1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018 Joyent, Inc.
  14  */
  15 
  16 /*
  17  * Remote backend management
  18  *
  19  * For more information, see the big theory statement in
  20  * lib/varpd/svp/common/libvarpd_svp.c.
  21  */
  22 
  23 #include <umem.h>
  24 #include <strings.h>
  25 #include <string.h>
  26 #include <stddef.h>
  27 #include <thread.h>
  28 #include <synch.h>
  29 #include <assert.h>
  30 #include <sys/socket.h>
  31 #include <netdb.h>
  32 #include <errno.h>
  33 #include <libidspace.h>
  34 
  35 #include <libvarpd_provider.h>
  36 #include <libvarpd_svp.h>
  37 
  38 typedef struct svp_shoot_vl3 {
  39         svp_query_t             ssv_query;
  40         struct sockaddr_in6     ssv_sock;
  41         svp_log_vl3_t           *ssv_vl3;
  42         svp_sdlog_t             *ssv_log;
  43 } svp_shoot_vl3_t;
  44 
  45 static mutex_t svp_remote_lock = ERRORCHECKMUTEX;
  46 static avl_tree_t svp_remote_tree;
  47 static svp_timer_t svp_dns_timer;
  48 static id_space_t *svp_idspace;
  49 static int svp_dns_timer_rate = 30;     /* seconds */
  50 
  51 id_t
  52 svp_id_alloc(void)
  53 {
  54         return (id_alloc(svp_idspace));
  55 }
  56 
  57 static void
  58 svp_remote_mkfmamsg(svp_remote_t *srp, svp_degrade_state_t state, char *buf,
  59     size_t buflen)
  60 {
  61         switch (state) {
  62         case SVP_RD_DNS_FAIL:
  63                 (void) snprintf(buf, buflen, "failed to resolve or find "
  64                     "entries for hostname %s", srp->sr_hostname);
  65                 break;
  66         case SVP_RD_REMOTE_FAIL:
  67                 (void) snprintf(buf, buflen, "cannot reach any remote peers");
  68                 break;
  69         default:
  70                 (void) snprintf(buf, buflen, "unkonwn error state: %d", state);
  71         }
  72 }
  73 
  74 static int
  75 svp_remote_comparator(const void *l, const void *r)
  76 {
  77         int ret;
  78         const svp_remote_t *lr = l, *rr = r;
  79 
  80         ret = strcmp(lr->sr_hostname, rr->sr_hostname);
  81         if (ret > 0)
  82                 return (1);
  83         else if (ret < 0)
  84                 return (-1);
  85 
  86         if (lr->sr_rport > rr->sr_rport)
  87                 return (1);
  88         else if (lr->sr_rport < rr->sr_rport)
  89                 return (-1);
  90 
  91         return (memcmp(&lr->sr_uip, &rr->sr_uip, sizeof (struct in6_addr)));
  92 }
  93 
  94 void
  95 svp_query_release(svp_query_t *sqp)
  96 {
  97         id_free(svp_idspace, sqp->sq_header.svp_id);
  98 }
  99 
 100 static void
 101 svp_remote_destroy(svp_remote_t *srp)
 102 {
 103         size_t len;
 104 
 105         /*
 106          * Clean up any unrelated DNS information. At this point we know that
 107          * we're not in the remote tree. That means, that svp_remote_dns_timer
 108          * cannot queue us. However, if any of our DNS related state flags are
 109          * set, we have to hang out.
 110          */
 111         mutex_enter(&srp->sr_lock);
 112         while (srp->sr_state &
 113             (SVP_RS_LOOKUP_SCHEDULED | SVP_RS_LOOKUP_INPROGRESS)) {
 114                 (void) cond_wait(&srp->sr_cond, &srp->sr_lock);
 115         }
 116         mutex_exit(&srp->sr_lock);
 117         svp_shootdown_fini(srp);
 118 
 119         if (cond_destroy(&srp->sr_cond) != 0)
 120                 libvarpd_panic("failed to destroy cond sr_cond");
 121 
 122         if (mutex_destroy(&srp->sr_lock) != 0)
 123                 libvarpd_panic("failed to destroy mutex sr_lock");
 124 
 125         if (srp->sr_addrinfo != NULL)
 126                 freeaddrinfo(srp->sr_addrinfo);
 127         len = strlen(srp->sr_hostname) + 1;
 128         umem_free(srp->sr_hostname, len);
 129         umem_free(srp, sizeof (svp_remote_t));
 130 }
 131 
 132 static int
 133 svp_remote_create(const char *host, uint16_t port, struct in6_addr *uip,
 134     svp_remote_t **outp)
 135 {
 136         size_t hlen;
 137         svp_remote_t *remote;
 138 
 139         assert(MUTEX_HELD(&svp_remote_lock));
 140 
 141         remote = umem_zalloc(sizeof (svp_remote_t), UMEM_DEFAULT);
 142         if (remote == NULL) {
 143                 mutex_exit(&svp_remote_lock);
 144                 return (ENOMEM);
 145         }
 146 
 147         if (svp_shootdown_init(remote) != 0) {
 148                 umem_free(remote, sizeof (svp_remote_t));
 149                 mutex_exit(&svp_remote_lock);
 150                 return (ENOMEM);
 151         }
 152 
 153         hlen = strlen(host) + 1;
 154         remote->sr_hostname = umem_alloc(hlen, UMEM_DEFAULT);
 155         if (remote->sr_hostname == NULL) {
 156                 svp_shootdown_fini(remote);
 157                 umem_free(remote, sizeof (svp_remote_t));
 158                 mutex_exit(&svp_remote_lock);
 159                 return (ENOMEM);
 160         }
 161         remote->sr_rport = port;
 162         if (mutex_init(&remote->sr_lock,
 163             USYNC_THREAD | LOCK_ERRORCHECK, NULL) != 0)
 164                 libvarpd_panic("failed to create mutex sr_lock");
 165         if (cond_init(&remote->sr_cond, USYNC_PROCESS, NULL) != 0)
 166                 libvarpd_panic("failed to create cond sr_cond");
 167         list_create(&remote->sr_conns, sizeof (svp_conn_t),
 168             offsetof(svp_conn_t, sc_rlist));
 169         avl_create(&remote->sr_tree, svp_comparator, sizeof (svp_t),
 170             offsetof(svp_t, svp_rlink));
 171         (void) strlcpy(remote->sr_hostname, host, hlen);
 172         remote->sr_count = 1;
 173         remote->sr_uip = *uip;
 174 
 175         svp_shootdown_start(remote);
 176 
 177         *outp = remote;
 178         return (0);
 179 }
 180 
 181 int
 182 svp_remote_find(char *host, uint16_t port, struct in6_addr *uip,
 183     svp_remote_t **outp)
 184 {
 185         int ret;
 186         svp_remote_t lookup, *remote;
 187 
 188         lookup.sr_hostname = host;
 189         lookup.sr_rport = port;
 190         lookup.sr_uip = *uip;
 191         mutex_enter(&svp_remote_lock);
 192         remote = avl_find(&svp_remote_tree, &lookup, NULL);
 193         if (remote != NULL) {
 194                 assert(remote->sr_count > 0);
 195                 remote->sr_count++;
 196                 *outp = remote;
 197                 mutex_exit(&svp_remote_lock);
 198                 return (0);
 199         }
 200 
 201         if ((ret = svp_remote_create(host, port, uip, outp)) != 0) {
 202                 mutex_exit(&svp_remote_lock);
 203                 return (ret);
 204         }
 205 
 206         avl_add(&svp_remote_tree, *outp);
 207         mutex_exit(&svp_remote_lock);
 208 
 209         /* Make sure DNS is up to date */
 210         svp_host_queue(*outp);
 211 
 212         return (0);
 213 }
 214 
 215 void
 216 svp_remote_release(svp_remote_t *srp)
 217 {
 218         mutex_enter(&svp_remote_lock);
 219         mutex_enter(&srp->sr_lock);
 220         srp->sr_count--;
 221         if (srp->sr_count != 0) {
 222                 mutex_exit(&srp->sr_lock);
 223                 mutex_exit(&svp_remote_lock);
 224                 return;
 225         }
 226         mutex_exit(&srp->sr_lock);
 227 
 228         avl_remove(&svp_remote_tree, srp);
 229         mutex_exit(&svp_remote_lock);
 230         svp_remote_destroy(srp);
 231 }
 232 
 233 int
 234 svp_remote_attach(svp_remote_t *srp, svp_t *svp)
 235 {
 236         svp_t check;
 237         avl_index_t where;
 238 
 239         mutex_enter(&srp->sr_lock);
 240         if (svp->svp_remote != NULL)
 241                 libvarpd_panic("failed to create mutex sr_lock");
 242 
 243         /*
 244          * We require everything except shootdowns
 245          */
 246         if (svp->svp_cb.scb_vl2_lookup == NULL)
 247                 libvarpd_panic("missing callback scb_vl2_lookup");
 248         if (svp->svp_cb.scb_vl3_lookup == NULL)
 249                 libvarpd_panic("missing callback scb_vl3_lookup");
 250         if (svp->svp_cb.scb_vl2_invalidate == NULL)
 251                 libvarpd_panic("missing callback scb_vl2_invalidate");
 252         if (svp->svp_cb.scb_vl3_inject == NULL)
 253                 libvarpd_panic("missing callback scb_vl3_inject");
 254         if (svp->svp_cb.scb_route_lookup == NULL)
 255                 libvarpd_panic("missing callback scb_route_lookup");
 256 
 257         check.svp_vid = svp->svp_vid;
 258         if (avl_find(&srp->sr_tree, &check, &where) != NULL)
 259                 libvarpd_panic("found duplicate entry with vid %ld",
 260                     svp->svp_vid);
 261         avl_insert(&srp->sr_tree, svp, where);
 262         svp->svp_remote = srp;
 263         mutex_exit(&srp->sr_lock);
 264 
 265         return (0);
 266 }
 267 
 268 void
 269 svp_remote_detach(svp_t *svp)
 270 {
 271         svp_t *lookup;
 272         svp_remote_t *srp = svp->svp_remote;
 273 
 274         if (srp == NULL)
 275                 libvarpd_panic("trying to detach remote when none exists");
 276 
 277         mutex_enter(&srp->sr_lock);
 278         lookup = avl_find(&srp->sr_tree, svp, NULL);
 279         if (lookup == NULL || lookup != svp)
 280                 libvarpd_panic("inconsitent remote avl tree...");
 281         avl_remove(&srp->sr_tree, svp);
 282         svp->svp_remote = NULL;
 283         mutex_exit(&srp->sr_lock);
 284         svp_remote_release(srp);
 285 }
 286 
 287 /*
 288  * See if the request can be sent over the connection's supported version.
 289  * Scribble the version in the request itself.  NOTE that we do not check the
 290  * version that already exists in sqp->sq_header.svp_ver, as we may be called
 291  * from svp_remote_reassign() (and change versions when arriving at a new
 292  * connection).
 293  */
 294 static boolean_t
 295 svp_outbound_version_check(int version, svp_query_t *sqp)
 296 {
 297         uint16_t op = htons(sqp->sq_header.svp_op);
 298 
 299         /*
 300          * As of v1 -> v2, we really only need to restrict SVP_R_ROUTE_REQ
 301          * as v2-only.  Reflect that here.
 302          *
 303          * NOTE that if any message semantics change between versions,
 304          * (e.g. "in v3 SVP_R_VL2_REQ takes on additional work"), we'll
 305          * need to more-deeply inspect the query.  It's possible that the
 306          * svp_op space is big enough to just continue op-only inspections.
 307          */
 308 
 309         assert(version > 0 && version <= SVP_CURRENT_VERSION);
 310 
 311         if (op != SVP_R_ROUTE_REQ || version >= SVP_VERSION_TWO) {
 312                 sqp->sq_header.svp_ver = htons(version);
 313                 return (B_TRUE);
 314         }
 315 
 316         return (B_FALSE);
 317 }
 318 
 319 /*
 320  * Walk the list of connections and find the first one that's available AND
 321  * version-appropriate for the message, then move the matched connection to
 322  * the back of the list so it's less likely to be used again.
 323  */
 324 static boolean_t
 325 svp_remote_conn_queue(svp_remote_t *srp, svp_query_t *sqp)
 326 {
 327         svp_conn_t *scp;
 328 
 329         assert(MUTEX_HELD(&srp->sr_lock));
 330         for (scp = list_head(&srp->sr_conns); scp != NULL;
 331             scp = list_next(&srp->sr_conns, scp)) {
 332                 mutex_enter(&scp->sc_lock);
 333                 if (scp->sc_cstate != SVP_CS_ACTIVE ||
 334                     !svp_outbound_version_check(scp->sc_version, sqp)) {
 335                         mutex_exit(&scp->sc_lock);
 336                         continue;
 337                 }
 338                 svp_conn_queue(scp, sqp);
 339                 mutex_exit(&scp->sc_lock);
 340                 list_remove(&srp->sr_conns, scp);
 341                 list_insert_tail(&srp->sr_conns, scp);
 342                 return (B_TRUE);
 343         }
 344 
 345         return (B_FALSE);
 346 }
 347 
 348 static void
 349 svp_remote_vl2_lookup_cb(svp_query_t *sqp, void *arg)
 350 {
 351         svp_t *svp = sqp->sq_svp;
 352         svp_vl2_ack_t *vl2a = (svp_vl2_ack_t *)sqp->sq_wdata;
 353 
 354         if (sqp->sq_status == SVP_S_OK)
 355                 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status,
 356                     (struct in6_addr *)vl2a->sl2a_addr, ntohs(vl2a->sl2a_port),
 357                     arg);
 358         else
 359                 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status, NULL, 0, arg);
 360 }
 361 
 362 void
 363 svp_remote_vl2_lookup(svp_t *svp, svp_query_t *sqp, const uint8_t *mac,
 364     void *arg)
 365 {
 366         svp_remote_t *srp;
 367         svp_vl2_req_t *vl2r = &sqp->sq_rdun.sqd_vl2r;
 368 
 369         srp = svp->svp_remote;
 370         sqp->sq_func = svp_remote_vl2_lookup_cb;
 371         sqp->sq_arg = arg;
 372         sqp->sq_svp = svp;
 373         sqp->sq_state = SVP_QUERY_INIT;
 374         sqp->sq_header.svp_op = htons(SVP_R_VL2_REQ);
 375         sqp->sq_header.svp_size = htonl(sizeof (svp_vl2_req_t));
 376         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 377         if (sqp->sq_header.svp_id == (id_t)-1)
 378                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 379                     errno);
 380         sqp->sq_header.svp_crc32 = 0;
 381         sqp->sq_rdata = vl2r;
 382         sqp->sq_rsize = sizeof (svp_vl2_req_t);
 383         sqp->sq_wdata = NULL;
 384         sqp->sq_wsize = 0;
 385 
 386         bcopy(mac, vl2r->sl2r_mac, ETHERADDRL);
 387         vl2r->sl2r_vnetid = ntohl(svp->svp_vid);
 388 
 389         mutex_enter(&srp->sr_lock);
 390         if (svp_remote_conn_queue(srp, sqp) == B_FALSE)
 391                 svp->svp_cb.scb_vl2_lookup(svp, SVP_S_FATAL, NULL, NULL, arg);
 392         mutex_exit(&srp->sr_lock);
 393 }
 394 
 395 static void
 396 svp_remote_route_lookup_cb(svp_query_t *sqp, void *arg)
 397 {
 398         svp_t *svp = sqp->sq_svp;
 399         svp_route_ack_t *sra = (svp_route_ack_t *)sqp->sq_wdata;
 400 
 401         /*
 402          * Do the ntoh*()-ing here.
 403          */
 404         if (sqp->sq_status == SVP_S_OK) {
 405                 svp->svp_cb.scb_route_lookup(svp, ntohl(sqp->sq_status),
 406                     ntohl(sra->sra_dcid), ntohl(sra->sra_vnetid),
 407                     ntohs(sra->sra_vlan), sra->sra_srcmac, sra->sra_dstmac,
 408                     ntohs(sra->sra_port), sra->sra_ip, sra->sra_src_pfx,
 409                     sra->sra_dst_pfx, arg);
 410         } else {
 411                 svp->svp_cb.scb_route_lookup(svp, sqp->sq_status,
 412                     0, 0, 0, NULL, NULL, 0, NULL, 0, 0, arg);
 413         }
 414 }
 415 
 416 void
 417 svp_remote_route_lookup(svp_t *svp, svp_query_t *sqp,
 418     const struct in6_addr *src, const struct in6_addr *dst, uint32_t vnetid,
 419     uint16_t vlan, void *arg)
 420 {
 421         svp_remote_t *srp;
 422         svp_route_req_t *srr = &sqp->sq_rdun.sqd_rr;
 423 
 424         srp = svp->svp_remote;
 425         sqp->sq_func = svp_remote_route_lookup_cb;
 426         sqp->sq_arg = arg;
 427         sqp->sq_svp = svp;
 428         sqp->sq_state = SVP_QUERY_INIT;
 429         sqp->sq_header.svp_op = htons(SVP_R_ROUTE_REQ);
 430         sqp->sq_header.svp_size = htonl(sizeof (svp_route_req_t));
 431         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 432         if (sqp->sq_header.svp_id == (id_t)-1)
 433                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 434                     errno);
 435         sqp->sq_header.svp_crc32 = 0;
 436         sqp->sq_rdata = srr;
 437         sqp->sq_rsize = sizeof (svp_route_req_t);
 438         sqp->sq_wdata = NULL;
 439         sqp->sq_wsize = 0;
 440 
 441         bcopy(src, srr->srr_srcip, sizeof (struct in6_addr));
 442         bcopy(dst, srr->srr_dstip, sizeof (struct in6_addr));
 443         /* Caller should've checked both are the same type... */
 444         srr->srr_vnetid = htonl(vnetid);
 445         srr->srr_vlan = htons(vlan);
 446         srr->srr_pad = 0;
 447 
 448         mutex_enter(&srp->sr_lock);
 449         if (!svp_remote_conn_queue(srp, sqp)) {
 450                 sqp->sq_status = SVP_S_FATAL;
 451                 sqp->sq_func(sqp, arg);
 452         }
 453         mutex_exit(&srp->sr_lock);
 454 }
 455 
 456 static void
 457 svp_remote_vl3_lookup_cb(svp_query_t *sqp, void *arg)
 458 {
 459         svp_t *svp = sqp->sq_svp;
 460         svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
 461 
 462         if (sqp->sq_status == SVP_S_OK)
 463                 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, vl3a->sl3a_mac,
 464                     (struct in6_addr *)vl3a->sl3a_uip, ntohs(vl3a->sl3a_uport),
 465                     arg);
 466         else
 467                 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, NULL, NULL, 0,
 468                     arg);
 469 }
 470 
 471 static void
 472 svp_remote_vl3_common(svp_remote_t *srp, svp_query_t *sqp,
 473     const struct sockaddr *addr,  svp_query_f func, void *arg, uint32_t vid)
 474 {
 475         svp_vl3_req_t *vl3r = &sqp->sq_rdun.sdq_vl3r;
 476 
 477         if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
 478                 libvarpd_panic("unexpected sa_family for the vl3 lookup");
 479 
 480         sqp->sq_func = func;
 481         sqp->sq_arg = arg;
 482         sqp->sq_state = SVP_QUERY_INIT;
 483         sqp->sq_header.svp_op = htons(SVP_R_VL3_REQ);
 484         sqp->sq_header.svp_size = htonl(sizeof (svp_vl3_req_t));
 485         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 486         if (sqp->sq_header.svp_id == (id_t)-1)
 487                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 488                     errno);
 489         sqp->sq_header.svp_crc32 = 0;
 490         sqp->sq_rdata = vl3r;
 491         sqp->sq_rsize = sizeof (svp_vl3_req_t);
 492         sqp->sq_wdata = NULL;
 493         sqp->sq_wsize = 0;
 494 
 495         if (addr->sa_family == AF_INET6) {
 496                 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)addr;
 497                 vl3r->sl3r_type = htonl(SVP_VL3_IPV6);
 498                 bcopy(&s6->sin6_addr, vl3r->sl3r_ip,
 499                     sizeof (struct in6_addr));
 500         } else {
 501                 struct sockaddr_in *s4 = (struct sockaddr_in *)addr;
 502                 struct in6_addr v6;
 503 
 504                 vl3r->sl3r_type = htonl(SVP_VL3_IP);
 505                 IN6_INADDR_TO_V4MAPPED(&s4->sin_addr, &v6);
 506                 bcopy(&v6, vl3r->sl3r_ip, sizeof (struct in6_addr));
 507         }
 508         vl3r->sl3r_vnetid = htonl(vid);
 509 
 510         mutex_enter(&srp->sr_lock);
 511         if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
 512                 sqp->sq_status = SVP_S_FATAL;
 513                 sqp->sq_func(sqp, arg);
 514         }
 515         mutex_exit(&srp->sr_lock);
 516 }
 517 
 518 /*
 519  * This is a request to do a VL3 look-up that originated internally as opposed
 520  * to coming from varpd. As such we need a slightly different query callback
 521  * function upon completion and don't go through the normal path with the svp_t.
 522  */
 523 void
 524 svp_remote_vl3_logreq(svp_remote_t *srp, svp_query_t *sqp, uint32_t vid,
 525     const struct sockaddr *addr, svp_query_f func, void *arg)
 526 {
 527         svp_remote_vl3_common(srp, sqp, addr, func, arg, vid);
 528 }
 529 
 530 void
 531 svp_remote_vl3_lookup(svp_t *svp, svp_query_t *sqp,
 532     const struct sockaddr *addr, void *arg)
 533 {
 534         svp_remote_t *srp = svp->svp_remote;
 535 
 536         sqp->sq_svp = svp;
 537         svp_remote_vl3_common(srp, sqp, addr, svp_remote_vl3_lookup_cb,
 538             arg, svp->svp_vid);
 539 }
 540 
 541 static void
 542 svp_remote_log_request_cb(svp_query_t *sqp, void *arg)
 543 {
 544         svp_remote_t *srp = sqp->sq_arg;
 545         uint16_t version;
 546 
 547         /*
 548          * Version in request is set in this sqp's read-data/sq_header by
 549          * now.
 550          */
 551         assert(sqp->sq_header.svp_op == htons(SVP_R_LOG_REQ));
 552         assert(sqp->sq_header.svp_ver != 0);
 553         version = htons(sqp->sq_header.svp_ver);
 554 
 555         assert(sqp->sq_wdata != NULL);
 556         if (sqp->sq_status == SVP_S_OK)
 557                 svp_shootdown_logr_cb(srp, sqp->sq_status, sqp->sq_wdata,
 558                     sqp->sq_size, version);
 559         else
 560                 svp_shootdown_logr_cb(srp, sqp->sq_status, NULL, 0, 0);
 561 }
 562 
 563 void
 564 svp_remote_log_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
 565     size_t buflen)
 566 {
 567         svp_log_req_t *logr = &sqp->sq_rdun.sdq_logr;
 568         boolean_t queued;
 569 
 570         sqp->sq_func = svp_remote_log_request_cb;
 571         sqp->sq_state = SVP_QUERY_INIT;
 572         sqp->sq_arg = srp;
 573         sqp->sq_header.svp_op = htons(SVP_R_LOG_REQ);
 574         sqp->sq_header.svp_size = htonl(sizeof (svp_log_req_t));
 575         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 576         if (sqp->sq_header.svp_id == (id_t)-1)
 577                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 578                     errno);
 579         sqp->sq_header.svp_crc32 = 0;
 580         sqp->sq_rdata = logr;
 581         sqp->sq_rsize = sizeof (svp_log_req_t);
 582         sqp->sq_wdata = buf;
 583         sqp->sq_wsize = buflen;
 584 
 585         logr->svlr_count = htonl(buflen);
 586         bcopy(&srp->sr_uip, logr->svlr_ip, sizeof (struct in6_addr));
 587 
 588         /*
 589          * If this fails, there isn't much that we can't do. Give the callback
 590          * with a fatal status.
 591          */
 592         mutex_enter(&srp->sr_lock);
 593         queued = svp_remote_conn_queue(srp, sqp);
 594         mutex_exit(&srp->sr_lock);
 595 
 596         if (queued == B_FALSE)
 597                 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0, 0);
 598 }
 599 
 600 static void
 601 svp_remote_lrm_request_cb(svp_query_t *sqp, void *arg)
 602 {
 603         svp_remote_t *srp = arg;
 604 
 605         svp_shootdown_lrm_cb(srp, sqp->sq_status);
 606 }
 607 
 608 void
 609 svp_remote_lrm_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
 610     size_t buflen)
 611 {
 612         boolean_t queued;
 613         svp_lrm_req_t *svrr = buf;
 614 
 615         sqp->sq_func = svp_remote_lrm_request_cb;
 616         sqp->sq_state = SVP_QUERY_INIT;
 617         sqp->sq_arg = srp;
 618         sqp->sq_header.svp_op = htons(SVP_R_LOG_RM);
 619         sqp->sq_header.svp_size = htonl(buflen);
 620         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 621         if (sqp->sq_header.svp_id == (id_t)-1)
 622                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 623                     errno);
 624         sqp->sq_header.svp_crc32 = 0;
 625         sqp->sq_rdata = buf;
 626         sqp->sq_rsize = buflen;
 627         sqp->sq_wdata = NULL;
 628         sqp->sq_wsize = 0;
 629 
 630         /*
 631          * We need to fix up the count to be in proper network order.
 632          */
 633         svrr->svrr_count = htonl(svrr->svrr_count);
 634 
 635         /*
 636          * If this fails, there isn't much that we can't do. Give the callback
 637          * with a fatal status.
 638          */
 639         mutex_enter(&srp->sr_lock);
 640         queued = svp_remote_conn_queue(srp, sqp);
 641         mutex_exit(&srp->sr_lock);
 642 
 643         if (queued == B_FALSE)
 644                 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0, 0);
 645 }
 646 
 647 /* ARGSUSED */
 648 void
 649 svp_remote_dns_timer(void *unused)
 650 {
 651         svp_remote_t *s;
 652         mutex_enter(&svp_remote_lock);
 653         for (s = avl_first(&svp_remote_tree); s != NULL;
 654             s = AVL_NEXT(&svp_remote_tree, s)) {
 655                 svp_host_queue(s);
 656         }
 657         mutex_exit(&svp_remote_lock);
 658 }
 659 
 660 void
 661 svp_remote_resolved(svp_remote_t *srp, struct addrinfo *newaddrs)
 662 {
 663         struct addrinfo *a;
 664         svp_conn_t *scp;
 665         int ngen;
 666 
 667         mutex_enter(&srp->sr_lock);
 668         srp->sr_gen++;
 669         ngen = srp->sr_gen;
 670         mutex_exit(&srp->sr_lock);
 671 
 672         for (a = newaddrs; a != NULL; a = a->ai_next) {
 673                 struct in6_addr in6;
 674                 struct in6_addr *addrp;
 675 
 676                 if (a->ai_family != AF_INET && a->ai_family != AF_INET6)
 677                         continue;
 678 
 679                 if (a->ai_family == AF_INET) {
 680                         struct sockaddr_in *v4;
 681                         v4 = (struct sockaddr_in *)a->ai_addr;
 682                         addrp = &in6;
 683                         IN6_INADDR_TO_V4MAPPED(&v4->sin_addr, addrp);
 684                 } else {
 685                         struct sockaddr_in6 *v6;
 686                         v6 = (struct sockaddr_in6 *)a->ai_addr;
 687                         addrp = &v6->sin6_addr;
 688                 }
 689 
 690                 mutex_enter(&srp->sr_lock);
 691                 for (scp = list_head(&srp->sr_conns); scp != NULL;
 692                     scp = list_next(&srp->sr_conns, scp)) {
 693                         mutex_enter(&scp->sc_lock);
 694                         if (bcmp(addrp, &scp->sc_addr,
 695                             sizeof (struct in6_addr)) == 0) {
 696                                 scp->sc_gen = ngen;
 697                                 mutex_exit(&scp->sc_lock);
 698                                 break;
 699                         }
 700                         mutex_exit(&scp->sc_lock);
 701                 }
 702 
 703                 /*
 704                  * We need to be careful in the assumptions that we make here,
 705                  * as there's a good chance that svp_conn_create will
 706                  * drop the svp_remote_t`sr_lock to kick off its effective event
 707                  * loop.
 708                  */
 709                 if (scp == NULL)
 710                         (void) svp_conn_create(srp, addrp);
 711                 mutex_exit(&srp->sr_lock);
 712         }
 713 
 714         /*
 715          * Now it's time to clean things up. We do not actively clean up the
 716          * current connections that we have, instead allowing them to stay
 717          * around assuming that they're still useful. Instead, we go through and
 718          * purge the degraded list for anything that's from an older generation.
 719          */
 720         mutex_enter(&srp->sr_lock);
 721         for (scp = list_head(&srp->sr_conns); scp != NULL;
 722             scp = list_next(&srp->sr_conns, scp)) {
 723                 boolean_t fall = B_FALSE;
 724                 mutex_enter(&scp->sc_lock);
 725                 if (scp->sc_gen < srp->sr_gen)
 726                         fall = B_TRUE;
 727                 mutex_exit(&scp->sc_lock);
 728                 if (fall == B_TRUE)
 729                         svp_conn_fallout(scp);
 730         }
 731         mutex_exit(&srp->sr_lock);
 732 }
 733 
 734 /*
 735  * This connection is in the process of being reset, we need to reassign all of
 736  * its queries to other places or mark them as fatal. Note that the first
 737  * connection was the one in flight when this failed. We always mark it as
 738  * failed to avoid trying to reset its state.
 739  */
 740 void
 741 svp_remote_reassign(svp_remote_t *srp, svp_conn_t *scp)
 742 {
 743         boolean_t first = B_TRUE;
 744         assert(MUTEX_HELD(&srp->sr_lock));
 745         assert(MUTEX_HELD(&srp->sr_lock));
 746         svp_query_t *sqp;
 747 
 748         /*
 749          * As we try to reassigning all of its queries, remove it from the list.
 750          */
 751         list_remove(&srp->sr_conns, scp);
 752 
 753         while ((sqp = list_remove_head(&scp->sc_queries)) != NULL) {
 754 
 755                 if (first == B_TRUE) {
 756                         sqp->sq_status = SVP_S_FATAL;
 757                         sqp->sq_func(sqp, sqp->sq_arg);
 758                         continue;
 759                 }
 760 
 761                 sqp->sq_acttime = -1;
 762 
 763                 /*
 764                  * We may want to maintain a queue of these for some time rather
 765                  * than just failing them all.
 766                  */
 767                 if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
 768                         sqp->sq_status = SVP_S_FATAL;
 769                         sqp->sq_func(sqp, sqp->sq_arg);
 770                 }
 771         }
 772 
 773         /*
 774          * Now that we're done, go ahead and re-insert.
 775          */
 776         list_insert_tail(&srp->sr_conns, scp);
 777 }
 778 
 779 void
 780 svp_remote_degrade(svp_remote_t *srp, svp_degrade_state_t flag)
 781 {
 782         int sf, nf;
 783         char buf[256];
 784 
 785         assert(MUTEX_HELD(&srp->sr_lock));
 786 
 787         if (flag == SVP_RD_ALL || flag == 0)
 788                 libvarpd_panic("invalid flag passed to degrade");
 789 
 790         if ((flag & srp->sr_degrade) != 0) {
 791                 return;
 792         }
 793 
 794         sf = ffs(srp->sr_degrade);
 795         nf = ffs(flag);
 796         srp->sr_degrade |= flag;
 797         if (sf == 0 || sf > nf) {
 798                 svp_t *svp;
 799                 svp_remote_mkfmamsg(srp, flag, buf, sizeof (buf));
 800 
 801                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 802                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 803                         libvarpd_fma_degrade(svp->svp_hdl, buf);
 804                 }
 805         }
 806 }
 807 
 808 void
 809 svp_remote_restore(svp_remote_t *srp, svp_degrade_state_t flag)
 810 {
 811         int sf, nf;
 812 
 813         assert(MUTEX_HELD(&srp->sr_lock));
 814         sf = ffs(srp->sr_degrade);
 815         if ((srp->sr_degrade & flag) != flag)
 816                 return;
 817         srp->sr_degrade &= ~flag;
 818         nf = ffs(srp->sr_degrade);
 819 
 820         /*
 821          * If we're now empty, restore the device. If we still are degraded, but
 822          * we now have a higher base than we used to, change the message.
 823          */
 824         if (srp->sr_degrade == 0) {
 825                 svp_t *svp;
 826                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 827                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 828                         libvarpd_fma_restore(svp->svp_hdl);
 829                 }
 830         } else if (nf != sf) {
 831                 svp_t *svp;
 832                 char buf[256];
 833 
 834                 svp_remote_mkfmamsg(srp, 1U << (nf - 1), buf, sizeof (buf));
 835                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 836                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 837                         libvarpd_fma_degrade(svp->svp_hdl, buf);
 838                 }
 839         }
 840 }
 841 
 842 void
 843 svp_remote_shootdown_vl3_cb(svp_query_t *sqp, void *arg)
 844 {
 845         svp_shoot_vl3_t *squery = arg;
 846         svp_log_vl3_t *svl3 = squery->ssv_vl3;
 847         svp_sdlog_t *sdl = squery->ssv_log;
 848 
 849         if (sqp->sq_status == SVP_S_OK) {
 850                 svp_t *svp, lookup;
 851 
 852                 svp_remote_t *srp = sdl->sdl_remote;
 853                 svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
 854 
 855                 lookup.svp_vid = ntohl(svl3->svl3_vnetid);
 856                 mutex_enter(&srp->sr_lock);
 857                 if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
 858                         svp->svp_cb.scb_vl3_inject(svp, ntohs(svl3->svl3_vlan),
 859                             (struct in6_addr *)svl3->svl3_ip, vl3a->sl3a_mac,
 860                             NULL);
 861                 }
 862                 mutex_exit(&srp->sr_lock);
 863 
 864         }
 865 
 866         svp_shootdown_vl3_cb(sqp->sq_status, svl3, sdl);
 867 
 868         umem_free(squery, sizeof (svp_shoot_vl3_t));
 869 }
 870 
 871 void
 872 svp_remote_shootdown_vl3(svp_remote_t *srp, svp_log_vl3_t *svl3,
 873     svp_sdlog_t *sdl)
 874 {
 875         svp_shoot_vl3_t *squery;
 876 
 877         squery = umem_zalloc(sizeof (svp_shoot_vl3_t), UMEM_DEFAULT);
 878         if (squery == NULL) {
 879                 svp_shootdown_vl3_cb(SVP_S_FATAL, svl3, sdl);
 880                 return;
 881         }
 882 
 883         squery->ssv_vl3 = svl3;
 884         squery->ssv_log = sdl;
 885         squery->ssv_sock.sin6_family = AF_INET6;
 886         bcopy(svl3->svl3_ip, &squery->ssv_sock.sin6_addr,
 887             sizeof (svl3->svl3_ip));
 888         svp_remote_vl3_logreq(srp, &squery->ssv_query, ntohl(svl3->svl3_vnetid),
 889             (struct sockaddr *)&squery->ssv_sock, svp_remote_shootdown_vl3_cb,
 890             squery);
 891 }
 892 
 893 void
 894 svp_remote_shootdown_vl2(svp_remote_t *srp, svp_log_vl2_t *svl2)
 895 {
 896         svp_t *svp, lookup;
 897 
 898         lookup.svp_vid = ntohl(svl2->svl2_vnetid);
 899         mutex_enter(&srp->sr_lock);
 900         if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
 901                 svp->svp_cb.scb_vl2_invalidate(svp, svl2->svl2_mac);
 902         }
 903         mutex_exit(&srp->sr_lock);
 904 }
 905 
 906 void
 907 svp_remote_shootdown_route(svp_remote_t *srp, svp_log_route_t *svlr)
 908 {
 909         svp_t *svp, lookup;
 910 
 911         lookup.svp_vid = ntohl(svlr->svlr_src_vnetid);
 912         mutex_enter(&srp->sr_lock);
 913         if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
 914                 svp->svp_cb.scb_route_shootdown(svp, svlr->svlr_srcip,
 915                     svlr->svlr_dstip, svlr->svlr_src_prefixlen,
 916                     svlr->svlr_dst_prefixlen, htons(svlr->svlr_src_vlan));
 917         }
 918         mutex_exit(&srp->sr_lock);
 919 }
 920 
 921 int
 922 svp_remote_init(void)
 923 {
 924         svp_idspace = id_space_create("svp_req_ids", 1, INT32_MAX);
 925         if (svp_idspace == NULL)
 926                 return (errno);
 927         avl_create(&svp_remote_tree, svp_remote_comparator,
 928             sizeof (svp_remote_t), offsetof(svp_remote_t, sr_gnode));
 929         svp_dns_timer.st_func = svp_remote_dns_timer;
 930         svp_dns_timer.st_arg = NULL;
 931         svp_dns_timer.st_oneshot = B_FALSE;
 932         svp_dns_timer.st_value = svp_dns_timer_rate;
 933         svp_timer_add(&svp_dns_timer);
 934         return (0);
 935 }
 936 
 937 void
 938 svp_remote_fini(void)
 939 {
 940         svp_timer_remove(&svp_dns_timer);
 941         avl_destroy(&svp_remote_tree);
 942         if (svp_idspace == NULL)
 943                 id_space_destroy(svp_idspace);
 944 }