1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018 Joyent, Inc.
  14  */
  15 
  16 /*
  17  * Remote backend management
  18  *
  19  * For more information, see the big theory statement in
  20  * lib/varpd/svp/common/libvarpd_svp.c.
  21  */
  22 
  23 #include <umem.h>
  24 #include <strings.h>
  25 #include <string.h>
  26 #include <stddef.h>
  27 #include <thread.h>
  28 #include <synch.h>
  29 #include <assert.h>
  30 #include <sys/socket.h>
  31 #include <netdb.h>
  32 #include <errno.h>
  33 #include <libidspace.h>
  34 
  35 #include <libvarpd_provider.h>
  36 #include <libvarpd_svp.h>
  37 
  38 typedef struct svp_shoot_vl3 {
  39         svp_query_t             ssv_query;
  40         struct sockaddr_in6     ssv_sock;
  41         svp_log_vl3_t           *ssv_vl3;
  42         svp_sdlog_t             *ssv_log;
  43 } svp_shoot_vl3_t;
  44 
  45 static mutex_t svp_remote_lock = ERRORCHECKMUTEX;
  46 static avl_tree_t svp_remote_tree;
  47 static svp_timer_t svp_dns_timer;
  48 static id_space_t *svp_idspace;
  49 static int svp_dns_timer_rate = 30;     /* seconds */
  50 
  51 id_t
  52 svp_id_alloc(void)
  53 {
  54         return (id_alloc(svp_idspace));
  55 }
  56 
  57 static void
  58 svp_remote_mkfmamsg(svp_remote_t *srp, svp_degrade_state_t state, char *buf,
  59     size_t buflen)
  60 {
  61         switch (state) {
  62         case SVP_RD_DNS_FAIL:
  63                 (void) snprintf(buf, buflen, "failed to resolve or find "
  64                     "entries for hostname %s", srp->sr_hostname);
  65                 break;
  66         case SVP_RD_REMOTE_FAIL:
  67                 (void) snprintf(buf, buflen, "cannot reach any remote peers");
  68                 break;
  69         default:
  70                 (void) snprintf(buf, buflen, "unkonwn error state: %d", state);
  71         }
  72 }
  73 
  74 static int
  75 svp_remote_comparator(const void *l, const void *r)
  76 {
  77         int ret;
  78         const svp_remote_t *lr = l, *rr = r;
  79 
  80         ret = strcmp(lr->sr_hostname, rr->sr_hostname);
  81         if (ret > 0)
  82                 return (1);
  83         else if (ret < 0)
  84                 return (-1);
  85 
  86         if (lr->sr_rport > rr->sr_rport)
  87                 return (1);
  88         else if (lr->sr_rport < rr->sr_rport)
  89                 return (-1);
  90 
  91         return (memcmp(&lr->sr_uip, &rr->sr_uip, sizeof (struct in6_addr)));
  92 }
  93 
  94 void
  95 svp_query_release(svp_query_t *sqp)
  96 {
  97         id_free(svp_idspace, sqp->sq_header.svp_id);
  98 }
  99 
 100 static void
 101 svp_remote_destroy(svp_remote_t *srp)
 102 {
 103         size_t len;
 104 
 105         /*
 106          * Clean up any unrelated DNS information. At this point we know that
 107          * we're not in the remote tree. That means, that svp_remote_dns_timer
 108          * cannot queue us. However, if any of our DNS related state flags are
 109          * set, we have to hang out.
 110          */
 111         mutex_enter(&srp->sr_lock);
 112         while (srp->sr_state &
 113             (SVP_RS_LOOKUP_SCHEDULED | SVP_RS_LOOKUP_INPROGRESS)) {
 114                 (void) cond_wait(&srp->sr_cond, &srp->sr_lock);
 115         }
 116         mutex_exit(&srp->sr_lock);
 117         svp_shootdown_fini(srp);
 118 
 119         if (cond_destroy(&srp->sr_cond) != 0)
 120                 libvarpd_panic("failed to destroy cond sr_cond");
 121 
 122         if (mutex_destroy(&srp->sr_lock) != 0)
 123                 libvarpd_panic("failed to destroy mutex sr_lock");
 124 
 125         if (srp->sr_addrinfo != NULL)
 126                 freeaddrinfo(srp->sr_addrinfo);
 127         len = strlen(srp->sr_hostname) + 1;
 128         umem_free(srp->sr_hostname, len);
 129         umem_free(srp, sizeof (svp_remote_t));
 130 }
 131 
 132 static int
 133 svp_remote_create(const char *host, uint16_t port, struct in6_addr *uip,
 134     svp_remote_t **outp)
 135 {
 136         size_t hlen;
 137         svp_remote_t *remote;
 138 
 139         assert(MUTEX_HELD(&svp_remote_lock));
 140 
 141         remote = umem_zalloc(sizeof (svp_remote_t), UMEM_DEFAULT);
 142         if (remote == NULL) {
 143                 mutex_exit(&svp_remote_lock);
 144                 return (ENOMEM);
 145         }
 146 
 147         if (svp_shootdown_init(remote) != 0) {
 148                 umem_free(remote, sizeof (svp_remote_t));
 149                 mutex_exit(&svp_remote_lock);
 150                 return (ENOMEM);
 151         }
 152 
 153         hlen = strlen(host) + 1;
 154         remote->sr_hostname = umem_alloc(hlen, UMEM_DEFAULT);
 155         if (remote->sr_hostname == NULL) {
 156                 svp_shootdown_fini(remote);
 157                 umem_free(remote, sizeof (svp_remote_t));
 158                 mutex_exit(&svp_remote_lock);
 159                 return (ENOMEM);
 160         }
 161         remote->sr_rport = port;
 162         if (mutex_init(&remote->sr_lock,
 163             USYNC_THREAD | LOCK_ERRORCHECK, NULL) != 0)
 164                 libvarpd_panic("failed to create mutex sr_lock");
 165         if (cond_init(&remote->sr_cond, USYNC_PROCESS, NULL) != 0)
 166                 libvarpd_panic("failed to create cond sr_cond");
 167         list_create(&remote->sr_conns, sizeof (svp_conn_t),
 168             offsetof(svp_conn_t, sc_rlist));
 169         avl_create(&remote->sr_tree, svp_comparator, sizeof (svp_t),
 170             offsetof(svp_t, svp_rlink));
 171         (void) strlcpy(remote->sr_hostname, host, hlen);
 172         remote->sr_count = 1;
 173         remote->sr_uip = *uip;
 174 
 175         svp_shootdown_start(remote);
 176 
 177         *outp = remote;
 178         return (0);
 179 }
 180 
 181 int
 182 svp_remote_find(char *host, uint16_t port, struct in6_addr *uip,
 183     svp_remote_t **outp)
 184 {
 185         int ret;
 186         svp_remote_t lookup, *remote;
 187 
 188         lookup.sr_hostname = host;
 189         lookup.sr_rport = port;
 190         lookup.sr_uip = *uip;
 191         mutex_enter(&svp_remote_lock);
 192         remote = avl_find(&svp_remote_tree, &lookup, NULL);
 193         if (remote != NULL) {
 194                 assert(remote->sr_count > 0);
 195                 remote->sr_count++;
 196                 *outp = remote;
 197                 mutex_exit(&svp_remote_lock);
 198                 return (0);
 199         }
 200 
 201         if ((ret = svp_remote_create(host, port, uip, outp)) != 0) {
 202                 mutex_exit(&svp_remote_lock);
 203                 return (ret);
 204         }
 205 
 206         avl_add(&svp_remote_tree, *outp);
 207         mutex_exit(&svp_remote_lock);
 208 
 209         /* Make sure DNS is up to date */
 210         svp_host_queue(*outp);
 211 
 212         return (0);
 213 }
 214 
 215 void
 216 svp_remote_release(svp_remote_t *srp)
 217 {
 218         mutex_enter(&svp_remote_lock);
 219         mutex_enter(&srp->sr_lock);
 220         srp->sr_count--;
 221         if (srp->sr_count != 0) {
 222                 mutex_exit(&srp->sr_lock);
 223                 mutex_exit(&svp_remote_lock);
 224                 return;
 225         }
 226         mutex_exit(&srp->sr_lock);
 227 
 228         avl_remove(&svp_remote_tree, srp);
 229         mutex_exit(&svp_remote_lock);
 230         svp_remote_destroy(srp);
 231 }
 232 
 233 int
 234 svp_remote_attach(svp_remote_t *srp, svp_t *svp)
 235 {
 236         svp_t check;
 237         avl_index_t where;
 238 
 239         mutex_enter(&srp->sr_lock);
 240         if (svp->svp_remote != NULL)
 241                 libvarpd_panic("failed to create mutex sr_lock");
 242 
 243         /*
 244          * We require everything except shootdowns
 245          */
 246         if (svp->svp_cb.scb_vl2_lookup == NULL)
 247                 libvarpd_panic("missing callback scb_vl2_lookup");
 248         if (svp->svp_cb.scb_vl3_lookup == NULL)
 249                 libvarpd_panic("missing callback scb_vl3_lookup");
 250         if (svp->svp_cb.scb_vl2_invalidate == NULL)
 251                 libvarpd_panic("missing callback scb_vl2_invalidate");
 252         if (svp->svp_cb.scb_vl3_inject == NULL)
 253                 libvarpd_panic("missing callback scb_vl3_inject");
 254         if (svp->svp_cb.scb_route_lookup == NULL)
 255                 libvarpd_panic("missing callback scb_route_lookup");
 256 
 257         check.svp_vid = svp->svp_vid;
 258         if (avl_find(&srp->sr_tree, &check, &where) != NULL)
 259                 libvarpd_panic("found duplicate entry with vid %ld",
 260                     svp->svp_vid);
 261         avl_insert(&srp->sr_tree, svp, where);
 262         svp->svp_remote = srp;
 263         mutex_exit(&srp->sr_lock);
 264 
 265         return (0);
 266 }
 267 
 268 void
 269 svp_remote_detach(svp_t *svp)
 270 {
 271         svp_t *lookup;
 272         svp_remote_t *srp = svp->svp_remote;
 273 
 274         if (srp == NULL)
 275                 libvarpd_panic("trying to detach remote when none exists");
 276 
 277         mutex_enter(&srp->sr_lock);
 278         lookup = avl_find(&srp->sr_tree, svp, NULL);
 279         if (lookup == NULL || lookup != svp)
 280                 libvarpd_panic("inconsitent remote avl tree...");
 281         avl_remove(&srp->sr_tree, svp);
 282         svp->svp_remote = NULL;
 283         mutex_exit(&srp->sr_lock);
 284         svp_remote_release(srp);
 285 }
 286 
 287 /*
 288  * See if the request can be sent over the connection's supported version.
 289  * Scribble the version in the request itself.  NOTE that we do not check the
 290  * version that already exists in sqp->sq_header.svp_ver, as we may be called
 291  * from svp_remote_reassign() (and change versions when arriving at a new
 292  * connection).
 293  */
 294 static boolean_t
 295 svp_outbound_version_check(int version, svp_query_t *sqp)
 296 {
 297         uint16_t op = htons(sqp->sq_header.svp_op);
 298 
 299         /*
 300          * As of v1 -> v2, we really only need to restrict SVP_R_ROUTE_REQ
 301          * as v2-only.  Reflect that here.
 302          *
 303          * NOTE that if any message semantics change between future versions,
 304          * (e.g. "in v3 SVP_R_VL2_REQ takes on additional work"), we'll
 305          * need to more-deeply inspect the query.  It's possible that the
 306          * svp_op space is big enough to just continue op-only inspections.
 307          */
 308 
 309         assert(version > 0 && version <= SVP_CURRENT_VERSION);
 310 
 311         if (op != SVP_R_ROUTE_REQ || version >= SVP_VERSION_TWO) {
 312                 sqp->sq_header.svp_ver = htons(version);
 313                 return (B_TRUE);
 314         }
 315         return (B_FALSE);
 316 }
 317 
 318 /*
 319  * Walk the list of connections and find the first one that's available AND
 320  * version-appropriate for the message, then move the matched connection to
 321  * the back of the list so it's less likely to be used again.
 322  */
 323 static boolean_t
 324 svp_remote_conn_queue(svp_remote_t *srp, svp_query_t *sqp)
 325 {
 326         svp_conn_t *scp;
 327 
 328         assert(MUTEX_HELD(&srp->sr_lock));
 329         for (scp = list_head(&srp->sr_conns); scp != NULL;
 330             scp = list_next(&srp->sr_conns, scp)) {
 331                 mutex_enter(&scp->sc_lock);
 332                 if (scp->sc_cstate != SVP_CS_ACTIVE ||
 333                     !svp_outbound_version_check(scp->sc_version, sqp)) {
 334                         mutex_exit(&scp->sc_lock);
 335                         continue;
 336                 }
 337                 svp_conn_queue(scp, sqp);
 338                 mutex_exit(&scp->sc_lock);
 339                 list_remove(&srp->sr_conns, scp);
 340                 list_insert_tail(&srp->sr_conns, scp);
 341                 return (B_TRUE);
 342         }
 343 
 344         return (B_FALSE);
 345 }
 346 
 347 static void
 348 svp_remote_vl2_lookup_cb(svp_query_t *sqp, void *arg)
 349 {
 350         svp_t *svp = sqp->sq_svp;
 351         svp_vl2_ack_t *vl2a = (svp_vl2_ack_t *)sqp->sq_wdata;
 352 
 353         if (sqp->sq_status == SVP_S_OK)
 354                 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status,
 355                     (struct in6_addr *)vl2a->sl2a_addr, ntohs(vl2a->sl2a_port),
 356                     arg);
 357         else
 358                 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status, NULL, 0, arg);
 359 }
 360 
 361 void
 362 svp_remote_vl2_lookup(svp_t *svp, svp_query_t *sqp, const uint8_t *mac,
 363     void *arg)
 364 {
 365         svp_remote_t *srp;
 366         svp_vl2_req_t *vl2r = &sqp->sq_rdun.sqd_vl2r;
 367 
 368         srp = svp->svp_remote;
 369         sqp->sq_func = svp_remote_vl2_lookup_cb;
 370         sqp->sq_arg = arg;
 371         sqp->sq_svp = svp;
 372         sqp->sq_state = SVP_QUERY_INIT;
 373         sqp->sq_header.svp_op = htons(SVP_R_VL2_REQ);
 374         sqp->sq_header.svp_size = htonl(sizeof (svp_vl2_req_t));
 375         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 376         if (sqp->sq_header.svp_id == (id_t)-1)
 377                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 378                     errno);
 379         sqp->sq_header.svp_crc32 = 0;
 380         sqp->sq_rdata = vl2r;
 381         sqp->sq_rsize = sizeof (svp_vl2_req_t);
 382         sqp->sq_wdata = NULL;
 383         sqp->sq_wsize = 0;
 384 
 385         bcopy(mac, vl2r->sl2r_mac, ETHERADDRL);
 386         vl2r->sl2r_vnetid = ntohl(svp->svp_vid);
 387 
 388         mutex_enter(&srp->sr_lock);
 389         if (svp_remote_conn_queue(srp, sqp) == B_FALSE)
 390                 svp->svp_cb.scb_vl2_lookup(svp, SVP_S_FATAL, NULL, NULL, arg);
 391         mutex_exit(&srp->sr_lock);
 392 }
 393 
 394 static void
 395 svp_remote_route_lookup_cb(svp_query_t *sqp, void *arg)
 396 {
 397         svp_t *svp = sqp->sq_svp;
 398         svp_route_ack_t *sra = (svp_route_ack_t *)sqp->sq_wdata;
 399 
 400         /*
 401          * Do the ntoh*()-ing here.
 402          */
 403         if (sqp->sq_status == SVP_S_OK) {
 404                 svp->svp_cb.scb_route_lookup(svp, ntohl(sqp->sq_status),
 405                     ntohl(sra->sra_dcid), ntohl(sra->sra_vnetid),
 406                     ntohs(sra->sra_vlan), sra->sra_srcmac, sra->sra_dstmac,
 407                     ntohs(sra->sra_port), sra->sra_ip, sra->sra_src_pfx,
 408                     sra->sra_dst_pfx, arg);
 409         } else {
 410                 svp->svp_cb.scb_route_lookup(svp, sqp->sq_status,
 411                     0, 0, 0, NULL, NULL, 0, NULL, 0, 0, arg);
 412         }
 413 }
 414 
 415 void
 416 svp_remote_route_lookup(svp_t *svp, svp_query_t *sqp,
 417     const struct in6_addr *src, const struct in6_addr *dst, uint32_t vnetid,
 418     uint16_t vlan, void *arg)
 419 {
 420         svp_remote_t *srp;
 421         svp_route_req_t *srr = &sqp->sq_rdun.sqd_rr;
 422 
 423         srp = svp->svp_remote;
 424         sqp->sq_func = svp_remote_route_lookup_cb;
 425         sqp->sq_arg = arg;
 426         sqp->sq_svp = svp;
 427         sqp->sq_state = SVP_QUERY_INIT;
 428         sqp->sq_header.svp_op = htons(SVP_R_ROUTE_REQ);
 429         sqp->sq_header.svp_size = htonl(sizeof (svp_route_req_t));
 430         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 431         if (sqp->sq_header.svp_id == (id_t)-1)
 432                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 433                     errno);
 434         sqp->sq_header.svp_crc32 = 0;
 435         sqp->sq_rdata = srr;
 436         sqp->sq_rsize = sizeof (svp_route_req_t);
 437         sqp->sq_wdata = NULL;
 438         sqp->sq_wsize = 0;
 439 
 440         bcopy(src, srr->srr_srcip, sizeof (struct in6_addr));
 441         bcopy(dst, srr->srr_dstip, sizeof (struct in6_addr));
 442         /* Caller should've checked both are the same type... */
 443         srr->srr_vnetid = htonl(vnetid);
 444         srr->srr_vlan = htons(vlan);
 445         srr->srr_pad = 0;
 446 
 447         mutex_enter(&srp->sr_lock);
 448         if (!svp_remote_conn_queue(srp, sqp)) {
 449                 sqp->sq_status = SVP_S_FATAL;
 450                 sqp->sq_func(sqp, arg);
 451         }
 452         mutex_exit(&srp->sr_lock);
 453 }
 454 
 455 static void
 456 svp_remote_vl3_lookup_cb(svp_query_t *sqp, void *arg)
 457 {
 458         svp_t *svp = sqp->sq_svp;
 459         svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
 460 
 461         if (sqp->sq_status == SVP_S_OK)
 462                 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, vl3a->sl3a_mac,
 463                     (struct in6_addr *)vl3a->sl3a_uip, ntohs(vl3a->sl3a_uport),
 464                     arg);
 465         else
 466                 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, NULL, NULL, 0,
 467                     arg);
 468 }
 469 
 470 static void
 471 svp_remote_vl3_common(svp_remote_t *srp, svp_query_t *sqp,
 472     const struct sockaddr *addr,  svp_query_f func, void *arg, uint32_t vid)
 473 {
 474         svp_vl3_req_t *vl3r = &sqp->sq_rdun.sdq_vl3r;
 475 
 476         if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
 477                 libvarpd_panic("unexpected sa_family for the vl3 lookup");
 478 
 479         sqp->sq_func = func;
 480         sqp->sq_arg = arg;
 481         sqp->sq_state = SVP_QUERY_INIT;
 482         sqp->sq_header.svp_op = htons(SVP_R_VL3_REQ);
 483         sqp->sq_header.svp_size = htonl(sizeof (svp_vl3_req_t));
 484         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 485         if (sqp->sq_header.svp_id == (id_t)-1)
 486                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 487                     errno);
 488         sqp->sq_header.svp_crc32 = 0;
 489         sqp->sq_rdata = vl3r;
 490         sqp->sq_rsize = sizeof (svp_vl3_req_t);
 491         sqp->sq_wdata = NULL;
 492         sqp->sq_wsize = 0;
 493 
 494         if (addr->sa_family == AF_INET6) {
 495                 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)addr;
 496                 vl3r->sl3r_type = htonl(SVP_VL3_IPV6);
 497                 bcopy(&s6->sin6_addr, vl3r->sl3r_ip,
 498                     sizeof (struct in6_addr));
 499         } else {
 500                 struct sockaddr_in *s4 = (struct sockaddr_in *)addr;
 501                 struct in6_addr v6;
 502 
 503                 vl3r->sl3r_type = htonl(SVP_VL3_IP);
 504                 IN6_INADDR_TO_V4MAPPED(&s4->sin_addr, &v6);
 505                 bcopy(&v6, vl3r->sl3r_ip, sizeof (struct in6_addr));
 506         }
 507         vl3r->sl3r_vnetid = htonl(vid);
 508 
 509         mutex_enter(&srp->sr_lock);
 510         if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
 511                 sqp->sq_status = SVP_S_FATAL;
 512                 sqp->sq_func(sqp, arg);
 513         }
 514         mutex_exit(&srp->sr_lock);
 515 }
 516 
 517 /*
 518  * This is a request to do a VL3 look-up that originated internally as opposed
 519  * to coming from varpd. As such we need a slightly different query callback
 520  * function upon completion and don't go through the normal path with the svp_t.
 521  */
 522 void
 523 svp_remote_vl3_logreq(svp_remote_t *srp, svp_query_t *sqp, uint32_t vid,
 524     const struct sockaddr *addr, svp_query_f func, void *arg)
 525 {
 526         svp_remote_vl3_common(srp, sqp, addr, func, arg, vid);
 527 }
 528 
 529 void
 530 svp_remote_vl3_lookup(svp_t *svp, svp_query_t *sqp,
 531     const struct sockaddr *addr, void *arg)
 532 {
 533         svp_remote_t *srp = svp->svp_remote;
 534 
 535         sqp->sq_svp = svp;
 536         svp_remote_vl3_common(srp, sqp, addr, svp_remote_vl3_lookup_cb,
 537             arg, svp->svp_vid);
 538 }
 539 
 540 static void
 541 svp_remote_log_request_cb(svp_query_t *sqp, void *arg)
 542 {
 543         svp_remote_t *srp = sqp->sq_arg;
 544 
 545         assert(sqp->sq_wdata != NULL);
 546         if (sqp->sq_status == SVP_S_OK)
 547                 svp_shootdown_logr_cb(srp, sqp->sq_status, sqp->sq_wdata,
 548                     sqp->sq_size);
 549         else
 550                 svp_shootdown_logr_cb(srp, sqp->sq_status, NULL, 0);
 551 }
 552 
 553 void
 554 svp_remote_log_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
 555     size_t buflen)
 556 {
 557         svp_log_req_t *logr = &sqp->sq_rdun.sdq_logr;
 558         boolean_t queued;
 559 
 560         sqp->sq_func = svp_remote_log_request_cb;
 561         sqp->sq_state = SVP_QUERY_INIT;
 562         sqp->sq_arg = srp;
 563         sqp->sq_header.svp_op = htons(SVP_R_LOG_REQ);
 564         sqp->sq_header.svp_size = htonl(sizeof (svp_log_req_t));
 565         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 566         if (sqp->sq_header.svp_id == (id_t)-1)
 567                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 568                     errno);
 569         sqp->sq_header.svp_crc32 = 0;
 570         sqp->sq_rdata = logr;
 571         sqp->sq_rsize = sizeof (svp_log_req_t);
 572         sqp->sq_wdata = buf;
 573         sqp->sq_wsize = buflen;
 574 
 575         logr->svlr_count = htonl(buflen);
 576         bcopy(&srp->sr_uip, logr->svlr_ip, sizeof (struct in6_addr));
 577 
 578         /*
 579          * If this fails, there isn't much that we can't do. Give the callback
 580          * with a fatal status.
 581          */
 582         mutex_enter(&srp->sr_lock);
 583         queued = svp_remote_conn_queue(srp, sqp);
 584         mutex_exit(&srp->sr_lock);
 585 
 586         if (queued == B_FALSE)
 587                 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0);
 588 }
 589 
 590 static void
 591 svp_remote_lrm_request_cb(svp_query_t *sqp, void *arg)
 592 {
 593         svp_remote_t *srp = arg;
 594 
 595         svp_shootdown_lrm_cb(srp, sqp->sq_status);
 596 }
 597 
 598 void
 599 svp_remote_lrm_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
 600     size_t buflen)
 601 {
 602         boolean_t queued;
 603         svp_lrm_req_t *svrr = buf;
 604 
 605         sqp->sq_func = svp_remote_lrm_request_cb;
 606         sqp->sq_state = SVP_QUERY_INIT;
 607         sqp->sq_arg = srp;
 608         sqp->sq_header.svp_op = htons(SVP_R_LOG_RM);
 609         sqp->sq_header.svp_size = htonl(buflen);
 610         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 611         if (sqp->sq_header.svp_id == (id_t)-1)
 612                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 613                     errno);
 614         sqp->sq_header.svp_crc32 = 0;
 615         sqp->sq_rdata = buf;
 616         sqp->sq_rsize = buflen;
 617         sqp->sq_wdata = NULL;
 618         sqp->sq_wsize = 0;
 619 
 620         /*
 621          * We need to fix up the count to be in proper network order.
 622          */
 623         svrr->svrr_count = htonl(svrr->svrr_count);
 624 
 625         /*
 626          * If this fails, there isn't much that we can't do. Give the callback
 627          * with a fatal status.
 628          */
 629         mutex_enter(&srp->sr_lock);
 630         queued = svp_remote_conn_queue(srp, sqp);
 631         mutex_exit(&srp->sr_lock);
 632 
 633         if (queued == B_FALSE)
 634                 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0);
 635 }
 636 
 637 /* ARGSUSED */
 638 void
 639 svp_remote_dns_timer(void *unused)
 640 {
 641         svp_remote_t *s;
 642         mutex_enter(&svp_remote_lock);
 643         for (s = avl_first(&svp_remote_tree); s != NULL;
 644             s = AVL_NEXT(&svp_remote_tree, s)) {
 645                 svp_host_queue(s);
 646         }
 647         mutex_exit(&svp_remote_lock);
 648 }
 649 
 650 void
 651 svp_remote_resolved(svp_remote_t *srp, struct addrinfo *newaddrs)
 652 {
 653         struct addrinfo *a;
 654         svp_conn_t *scp;
 655         int ngen;
 656 
 657         mutex_enter(&srp->sr_lock);
 658         srp->sr_gen++;
 659         ngen = srp->sr_gen;
 660         mutex_exit(&srp->sr_lock);
 661 
 662         for (a = newaddrs; a != NULL; a = a->ai_next) {
 663                 struct in6_addr in6;
 664                 struct in6_addr *addrp;
 665 
 666                 if (a->ai_family != AF_INET && a->ai_family != AF_INET6)
 667                         continue;
 668 
 669                 if (a->ai_family == AF_INET) {
 670                         struct sockaddr_in *v4;
 671                         v4 = (struct sockaddr_in *)a->ai_addr;
 672                         addrp = &in6;
 673                         IN6_INADDR_TO_V4MAPPED(&v4->sin_addr, addrp);
 674                 } else {
 675                         struct sockaddr_in6 *v6;
 676                         v6 = (struct sockaddr_in6 *)a->ai_addr;
 677                         addrp = &v6->sin6_addr;
 678                 }
 679 
 680                 mutex_enter(&srp->sr_lock);
 681                 for (scp = list_head(&srp->sr_conns); scp != NULL;
 682                     scp = list_next(&srp->sr_conns, scp)) {
 683                         mutex_enter(&scp->sc_lock);
 684                         if (bcmp(addrp, &scp->sc_addr,
 685                             sizeof (struct in6_addr)) == 0) {
 686                                 scp->sc_gen = ngen;
 687                                 mutex_exit(&scp->sc_lock);
 688                                 break;
 689                         }
 690                         mutex_exit(&scp->sc_lock);
 691                 }
 692 
 693                 /*
 694                  * We need to be careful in the assumptions that we make here,
 695                  * as there's a good chance that svp_conn_create will
 696                  * drop the svp_remote_t`sr_lock to kick off its effective event
 697                  * loop.
 698                  */
 699                 if (scp == NULL)
 700                         (void) svp_conn_create(srp, addrp);
 701                 mutex_exit(&srp->sr_lock);
 702         }
 703 
 704         /*
 705          * Now it's time to clean things up. We do not actively clean up the
 706          * current connections that we have, instead allowing them to stay
 707          * around assuming that they're still useful. Instead, we go through and
 708          * purge the degraded list for anything that's from an older generation.
 709          */
 710         mutex_enter(&srp->sr_lock);
 711         for (scp = list_head(&srp->sr_conns); scp != NULL;
 712             scp = list_next(&srp->sr_conns, scp)) {
 713                 boolean_t fall = B_FALSE;
 714                 mutex_enter(&scp->sc_lock);
 715                 if (scp->sc_gen < srp->sr_gen)
 716                         fall = B_TRUE;
 717                 mutex_exit(&scp->sc_lock);
 718                 if (fall == B_TRUE)
 719                         svp_conn_fallout(scp);
 720         }
 721         mutex_exit(&srp->sr_lock);
 722 }
 723 
 724 /*
 725  * This connection is in the process of being reset, we need to reassign all of
 726  * its queries to other places or mark them as fatal. Note that the first
 727  * connection was the one in flight when this failed. We always mark it as
 728  * failed to avoid trying to reset its state.
 729  */
 730 void
 731 svp_remote_reassign(svp_remote_t *srp, svp_conn_t *scp)
 732 {
 733         boolean_t first = B_TRUE;
 734         assert(MUTEX_HELD(&srp->sr_lock));
 735         assert(MUTEX_HELD(&srp->sr_lock));
 736         svp_query_t *sqp;
 737 
 738         /*
 739          * As we try to reassigning all of its queries, remove it from the list.
 740          */
 741         list_remove(&srp->sr_conns, scp);
 742 
 743         while ((sqp = list_remove_head(&scp->sc_queries)) != NULL) {
 744 
 745                 if (first == B_TRUE) {
 746                         sqp->sq_status = SVP_S_FATAL;
 747                         sqp->sq_func(sqp, sqp->sq_arg);
 748                         continue;
 749                 }
 750 
 751                 sqp->sq_acttime = -1;
 752 
 753                 /*
 754                  * We may want to maintain a queue of these for some time rather
 755                  * than just failing them all.
 756                  */
 757                 if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
 758                         sqp->sq_status = SVP_S_FATAL;
 759                         sqp->sq_func(sqp, sqp->sq_arg);
 760                 }
 761         }
 762 
 763         /*
 764          * Now that we're done, go ahead and re-insert.
 765          */
 766         list_insert_tail(&srp->sr_conns, scp);
 767 }
 768 
 769 void
 770 svp_remote_degrade(svp_remote_t *srp, svp_degrade_state_t flag)
 771 {
 772         int sf, nf;
 773         char buf[256];
 774 
 775         assert(MUTEX_HELD(&srp->sr_lock));
 776 
 777         if (flag == SVP_RD_ALL || flag == 0)
 778                 libvarpd_panic("invalid flag passed to degrade");
 779 
 780         if ((flag & srp->sr_degrade) != 0) {
 781                 return;
 782         }
 783 
 784         sf = ffs(srp->sr_degrade);
 785         nf = ffs(flag);
 786         srp->sr_degrade |= flag;
 787         if (sf == 0 || sf > nf) {
 788                 svp_t *svp;
 789                 svp_remote_mkfmamsg(srp, flag, buf, sizeof (buf));
 790 
 791                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 792                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 793                         libvarpd_fma_degrade(svp->svp_hdl, buf);
 794                 }
 795         }
 796 }
 797 
 798 void
 799 svp_remote_restore(svp_remote_t *srp, svp_degrade_state_t flag)
 800 {
 801         int sf, nf;
 802 
 803         assert(MUTEX_HELD(&srp->sr_lock));
 804         sf = ffs(srp->sr_degrade);
 805         if ((srp->sr_degrade & flag) != flag)
 806                 return;
 807         srp->sr_degrade &= ~flag;
 808         nf = ffs(srp->sr_degrade);
 809 
 810         /*
 811          * If we're now empty, restore the device. If we still are degraded, but
 812          * we now have a higher base than we used to, change the message.
 813          */
 814         if (srp->sr_degrade == 0) {
 815                 svp_t *svp;
 816                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 817                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 818                         libvarpd_fma_restore(svp->svp_hdl);
 819                 }
 820         } else if (nf != sf) {
 821                 svp_t *svp;
 822                 char buf[256];
 823 
 824                 svp_remote_mkfmamsg(srp, 1U << (nf - 1), buf, sizeof (buf));
 825                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 826                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 827                         libvarpd_fma_degrade(svp->svp_hdl, buf);
 828                 }
 829         }
 830 }
 831 
 832 void
 833 svp_remote_shootdown_vl3_cb(svp_query_t *sqp, void *arg)
 834 {
 835         svp_shoot_vl3_t *squery = arg;
 836         svp_log_vl3_t *svl3 = squery->ssv_vl3;
 837         svp_sdlog_t *sdl = squery->ssv_log;
 838 
 839         if (sqp->sq_status == SVP_S_OK) {
 840                 svp_t *svp, lookup;
 841 
 842                 svp_remote_t *srp = sdl->sdl_remote;
 843                 svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
 844 
 845                 lookup.svp_vid = ntohl(svl3->svl3_vnetid);
 846                 mutex_enter(&srp->sr_lock);
 847                 if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
 848                         svp->svp_cb.scb_vl3_inject(svp, ntohs(svl3->svl3_vlan),
 849                             (struct in6_addr *)svl3->svl3_ip, vl3a->sl3a_mac,
 850                             NULL);
 851                 }
 852                 mutex_exit(&srp->sr_lock);
 853 
 854         }
 855 
 856         svp_shootdown_vl3_cb(sqp->sq_status, svl3, sdl);
 857 
 858         umem_free(squery, sizeof (svp_shoot_vl3_t));
 859 }
 860 
 861 void
 862 svp_remote_shootdown_vl3(svp_remote_t *srp, svp_log_vl3_t *svl3,
 863     svp_sdlog_t *sdl)
 864 {
 865         svp_shoot_vl3_t *squery;
 866 
 867         squery = umem_zalloc(sizeof (svp_shoot_vl3_t), UMEM_DEFAULT);
 868         if (squery == NULL) {
 869                 svp_shootdown_vl3_cb(SVP_S_FATAL, svl3, sdl);
 870                 return;
 871         }
 872 
 873         squery->ssv_vl3 = svl3;
 874         squery->ssv_log = sdl;
 875         squery->ssv_sock.sin6_family = AF_INET6;
 876         bcopy(svl3->svl3_ip, &squery->ssv_sock.sin6_addr,
 877             sizeof (svl3->svl3_ip));
 878         svp_remote_vl3_logreq(srp, &squery->ssv_query, ntohl(svl3->svl3_vnetid),
 879             (struct sockaddr *)&squery->ssv_sock, svp_remote_shootdown_vl3_cb,
 880             squery);
 881 }
 882 
 883 void
 884 svp_remote_shootdown_vl2(svp_remote_t *srp, svp_log_vl2_t *svl2)
 885 {
 886         svp_t *svp, lookup;
 887 
 888         lookup.svp_vid = ntohl(svl2->svl2_vnetid);
 889         mutex_enter(&srp->sr_lock);
 890         if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
 891                 svp->svp_cb.scb_vl2_invalidate(svp, svl2->svl2_mac);
 892         }
 893         mutex_exit(&srp->sr_lock);
 894 }
 895 
 896 int
 897 svp_remote_init(void)
 898 {
 899         svp_idspace = id_space_create("svp_req_ids", 1, INT32_MAX);
 900         if (svp_idspace == NULL)
 901                 return (errno);
 902         avl_create(&svp_remote_tree, svp_remote_comparator,
 903             sizeof (svp_remote_t), offsetof(svp_remote_t, sr_gnode));
 904         svp_dns_timer.st_func = svp_remote_dns_timer;
 905         svp_dns_timer.st_arg = NULL;
 906         svp_dns_timer.st_oneshot = B_FALSE;
 907         svp_dns_timer.st_value = svp_dns_timer_rate;
 908         svp_timer_add(&svp_dns_timer);
 909         return (0);
 910 }
 911 
 912 void
 913 svp_remote_fini(void)
 914 {
 915         svp_timer_remove(&svp_dns_timer);
 916         avl_destroy(&svp_remote_tree);
 917         if (svp_idspace == NULL)
 918                 id_space_destroy(svp_idspace);
 919 }