1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018 Joyent, Inc.
  14  */
  15 
  16 /*
  17  * Remote backend management
  18  *
  19  * For more information, see the big theory statement in
  20  * lib/varpd/svp/common/libvarpd_svp.c.
  21  */
  22 
  23 #include <umem.h>
  24 #include <strings.h>
  25 #include <string.h>
  26 #include <stddef.h>
  27 #include <thread.h>
  28 #include <synch.h>
  29 #include <assert.h>
  30 #include <sys/socket.h>
  31 #include <netdb.h>
  32 #include <errno.h>
  33 #include <libidspace.h>
  34 
  35 #include <libvarpd_provider.h>
  36 #include <libvarpd_svp.h>
  37 
  38 typedef struct svp_shoot_vl3 {
  39         svp_query_t             ssv_query;
  40         struct sockaddr_in6     ssv_sock;
  41         svp_log_vl3_t           *ssv_vl3;
  42         svp_sdlog_t             *ssv_log;
  43 } svp_shoot_vl3_t;
  44 
  45 static mutex_t svp_remote_lock = ERRORCHECKMUTEX;
  46 static avl_tree_t svp_remote_tree;
  47 static svp_timer_t svp_dns_timer;
  48 static id_space_t *svp_idspace;
  49 static int svp_dns_timer_rate = 30;     /* seconds */
  50 
  51 id_t
  52 svp_id_alloc(void)
  53 {
  54         return (id_alloc(svp_idspace));
  55 }
  56 
  57 static void
  58 svp_remote_mkfmamsg(svp_remote_t *srp, svp_degrade_state_t state, char *buf,
  59     size_t buflen)
  60 {
  61         switch (state) {
  62         case SVP_RD_DNS_FAIL:
  63                 (void) snprintf(buf, buflen, "failed to resolve or find "
  64                     "entries for hostname %s", srp->sr_hostname);
  65                 break;
  66         case SVP_RD_REMOTE_FAIL:
  67                 (void) snprintf(buf, buflen, "cannot reach any remote peers");
  68                 break;
  69         default:
  70                 (void) snprintf(buf, buflen, "unkonwn error state: %d", state);
  71         }
  72 }
  73 
  74 static int
  75 svp_remote_comparator(const void *l, const void *r)
  76 {
  77         int ret;
  78         const svp_remote_t *lr = l, *rr = r;
  79 
  80         ret = strcmp(lr->sr_hostname, rr->sr_hostname);
  81         if (ret > 0)
  82                 return (1);
  83         else if (ret < 0)
  84                 return (-1);
  85 
  86         if (lr->sr_rport > rr->sr_rport)
  87                 return (1);
  88         else if (lr->sr_rport < rr->sr_rport)
  89                 return (-1);
  90 
  91         return (memcmp(&lr->sr_uip, &rr->sr_uip, sizeof (struct in6_addr)));
  92 }
  93 
  94 void
  95 svp_query_release(svp_query_t *sqp)
  96 {
  97         id_free(svp_idspace, sqp->sq_header.svp_id);
  98 }
  99 
 100 static void
 101 svp_remote_destroy(svp_remote_t *srp)
 102 {
 103         size_t len;
 104 
 105         /*
 106          * Clean up any unrelated DNS information. At this point we know that
 107          * we're not in the remote tree. That means, that svp_remote_dns_timer
 108          * cannot queue us. However, if any of our DNS related state flags are
 109          * set, we have to hang out.
 110          */
 111         mutex_enter(&srp->sr_lock);
 112         while (srp->sr_state &
 113             (SVP_RS_LOOKUP_SCHEDULED | SVP_RS_LOOKUP_INPROGRESS)) {
 114                 (void) cond_wait(&srp->sr_cond, &srp->sr_lock);
 115         }
 116         mutex_exit(&srp->sr_lock);
 117         svp_shootdown_fini(srp);
 118 
 119         if (cond_destroy(&srp->sr_cond) != 0)
 120                 libvarpd_panic("failed to destroy cond sr_cond");
 121 
 122         if (mutex_destroy(&srp->sr_lock) != 0)
 123                 libvarpd_panic("failed to destroy mutex sr_lock");
 124 
 125         if (srp->sr_addrinfo != NULL)
 126                 freeaddrinfo(srp->sr_addrinfo);
 127         len = strlen(srp->sr_hostname) + 1;
 128         umem_free(srp->sr_hostname, len);
 129         umem_free(srp, sizeof (svp_remote_t));
 130 }
 131 
 132 static int
 133 svp_remote_create(const char *host, uint16_t port, struct in6_addr *uip,
 134     svp_remote_t **outp)
 135 {
 136         size_t hlen;
 137         svp_remote_t *remote;
 138 
 139         assert(MUTEX_HELD(&svp_remote_lock));
 140 
 141         remote = umem_zalloc(sizeof (svp_remote_t), UMEM_DEFAULT);
 142         if (remote == NULL) {
 143                 mutex_exit(&svp_remote_lock);
 144                 return (ENOMEM);
 145         }
 146 
 147         if (svp_shootdown_init(remote) != 0) {
 148                 umem_free(remote, sizeof (svp_remote_t));
 149                 mutex_exit(&svp_remote_lock);
 150                 return (ENOMEM);
 151         }
 152 
 153         hlen = strlen(host) + 1;
 154         remote->sr_hostname = umem_alloc(hlen, UMEM_DEFAULT);
 155         if (remote->sr_hostname == NULL) {
 156                 svp_shootdown_fini(remote);
 157                 umem_free(remote, sizeof (svp_remote_t));
 158                 mutex_exit(&svp_remote_lock);
 159                 return (ENOMEM);
 160         }
 161         remote->sr_rport = port;
 162         if (mutex_init(&remote->sr_lock,
 163             USYNC_THREAD | LOCK_ERRORCHECK, NULL) != 0)
 164                 libvarpd_panic("failed to create mutex sr_lock");
 165         if (cond_init(&remote->sr_cond, USYNC_PROCESS, NULL) != 0)
 166                 libvarpd_panic("failed to create cond sr_cond");
 167         list_create(&remote->sr_conns, sizeof (svp_conn_t),
 168             offsetof(svp_conn_t, sc_rlist));
 169         avl_create(&remote->sr_tree, svp_comparator, sizeof (svp_t),
 170             offsetof(svp_t, svp_rlink));
 171         (void) strlcpy(remote->sr_hostname, host, hlen);
 172         remote->sr_count = 1;
 173         remote->sr_uip = *uip;
 174 
 175         svp_shootdown_start(remote);
 176 
 177         *outp = remote;
 178         return (0);
 179 }
 180 
 181 int
 182 svp_remote_find(char *host, uint16_t port, struct in6_addr *uip,
 183     svp_remote_t **outp)
 184 {
 185         int ret;
 186         svp_remote_t lookup, *remote;
 187 
 188         lookup.sr_hostname = host;
 189         lookup.sr_rport = port;
 190         lookup.sr_uip = *uip;
 191         mutex_enter(&svp_remote_lock);
 192         remote = avl_find(&svp_remote_tree, &lookup, NULL);
 193         if (remote != NULL) {
 194                 assert(remote->sr_count > 0);
 195                 remote->sr_count++;
 196                 *outp = remote;
 197                 mutex_exit(&svp_remote_lock);
 198                 return (0);
 199         }
 200 
 201         if ((ret = svp_remote_create(host, port, uip, outp)) != 0) {
 202                 mutex_exit(&svp_remote_lock);
 203                 return (ret);
 204         }
 205 
 206         avl_add(&svp_remote_tree, *outp);
 207         mutex_exit(&svp_remote_lock);
 208 
 209         /* Make sure DNS is up to date */
 210         svp_host_queue(*outp);
 211 
 212         return (0);
 213 }
 214 
 215 void
 216 svp_remote_release(svp_remote_t *srp)
 217 {
 218         mutex_enter(&svp_remote_lock);
 219         mutex_enter(&srp->sr_lock);
 220         srp->sr_count--;
 221         if (srp->sr_count != 0) {
 222                 mutex_exit(&srp->sr_lock);
 223                 mutex_exit(&svp_remote_lock);
 224                 return;
 225         }
 226         mutex_exit(&srp->sr_lock);
 227 
 228         avl_remove(&svp_remote_tree, srp);
 229         mutex_exit(&svp_remote_lock);
 230         svp_remote_destroy(srp);
 231 }
 232 
 233 int
 234 svp_remote_attach(svp_remote_t *srp, svp_t *svp)
 235 {
 236         svp_t check;
 237         avl_index_t where;
 238 
 239         mutex_enter(&srp->sr_lock);
 240         if (svp->svp_remote != NULL)
 241                 libvarpd_panic("failed to create mutex sr_lock");
 242 
 243         /*
 244          * We require everything except shootdowns
 245          */
 246         if (svp->svp_cb.scb_vl2_lookup == NULL)
 247                 libvarpd_panic("missing callback scb_vl2_lookup");
 248         if (svp->svp_cb.scb_vl3_lookup == NULL)
 249                 libvarpd_panic("missing callback scb_vl3_lookup");
 250         if (svp->svp_cb.scb_vl2_invalidate == NULL)
 251                 libvarpd_panic("missing callback scb_vl2_invalidate");
 252         if (svp->svp_cb.scb_vl3_inject == NULL)
 253                 libvarpd_panic("missing callback scb_vl3_inject");
 254         if (svp->svp_cb.scb_route_lookup == NULL)
 255                 libvarpd_panic("missing callback scb_route_lookup");
 256 
 257         check.svp_vid = svp->svp_vid;
 258         if (avl_find(&srp->sr_tree, &check, &where) != NULL)
 259                 libvarpd_panic("found duplicate entry with vid %ld",
 260                     svp->svp_vid);
 261         avl_insert(&srp->sr_tree, svp, where);
 262         svp->svp_remote = srp;
 263         mutex_exit(&srp->sr_lock);
 264 
 265         return (0);
 266 }
 267 
 268 void
 269 svp_remote_detach(svp_t *svp)
 270 {
 271         svp_t *lookup;
 272         svp_remote_t *srp = svp->svp_remote;
 273 
 274         if (srp == NULL)
 275                 libvarpd_panic("trying to detach remote when none exists");
 276 
 277         mutex_enter(&srp->sr_lock);
 278         lookup = avl_find(&srp->sr_tree, svp, NULL);
 279         if (lookup == NULL || lookup != svp)
 280                 libvarpd_panic("inconsitent remote avl tree...");
 281         avl_remove(&srp->sr_tree, svp);
 282         svp->svp_remote = NULL;
 283         mutex_exit(&srp->sr_lock);
 284         svp_remote_release(srp);
 285 }
 286 
 287 /*
 288  * See if the request can be sent over the connection's supported version.
 289  * Scribble the version in the request itself.  NOTE that we do not check the
 290  * version that already exists in sqp->sq_header.svp_ver, as we may be called
 291  * from svp_remote_reassign() (and change versions when arriving at a new
 292  * connection).
 293  */
 294 static boolean_t
 295 svp_outbound_version_check(int version, svp_query_t *sqp)
 296 {
 297         uint16_t op = htons(sqp->sq_header.svp_op);
 298 
 299         /*
 300          * As of v1 -> v2, we really only need to restrict SVP_R_ROUTE_REQ
 301          * as v2-only.  Reflect that here.
 302          *
 303          * NOTE that if any message semantics change between future versions,
 304          * (e.g. "in v3 SVP_R_VL2_REQ takes on additional work"), we'll
 305          * need to more-deeply inspect the query.  It's possible that the
 306          * svp_op space is big enough to just continue op-only inspections.
 307          */
 308 
 309         assert(version > 0 && version <= SVP_CURRENT_VERSION);
 310 
 311         if (op != SVP_R_ROUTE_REQ || version >= SVP_VERSION_TWO) {
 312                 sqp->sq_header.svp_ver = htons(version);
 313                 return (B_TRUE);
 314         }
 315         return (B_FALSE);
 316 }
 317 
 318 /*
 319  * Walk the list of connections and find the first one that's available AND
 320  * version-appropriate for the message, then move the matched connection to
 321  * the back of the list so it's less likely to be used again.
 322  */
 323 static boolean_t
 324 svp_remote_conn_queue(svp_remote_t *srp, svp_query_t *sqp)
 325 {
 326         svp_conn_t *scp;
 327 
 328         assert(MUTEX_HELD(&srp->sr_lock));
 329         for (scp = list_head(&srp->sr_conns); scp != NULL;
 330             scp = list_next(&srp->sr_conns, scp)) {
 331                 mutex_enter(&scp->sc_lock);
 332                 if (scp->sc_cstate != SVP_CS_ACTIVE ||
 333                     !svp_outbound_version_check(scp->sc_version, sqp)) {
 334                         mutex_exit(&scp->sc_lock);
 335                         continue;
 336                 }
 337                 svp_conn_queue(scp, sqp);
 338                 mutex_exit(&scp->sc_lock);
 339                 list_remove(&srp->sr_conns, scp);
 340                 list_insert_tail(&srp->sr_conns, scp);
 341                 return (B_TRUE);
 342         }
 343 
 344         return (B_FALSE);
 345 }
 346 
 347 static void
 348 svp_remote_vl2_lookup_cb(svp_query_t *sqp, void *arg)
 349 {
 350         svp_t *svp = sqp->sq_svp;
 351         svp_vl2_ack_t *vl2a = (svp_vl2_ack_t *)sqp->sq_wdata;
 352 
 353         if (sqp->sq_status == SVP_S_OK)
 354                 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status,
 355                     (struct in6_addr *)vl2a->sl2a_addr, ntohs(vl2a->sl2a_port),
 356                     arg);
 357         else
 358                 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status, NULL, 0, arg);
 359 }
 360 
 361 void
 362 svp_remote_vl2_lookup(svp_t *svp, svp_query_t *sqp, const uint8_t *mac,
 363     void *arg)
 364 {
 365         svp_remote_t *srp;
 366         svp_vl2_req_t *vl2r = &sqp->sq_rdun.sqd_vl2r;
 367 
 368         srp = svp->svp_remote;
 369         sqp->sq_func = svp_remote_vl2_lookup_cb;
 370         sqp->sq_arg = arg;
 371         sqp->sq_svp = svp;
 372         sqp->sq_state = SVP_QUERY_INIT;
 373         sqp->sq_header.svp_op = htons(SVP_R_VL2_REQ);
 374         sqp->sq_header.svp_size = htonl(sizeof (svp_vl2_req_t));
 375         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 376         if (sqp->sq_header.svp_id == (id_t)-1)
 377                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 378                     errno);
 379         sqp->sq_header.svp_crc32 = htonl(0);
 380         sqp->sq_rdata = vl2r;
 381         sqp->sq_rsize = sizeof (svp_vl2_req_t);
 382         sqp->sq_wdata = NULL;
 383         sqp->sq_wsize = 0;
 384 
 385         bcopy(mac, vl2r->sl2r_mac, ETHERADDRL);
 386         vl2r->sl2r_vnetid = ntohl(svp->svp_vid);
 387 
 388         mutex_enter(&srp->sr_lock);
 389         if (svp_remote_conn_queue(srp, sqp) == B_FALSE)
 390                 svp->svp_cb.scb_vl2_lookup(svp, SVP_S_FATAL, NULL, NULL, arg);
 391         mutex_exit(&srp->sr_lock);
 392 }
 393 
 394 static void
 395 svp_remote_route_lookup_cb(svp_query_t *sqp, void *arg)
 396 {
 397         svp_t *svp = sqp->sq_svp;
 398         svp_route_ack_t *sra = (svp_route_ack_t *)sqp->sq_wdata;
 399 
 400         if (sqp->sq_status == SVP_S_OK) {
 401                 svp->svp_cb.scb_route_lookup(svp, sqp->sq_status,
 402                     sra->sra_dcid, sra->sra_vnetid, sra->sra_vlan,
 403                     sra->sra_srcmac, sra->sra_dstmac, sra->sra_port,
 404                     sra->sra_ip, sra->sra_src_pfx, sra->sra_dst_pfx, arg);
 405         } else {
 406                 svp->svp_cb.scb_route_lookup(svp, sqp->sq_status,
 407                     0, 0, 0, NULL, NULL, 0, NULL, 0, 0, arg);
 408         }
 409 }
 410 
 411 void
 412 svp_remote_route_lookup(svp_t *svp, svp_query_t *sqp,
 413     const struct in6_addr *src, const struct in6_addr *dst, uint32_t vnetid,
 414     uint16_t vlan, void *arg)
 415 {
 416         svp_remote_t *srp;
 417         svp_route_req_t *srr = &sqp->sq_rdun.sqd_rr;
 418 
 419         srp = svp->svp_remote;
 420         sqp->sq_func = svp_remote_route_lookup_cb;
 421         sqp->sq_arg = arg;
 422         sqp->sq_svp = svp;
 423         sqp->sq_state = SVP_QUERY_INIT;
 424         sqp->sq_header.svp_op = htons(SVP_R_ROUTE_REQ);
 425         sqp->sq_header.svp_size = htonl(sizeof (svp_route_req_t));
 426         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 427         if (sqp->sq_header.svp_id == (id_t)-1)
 428                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 429                     errno);
 430         sqp->sq_header.svp_crc32 = htonl(0);
 431         sqp->sq_rdata = srr;
 432 
 433         bcopy(src, srr->srr_srcip, sizeof (struct in6_addr));
 434         bcopy(dst, srr->srr_dstip, sizeof (struct in6_addr));
 435         /* Caller should've checked both are the same type... */
 436         srr->srr_vnetid = vnetid;
 437         srr->srr_vlan = vlan;
 438         srr->srr_pad = 0;
 439 
 440         mutex_enter(&srp->sr_lock);
 441         if (!svp_remote_conn_queue(srp, sqp)) {
 442                 sqp->sq_status = SVP_S_FATAL;
 443                 sqp->sq_func(sqp, arg);
 444         }
 445         mutex_exit(&srp->sr_lock);
 446 }
 447 
 448 static void
 449 svp_remote_vl3_lookup_cb(svp_query_t *sqp, void *arg)
 450 {
 451         svp_t *svp = sqp->sq_svp;
 452         svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
 453 
 454         if (sqp->sq_status == SVP_S_OK)
 455                 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, vl3a->sl3a_mac,
 456                     (struct in6_addr *)vl3a->sl3a_uip, ntohs(vl3a->sl3a_uport),
 457                     arg);
 458         else
 459                 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, NULL, NULL, 0,
 460                     arg);
 461 }
 462 
 463 static void
 464 svp_remote_vl3_common(svp_remote_t *srp, svp_query_t *sqp,
 465     const struct sockaddr *addr,  svp_query_f func, void *arg, uint32_t vid)
 466 {
 467         svp_vl3_req_t *vl3r = &sqp->sq_rdun.sdq_vl3r;
 468 
 469         if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
 470                 libvarpd_panic("unexpected sa_family for the vl3 lookup");
 471 
 472         sqp->sq_func = func;
 473         sqp->sq_arg = arg;
 474         sqp->sq_state = SVP_QUERY_INIT;
 475         sqp->sq_header.svp_op = htons(SVP_R_VL3_REQ);
 476         sqp->sq_header.svp_size = htonl(sizeof (svp_vl3_req_t));
 477         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 478         if (sqp->sq_header.svp_id == (id_t)-1)
 479                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 480                     errno);
 481         sqp->sq_header.svp_crc32 = htonl(0);
 482         sqp->sq_rdata = vl3r;
 483         sqp->sq_rsize = sizeof (svp_vl3_req_t);
 484         sqp->sq_wdata = NULL;
 485         sqp->sq_wsize = 0;
 486 
 487         if (addr->sa_family == AF_INET6) {
 488                 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)addr;
 489                 vl3r->sl3r_type = htonl(SVP_VL3_IPV6);
 490                 bcopy(&s6->sin6_addr, vl3r->sl3r_ip,
 491                     sizeof (struct in6_addr));
 492         } else {
 493                 struct sockaddr_in *s4 = (struct sockaddr_in *)addr;
 494                 struct in6_addr v6;
 495 
 496                 vl3r->sl3r_type = htonl(SVP_VL3_IP);
 497                 IN6_INADDR_TO_V4MAPPED(&s4->sin_addr, &v6);
 498                 bcopy(&v6, vl3r->sl3r_ip, sizeof (struct in6_addr));
 499         }
 500         vl3r->sl3r_vnetid = htonl(vid);
 501 
 502         mutex_enter(&srp->sr_lock);
 503         if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
 504                 sqp->sq_status = SVP_S_FATAL;
 505                 sqp->sq_func(sqp, arg);
 506         }
 507         mutex_exit(&srp->sr_lock);
 508 }
 509 
 510 /*
 511  * This is a request to do a VL3 look-up that originated internally as opposed
 512  * to coming from varpd. As such we need a slightly different query callback
 513  * function upon completion and don't go through the normal path with the svp_t.
 514  */
 515 void
 516 svp_remote_vl3_logreq(svp_remote_t *srp, svp_query_t *sqp, uint32_t vid,
 517     const struct sockaddr *addr, svp_query_f func, void *arg)
 518 {
 519         svp_remote_vl3_common(srp, sqp, addr, func, arg, vid);
 520 }
 521 
 522 void
 523 svp_remote_vl3_lookup(svp_t *svp, svp_query_t *sqp,
 524     const struct sockaddr *addr, void *arg)
 525 {
 526         svp_remote_t *srp = svp->svp_remote;
 527 
 528         sqp->sq_svp = svp;
 529         svp_remote_vl3_common(srp, sqp, addr, svp_remote_vl3_lookup_cb,
 530             arg, svp->svp_vid);
 531 }
 532 
 533 static void
 534 svp_remote_log_request_cb(svp_query_t *sqp, void *arg)
 535 {
 536         svp_remote_t *srp = sqp->sq_arg;
 537 
 538         assert(sqp->sq_wdata != NULL);
 539         if (sqp->sq_status == SVP_S_OK)
 540                 svp_shootdown_logr_cb(srp, sqp->sq_status, sqp->sq_wdata,
 541                     sqp->sq_size);
 542         else
 543                 svp_shootdown_logr_cb(srp, sqp->sq_status, NULL, 0);
 544 }
 545 
 546 void
 547 svp_remote_log_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
 548     size_t buflen)
 549 {
 550         svp_log_req_t *logr = &sqp->sq_rdun.sdq_logr;
 551         boolean_t queued;
 552 
 553         sqp->sq_func = svp_remote_log_request_cb;
 554         sqp->sq_state = SVP_QUERY_INIT;
 555         sqp->sq_arg = srp;
 556         sqp->sq_header.svp_op = htons(SVP_R_LOG_REQ);
 557         sqp->sq_header.svp_size = htonl(sizeof (svp_log_req_t));
 558         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 559         if (sqp->sq_header.svp_id == (id_t)-1)
 560                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 561                     errno);
 562         sqp->sq_header.svp_crc32 = htonl(0);
 563         sqp->sq_rdata = logr;
 564         sqp->sq_rsize = sizeof (svp_log_req_t);
 565         sqp->sq_wdata = buf;
 566         sqp->sq_wsize = buflen;
 567 
 568         logr->svlr_count = htonl(buflen);
 569         bcopy(&srp->sr_uip, logr->svlr_ip, sizeof (struct in6_addr));
 570 
 571         /*
 572          * If this fails, there isn't much that we can't do. Give the callback
 573          * with a fatal status.
 574          */
 575         mutex_enter(&srp->sr_lock);
 576         queued = svp_remote_conn_queue(srp, sqp);
 577         mutex_exit(&srp->sr_lock);
 578 
 579         if (queued == B_FALSE)
 580                 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0);
 581 }
 582 
 583 static void
 584 svp_remote_lrm_request_cb(svp_query_t *sqp, void *arg)
 585 {
 586         svp_remote_t *srp = arg;
 587 
 588         svp_shootdown_lrm_cb(srp, sqp->sq_status);
 589 }
 590 
 591 void
 592 svp_remote_lrm_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
 593     size_t buflen)
 594 {
 595         boolean_t queued;
 596         svp_lrm_req_t *svrr = buf;
 597 
 598         sqp->sq_func = svp_remote_lrm_request_cb;
 599         sqp->sq_state = SVP_QUERY_INIT;
 600         sqp->sq_arg = srp;
 601         sqp->sq_header.svp_op = htons(SVP_R_LOG_RM);
 602         sqp->sq_header.svp_size = htonl(buflen);
 603         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 604         if (sqp->sq_header.svp_id == (id_t)-1)
 605                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 606                     errno);
 607         sqp->sq_header.svp_crc32 = htonl(0);
 608         sqp->sq_rdata = buf;
 609         sqp->sq_rsize = buflen;
 610         sqp->sq_wdata = NULL;
 611         sqp->sq_wsize = 0;
 612 
 613         /*
 614          * We need to fix up the count to be in proper network order.
 615          */
 616         svrr->svrr_count = htonl(svrr->svrr_count);
 617 
 618         /*
 619          * If this fails, there isn't much that we can't do. Give the callback
 620          * with a fatal status.
 621          */
 622         mutex_enter(&srp->sr_lock);
 623         queued = svp_remote_conn_queue(srp, sqp);
 624         mutex_exit(&srp->sr_lock);
 625 
 626         if (queued == B_FALSE)
 627                 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0);
 628 }
 629 
 630 /* ARGSUSED */
 631 void
 632 svp_remote_dns_timer(void *unused)
 633 {
 634         svp_remote_t *s;
 635         mutex_enter(&svp_remote_lock);
 636         for (s = avl_first(&svp_remote_tree); s != NULL;
 637             s = AVL_NEXT(&svp_remote_tree, s)) {
 638                 svp_host_queue(s);
 639         }
 640         mutex_exit(&svp_remote_lock);
 641 }
 642 
 643 void
 644 svp_remote_resolved(svp_remote_t *srp, struct addrinfo *newaddrs)
 645 {
 646         struct addrinfo *a;
 647         svp_conn_t *scp;
 648         int ngen;
 649 
 650         mutex_enter(&srp->sr_lock);
 651         srp->sr_gen++;
 652         ngen = srp->sr_gen;
 653         mutex_exit(&srp->sr_lock);
 654 
 655         for (a = newaddrs; a != NULL; a = a->ai_next) {
 656                 struct in6_addr in6;
 657                 struct in6_addr *addrp;
 658 
 659                 if (a->ai_family != AF_INET && a->ai_family != AF_INET6)
 660                         continue;
 661 
 662                 if (a->ai_family == AF_INET) {
 663                         struct sockaddr_in *v4;
 664                         v4 = (struct sockaddr_in *)a->ai_addr;
 665                         addrp = &in6;
 666                         IN6_INADDR_TO_V4MAPPED(&v4->sin_addr, addrp);
 667                 } else {
 668                         struct sockaddr_in6 *v6;
 669                         v6 = (struct sockaddr_in6 *)a->ai_addr;
 670                         addrp = &v6->sin6_addr;
 671                 }
 672 
 673                 mutex_enter(&srp->sr_lock);
 674                 for (scp = list_head(&srp->sr_conns); scp != NULL;
 675                     scp = list_next(&srp->sr_conns, scp)) {
 676                         mutex_enter(&scp->sc_lock);
 677                         if (bcmp(addrp, &scp->sc_addr,
 678                             sizeof (struct in6_addr)) == 0) {
 679                                 scp->sc_gen = ngen;
 680                                 mutex_exit(&scp->sc_lock);
 681                                 break;
 682                         }
 683                         mutex_exit(&scp->sc_lock);
 684                 }
 685 
 686                 /*
 687                  * We need to be careful in the assumptions that we make here,
 688                  * as there's a good chance that svp_conn_create will
 689                  * drop the svp_remote_t`sr_lock to kick off its effective event
 690                  * loop.
 691                  */
 692                 if (scp == NULL)
 693                         (void) svp_conn_create(srp, addrp);
 694                 mutex_exit(&srp->sr_lock);
 695         }
 696 
 697         /*
 698          * Now it's time to clean things up. We do not actively clean up the
 699          * current connections that we have, instead allowing them to stay
 700          * around assuming that they're still useful. Instead, we go through and
 701          * purge the degraded list for anything that's from an older generation.
 702          */
 703         mutex_enter(&srp->sr_lock);
 704         for (scp = list_head(&srp->sr_conns); scp != NULL;
 705             scp = list_next(&srp->sr_conns, scp)) {
 706                 boolean_t fall = B_FALSE;
 707                 mutex_enter(&scp->sc_lock);
 708                 if (scp->sc_gen < srp->sr_gen)
 709                         fall = B_TRUE;
 710                 mutex_exit(&scp->sc_lock);
 711                 if (fall == B_TRUE)
 712                         svp_conn_fallout(scp);
 713         }
 714         mutex_exit(&srp->sr_lock);
 715 }
 716 
 717 /*
 718  * This connection is in the process of being reset, we need to reassign all of
 719  * its queries to other places or mark them as fatal. Note that the first
 720  * connection was the one in flight when this failed. We always mark it as
 721  * failed to avoid trying to reset its state.
 722  */
 723 void
 724 svp_remote_reassign(svp_remote_t *srp, svp_conn_t *scp)
 725 {
 726         boolean_t first = B_TRUE;
 727         assert(MUTEX_HELD(&srp->sr_lock));
 728         assert(MUTEX_HELD(&srp->sr_lock));
 729         svp_query_t *sqp;
 730 
 731         /*
 732          * As we try to reassigning all of its queries, remove it from the list.
 733          */
 734         list_remove(&srp->sr_conns, scp);
 735 
 736         while ((sqp = list_remove_head(&scp->sc_queries)) != NULL) {
 737 
 738                 if (first == B_TRUE) {
 739                         sqp->sq_status = SVP_S_FATAL;
 740                         sqp->sq_func(sqp, sqp->sq_arg);
 741                         continue;
 742                 }
 743 
 744                 sqp->sq_acttime = -1;
 745 
 746                 /*
 747                  * We may want to maintain a queue of these for some time rather
 748                  * than just failing them all.
 749                  */
 750                 if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
 751                         sqp->sq_status = SVP_S_FATAL;
 752                         sqp->sq_func(sqp, sqp->sq_arg);
 753                 }
 754         }
 755 
 756         /*
 757          * Now that we're done, go ahead and re-insert.
 758          */
 759         list_insert_tail(&srp->sr_conns, scp);
 760 }
 761 
 762 void
 763 svp_remote_degrade(svp_remote_t *srp, svp_degrade_state_t flag)
 764 {
 765         int sf, nf;
 766         char buf[256];
 767 
 768         assert(MUTEX_HELD(&srp->sr_lock));
 769 
 770         if (flag == SVP_RD_ALL || flag == 0)
 771                 libvarpd_panic("invalid flag passed to degrade");
 772 
 773         if ((flag & srp->sr_degrade) != 0) {
 774                 return;
 775         }
 776 
 777         sf = ffs(srp->sr_degrade);
 778         nf = ffs(flag);
 779         srp->sr_degrade |= flag;
 780         if (sf == 0 || sf > nf) {
 781                 svp_t *svp;
 782                 svp_remote_mkfmamsg(srp, flag, buf, sizeof (buf));
 783 
 784                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 785                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 786                         libvarpd_fma_degrade(svp->svp_hdl, buf);
 787                 }
 788         }
 789 }
 790 
 791 void
 792 svp_remote_restore(svp_remote_t *srp, svp_degrade_state_t flag)
 793 {
 794         int sf, nf;
 795 
 796         assert(MUTEX_HELD(&srp->sr_lock));
 797         sf = ffs(srp->sr_degrade);
 798         if ((srp->sr_degrade & flag) != flag)
 799                 return;
 800         srp->sr_degrade &= ~flag;
 801         nf = ffs(srp->sr_degrade);
 802 
 803         /*
 804          * If we're now empty, restore the device. If we still are degraded, but
 805          * we now have a higher base than we used to, change the message.
 806          */
 807         if (srp->sr_degrade == 0) {
 808                 svp_t *svp;
 809                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 810                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 811                         libvarpd_fma_restore(svp->svp_hdl);
 812                 }
 813         } else if (nf != sf) {
 814                 svp_t *svp;
 815                 char buf[256];
 816 
 817                 svp_remote_mkfmamsg(srp, 1U << (nf - 1), buf, sizeof (buf));
 818                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 819                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 820                         libvarpd_fma_degrade(svp->svp_hdl, buf);
 821                 }
 822         }
 823 }
 824 
 825 void
 826 svp_remote_shootdown_vl3_cb(svp_query_t *sqp, void *arg)
 827 {
 828         svp_shoot_vl3_t *squery = arg;
 829         svp_log_vl3_t *svl3 = squery->ssv_vl3;
 830         svp_sdlog_t *sdl = squery->ssv_log;
 831 
 832         if (sqp->sq_status == SVP_S_OK) {
 833                 svp_t *svp, lookup;
 834 
 835                 svp_remote_t *srp = sdl->sdl_remote;
 836                 svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
 837 
 838                 lookup.svp_vid = ntohl(svl3->svl3_vnetid);
 839                 mutex_enter(&srp->sr_lock);
 840                 if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
 841                         svp->svp_cb.scb_vl3_inject(svp, ntohs(svl3->svl3_vlan),
 842                             (struct in6_addr *)svl3->svl3_ip, vl3a->sl3a_mac,
 843                             NULL);
 844                 }
 845                 mutex_exit(&srp->sr_lock);
 846 
 847         }
 848 
 849         svp_shootdown_vl3_cb(sqp->sq_status, svl3, sdl);
 850 
 851         umem_free(squery, sizeof (svp_shoot_vl3_t));
 852 }
 853 
 854 void
 855 svp_remote_shootdown_vl3(svp_remote_t *srp, svp_log_vl3_t *svl3,
 856     svp_sdlog_t *sdl)
 857 {
 858         svp_shoot_vl3_t *squery;
 859 
 860         squery = umem_zalloc(sizeof (svp_shoot_vl3_t), UMEM_DEFAULT);
 861         if (squery == NULL) {
 862                 svp_shootdown_vl3_cb(SVP_S_FATAL, svl3, sdl);
 863                 return;
 864         }
 865 
 866         squery->ssv_vl3 = svl3;
 867         squery->ssv_log = sdl;
 868         squery->ssv_sock.sin6_family = AF_INET6;
 869         bcopy(svl3->svl3_ip, &squery->ssv_sock.sin6_addr,
 870             sizeof (svl3->svl3_ip));
 871         svp_remote_vl3_logreq(srp, &squery->ssv_query, ntohl(svl3->svl3_vnetid),
 872             (struct sockaddr *)&squery->ssv_sock, svp_remote_shootdown_vl3_cb,
 873             squery);
 874 }
 875 
 876 void
 877 svp_remote_shootdown_vl2(svp_remote_t *srp, svp_log_vl2_t *svl2)
 878 {
 879         svp_t *svp, lookup;
 880 
 881         lookup.svp_vid = ntohl(svl2->svl2_vnetid);
 882         mutex_enter(&srp->sr_lock);
 883         if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
 884                 svp->svp_cb.scb_vl2_invalidate(svp, svl2->svl2_mac);
 885         }
 886         mutex_exit(&srp->sr_lock);
 887 }
 888 
 889 int
 890 svp_remote_init(void)
 891 {
 892         svp_idspace = id_space_create("svp_req_ids", 1, INT32_MAX);
 893         if (svp_idspace == NULL)
 894                 return (errno);
 895         avl_create(&svp_remote_tree, svp_remote_comparator,
 896             sizeof (svp_remote_t), offsetof(svp_remote_t, sr_gnode));
 897         svp_dns_timer.st_func = svp_remote_dns_timer;
 898         svp_dns_timer.st_arg = NULL;
 899         svp_dns_timer.st_oneshot = B_FALSE;
 900         svp_dns_timer.st_value = svp_dns_timer_rate;
 901         svp_timer_add(&svp_dns_timer);
 902         return (0);
 903 }
 904 
 905 void
 906 svp_remote_fini(void)
 907 {
 908         svp_timer_remove(&svp_dns_timer);
 909         avl_destroy(&svp_remote_tree);
 910         if (svp_idspace == NULL)
 911                 id_space_destroy(svp_idspace);
 912 }