1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018 Joyent, Inc.
  14  */
  15 
  16 /*
  17  * Remote backend management
  18  *
  19  * For more information, see the big theory statement in
  20  * lib/varpd/svp/common/libvarpd_svp.c.
  21  */
  22 
  23 #include <umem.h>
  24 #include <strings.h>
  25 #include <string.h>
  26 #include <stddef.h>
  27 #include <thread.h>
  28 #include <synch.h>
  29 #include <assert.h>
  30 #include <sys/socket.h>
  31 #include <netdb.h>
  32 #include <errno.h>
  33 #include <libidspace.h>
  34 
  35 #include <libvarpd_provider.h>
  36 #include <libvarpd_svp.h>
  37 
  38 typedef struct svp_shoot_vl3 {
  39         svp_query_t             ssv_query;
  40         struct sockaddr_in6     ssv_sock;
  41         svp_log_vl3_t           *ssv_vl3;
  42         svp_sdlog_t             *ssv_log;
  43 } svp_shoot_vl3_t;
  44 
  45 static mutex_t svp_remote_lock = ERRORCHECKMUTEX;
  46 static avl_tree_t svp_remote_tree;
  47 static svp_timer_t svp_dns_timer;
  48 static id_space_t *svp_idspace;
  49 static int svp_dns_timer_rate = 30;     /* seconds */
  50 
  51 static void
  52 svp_remote_mkfmamsg(svp_remote_t *srp, svp_degrade_state_t state, char *buf,
  53     size_t buflen)
  54 {
  55         switch (state) {
  56         case SVP_RD_DNS_FAIL:
  57                 (void) snprintf(buf, buflen, "failed to resolve or find "
  58                     "entries for hostname %s", srp->sr_hostname);
  59                 break;
  60         case SVP_RD_REMOTE_FAIL:
  61                 (void) snprintf(buf, buflen, "cannot reach any remote peers");
  62                 break;
  63         default:
  64                 (void) snprintf(buf, buflen, "unkonwn error state: %d", state);
  65         }
  66 }
  67 
  68 static int
  69 svp_remote_comparator(const void *l, const void *r)
  70 {
  71         int ret;
  72         const svp_remote_t *lr = l, *rr = r;
  73 
  74         ret = strcmp(lr->sr_hostname, rr->sr_hostname);
  75         if (ret > 0)
  76                 return (1);
  77         else if (ret < 0)
  78                 return (-1);
  79 
  80         if (lr->sr_rport > rr->sr_rport)
  81                 return (1);
  82         else if (lr->sr_rport < rr->sr_rport)
  83                 return (-1);
  84 
  85         return (memcmp(&lr->sr_uip, &rr->sr_uip, sizeof (struct in6_addr)));
  86 }
  87 
  88 void
  89 svp_query_release(svp_query_t *sqp)
  90 {
  91         id_free(svp_idspace, sqp->sq_header.svp_id);
  92 }
  93 
  94 static void
  95 svp_remote_destroy(svp_remote_t *srp)
  96 {
  97         size_t len;
  98 
  99         /*
 100          * Clean up any unrelated DNS information. At this point we know that
 101          * we're not in the remote tree. That means, that svp_remote_dns_timer
 102          * cannot queue us. However, if any of our DNS related state flags are
 103          * set, we have to hang out.
 104          */
 105         mutex_enter(&srp->sr_lock);
 106         while (srp->sr_state &
 107             (SVP_RS_LOOKUP_SCHEDULED | SVP_RS_LOOKUP_INPROGRESS)) {
 108                 (void) cond_wait(&srp->sr_cond, &srp->sr_lock);
 109         }
 110         mutex_exit(&srp->sr_lock);
 111         svp_shootdown_fini(srp);
 112 
 113         if (cond_destroy(&srp->sr_cond) != 0)
 114                 libvarpd_panic("failed to destroy cond sr_cond");
 115 
 116         if (mutex_destroy(&srp->sr_lock) != 0)
 117                 libvarpd_panic("failed to destroy mutex sr_lock");
 118 
 119         if (srp->sr_addrinfo != NULL)
 120                 freeaddrinfo(srp->sr_addrinfo);
 121         len = strlen(srp->sr_hostname) + 1;
 122         umem_free(srp->sr_hostname, len);
 123         umem_free(srp, sizeof (svp_remote_t));
 124 }
 125 
 126 static int
 127 svp_remote_create(const char *host, uint16_t port, struct in6_addr *uip,
 128     svp_remote_t **outp)
 129 {
 130         size_t hlen;
 131         svp_remote_t *remote;
 132 
 133         assert(MUTEX_HELD(&svp_remote_lock));
 134 
 135         remote = umem_zalloc(sizeof (svp_remote_t), UMEM_DEFAULT);
 136         if (remote == NULL) {
 137                 mutex_exit(&svp_remote_lock);
 138                 return (ENOMEM);
 139         }
 140 
 141         if (svp_shootdown_init(remote) != 0) {
 142                 umem_free(remote, sizeof (svp_remote_t));
 143                 mutex_exit(&svp_remote_lock);
 144                 return (ENOMEM);
 145         }
 146 
 147         hlen = strlen(host) + 1;
 148         remote->sr_hostname = umem_alloc(hlen, UMEM_DEFAULT);
 149         if (remote->sr_hostname == NULL) {
 150                 svp_shootdown_fini(remote);
 151                 umem_free(remote, sizeof (svp_remote_t));
 152                 mutex_exit(&svp_remote_lock);
 153                 return (ENOMEM);
 154         }
 155         remote->sr_rport = port;
 156         if (mutex_init(&remote->sr_lock,
 157             USYNC_THREAD | LOCK_ERRORCHECK, NULL) != 0)
 158                 libvarpd_panic("failed to create mutex sr_lock");
 159         if (cond_init(&remote->sr_cond, USYNC_PROCESS, NULL) != 0)
 160                 libvarpd_panic("failed to create cond sr_cond");
 161         list_create(&remote->sr_conns, sizeof (svp_conn_t),
 162             offsetof(svp_conn_t, sc_rlist));
 163         avl_create(&remote->sr_tree, svp_comparator, sizeof (svp_t),
 164             offsetof(svp_t, svp_rlink));
 165         (void) strlcpy(remote->sr_hostname, host, hlen);
 166         remote->sr_count = 1;
 167         remote->sr_uip = *uip;
 168 
 169         svp_shootdown_start(remote);
 170 
 171         *outp = remote;
 172         return (0);
 173 }
 174 
 175 int
 176 svp_remote_find(char *host, uint16_t port, struct in6_addr *uip,
 177     svp_remote_t **outp)
 178 {
 179         int ret;
 180         svp_remote_t lookup, *remote;
 181 
 182         lookup.sr_hostname = host;
 183         lookup.sr_rport = port;
 184         lookup.sr_uip = *uip;
 185         mutex_enter(&svp_remote_lock);
 186         remote = avl_find(&svp_remote_tree, &lookup, NULL);
 187         if (remote != NULL) {
 188                 assert(remote->sr_count > 0);
 189                 remote->sr_count++;
 190                 *outp = remote;
 191                 mutex_exit(&svp_remote_lock);
 192                 return (0);
 193         }
 194 
 195         if ((ret = svp_remote_create(host, port, uip, outp)) != 0) {
 196                 mutex_exit(&svp_remote_lock);
 197                 return (ret);
 198         }
 199 
 200         avl_add(&svp_remote_tree, *outp);
 201         mutex_exit(&svp_remote_lock);
 202 
 203         /* Make sure DNS is up to date */
 204         svp_host_queue(*outp);
 205 
 206         return (0);
 207 }
 208 
 209 void
 210 svp_remote_release(svp_remote_t *srp)
 211 {
 212         mutex_enter(&svp_remote_lock);
 213         mutex_enter(&srp->sr_lock);
 214         srp->sr_count--;
 215         if (srp->sr_count != 0) {
 216                 mutex_exit(&srp->sr_lock);
 217                 mutex_exit(&svp_remote_lock);
 218                 return;
 219         }
 220         mutex_exit(&srp->sr_lock);
 221 
 222         avl_remove(&svp_remote_tree, srp);
 223         mutex_exit(&svp_remote_lock);
 224         svp_remote_destroy(srp);
 225 }
 226 
 227 int
 228 svp_remote_attach(svp_remote_t *srp, svp_t *svp)
 229 {
 230         svp_t check;
 231         avl_index_t where;
 232 
 233         mutex_enter(&srp->sr_lock);
 234         if (svp->svp_remote != NULL)
 235                 libvarpd_panic("failed to create mutex sr_lock");
 236 
 237         /*
 238          * We require everything except shootdowns
 239          */
 240         if (svp->svp_cb.scb_vl2_lookup == NULL)
 241                 libvarpd_panic("missing callback scb_vl2_lookup");
 242         if (svp->svp_cb.scb_vl3_lookup == NULL)
 243                 libvarpd_panic("missing callback scb_vl3_lookup");
 244         if (svp->svp_cb.scb_vl2_invalidate == NULL)
 245                 libvarpd_panic("missing callback scb_vl2_invalidate");
 246         if (svp->svp_cb.scb_vl3_inject == NULL)
 247                 libvarpd_panic("missing callback scb_vl3_inject");
 248         if (svp->svp_cb.scb_route_lookup == NULL)
 249                 libvarpd_panic("missing callback scb_route_lookup");
 250 
 251         check.svp_vid = svp->svp_vid;
 252         if (avl_find(&srp->sr_tree, &check, &where) != NULL)
 253                 libvarpd_panic("found duplicate entry with vid %ld",
 254                     svp->svp_vid);
 255         avl_insert(&srp->sr_tree, svp, where);
 256         svp->svp_remote = srp;
 257         mutex_exit(&srp->sr_lock);
 258 
 259         return (0);
 260 }
 261 
 262 void
 263 svp_remote_detach(svp_t *svp)
 264 {
 265         svp_t *lookup;
 266         svp_remote_t *srp = svp->svp_remote;
 267 
 268         if (srp == NULL)
 269                 libvarpd_panic("trying to detach remote when none exists");
 270 
 271         mutex_enter(&srp->sr_lock);
 272         lookup = avl_find(&srp->sr_tree, svp, NULL);
 273         if (lookup == NULL || lookup != svp)
 274                 libvarpd_panic("inconsitent remote avl tree...");
 275         avl_remove(&srp->sr_tree, svp);
 276         svp->svp_remote = NULL;
 277         mutex_exit(&srp->sr_lock);
 278         svp_remote_release(srp);
 279 }
 280 
 281 /*
 282  * See if the request can be sent over the connection's supported version.
 283  * Scribble the version in the request itself.  NOTE that we do not check the
 284  * version that already exists in sqp->sq_header.svp_ver, as we may be called
 285  * from svp_remote_reassign() (and change versions when arriving at a new
 286  * connection).
 287  */
 288 static boolean_t
 289 svp_outbound_version_check(int version, svp_query_t *sqp)
 290 {
 291         uint16_t op = htons(sqp->sq_header.svp_op);
 292 
 293         /*
 294          * As of v1 -> v2, we really only need to restrict SVP_R_ROUTE_REQ
 295          * as v2-only.  Reflect that here.
 296          *
 297          * NOTE that if any message semantics change between future versions,
 298          * (e.g. "in v3 SVP_R_VL2_REQ takes on additional work"), we'll
 299          * need to more-deeply inspect the query.  It's possible that the
 300          * svp_op space is big enough to just continue op-only inspections.
 301          */
 302 
 303         assert(version > 0 && version <= SVP_CURRENT_VERSION);
 304 
 305         if (op != SVP_R_ROUTE_REQ || version >= SVP_VERSION_TWO) {
 306                 sqp->sq_header.svp_ver = htons(version);
 307                 return (B_TRUE);
 308         }
 309         return (B_FALSE);
 310 }
 311 
 312 /*
 313  * Walk the list of connections and find the first one that's available AND
 314  * version-appropriate for the message, then move the matched connection to
 315  * the back of the list so it's less likely to be used again.
 316  */
 317 static boolean_t
 318 svp_remote_conn_queue(svp_remote_t *srp, svp_query_t *sqp)
 319 {
 320         svp_conn_t *scp;
 321 
 322         assert(MUTEX_HELD(&srp->sr_lock));
 323         for (scp = list_head(&srp->sr_conns); scp != NULL;
 324             scp = list_next(&srp->sr_conns, scp)) {
 325                 mutex_enter(&scp->sc_lock);
 326                 if (scp->sc_cstate != SVP_CS_ACTIVE ||
 327                     !svp_outbound_version_check(scp->sc_version, sqp)) {
 328                         mutex_exit(&scp->sc_lock);
 329                         continue;
 330                 }
 331                 svp_conn_queue(scp, sqp);
 332                 mutex_exit(&scp->sc_lock);
 333                 list_remove(&srp->sr_conns, scp);
 334                 list_insert_tail(&srp->sr_conns, scp);
 335                 return (B_TRUE);
 336         }
 337 
 338         return (B_FALSE);
 339 }
 340 
 341 static void
 342 svp_remote_vl2_lookup_cb(svp_query_t *sqp, void *arg)
 343 {
 344         svp_t *svp = sqp->sq_svp;
 345         svp_vl2_ack_t *vl2a = (svp_vl2_ack_t *)sqp->sq_wdata;
 346 
 347         if (sqp->sq_status == SVP_S_OK)
 348                 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status,
 349                     (struct in6_addr *)vl2a->sl2a_addr, ntohs(vl2a->sl2a_port),
 350                     arg);
 351         else
 352                 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status, NULL, 0, arg);
 353 }
 354 
 355 void
 356 svp_remote_vl2_lookup(svp_t *svp, svp_query_t *sqp, const uint8_t *mac,
 357     void *arg)
 358 {
 359         svp_remote_t *srp;
 360         svp_vl2_req_t *vl2r = &sqp->sq_rdun.sqd_vl2r;
 361 
 362         srp = svp->svp_remote;
 363         sqp->sq_func = svp_remote_vl2_lookup_cb;
 364         sqp->sq_arg = arg;
 365         sqp->sq_svp = svp;
 366         sqp->sq_state = SVP_QUERY_INIT;
 367         sqp->sq_header.svp_op = htons(SVP_R_VL2_REQ);
 368         sqp->sq_header.svp_size = htonl(sizeof (svp_vl2_req_t));
 369         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 370         if (sqp->sq_header.svp_id == (id_t)-1)
 371                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 372                     errno);
 373         sqp->sq_header.svp_crc32 = htonl(0);
 374         sqp->sq_rdata = vl2r;
 375         sqp->sq_rsize = sizeof (svp_vl2_req_t);
 376         sqp->sq_wdata = NULL;
 377         sqp->sq_wsize = 0;
 378 
 379         bcopy(mac, vl2r->sl2r_mac, ETHERADDRL);
 380         vl2r->sl2r_vnetid = ntohl(svp->svp_vid);
 381 
 382         mutex_enter(&srp->sr_lock);
 383         if (svp_remote_conn_queue(srp, sqp) == B_FALSE)
 384                 svp->svp_cb.scb_vl2_lookup(svp, SVP_S_FATAL, NULL, NULL, arg);
 385         mutex_exit(&srp->sr_lock);
 386 }
 387 
 388 static void
 389 svp_remote_route_lookup_cb(svp_query_t *sqp, void *arg)
 390 {
 391         svp_t *svp = sqp->sq_svp;
 392         svp_route_ack_t *sra = (svp_route_ack_t *)sqp->sq_wdata;
 393 
 394         if (sqp->sq_status == SVP_S_OK) {
 395                 svp->svp_cb.scb_route_lookup(svp, sqp->sq_status,
 396                     sra->sra_dcid, sra->sra_vnetid, sra->sra_vlan,
 397                     sra->sra_srcmac, sra->sra_dstmac, sra->sra_port,
 398                     sra->sra_ip, sra->sra_src_pfx, sra->sra_dst_pfx, arg);
 399         } else {
 400                 svp->svp_cb.scb_route_lookup(svp, sqp->sq_status,
 401                     0, 0, 0, NULL, NULL, 0, NULL, 0, 0, arg);
 402         }
 403 }
 404 
 405 void
 406 svp_remote_route_lookup(svp_t *svp, svp_query_t *sqp,
 407     const struct in6_addr *src, const struct in6_addr *dst, uint32_t vnetid,
 408     uint16_t vlan, void *arg)
 409 {
 410         svp_remote_t *srp;
 411         svp_route_req_t *srr = &sqp->sq_rdun.sqd_rr;
 412 
 413         srp = svp->svp_remote;
 414         sqp->sq_func = svp_remote_route_lookup_cb;
 415         sqp->sq_arg = arg;
 416         sqp->sq_svp = svp;
 417         sqp->sq_state = SVP_QUERY_INIT;
 418         sqp->sq_header.svp_op = htons(SVP_R_ROUTE_REQ);
 419         sqp->sq_header.svp_size = htonl(sizeof (svp_route_req_t));
 420         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 421         if (sqp->sq_header.svp_id == (id_t)-1)
 422                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 423                     errno);
 424         sqp->sq_header.svp_crc32 = htonl(0);
 425         sqp->sq_rdata = srr;
 426 
 427         bcopy(src, srr->srr_srcip, sizeof (struct in6_addr));
 428         bcopy(dst, srr->srr_dstip, sizeof (struct in6_addr));
 429         /* Caller should've checked both are the same type... */
 430         srr->srr_vnetid = vnetid;
 431         srr->srr_vlan = vlan;
 432         srr->srr_pad = 0;
 433 
 434         mutex_enter(&srp->sr_lock);
 435         if (!svp_remote_conn_queue(srp, sqp)) {
 436                 sqp->sq_status = SVP_S_FATAL;
 437                 sqp->sq_func(sqp, arg);
 438         }
 439         mutex_exit(&srp->sr_lock);
 440 }
 441 
 442 static void
 443 svp_remote_vl3_lookup_cb(svp_query_t *sqp, void *arg)
 444 {
 445         svp_t *svp = sqp->sq_svp;
 446         svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
 447 
 448         if (sqp->sq_status == SVP_S_OK)
 449                 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, vl3a->sl3a_mac,
 450                     (struct in6_addr *)vl3a->sl3a_uip, ntohs(vl3a->sl3a_uport),
 451                     arg);
 452         else
 453                 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, NULL, NULL, 0,
 454                     arg);
 455 }
 456 
 457 static void
 458 svp_remote_vl3_common(svp_remote_t *srp, svp_query_t *sqp,
 459     const struct sockaddr *addr,  svp_query_f func, void *arg, uint32_t vid)
 460 {
 461         svp_vl3_req_t *vl3r = &sqp->sq_rdun.sdq_vl3r;
 462 
 463         if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
 464                 libvarpd_panic("unexpected sa_family for the vl3 lookup");
 465 
 466         sqp->sq_func = func;
 467         sqp->sq_arg = arg;
 468         sqp->sq_state = SVP_QUERY_INIT;
 469         sqp->sq_header.svp_op = htons(SVP_R_VL3_REQ);
 470         sqp->sq_header.svp_size = htonl(sizeof (svp_vl3_req_t));
 471         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 472         if (sqp->sq_header.svp_id == (id_t)-1)
 473                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 474                     errno);
 475         sqp->sq_header.svp_crc32 = htonl(0);
 476         sqp->sq_rdata = vl3r;
 477         sqp->sq_rsize = sizeof (svp_vl3_req_t);
 478         sqp->sq_wdata = NULL;
 479         sqp->sq_wsize = 0;
 480 
 481         if (addr->sa_family == AF_INET6) {
 482                 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)addr;
 483                 vl3r->sl3r_type = htonl(SVP_VL3_IPV6);
 484                 bcopy(&s6->sin6_addr, vl3r->sl3r_ip,
 485                     sizeof (struct in6_addr));
 486         } else {
 487                 struct sockaddr_in *s4 = (struct sockaddr_in *)addr;
 488                 struct in6_addr v6;
 489 
 490                 vl3r->sl3r_type = htonl(SVP_VL3_IP);
 491                 IN6_INADDR_TO_V4MAPPED(&s4->sin_addr, &v6);
 492                 bcopy(&v6, vl3r->sl3r_ip, sizeof (struct in6_addr));
 493         }
 494         vl3r->sl3r_vnetid = htonl(vid);
 495 
 496         mutex_enter(&srp->sr_lock);
 497         if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
 498                 sqp->sq_status = SVP_S_FATAL;
 499                 sqp->sq_func(sqp, arg);
 500         }
 501         mutex_exit(&srp->sr_lock);
 502 }
 503 
 504 /*
 505  * This is a request to do a VL3 look-up that originated internally as opposed
 506  * to coming from varpd. As such we need a slightly different query callback
 507  * function upon completion and don't go through the normal path with the svp_t.
 508  */
 509 void
 510 svp_remote_vl3_logreq(svp_remote_t *srp, svp_query_t *sqp, uint32_t vid,
 511     const struct sockaddr *addr, svp_query_f func, void *arg)
 512 {
 513         svp_remote_vl3_common(srp, sqp, addr, func, arg, vid);
 514 }
 515 
 516 void
 517 svp_remote_vl3_lookup(svp_t *svp, svp_query_t *sqp,
 518     const struct sockaddr *addr, void *arg)
 519 {
 520         svp_remote_t *srp = svp->svp_remote;
 521 
 522         sqp->sq_svp = svp;
 523         svp_remote_vl3_common(srp, sqp, addr, svp_remote_vl3_lookup_cb,
 524             arg, svp->svp_vid);
 525 }
 526 
 527 static void
 528 svp_remote_log_request_cb(svp_query_t *sqp, void *arg)
 529 {
 530         svp_remote_t *srp = sqp->sq_arg;
 531 
 532         assert(sqp->sq_wdata != NULL);
 533         if (sqp->sq_status == SVP_S_OK)
 534                 svp_shootdown_logr_cb(srp, sqp->sq_status, sqp->sq_wdata,
 535                     sqp->sq_size);
 536         else
 537                 svp_shootdown_logr_cb(srp, sqp->sq_status, NULL, 0);
 538 }
 539 
 540 void
 541 svp_remote_log_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
 542     size_t buflen)
 543 {
 544         svp_log_req_t *logr = &sqp->sq_rdun.sdq_logr;
 545         boolean_t queued;
 546 
 547         sqp->sq_func = svp_remote_log_request_cb;
 548         sqp->sq_state = SVP_QUERY_INIT;
 549         sqp->sq_arg = srp;
 550         sqp->sq_header.svp_op = htons(SVP_R_LOG_REQ);
 551         sqp->sq_header.svp_size = htonl(sizeof (svp_log_req_t));
 552         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 553         if (sqp->sq_header.svp_id == (id_t)-1)
 554                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 555                     errno);
 556         sqp->sq_header.svp_crc32 = htonl(0);
 557         sqp->sq_rdata = logr;
 558         sqp->sq_rsize = sizeof (svp_log_req_t);
 559         sqp->sq_wdata = buf;
 560         sqp->sq_wsize = buflen;
 561 
 562         logr->svlr_count = htonl(buflen);
 563         bcopy(&srp->sr_uip, logr->svlr_ip, sizeof (struct in6_addr));
 564 
 565         /*
 566          * If this fails, there isn't much that we can't do. Give the callback
 567          * with a fatal status.
 568          */
 569         mutex_enter(&srp->sr_lock);
 570         queued = svp_remote_conn_queue(srp, sqp);
 571         mutex_exit(&srp->sr_lock);
 572 
 573         if (queued == B_FALSE)
 574                 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0);
 575 }
 576 
 577 static void
 578 svp_remote_lrm_request_cb(svp_query_t *sqp, void *arg)
 579 {
 580         svp_remote_t *srp = arg;
 581 
 582         svp_shootdown_lrm_cb(srp, sqp->sq_status);
 583 }
 584 
 585 void
 586 svp_remote_lrm_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
 587     size_t buflen)
 588 {
 589         boolean_t queued;
 590         svp_lrm_req_t *svrr = buf;
 591 
 592         sqp->sq_func = svp_remote_lrm_request_cb;
 593         sqp->sq_state = SVP_QUERY_INIT;
 594         sqp->sq_arg = srp;
 595         sqp->sq_header.svp_op = htons(SVP_R_LOG_RM);
 596         sqp->sq_header.svp_size = htonl(buflen);
 597         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 598         if (sqp->sq_header.svp_id == (id_t)-1)
 599                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 600                     errno);
 601         sqp->sq_header.svp_crc32 = htonl(0);
 602         sqp->sq_rdata = buf;
 603         sqp->sq_rsize = buflen;
 604         sqp->sq_wdata = NULL;
 605         sqp->sq_wsize = 0;
 606 
 607         /*
 608          * We need to fix up the count to be in proper network order.
 609          */
 610         svrr->svrr_count = htonl(svrr->svrr_count);
 611 
 612         /*
 613          * If this fails, there isn't much that we can't do. Give the callback
 614          * with a fatal status.
 615          */
 616         mutex_enter(&srp->sr_lock);
 617         queued = svp_remote_conn_queue(srp, sqp);
 618         mutex_exit(&srp->sr_lock);
 619 
 620         if (queued == B_FALSE)
 621                 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0);
 622 }
 623 
 624 /* ARGSUSED */
 625 void
 626 svp_remote_dns_timer(void *unused)
 627 {
 628         svp_remote_t *s;
 629         mutex_enter(&svp_remote_lock);
 630         for (s = avl_first(&svp_remote_tree); s != NULL;
 631             s = AVL_NEXT(&svp_remote_tree, s)) {
 632                 svp_host_queue(s);
 633         }
 634         mutex_exit(&svp_remote_lock);
 635 }
 636 
 637 void
 638 svp_remote_resolved(svp_remote_t *srp, struct addrinfo *newaddrs)
 639 {
 640         struct addrinfo *a;
 641         svp_conn_t *scp;
 642         int ngen;
 643 
 644         mutex_enter(&srp->sr_lock);
 645         srp->sr_gen++;
 646         ngen = srp->sr_gen;
 647         mutex_exit(&srp->sr_lock);
 648 
 649         for (a = newaddrs; a != NULL; a = a->ai_next) {
 650                 struct in6_addr in6;
 651                 struct in6_addr *addrp;
 652 
 653                 if (a->ai_family != AF_INET && a->ai_family != AF_INET6)
 654                         continue;
 655 
 656                 if (a->ai_family == AF_INET) {
 657                         struct sockaddr_in *v4;
 658                         v4 = (struct sockaddr_in *)a->ai_addr;
 659                         addrp = &in6;
 660                         IN6_INADDR_TO_V4MAPPED(&v4->sin_addr, addrp);
 661                 } else {
 662                         struct sockaddr_in6 *v6;
 663                         v6 = (struct sockaddr_in6 *)a->ai_addr;
 664                         addrp = &v6->sin6_addr;
 665                 }
 666 
 667                 mutex_enter(&srp->sr_lock);
 668                 for (scp = list_head(&srp->sr_conns); scp != NULL;
 669                     scp = list_next(&srp->sr_conns, scp)) {
 670                         mutex_enter(&scp->sc_lock);
 671                         if (bcmp(addrp, &scp->sc_addr,
 672                             sizeof (struct in6_addr)) == 0) {
 673                                 scp->sc_gen = ngen;
 674                                 mutex_exit(&scp->sc_lock);
 675                                 break;
 676                         }
 677                         mutex_exit(&scp->sc_lock);
 678                 }
 679 
 680                 /*
 681                  * We need to be careful in the assumptions that we make here,
 682                  * as there's a good chance that svp_conn_create will
 683                  * drop the svp_remote_t`sr_lock to kick off its effective event
 684                  * loop.
 685                  */
 686                 if (scp == NULL)
 687                         (void) svp_conn_create(srp, addrp);
 688                 mutex_exit(&srp->sr_lock);
 689         }
 690 
 691         /*
 692          * Now it's time to clean things up. We do not actively clean up the
 693          * current connections that we have, instead allowing them to stay
 694          * around assuming that they're still useful. Instead, we go through and
 695          * purge the degraded list for anything that's from an older generation.
 696          */
 697         mutex_enter(&srp->sr_lock);
 698         for (scp = list_head(&srp->sr_conns); scp != NULL;
 699             scp = list_next(&srp->sr_conns, scp)) {
 700                 boolean_t fall = B_FALSE;
 701                 mutex_enter(&scp->sc_lock);
 702                 if (scp->sc_gen < srp->sr_gen)
 703                         fall = B_TRUE;
 704                 mutex_exit(&scp->sc_lock);
 705                 if (fall == B_TRUE)
 706                         svp_conn_fallout(scp);
 707         }
 708         mutex_exit(&srp->sr_lock);
 709 }
 710 
 711 /*
 712  * This connection is in the process of being reset, we need to reassign all of
 713  * its queries to other places or mark them as fatal. Note that the first
 714  * connection was the one in flight when this failed. We always mark it as
 715  * failed to avoid trying to reset its state.
 716  */
 717 void
 718 svp_remote_reassign(svp_remote_t *srp, svp_conn_t *scp)
 719 {
 720         boolean_t first = B_TRUE;
 721         assert(MUTEX_HELD(&srp->sr_lock));
 722         assert(MUTEX_HELD(&srp->sr_lock));
 723         svp_query_t *sqp;
 724 
 725         /*
 726          * As we try to reassigning all of its queries, remove it from the list.
 727          */
 728         list_remove(&srp->sr_conns, scp);
 729 
 730         while ((sqp = list_remove_head(&scp->sc_queries)) != NULL) {
 731 
 732                 if (first == B_TRUE) {
 733                         sqp->sq_status = SVP_S_FATAL;
 734                         sqp->sq_func(sqp, sqp->sq_arg);
 735                         continue;
 736                 }
 737 
 738                 sqp->sq_acttime = -1;
 739 
 740                 /*
 741                  * We may want to maintain a queue of these for some time rather
 742                  * than just failing them all.
 743                  */
 744                 if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
 745                         sqp->sq_status = SVP_S_FATAL;
 746                         sqp->sq_func(sqp, sqp->sq_arg);
 747                 }
 748         }
 749 
 750         /*
 751          * Now that we're done, go ahead and re-insert.
 752          */
 753         list_insert_tail(&srp->sr_conns, scp);
 754 }
 755 
 756 void
 757 svp_remote_degrade(svp_remote_t *srp, svp_degrade_state_t flag)
 758 {
 759         int sf, nf;
 760         char buf[256];
 761 
 762         assert(MUTEX_HELD(&srp->sr_lock));
 763 
 764         if (flag == SVP_RD_ALL || flag == 0)
 765                 libvarpd_panic("invalid flag passed to degrade");
 766 
 767         if ((flag & srp->sr_degrade) != 0) {
 768                 return;
 769         }
 770 
 771         sf = ffs(srp->sr_degrade);
 772         nf = ffs(flag);
 773         srp->sr_degrade |= flag;
 774         if (sf == 0 || sf > nf) {
 775                 svp_t *svp;
 776                 svp_remote_mkfmamsg(srp, flag, buf, sizeof (buf));
 777 
 778                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 779                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 780                         libvarpd_fma_degrade(svp->svp_hdl, buf);
 781                 }
 782         }
 783 }
 784 
 785 void
 786 svp_remote_restore(svp_remote_t *srp, svp_degrade_state_t flag)
 787 {
 788         int sf, nf;
 789 
 790         assert(MUTEX_HELD(&srp->sr_lock));
 791         sf = ffs(srp->sr_degrade);
 792         if ((srp->sr_degrade & flag) != flag)
 793                 return;
 794         srp->sr_degrade &= ~flag;
 795         nf = ffs(srp->sr_degrade);
 796 
 797         /*
 798          * If we're now empty, restore the device. If we still are degraded, but
 799          * we now have a higher base than we used to, change the message.
 800          */
 801         if (srp->sr_degrade == 0) {
 802                 svp_t *svp;
 803                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 804                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 805                         libvarpd_fma_restore(svp->svp_hdl);
 806                 }
 807         } else if (nf != sf) {
 808                 svp_t *svp;
 809                 char buf[256];
 810 
 811                 svp_remote_mkfmamsg(srp, 1U << (nf - 1), buf, sizeof (buf));
 812                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 813                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 814                         libvarpd_fma_degrade(svp->svp_hdl, buf);
 815                 }
 816         }
 817 }
 818 
 819 void
 820 svp_remote_shootdown_vl3_cb(svp_query_t *sqp, void *arg)
 821 {
 822         svp_shoot_vl3_t *squery = arg;
 823         svp_log_vl3_t *svl3 = squery->ssv_vl3;
 824         svp_sdlog_t *sdl = squery->ssv_log;
 825 
 826         if (sqp->sq_status == SVP_S_OK) {
 827                 svp_t *svp, lookup;
 828 
 829                 svp_remote_t *srp = sdl->sdl_remote;
 830                 svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
 831 
 832                 lookup.svp_vid = ntohl(svl3->svl3_vnetid);
 833                 mutex_enter(&srp->sr_lock);
 834                 if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
 835                         svp->svp_cb.scb_vl3_inject(svp, ntohs(svl3->svl3_vlan),
 836                             (struct in6_addr *)svl3->svl3_ip, vl3a->sl3a_mac,
 837                             NULL);
 838                 }
 839                 mutex_exit(&srp->sr_lock);
 840 
 841         }
 842 
 843         svp_shootdown_vl3_cb(sqp->sq_status, svl3, sdl);
 844 
 845         umem_free(squery, sizeof (svp_shoot_vl3_t));
 846 }
 847 
 848 void
 849 svp_remote_shootdown_vl3(svp_remote_t *srp, svp_log_vl3_t *svl3,
 850     svp_sdlog_t *sdl)
 851 {
 852         svp_shoot_vl3_t *squery;
 853 
 854         squery = umem_zalloc(sizeof (svp_shoot_vl3_t), UMEM_DEFAULT);
 855         if (squery == NULL) {
 856                 svp_shootdown_vl3_cb(SVP_S_FATAL, svl3, sdl);
 857                 return;
 858         }
 859 
 860         squery->ssv_vl3 = svl3;
 861         squery->ssv_log = sdl;
 862         squery->ssv_sock.sin6_family = AF_INET6;
 863         bcopy(svl3->svl3_ip, &squery->ssv_sock.sin6_addr,
 864             sizeof (svl3->svl3_ip));
 865         svp_remote_vl3_logreq(srp, &squery->ssv_query, ntohl(svl3->svl3_vnetid),
 866             (struct sockaddr *)&squery->ssv_sock, svp_remote_shootdown_vl3_cb,
 867             squery);
 868 }
 869 
 870 void
 871 svp_remote_shootdown_vl2(svp_remote_t *srp, svp_log_vl2_t *svl2)
 872 {
 873         svp_t *svp, lookup;
 874 
 875         lookup.svp_vid = ntohl(svl2->svl2_vnetid);
 876         mutex_enter(&srp->sr_lock);
 877         if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
 878                 svp->svp_cb.scb_vl2_invalidate(svp, svl2->svl2_mac);
 879         }
 880         mutex_exit(&srp->sr_lock);
 881 }
 882 
 883 int
 884 svp_remote_init(void)
 885 {
 886         svp_idspace = id_space_create("svp_req_ids", 1, INT32_MAX);
 887         if (svp_idspace == NULL)
 888                 return (errno);
 889         avl_create(&svp_remote_tree, svp_remote_comparator,
 890             sizeof (svp_remote_t), offsetof(svp_remote_t, sr_gnode));
 891         svp_dns_timer.st_func = svp_remote_dns_timer;
 892         svp_dns_timer.st_arg = NULL;
 893         svp_dns_timer.st_oneshot = B_FALSE;
 894         svp_dns_timer.st_value = svp_dns_timer_rate;
 895         svp_timer_add(&svp_dns_timer);
 896         return (0);
 897 }
 898 
 899 void
 900 svp_remote_fini(void)
 901 {
 902         svp_timer_remove(&svp_dns_timer);
 903         avl_destroy(&svp_remote_tree);
 904         if (svp_idspace == NULL)
 905                 id_space_destroy(svp_idspace);
 906 }