1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018 Joyent, Inc.
  14  */
  15 
  16 /*
  17  * Remote backend management
  18  *
  19  * For more information, see the big theory statement in
  20  * lib/varpd/svp/common/libvarpd_svp.c.
  21  */
  22 
  23 #include <umem.h>
  24 #include <strings.h>
  25 #include <string.h>
  26 #include <stddef.h>
  27 #include <thread.h>
  28 #include <synch.h>
  29 #include <assert.h>
  30 #include <sys/socket.h>
  31 #include <netdb.h>
  32 #include <errno.h>
  33 #include <libidspace.h>
  34 
  35 #include <libvarpd_provider.h>
  36 #include <libvarpd_svp.h>
  37 
  38 typedef struct svp_shoot_vl3 {
  39         svp_query_t             ssv_query;
  40         struct sockaddr_in6     ssv_sock;
  41         svp_log_vl3_t           *ssv_vl3;
  42         svp_sdlog_t             *ssv_log;
  43 } svp_shoot_vl3_t;
  44 
  45 static mutex_t svp_remote_lock = ERRORCHECKMUTEX;
  46 static avl_tree_t svp_remote_tree;
  47 static svp_timer_t svp_dns_timer;
  48 static id_space_t *svp_idspace;
  49 static int svp_dns_timer_rate = 30;     /* seconds */
  50 
  51 static void
  52 svp_remote_mkfmamsg(svp_remote_t *srp, svp_degrade_state_t state, char *buf,
  53     size_t buflen)
  54 {
  55         switch (state) {
  56         case SVP_RD_DNS_FAIL:
  57                 (void) snprintf(buf, buflen, "failed to resolve or find "
  58                     "entries for hostname %s", srp->sr_hostname);
  59                 break;
  60         case SVP_RD_REMOTE_FAIL:
  61                 (void) snprintf(buf, buflen, "cannot reach any remote peers");
  62                 break;
  63         default:
  64                 (void) snprintf(buf, buflen, "unkonwn error state: %d", state);
  65         }
  66 }
  67 
  68 static int
  69 svp_remote_comparator(const void *l, const void *r)
  70 {
  71         int ret;
  72         const svp_remote_t *lr = l, *rr = r;
  73 
  74         ret = strcmp(lr->sr_hostname, rr->sr_hostname);
  75         if (ret > 0)
  76                 return (1);
  77         else if (ret < 0)
  78                 return (-1);
  79 
  80         if (lr->sr_rport > rr->sr_rport)
  81                 return (1);
  82         else if (lr->sr_rport < rr->sr_rport)
  83                 return (-1);
  84 
  85         return (memcmp(&lr->sr_uip, &rr->sr_uip, sizeof (struct in6_addr)));
  86 }
  87 
  88 void
  89 svp_query_release(svp_query_t *sqp)
  90 {
  91         id_free(svp_idspace, sqp->sq_header.svp_id);
  92 }
  93 
  94 static void
  95 svp_remote_destroy(svp_remote_t *srp)
  96 {
  97         size_t len;
  98 
  99         /*
 100          * Clean up any unrelated DNS information. At this point we know that
 101          * we're not in the remote tree. That means, that svp_remote_dns_timer
 102          * cannot queue us. However, if any of our DNS related state flags are
 103          * set, we have to hang out.
 104          */
 105         mutex_enter(&srp->sr_lock);
 106         while (srp->sr_state &
 107             (SVP_RS_LOOKUP_SCHEDULED | SVP_RS_LOOKUP_INPROGRESS)) {
 108                 (void) cond_wait(&srp->sr_cond, &srp->sr_lock);
 109         }
 110         mutex_exit(&srp->sr_lock);
 111         svp_shootdown_fini(srp);
 112 
 113         if (cond_destroy(&srp->sr_cond) != 0)
 114                 libvarpd_panic("failed to destroy cond sr_cond");
 115 
 116         if (mutex_destroy(&srp->sr_lock) != 0)
 117                 libvarpd_panic("failed to destroy mutex sr_lock");
 118 
 119         if (srp->sr_addrinfo != NULL)
 120                 freeaddrinfo(srp->sr_addrinfo);
 121         len = strlen(srp->sr_hostname) + 1;
 122         umem_free(srp->sr_hostname, len);
 123         umem_free(srp, sizeof (svp_remote_t));
 124 }
 125 
 126 static int
 127 svp_remote_create(const char *host, uint16_t port, struct in6_addr *uip,
 128     svp_remote_t **outp)
 129 {
 130         size_t hlen;
 131         svp_remote_t *remote;
 132 
 133         assert(MUTEX_HELD(&svp_remote_lock));
 134 
 135         remote = umem_zalloc(sizeof (svp_remote_t), UMEM_DEFAULT);
 136         if (remote == NULL) {
 137                 mutex_exit(&svp_remote_lock);
 138                 return (ENOMEM);
 139         }
 140 
 141         if (svp_shootdown_init(remote) != 0) {
 142                 umem_free(remote, sizeof (svp_remote_t));
 143                 mutex_exit(&svp_remote_lock);
 144                 return (ENOMEM);
 145         }
 146 
 147         hlen = strlen(host) + 1;
 148         remote->sr_hostname = umem_alloc(hlen, UMEM_DEFAULT);
 149         if (remote->sr_hostname == NULL) {
 150                 svp_shootdown_fini(remote);
 151                 umem_free(remote, sizeof (svp_remote_t));
 152                 mutex_exit(&svp_remote_lock);
 153                 return (ENOMEM);
 154         }
 155         remote->sr_rport = port;
 156         if (mutex_init(&remote->sr_lock,
 157             USYNC_THREAD | LOCK_ERRORCHECK, NULL) != 0)
 158                 libvarpd_panic("failed to create mutex sr_lock");
 159         if (cond_init(&remote->sr_cond, USYNC_PROCESS, NULL) != 0)
 160                 libvarpd_panic("failed to create cond sr_cond");
 161         list_create(&remote->sr_conns, sizeof (svp_conn_t),
 162             offsetof(svp_conn_t, sc_rlist));
 163         avl_create(&remote->sr_tree, svp_comparator, sizeof (svp_t),
 164             offsetof(svp_t, svp_rlink));
 165         (void) strlcpy(remote->sr_hostname, host, hlen);
 166         remote->sr_count = 1;
 167         remote->sr_uip = *uip;
 168 
 169         svp_shootdown_start(remote);
 170 
 171         *outp = remote;
 172         return (0);
 173 }
 174 
 175 int
 176 svp_remote_find(char *host, uint16_t port, struct in6_addr *uip,
 177     svp_remote_t **outp)
 178 {
 179         int ret;
 180         svp_remote_t lookup, *remote;
 181 
 182         lookup.sr_hostname = host;
 183         lookup.sr_rport = port;
 184         lookup.sr_uip = *uip;
 185         mutex_enter(&svp_remote_lock);
 186         remote = avl_find(&svp_remote_tree, &lookup, NULL);
 187         if (remote != NULL) {
 188                 assert(remote->sr_count > 0);
 189                 remote->sr_count++;
 190                 *outp = remote;
 191                 mutex_exit(&svp_remote_lock);
 192                 return (0);
 193         }
 194 
 195         if ((ret = svp_remote_create(host, port, uip, outp)) != 0) {
 196                 mutex_exit(&svp_remote_lock);
 197                 return (ret);
 198         }
 199 
 200         avl_add(&svp_remote_tree, *outp);
 201         mutex_exit(&svp_remote_lock);
 202 
 203         /* Make sure DNS is up to date */
 204         svp_host_queue(*outp);
 205 
 206         return (0);
 207 }
 208 
 209 void
 210 svp_remote_release(svp_remote_t *srp)
 211 {
 212         mutex_enter(&svp_remote_lock);
 213         mutex_enter(&srp->sr_lock);
 214         srp->sr_count--;
 215         if (srp->sr_count != 0) {
 216                 mutex_exit(&srp->sr_lock);
 217                 mutex_exit(&svp_remote_lock);
 218                 return;
 219         }
 220         mutex_exit(&srp->sr_lock);
 221 
 222         avl_remove(&svp_remote_tree, srp);
 223         mutex_exit(&svp_remote_lock);
 224         svp_remote_destroy(srp);
 225 }
 226 
 227 int
 228 svp_remote_attach(svp_remote_t *srp, svp_t *svp)
 229 {
 230         svp_t check;
 231         avl_index_t where;
 232 
 233         mutex_enter(&srp->sr_lock);
 234         if (svp->svp_remote != NULL)
 235                 libvarpd_panic("failed to create mutex sr_lock");
 236 
 237         /*
 238          * We require everything except shootdowns
 239          */
 240         if (svp->svp_cb.scb_vl2_lookup == NULL)
 241                 libvarpd_panic("missing callback scb_vl2_lookup");
 242         if (svp->svp_cb.scb_vl3_lookup == NULL)
 243                 libvarpd_panic("missing callback scb_vl3_lookup");
 244         if (svp->svp_cb.scb_vl2_invalidate == NULL)
 245                 libvarpd_panic("missing callback scb_vl2_invalidate");
 246         if (svp->svp_cb.scb_vl3_inject == NULL)
 247                 libvarpd_panic("missing callback scb_vl3_inject");
 248         if (svp->svp_cb.scb_route_lookup == NULL)
 249                 libvarpd_panic("missing callback scb_route_lookup");
 250 
 251         check.svp_vid = svp->svp_vid;
 252         if (avl_find(&srp->sr_tree, &check, &where) != NULL)
 253                 libvarpd_panic("found duplicate entry with vid %ld",
 254                     svp->svp_vid);
 255         avl_insert(&srp->sr_tree, svp, where);
 256         svp->svp_remote = srp;
 257         mutex_exit(&srp->sr_lock);
 258 
 259         return (0);
 260 }
 261 
 262 void
 263 svp_remote_detach(svp_t *svp)
 264 {
 265         svp_t *lookup;
 266         svp_remote_t *srp = svp->svp_remote;
 267 
 268         if (srp == NULL)
 269                 libvarpd_panic("trying to detach remote when none exists");
 270 
 271         mutex_enter(&srp->sr_lock);
 272         lookup = avl_find(&srp->sr_tree, svp, NULL);
 273         if (lookup == NULL || lookup != svp)
 274                 libvarpd_panic("inconsitent remote avl tree...");
 275         avl_remove(&srp->sr_tree, svp);
 276         svp->svp_remote = NULL;
 277         mutex_exit(&srp->sr_lock);
 278         svp_remote_release(srp);
 279 }
 280 
 281 /*
 282  * Walk the list of connections and find the first one that's available, the
 283  * move it to the back of the list so it's less likely to be used again.
 284  */
 285 static boolean_t
 286 svp_remote_conn_queue(svp_remote_t *srp, svp_query_t *sqp)
 287 {
 288         svp_conn_t *scp;
 289 
 290         assert(MUTEX_HELD(&srp->sr_lock));
 291         for (scp = list_head(&srp->sr_conns); scp != NULL;
 292             scp = list_next(&srp->sr_conns, scp)) {
 293                 mutex_enter(&scp->sc_lock);
 294                 if (scp->sc_cstate != SVP_CS_ACTIVE) {
 295                         mutex_exit(&scp->sc_lock);
 296                         continue;
 297                 }
 298                 svp_conn_queue(scp, sqp);
 299                 mutex_exit(&scp->sc_lock);
 300                 list_remove(&srp->sr_conns, scp);
 301                 list_insert_tail(&srp->sr_conns, scp);
 302                 return (B_TRUE);
 303         }
 304 
 305         return (B_FALSE);
 306 }
 307 
 308 static void
 309 svp_remote_vl2_lookup_cb(svp_query_t *sqp, void *arg)
 310 {
 311         svp_t *svp = sqp->sq_svp;
 312         svp_vl2_ack_t *vl2a = (svp_vl2_ack_t *)sqp->sq_wdata;
 313 
 314         if (sqp->sq_status == SVP_S_OK)
 315                 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status,
 316                     (struct in6_addr *)vl2a->sl2a_addr, ntohs(vl2a->sl2a_port),
 317                     arg);
 318         else
 319                 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status, NULL, 0, arg);
 320 }
 321 
 322 void
 323 svp_remote_vl2_lookup(svp_t *svp, svp_query_t *sqp, const uint8_t *mac,
 324     void *arg)
 325 {
 326         svp_remote_t *srp;
 327         svp_vl2_req_t *vl2r = &sqp->sq_rdun.sqd_vl2r;
 328 
 329         srp = svp->svp_remote;
 330         sqp->sq_func = svp_remote_vl2_lookup_cb;
 331         sqp->sq_arg = arg;
 332         sqp->sq_svp = svp;
 333         sqp->sq_state = SVP_QUERY_INIT;
 334         sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
 335         sqp->sq_header.svp_op = htons(SVP_R_VL2_REQ);
 336         sqp->sq_header.svp_size = htonl(sizeof (svp_vl2_req_t));
 337         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 338         if (sqp->sq_header.svp_id == (id_t)-1)
 339                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 340                     errno);
 341         sqp->sq_header.svp_crc32 = htonl(0);
 342         sqp->sq_rdata = vl2r;
 343         sqp->sq_rsize = sizeof (svp_vl2_req_t);
 344         sqp->sq_wdata = NULL;
 345         sqp->sq_wsize = 0;
 346 
 347         bcopy(mac, vl2r->sl2r_mac, ETHERADDRL);
 348         vl2r->sl2r_vnetid = ntohl(svp->svp_vid);
 349 
 350         mutex_enter(&srp->sr_lock);
 351         if (svp_remote_conn_queue(srp, sqp) == B_FALSE)
 352                 svp->svp_cb.scb_vl2_lookup(svp, SVP_S_FATAL, NULL, NULL, arg);
 353         mutex_exit(&srp->sr_lock);
 354 }
 355 
 356 static void
 357 svp_remote_route_lookup_cb(svp_query_t *sqp, void *arg)
 358 {
 359         svp_t *svp = sqp->sq_svp;
 360         svp_route_ack_t *sra = (svp_route_ack_t *)sqp->sq_wdata;
 361 
 362         if (sqp->sq_status == SVP_S_OK) {
 363                 svp->svp_cb.scb_route_lookup(svp, sqp->sq_status,
 364                     sra->sra_dcid, sra->sra_vnetid, sra->sra_vlan,
 365                     sra->sra_srcmac, sra->sra_dstmac, sra->sra_port,
 366                     sra->sra_ip, sra->sra_src_pfx, sra->sra_dst_pfx, arg);
 367         } else {
 368                 svp->svp_cb.scb_route_lookup(svp, sqp->sq_status,
 369                     0, 0, 0, NULL, NULL, 0, NULL, 0, 0, arg);
 370         }
 371 }
 372 
 373 void
 374 svp_remote_route_lookup(svp_t *svp, svp_query_t *sqp,
 375     const struct in6_addr *src, const struct in6_addr *dst, uint32_t vnetid,
 376     uint16_t vlan, void *arg)
 377 {
 378         svp_remote_t *srp;
 379         svp_route_req_t *srr = &sqp->sq_rdun.sqd_rr;
 380 
 381         srp = svp->svp_remote;
 382         sqp->sq_func = svp_remote_route_lookup_cb;
 383         sqp->sq_arg = arg;
 384         sqp->sq_svp = svp;
 385         sqp->sq_state = SVP_QUERY_INIT;
 386         sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
 387         sqp->sq_header.svp_op = htons(SVP_R_ROUTE_REQ);
 388         sqp->sq_header.svp_size = htonl(sizeof (svp_route_req_t));
 389         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 390         if (sqp->sq_header.svp_id == (id_t)-1)
 391                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 392                     errno);
 393         sqp->sq_header.svp_crc32 = htonl(0);
 394         sqp->sq_rdata = srr;
 395 
 396         bcopy(src, srr->srr_srcip, sizeof (struct in6_addr));
 397         bcopy(dst, srr->srr_dstip, sizeof (struct in6_addr));
 398         /* Caller should've checked both are the same type... */
 399         srr->srr_vnetid = vnetid;
 400         srr->srr_vlan = vlan;
 401         srr->srr_pad = 0;
 402 
 403         mutex_enter(&srp->sr_lock);
 404         if (!svp_remote_conn_queue(srp, sqp)) {
 405                 sqp->sq_status = SVP_S_FATAL;
 406                 sqp->sq_func(sqp, arg);
 407         }
 408         mutex_exit(&srp->sr_lock);
 409 }
 410 
 411 static void
 412 svp_remote_vl3_lookup_cb(svp_query_t *sqp, void *arg)
 413 {
 414         svp_t *svp = sqp->sq_svp;
 415         svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
 416 
 417         if (sqp->sq_status == SVP_S_OK)
 418                 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, vl3a->sl3a_mac,
 419                     (struct in6_addr *)vl3a->sl3a_uip, ntohs(vl3a->sl3a_uport),
 420                     arg);
 421         else
 422                 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, NULL, NULL, 0,
 423                     arg);
 424 }
 425 
 426 static void
 427 svp_remote_vl3_common(svp_remote_t *srp, svp_query_t *sqp,
 428     const struct sockaddr *addr,  svp_query_f func, void *arg, uint32_t vid)
 429 {
 430         svp_vl3_req_t *vl3r = &sqp->sq_rdun.sdq_vl3r;
 431 
 432         if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
 433                 libvarpd_panic("unexpected sa_family for the vl3 lookup");
 434 
 435         sqp->sq_func = func;
 436         sqp->sq_arg = arg;
 437         sqp->sq_state = SVP_QUERY_INIT;
 438         sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
 439         sqp->sq_header.svp_op = htons(SVP_R_VL3_REQ);
 440         sqp->sq_header.svp_size = htonl(sizeof (svp_vl3_req_t));
 441         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 442         if (sqp->sq_header.svp_id == (id_t)-1)
 443                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 444                     errno);
 445         sqp->sq_header.svp_crc32 = htonl(0);
 446         sqp->sq_rdata = vl3r;
 447         sqp->sq_rsize = sizeof (svp_vl3_req_t);
 448         sqp->sq_wdata = NULL;
 449         sqp->sq_wsize = 0;
 450 
 451         if (addr->sa_family == AF_INET6) {
 452                 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)addr;
 453                 vl3r->sl3r_type = htonl(SVP_VL3_IPV6);
 454                 bcopy(&s6->sin6_addr, vl3r->sl3r_ip,
 455                     sizeof (struct in6_addr));
 456         } else {
 457                 struct sockaddr_in *s4 = (struct sockaddr_in *)addr;
 458                 struct in6_addr v6;
 459 
 460                 vl3r->sl3r_type = htonl(SVP_VL3_IP);
 461                 IN6_INADDR_TO_V4MAPPED(&s4->sin_addr, &v6);
 462                 bcopy(&v6, vl3r->sl3r_ip, sizeof (struct in6_addr));
 463         }
 464         vl3r->sl3r_vnetid = htonl(vid);
 465 
 466         mutex_enter(&srp->sr_lock);
 467         if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
 468                 sqp->sq_status = SVP_S_FATAL;
 469                 sqp->sq_func(sqp, arg);
 470         }
 471         mutex_exit(&srp->sr_lock);
 472 }
 473 
 474 /*
 475  * This is a request to do a VL3 look-up that originated internally as opposed
 476  * to coming from varpd. As such we need a slightly different query callback
 477  * function upon completion and don't go through the normal path with the svp_t.
 478  */
 479 void
 480 svp_remote_vl3_logreq(svp_remote_t *srp, svp_query_t *sqp, uint32_t vid,
 481     const struct sockaddr *addr, svp_query_f func, void *arg)
 482 {
 483         svp_remote_vl3_common(srp, sqp, addr, func, arg, vid);
 484 }
 485 
 486 void
 487 svp_remote_vl3_lookup(svp_t *svp, svp_query_t *sqp,
 488     const struct sockaddr *addr, void *arg)
 489 {
 490         svp_remote_t *srp = svp->svp_remote;
 491 
 492         sqp->sq_svp = svp;
 493         svp_remote_vl3_common(srp, sqp, addr, svp_remote_vl3_lookup_cb,
 494             arg, svp->svp_vid);
 495 }
 496 
 497 static void
 498 svp_remote_log_request_cb(svp_query_t *sqp, void *arg)
 499 {
 500         svp_remote_t *srp = sqp->sq_arg;
 501 
 502         assert(sqp->sq_wdata != NULL);
 503         if (sqp->sq_status == SVP_S_OK)
 504                 svp_shootdown_logr_cb(srp, sqp->sq_status, sqp->sq_wdata,
 505                     sqp->sq_size);
 506         else
 507                 svp_shootdown_logr_cb(srp, sqp->sq_status, NULL, 0);
 508 }
 509 
 510 void
 511 svp_remote_log_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
 512     size_t buflen)
 513 {
 514         svp_log_req_t *logr = &sqp->sq_rdun.sdq_logr;
 515         boolean_t queued;
 516 
 517         sqp->sq_func = svp_remote_log_request_cb;
 518         sqp->sq_state = SVP_QUERY_INIT;
 519         sqp->sq_arg = srp;
 520         sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
 521         sqp->sq_header.svp_op = htons(SVP_R_LOG_REQ);
 522         sqp->sq_header.svp_size = htonl(sizeof (svp_log_req_t));
 523         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 524         if (sqp->sq_header.svp_id == (id_t)-1)
 525                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 526                     errno);
 527         sqp->sq_header.svp_crc32 = htonl(0);
 528         sqp->sq_rdata = logr;
 529         sqp->sq_rsize = sizeof (svp_log_req_t);
 530         sqp->sq_wdata = buf;
 531         sqp->sq_wsize = buflen;
 532 
 533         logr->svlr_count = htonl(buflen);
 534         bcopy(&srp->sr_uip, logr->svlr_ip, sizeof (struct in6_addr));
 535 
 536         /*
 537          * If this fails, there isn't much that we can't do. Give the callback
 538          * with a fatal status.
 539          */
 540         mutex_enter(&srp->sr_lock);
 541         queued = svp_remote_conn_queue(srp, sqp);
 542         mutex_exit(&srp->sr_lock);
 543 
 544         if (queued == B_FALSE)
 545                 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0);
 546 }
 547 
 548 static void
 549 svp_remote_lrm_request_cb(svp_query_t *sqp, void *arg)
 550 {
 551         svp_remote_t *srp = arg;
 552 
 553         svp_shootdown_lrm_cb(srp, sqp->sq_status);
 554 }
 555 
 556 void
 557 svp_remote_lrm_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
 558     size_t buflen)
 559 {
 560         boolean_t queued;
 561         svp_lrm_req_t *svrr = buf;
 562 
 563         sqp->sq_func = svp_remote_lrm_request_cb;
 564         sqp->sq_state = SVP_QUERY_INIT;
 565         sqp->sq_arg = srp;
 566         sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
 567         sqp->sq_header.svp_op = htons(SVP_R_LOG_RM);
 568         sqp->sq_header.svp_size = htonl(buflen);
 569         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 570         if (sqp->sq_header.svp_id == (id_t)-1)
 571                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 572                     errno);
 573         sqp->sq_header.svp_crc32 = htonl(0);
 574         sqp->sq_rdata = buf;
 575         sqp->sq_rsize = buflen;
 576         sqp->sq_wdata = NULL;
 577         sqp->sq_wsize = 0;
 578 
 579         /*
 580          * We need to fix up the count to be in proper network order.
 581          */
 582         svrr->svrr_count = htonl(svrr->svrr_count);
 583 
 584         /*
 585          * If this fails, there isn't much that we can't do. Give the callback
 586          * with a fatal status.
 587          */
 588         mutex_enter(&srp->sr_lock);
 589         queued = svp_remote_conn_queue(srp, sqp);
 590         mutex_exit(&srp->sr_lock);
 591 
 592         if (queued == B_FALSE)
 593                 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0);
 594 }
 595 
 596 /* ARGSUSED */
 597 void
 598 svp_remote_dns_timer(void *unused)
 599 {
 600         svp_remote_t *s;
 601         mutex_enter(&svp_remote_lock);
 602         for (s = avl_first(&svp_remote_tree); s != NULL;
 603             s = AVL_NEXT(&svp_remote_tree, s)) {
 604                 svp_host_queue(s);
 605         }
 606         mutex_exit(&svp_remote_lock);
 607 }
 608 
 609 void
 610 svp_remote_resolved(svp_remote_t *srp, struct addrinfo *newaddrs)
 611 {
 612         struct addrinfo *a;
 613         svp_conn_t *scp;
 614         int ngen;
 615 
 616         mutex_enter(&srp->sr_lock);
 617         srp->sr_gen++;
 618         ngen = srp->sr_gen;
 619         mutex_exit(&srp->sr_lock);
 620 
 621         for (a = newaddrs; a != NULL; a = a->ai_next) {
 622                 struct in6_addr in6;
 623                 struct in6_addr *addrp;
 624 
 625                 if (a->ai_family != AF_INET && a->ai_family != AF_INET6)
 626                         continue;
 627 
 628                 if (a->ai_family == AF_INET) {
 629                         struct sockaddr_in *v4;
 630                         v4 = (struct sockaddr_in *)a->ai_addr;
 631                         addrp = &in6;
 632                         IN6_INADDR_TO_V4MAPPED(&v4->sin_addr, addrp);
 633                 } else {
 634                         struct sockaddr_in6 *v6;
 635                         v6 = (struct sockaddr_in6 *)a->ai_addr;
 636                         addrp = &v6->sin6_addr;
 637                 }
 638 
 639                 mutex_enter(&srp->sr_lock);
 640                 for (scp = list_head(&srp->sr_conns); scp != NULL;
 641                     scp = list_next(&srp->sr_conns, scp)) {
 642                         mutex_enter(&scp->sc_lock);
 643                         if (bcmp(addrp, &scp->sc_addr,
 644                             sizeof (struct in6_addr)) == 0) {
 645                                 scp->sc_gen = ngen;
 646                                 mutex_exit(&scp->sc_lock);
 647                                 break;
 648                         }
 649                         mutex_exit(&scp->sc_lock);
 650                 }
 651 
 652                 /*
 653                  * We need to be careful in the assumptions that we make here,
 654                  * as there's a good chance that svp_conn_create will
 655                  * drop the svp_remote_t`sr_lock to kick off its effective event
 656                  * loop.
 657                  */
 658                 if (scp == NULL)
 659                         (void) svp_conn_create(srp, addrp);
 660                 mutex_exit(&srp->sr_lock);
 661         }
 662 
 663         /*
 664          * Now it's time to clean things up. We do not actively clean up the
 665          * current connections that we have, instead allowing them to stay
 666          * around assuming that they're still useful. Instead, we go through and
 667          * purge the degraded list for anything that's from an older generation.
 668          */
 669         mutex_enter(&srp->sr_lock);
 670         for (scp = list_head(&srp->sr_conns); scp != NULL;
 671             scp = list_next(&srp->sr_conns, scp)) {
 672                 boolean_t fall = B_FALSE;
 673                 mutex_enter(&scp->sc_lock);
 674                 if (scp->sc_gen < srp->sr_gen)
 675                         fall = B_TRUE;
 676                 mutex_exit(&scp->sc_lock);
 677                 if (fall == B_TRUE)
 678                         svp_conn_fallout(scp);
 679         }
 680         mutex_exit(&srp->sr_lock);
 681 }
 682 
 683 /*
 684  * This connection is in the process of being reset, we need to reassign all of
 685  * its queries to other places or mark them as fatal. Note that the first
 686  * connection was the one in flight when this failed. We always mark it as
 687  * failed to avoid trying to reset its state.
 688  */
 689 void
 690 svp_remote_reassign(svp_remote_t *srp, svp_conn_t *scp)
 691 {
 692         boolean_t first = B_TRUE;
 693         assert(MUTEX_HELD(&srp->sr_lock));
 694         assert(MUTEX_HELD(&srp->sr_lock));
 695         svp_query_t *sqp;
 696 
 697         /*
 698          * As we try to reassigning all of its queries, remove it from the list.
 699          */
 700         list_remove(&srp->sr_conns, scp);
 701 
 702         while ((sqp = list_remove_head(&scp->sc_queries)) != NULL) {
 703 
 704                 if (first == B_TRUE) {
 705                         sqp->sq_status = SVP_S_FATAL;
 706                         sqp->sq_func(sqp, sqp->sq_arg);
 707                         continue;
 708                 }
 709 
 710                 sqp->sq_acttime = -1;
 711 
 712                 /*
 713                  * We may want to maintain a queue of these for some time rather
 714                  * than just failing them all.
 715                  */
 716                 if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
 717                         sqp->sq_status = SVP_S_FATAL;
 718                         sqp->sq_func(sqp, sqp->sq_arg);
 719                 }
 720         }
 721 
 722         /*
 723          * Now that we're done, go ahead and re-insert.
 724          */
 725         list_insert_tail(&srp->sr_conns, scp);
 726 }
 727 
 728 void
 729 svp_remote_degrade(svp_remote_t *srp, svp_degrade_state_t flag)
 730 {
 731         int sf, nf;
 732         char buf[256];
 733 
 734         assert(MUTEX_HELD(&srp->sr_lock));
 735 
 736         if (flag == SVP_RD_ALL || flag == 0)
 737                 libvarpd_panic("invalid flag passed to degrade");
 738 
 739         if ((flag & srp->sr_degrade) != 0) {
 740                 return;
 741         }
 742 
 743         sf = ffs(srp->sr_degrade);
 744         nf = ffs(flag);
 745         srp->sr_degrade |= flag;
 746         if (sf == 0 || sf > nf) {
 747                 svp_t *svp;
 748                 svp_remote_mkfmamsg(srp, flag, buf, sizeof (buf));
 749 
 750                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 751                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 752                         libvarpd_fma_degrade(svp->svp_hdl, buf);
 753                 }
 754         }
 755 }
 756 
 757 void
 758 svp_remote_restore(svp_remote_t *srp, svp_degrade_state_t flag)
 759 {
 760         int sf, nf;
 761 
 762         assert(MUTEX_HELD(&srp->sr_lock));
 763         sf = ffs(srp->sr_degrade);
 764         if ((srp->sr_degrade & flag) != flag)
 765                 return;
 766         srp->sr_degrade &= ~flag;
 767         nf = ffs(srp->sr_degrade);
 768 
 769         /*
 770          * If we're now empty, restore the device. If we still are degraded, but
 771          * we now have a higher base than we used to, change the message.
 772          */
 773         if (srp->sr_degrade == 0) {
 774                 svp_t *svp;
 775                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 776                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 777                         libvarpd_fma_restore(svp->svp_hdl);
 778                 }
 779         } else if (nf != sf) {
 780                 svp_t *svp;
 781                 char buf[256];
 782 
 783                 svp_remote_mkfmamsg(srp, 1U << (nf - 1), buf, sizeof (buf));
 784                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 785                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 786                         libvarpd_fma_degrade(svp->svp_hdl, buf);
 787                 }
 788         }
 789 }
 790 
 791 void
 792 svp_remote_shootdown_vl3_cb(svp_query_t *sqp, void *arg)
 793 {
 794         svp_shoot_vl3_t *squery = arg;
 795         svp_log_vl3_t *svl3 = squery->ssv_vl3;
 796         svp_sdlog_t *sdl = squery->ssv_log;
 797 
 798         if (sqp->sq_status == SVP_S_OK) {
 799                 svp_t *svp, lookup;
 800 
 801                 svp_remote_t *srp = sdl->sdl_remote;
 802                 svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
 803 
 804                 lookup.svp_vid = ntohl(svl3->svl3_vnetid);
 805                 mutex_enter(&srp->sr_lock);
 806                 if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
 807                         svp->svp_cb.scb_vl3_inject(svp, ntohs(svl3->svl3_vlan),
 808                             (struct in6_addr *)svl3->svl3_ip, vl3a->sl3a_mac,
 809                             NULL);
 810                 }
 811                 mutex_exit(&srp->sr_lock);
 812 
 813         }
 814 
 815         svp_shootdown_vl3_cb(sqp->sq_status, svl3, sdl);
 816 
 817         umem_free(squery, sizeof (svp_shoot_vl3_t));
 818 }
 819 
 820 void
 821 svp_remote_shootdown_vl3(svp_remote_t *srp, svp_log_vl3_t *svl3,
 822     svp_sdlog_t *sdl)
 823 {
 824         svp_shoot_vl3_t *squery;
 825 
 826         squery = umem_zalloc(sizeof (svp_shoot_vl3_t), UMEM_DEFAULT);
 827         if (squery == NULL) {
 828                 svp_shootdown_vl3_cb(SVP_S_FATAL, svl3, sdl);
 829                 return;
 830         }
 831 
 832         squery->ssv_vl3 = svl3;
 833         squery->ssv_log = sdl;
 834         squery->ssv_sock.sin6_family = AF_INET6;
 835         bcopy(svl3->svl3_ip, &squery->ssv_sock.sin6_addr,
 836             sizeof (svl3->svl3_ip));
 837         svp_remote_vl3_logreq(srp, &squery->ssv_query, ntohl(svl3->svl3_vnetid),
 838             (struct sockaddr *)&squery->ssv_sock, svp_remote_shootdown_vl3_cb,
 839             squery);
 840 }
 841 
 842 void
 843 svp_remote_shootdown_vl2(svp_remote_t *srp, svp_log_vl2_t *svl2)
 844 {
 845         svp_t *svp, lookup;
 846 
 847         lookup.svp_vid = ntohl(svl2->svl2_vnetid);
 848         mutex_enter(&srp->sr_lock);
 849         if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
 850                 svp->svp_cb.scb_vl2_invalidate(svp, svl2->svl2_mac);
 851         }
 852         mutex_exit(&srp->sr_lock);
 853 }
 854 
 855 int
 856 svp_remote_init(void)
 857 {
 858         svp_idspace = id_space_create("svp_req_ids", 1, INT32_MAX);
 859         if (svp_idspace == NULL)
 860                 return (errno);
 861         avl_create(&svp_remote_tree, svp_remote_comparator,
 862             sizeof (svp_remote_t), offsetof(svp_remote_t, sr_gnode));
 863         svp_dns_timer.st_func = svp_remote_dns_timer;
 864         svp_dns_timer.st_arg = NULL;
 865         svp_dns_timer.st_oneshot = B_FALSE;
 866         svp_dns_timer.st_value = svp_dns_timer_rate;
 867         svp_timer_add(&svp_dns_timer);
 868         return (0);
 869 }
 870 
 871 void
 872 svp_remote_fini(void)
 873 {
 874         svp_timer_remove(&svp_dns_timer);
 875         avl_destroy(&svp_remote_tree);
 876         if (svp_idspace == NULL)
 877                 id_space_destroy(svp_idspace);
 878 }