1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018 Joyent, Inc.
  14  */
  15 
  16 /*
  17  * Remote backend management
  18  *
  19  * For more information, see the big theory statement in
  20  * lib/varpd/svp/common/libvarpd_svp.c.
  21  */
  22 
  23 #include <umem.h>
  24 #include <strings.h>
  25 #include <string.h>
  26 #include <stddef.h>
  27 #include <thread.h>
  28 #include <synch.h>
  29 #include <assert.h>
  30 #include <sys/socket.h>
  31 #include <netdb.h>
  32 #include <errno.h>
  33 #include <libidspace.h>
  34 
  35 #include <libvarpd_provider.h>
  36 #include <libvarpd_svp.h>
  37 
  38 typedef struct svp_shoot_vl3 {
  39         svp_query_t             ssv_query;
  40         struct sockaddr_in6     ssv_sock;
  41         svp_log_vl3_t           *ssv_vl3;
  42         svp_sdlog_t             *ssv_log;
  43 } svp_shoot_vl3_t;
  44 
  45 static mutex_t svp_remote_lock = ERRORCHECKMUTEX;
  46 static avl_tree_t svp_remote_tree;
  47 static svp_timer_t svp_dns_timer;
  48 static id_space_t *svp_idspace;
  49 static int svp_dns_timer_rate = 30;     /* seconds */
  50 
  51 static void
  52 svp_remote_mkfmamsg(svp_remote_t *srp, svp_degrade_state_t state, char *buf,
  53     size_t buflen)
  54 {
  55         switch (state) {
  56         case SVP_RD_DNS_FAIL:
  57                 (void) snprintf(buf, buflen, "failed to resolve or find "
  58                     "entries for hostname %s", srp->sr_hostname);
  59                 break;
  60         case SVP_RD_REMOTE_FAIL:
  61                 (void) snprintf(buf, buflen, "cannot reach any remote peers");
  62                 break;
  63         default:
  64                 (void) snprintf(buf, buflen, "unkonwn error state: %d", state);
  65         }
  66 }
  67 
  68 static int
  69 svp_remote_comparator(const void *l, const void *r)
  70 {
  71         int ret;
  72         const svp_remote_t *lr = l, *rr = r;
  73 
  74         ret = strcmp(lr->sr_hostname, rr->sr_hostname);
  75         if (ret > 0)
  76                 return (1);
  77         else if (ret < 0)
  78                 return (-1);
  79 
  80         if (lr->sr_rport > rr->sr_rport)
  81                 return (1);
  82         else if (lr->sr_rport < rr->sr_rport)
  83                 return (-1);
  84 
  85         return (memcmp(&lr->sr_uip, &rr->sr_uip, sizeof (struct in6_addr)));
  86 }
  87 
  88 void
  89 svp_query_release(svp_query_t *sqp)
  90 {
  91         id_free(svp_idspace, sqp->sq_header.svp_id);
  92 }
  93 
  94 static void
  95 svp_remote_destroy(svp_remote_t *srp)
  96 {
  97         size_t len;
  98 
  99         /*
 100          * Clean up any unrelated DNS information. At this point we know that
 101          * we're not in the remote tree. That means, that svp_remote_dns_timer
 102          * cannot queue us. However, if any of our DNS related state flags are
 103          * set, we have to hang out.
 104          */
 105         mutex_enter(&srp->sr_lock);
 106         while (srp->sr_state &
 107             (SVP_RS_LOOKUP_SCHEDULED | SVP_RS_LOOKUP_INPROGRESS)) {
 108                 (void) cond_wait(&srp->sr_cond, &srp->sr_lock);
 109         }
 110         mutex_exit(&srp->sr_lock);
 111         svp_shootdown_fini(srp);
 112 
 113         if (cond_destroy(&srp->sr_cond) != 0)
 114                 libvarpd_panic("failed to destroy cond sr_cond");
 115 
 116         if (mutex_destroy(&srp->sr_lock) != 0)
 117                 libvarpd_panic("failed to destroy mutex sr_lock");
 118 
 119         if (srp->sr_addrinfo != NULL)
 120                 freeaddrinfo(srp->sr_addrinfo);
 121         len = strlen(srp->sr_hostname) + 1;
 122         umem_free(srp->sr_hostname, len);
 123         umem_free(srp, sizeof (svp_remote_t));
 124 }
 125 
 126 static int
 127 svp_remote_create(const char *host, uint16_t port, struct in6_addr *uip,
 128     svp_remote_t **outp)
 129 {
 130         size_t hlen;
 131         svp_remote_t *remote;
 132 
 133         assert(MUTEX_HELD(&svp_remote_lock));
 134 
 135         remote = umem_zalloc(sizeof (svp_remote_t), UMEM_DEFAULT);
 136         if (remote == NULL) {
 137                 mutex_exit(&svp_remote_lock);
 138                 return (ENOMEM);
 139         }
 140 
 141         if (svp_shootdown_init(remote) != 0) {
 142                 umem_free(remote, sizeof (svp_remote_t));
 143                 mutex_exit(&svp_remote_lock);
 144                 return (ENOMEM);
 145         }
 146 
 147         hlen = strlen(host) + 1;
 148         remote->sr_hostname = umem_alloc(hlen, UMEM_DEFAULT);
 149         if (remote->sr_hostname == NULL) {
 150                 svp_shootdown_fini(remote);
 151                 umem_free(remote, sizeof (svp_remote_t));
 152                 mutex_exit(&svp_remote_lock);
 153                 return (ENOMEM);
 154         }
 155         remote->sr_rport = port;
 156         if (mutex_init(&remote->sr_lock,
 157             USYNC_THREAD | LOCK_ERRORCHECK, NULL) != 0)
 158                 libvarpd_panic("failed to create mutex sr_lock");
 159         if (cond_init(&remote->sr_cond, USYNC_PROCESS, NULL) != 0)
 160                 libvarpd_panic("failed to create cond sr_cond");
 161         list_create(&remote->sr_conns, sizeof (svp_conn_t),
 162             offsetof(svp_conn_t, sc_rlist));
 163         avl_create(&remote->sr_tree, svp_comparator, sizeof (svp_t),
 164             offsetof(svp_t, svp_rlink));
 165         (void) strlcpy(remote->sr_hostname, host, hlen);
 166         remote->sr_count = 1;
 167         remote->sr_uip = *uip;
 168 
 169         svp_shootdown_start(remote);
 170 
 171         *outp = remote;
 172         return (0);
 173 }
 174 
 175 int
 176 svp_remote_find(char *host, uint16_t port, struct in6_addr *uip,
 177     svp_remote_t **outp)
 178 {
 179         int ret;
 180         svp_remote_t lookup, *remote;
 181 
 182         lookup.sr_hostname = host;
 183         lookup.sr_rport = port;
 184         lookup.sr_uip = *uip;
 185         mutex_enter(&svp_remote_lock);
 186         remote = avl_find(&svp_remote_tree, &lookup, NULL);
 187         if (remote != NULL) {
 188                 assert(remote->sr_count > 0);
 189                 remote->sr_count++;
 190                 *outp = remote;
 191                 mutex_exit(&svp_remote_lock);
 192                 return (0);
 193         }
 194 
 195         if ((ret = svp_remote_create(host, port, uip, outp)) != 0) {
 196                 mutex_exit(&svp_remote_lock);
 197                 return (ret);
 198         }
 199 
 200         avl_add(&svp_remote_tree, *outp);
 201         mutex_exit(&svp_remote_lock);
 202 
 203         /* Make sure DNS is up to date */
 204         svp_host_queue(*outp);
 205 
 206         return (0);
 207 }
 208 
 209 void
 210 svp_remote_release(svp_remote_t *srp)
 211 {
 212         mutex_enter(&svp_remote_lock);
 213         mutex_enter(&srp->sr_lock);
 214         srp->sr_count--;
 215         if (srp->sr_count != 0) {
 216                 mutex_exit(&srp->sr_lock);
 217                 mutex_exit(&svp_remote_lock);
 218                 return;
 219         }
 220         mutex_exit(&srp->sr_lock);
 221 
 222         avl_remove(&svp_remote_tree, srp);
 223         mutex_exit(&svp_remote_lock);
 224         svp_remote_destroy(srp);
 225 }
 226 
 227 int
 228 svp_remote_attach(svp_remote_t *srp, svp_t *svp)
 229 {
 230         svp_t check;
 231         avl_index_t where;
 232 
 233         mutex_enter(&srp->sr_lock);
 234         if (svp->svp_remote != NULL)
 235                 libvarpd_panic("failed to create mutex sr_lock");
 236 
 237         /*
 238          * We require everything except shootdowns
 239          */
 240         if (svp->svp_cb.scb_vl2_lookup == NULL)
 241                 libvarpd_panic("missing callback scb_vl2_lookup");
 242         if (svp->svp_cb.scb_vl3_lookup == NULL)
 243                 libvarpd_panic("missing callback scb_vl3_lookup");
 244         if (svp->svp_cb.scb_vl2_invalidate == NULL)
 245                 libvarpd_panic("missing callback scb_vl2_invalidate");
 246         if (svp->svp_cb.scb_vl3_inject == NULL)
 247                 libvarpd_panic("missing callback scb_vl3_inject");
 248         if (svp->svp_cb.scb_rvl3_lookup == NULL)
 249                 libvarpd_panic("missing callback scb_rvl3_lookup");
 250 
 251         check.svp_vid = svp->svp_vid;
 252         if (avl_find(&srp->sr_tree, &check, &where) != NULL)
 253                 libvarpd_panic("found duplicate entry with vid %ld",
 254                     svp->svp_vid);
 255         avl_insert(&srp->sr_tree, svp, where);
 256         svp->svp_remote = srp;
 257         mutex_exit(&srp->sr_lock);
 258 
 259         return (0);
 260 }
 261 
 262 void
 263 svp_remote_detach(svp_t *svp)
 264 {
 265         svp_t *lookup;
 266         svp_remote_t *srp = svp->svp_remote;
 267 
 268         if (srp == NULL)
 269                 libvarpd_panic("trying to detach remote when none exists");
 270 
 271         mutex_enter(&srp->sr_lock);
 272         lookup = avl_find(&srp->sr_tree, svp, NULL);
 273         if (lookup == NULL || lookup != svp)
 274                 libvarpd_panic("inconsitent remote avl tree...");
 275         avl_remove(&srp->sr_tree, svp);
 276         svp->svp_remote = NULL;
 277         mutex_exit(&srp->sr_lock);
 278         svp_remote_release(srp);
 279 }
 280 
 281 /*
 282  * Walk the list of connections and find the first one that's available, the
 283  * move it to the back of the list so it's less likely to be used again.
 284  */
 285 static boolean_t
 286 svp_remote_conn_queue(svp_remote_t *srp, svp_query_t *sqp)
 287 {
 288         svp_conn_t *scp;
 289 
 290         assert(MUTEX_HELD(&srp->sr_lock));
 291         for (scp = list_head(&srp->sr_conns); scp != NULL;
 292             scp = list_next(&srp->sr_conns, scp)) {
 293                 mutex_enter(&scp->sc_lock);
 294                 if (scp->sc_cstate != SVP_CS_ACTIVE) {
 295                         mutex_exit(&scp->sc_lock);
 296                         continue;
 297                 }
 298                 svp_conn_queue(scp, sqp);
 299                 mutex_exit(&scp->sc_lock);
 300                 list_remove(&srp->sr_conns, scp);
 301                 list_insert_tail(&srp->sr_conns, scp);
 302                 return (B_TRUE);
 303         }
 304 
 305         return (B_FALSE);
 306 }
 307 
 308 static void
 309 svp_remote_vl2_lookup_cb(svp_query_t *sqp, void *arg)
 310 {
 311         svp_t *svp = sqp->sq_svp;
 312         svp_vl2_ack_t *vl2a = (svp_vl2_ack_t *)sqp->sq_wdata;
 313 
 314         if (sqp->sq_status == SVP_S_OK)
 315                 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status,
 316                     (struct in6_addr *)vl2a->sl2a_addr, ntohs(vl2a->sl2a_port),
 317                     arg);
 318         else
 319                 svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status, NULL, 0, arg);
 320 }
 321 
 322 void
 323 svp_remote_vl2_lookup(svp_t *svp, svp_query_t *sqp, const uint8_t *mac,
 324     void *arg)
 325 {
 326         svp_remote_t *srp;
 327         svp_vl2_req_t *vl2r = &sqp->sq_rdun.sqd_vl2r;
 328 
 329         srp = svp->svp_remote;
 330         sqp->sq_func = svp_remote_vl2_lookup_cb;
 331         sqp->sq_arg = arg;
 332         sqp->sq_svp = svp;
 333         sqp->sq_state = SVP_QUERY_INIT;
 334         sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
 335         sqp->sq_header.svp_op = htons(SVP_R_VL2_REQ);
 336         sqp->sq_header.svp_size = htonl(sizeof (svp_vl2_req_t));
 337         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 338         if (sqp->sq_header.svp_id == (id_t)-1)
 339                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 340                     errno);
 341         sqp->sq_header.svp_crc32 = htonl(0);
 342         sqp->sq_rdata = vl2r;
 343         sqp->sq_rsize = sizeof (svp_vl2_req_t);
 344         sqp->sq_wdata = NULL;
 345         sqp->sq_wsize = 0;
 346 
 347         bcopy(mac, vl2r->sl2r_mac, ETHERADDRL);
 348         vl2r->sl2r_vnetid = ntohl(svp->svp_vid);
 349 
 350         mutex_enter(&srp->sr_lock);
 351         if (svp_remote_conn_queue(srp, sqp) == B_FALSE)
 352                 svp->svp_cb.scb_vl2_lookup(svp, SVP_S_FATAL, NULL, NULL, arg);
 353         mutex_exit(&srp->sr_lock);
 354 }
 355 
 356 static void
 357 svp_remote_rvl3_lookup_cb(svp_query_t *sqp, void *arg)
 358 {
 359         svp_t *svp = sqp->sq_svp;
 360         svp_rvl3_ack_t *rvl3a = (svp_rvl3_ack_t *)sqp->sq_wdata;
 361 
 362         if (sqp->sq_status == SVP_S_OK) {
 363                 svp->svp_cb.scb_rvl3_lookup(svp, sqp->sq_status,
 364                     /* XXX KEBE SAYS MORE HERE */ arg);
 365         } else {
 366         }
 367 }
 368 
 369 void
 370 svp_remote_rvl3_lookup(svp_t *svp, svp_query_t *sqp, const struct in6_addr *src,
 371     const struct in6_addr *dst, uint32_t type, uint32_t vnetid, uint16_t vlan,
 372     void *arg)
 373 {
 374         svp_remote_t *srp;
 375         svp_rvl3_req_t *rvl3r = &sqp->sq_rdun.sqd_rvl3r;
 376 
 377         srp = svp->svp_remote;
 378         sqp->sq_func = svp_remote_rvl3_lookup_cb;
 379         sqp->sq_arg = arg;
 380         sqp->sq_svp = svp;
 381         sqp->sq_state = SVP_QUERY_INIT;
 382         sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
 383         sqp->sq_header.svp_op = htons(SVP_R_REMOTE_VL3_REQ);
 384         sqp->sq_header.svp_size = htonl(sizeof (svp_vl2_req_t));
 385         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 386         if (sqp->sq_header.svp_id == (id_t)-1)
 387                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 388                     errno);
 389         sqp->sq_header.svp_crc32 = htonl(0);
 390         sqp->sq_rdata = rvl3r;
 391 
 392         bcopy(src, rvl3r->srl3r_srcip, sizeof (struct in6_addr));
 393         bcopy(dst, rvl3r->srl3r_dstip, sizeof (struct in6_addr));
 394         /* Caller should've checked both are the same type... */
 395         rvl3r->srl3r_type = type;
 396         rvl3r->srl3r_vnetid = vnetid;
 397         rvl3r->srl3r_vlan = vlan;
 398         rvl3r->srl3r_pad = 0;
 399 
 400         mutex_enter(&srp->sr_lock);
 401         if (!svp_remote_conn_queue(srp, sqp)) {
 402                 sqp->sq_status = SVP_S_FATAL;
 403                 sqp->sq_func(sqp, arg);
 404         }
 405         mutex_exit(&srp->sr_lock);
 406 }
 407 
 408 static void
 409 svp_remote_vl3_lookup_cb(svp_query_t *sqp, void *arg)
 410 {
 411         svp_t *svp = sqp->sq_svp;
 412         svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
 413 
 414         if (sqp->sq_status == SVP_S_OK)
 415                 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, vl3a->sl3a_mac,
 416                     (struct in6_addr *)vl3a->sl3a_uip, ntohs(vl3a->sl3a_uport),
 417                     arg);
 418         else
 419                 svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, NULL, NULL, 0,
 420                     arg);
 421 }
 422 
 423 static void
 424 svp_remote_vl3_common(svp_remote_t *srp, svp_query_t *sqp,
 425     const struct sockaddr *addr,  svp_query_f func, void *arg, uint32_t vid)
 426 {
 427         svp_vl3_req_t *vl3r = &sqp->sq_rdun.sdq_vl3r;
 428 
 429         if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
 430                 libvarpd_panic("unexpected sa_family for the vl3 lookup");
 431 
 432         sqp->sq_func = func;
 433         sqp->sq_arg = arg;
 434         sqp->sq_state = SVP_QUERY_INIT;
 435         sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
 436         sqp->sq_header.svp_op = htons(SVP_R_VL3_REQ);
 437         sqp->sq_header.svp_size = htonl(sizeof (svp_vl3_req_t));
 438         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 439         if (sqp->sq_header.svp_id == (id_t)-1)
 440                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 441                     errno);
 442         sqp->sq_header.svp_crc32 = htonl(0);
 443         sqp->sq_rdata = vl3r;
 444         sqp->sq_rsize = sizeof (svp_vl3_req_t);
 445         sqp->sq_wdata = NULL;
 446         sqp->sq_wsize = 0;
 447 
 448         if (addr->sa_family == AF_INET6) {
 449                 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)addr;
 450                 vl3r->sl3r_type = htonl(SVP_VL3_IPV6);
 451                 bcopy(&s6->sin6_addr, vl3r->sl3r_ip,
 452                     sizeof (struct in6_addr));
 453         } else {
 454                 struct sockaddr_in *s4 = (struct sockaddr_in *)addr;
 455                 struct in6_addr v6;
 456 
 457                 vl3r->sl3r_type = htonl(SVP_VL3_IP);
 458                 IN6_INADDR_TO_V4MAPPED(&s4->sin_addr, &v6);
 459                 bcopy(&v6, vl3r->sl3r_ip, sizeof (struct in6_addr));
 460         }
 461         vl3r->sl3r_vnetid = htonl(vid);
 462 
 463         mutex_enter(&srp->sr_lock);
 464         if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
 465                 sqp->sq_status = SVP_S_FATAL;
 466                 sqp->sq_func(sqp, arg);
 467         }
 468         mutex_exit(&srp->sr_lock);
 469 }
 470 
 471 /*
 472  * This is a request to do a VL3 look-up that originated internally as opposed
 473  * to coming from varpd. As such we need a slightly different query callback
 474  * function upon completion and don't go through the normal path with the svp_t.
 475  */
 476 void
 477 svp_remote_vl3_logreq(svp_remote_t *srp, svp_query_t *sqp, uint32_t vid,
 478     const struct sockaddr *addr, svp_query_f func, void *arg)
 479 {
 480         svp_remote_vl3_common(srp, sqp, addr, func, arg, vid);
 481 }
 482 
 483 void
 484 svp_remote_vl3_lookup(svp_t *svp, svp_query_t *sqp,
 485     const struct sockaddr *addr, void *arg)
 486 {
 487         svp_remote_t *srp = svp->svp_remote;
 488 
 489         sqp->sq_svp = svp;
 490         svp_remote_vl3_common(srp, sqp, addr, svp_remote_vl3_lookup_cb,
 491             arg, svp->svp_vid);
 492 }
 493 
 494 static void
 495 svp_remote_log_request_cb(svp_query_t *sqp, void *arg)
 496 {
 497         svp_remote_t *srp = sqp->sq_arg;
 498 
 499         assert(sqp->sq_wdata != NULL);
 500         if (sqp->sq_status == SVP_S_OK)
 501                 svp_shootdown_logr_cb(srp, sqp->sq_status, sqp->sq_wdata,
 502                     sqp->sq_size);
 503         else
 504                 svp_shootdown_logr_cb(srp, sqp->sq_status, NULL, 0);
 505 }
 506 
 507 void
 508 svp_remote_log_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
 509     size_t buflen)
 510 {
 511         svp_log_req_t *logr = &sqp->sq_rdun.sdq_logr;
 512         boolean_t queued;
 513 
 514         sqp->sq_func = svp_remote_log_request_cb;
 515         sqp->sq_state = SVP_QUERY_INIT;
 516         sqp->sq_arg = srp;
 517         sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
 518         sqp->sq_header.svp_op = htons(SVP_R_LOG_REQ);
 519         sqp->sq_header.svp_size = htonl(sizeof (svp_log_req_t));
 520         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 521         if (sqp->sq_header.svp_id == (id_t)-1)
 522                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 523                     errno);
 524         sqp->sq_header.svp_crc32 = htonl(0);
 525         sqp->sq_rdata = logr;
 526         sqp->sq_rsize = sizeof (svp_log_req_t);
 527         sqp->sq_wdata = buf;
 528         sqp->sq_wsize = buflen;
 529 
 530         logr->svlr_count = htonl(buflen);
 531         bcopy(&srp->sr_uip, logr->svlr_ip, sizeof (struct in6_addr));
 532 
 533         /*
 534          * If this fails, there isn't much that we can't do. Give the callback
 535          * with a fatal status.
 536          */
 537         mutex_enter(&srp->sr_lock);
 538         queued = svp_remote_conn_queue(srp, sqp);
 539         mutex_exit(&srp->sr_lock);
 540 
 541         if (queued == B_FALSE)
 542                 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0);
 543 }
 544 
 545 static void
 546 svp_remote_lrm_request_cb(svp_query_t *sqp, void *arg)
 547 {
 548         svp_remote_t *srp = arg;
 549 
 550         svp_shootdown_lrm_cb(srp, sqp->sq_status);
 551 }
 552 
 553 void
 554 svp_remote_lrm_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
 555     size_t buflen)
 556 {
 557         boolean_t queued;
 558         svp_lrm_req_t *svrr = buf;
 559 
 560         sqp->sq_func = svp_remote_lrm_request_cb;
 561         sqp->sq_state = SVP_QUERY_INIT;
 562         sqp->sq_arg = srp;
 563         sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
 564         sqp->sq_header.svp_op = htons(SVP_R_LOG_RM);
 565         sqp->sq_header.svp_size = htonl(buflen);
 566         sqp->sq_header.svp_id = id_alloc(svp_idspace);
 567         if (sqp->sq_header.svp_id == (id_t)-1)
 568                 libvarpd_panic("failed to allcoate from svp_idspace: %d",
 569                     errno);
 570         sqp->sq_header.svp_crc32 = htonl(0);
 571         sqp->sq_rdata = buf;
 572         sqp->sq_rsize = buflen;
 573         sqp->sq_wdata = NULL;
 574         sqp->sq_wsize = 0;
 575 
 576         /*
 577          * We need to fix up the count to be in proper network order.
 578          */
 579         svrr->svrr_count = htonl(svrr->svrr_count);
 580 
 581         /*
 582          * If this fails, there isn't much that we can't do. Give the callback
 583          * with a fatal status.
 584          */
 585         mutex_enter(&srp->sr_lock);
 586         queued = svp_remote_conn_queue(srp, sqp);
 587         mutex_exit(&srp->sr_lock);
 588 
 589         if (queued == B_FALSE)
 590                 svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0);
 591 }
 592 
 593 /* ARGSUSED */
 594 void
 595 svp_remote_dns_timer(void *unused)
 596 {
 597         svp_remote_t *s;
 598         mutex_enter(&svp_remote_lock);
 599         for (s = avl_first(&svp_remote_tree); s != NULL;
 600             s = AVL_NEXT(&svp_remote_tree, s)) {
 601                 svp_host_queue(s);
 602         }
 603         mutex_exit(&svp_remote_lock);
 604 }
 605 
 606 void
 607 svp_remote_resolved(svp_remote_t *srp, struct addrinfo *newaddrs)
 608 {
 609         struct addrinfo *a;
 610         svp_conn_t *scp;
 611         int ngen;
 612 
 613         mutex_enter(&srp->sr_lock);
 614         srp->sr_gen++;
 615         ngen = srp->sr_gen;
 616         mutex_exit(&srp->sr_lock);
 617 
 618         for (a = newaddrs; a != NULL; a = a->ai_next) {
 619                 struct in6_addr in6;
 620                 struct in6_addr *addrp;
 621 
 622                 if (a->ai_family != AF_INET && a->ai_family != AF_INET6)
 623                         continue;
 624 
 625                 if (a->ai_family == AF_INET) {
 626                         struct sockaddr_in *v4;
 627                         v4 = (struct sockaddr_in *)a->ai_addr;
 628                         addrp = &in6;
 629                         IN6_INADDR_TO_V4MAPPED(&v4->sin_addr, addrp);
 630                 } else {
 631                         struct sockaddr_in6 *v6;
 632                         v6 = (struct sockaddr_in6 *)a->ai_addr;
 633                         addrp = &v6->sin6_addr;
 634                 }
 635 
 636                 mutex_enter(&srp->sr_lock);
 637                 for (scp = list_head(&srp->sr_conns); scp != NULL;
 638                     scp = list_next(&srp->sr_conns, scp)) {
 639                         mutex_enter(&scp->sc_lock);
 640                         if (bcmp(addrp, &scp->sc_addr,
 641                             sizeof (struct in6_addr)) == 0) {
 642                                 scp->sc_gen = ngen;
 643                                 mutex_exit(&scp->sc_lock);
 644                                 break;
 645                         }
 646                         mutex_exit(&scp->sc_lock);
 647                 }
 648 
 649                 /*
 650                  * We need to be careful in the assumptions that we make here,
 651                  * as there's a good chance that svp_conn_create will
 652                  * drop the svp_remote_t`sr_lock to kick off its effective event
 653                  * loop.
 654                  */
 655                 if (scp == NULL)
 656                         (void) svp_conn_create(srp, addrp);
 657                 mutex_exit(&srp->sr_lock);
 658         }
 659 
 660         /*
 661          * Now it's time to clean things up. We do not actively clean up the
 662          * current connections that we have, instead allowing them to stay
 663          * around assuming that they're still useful. Instead, we go through and
 664          * purge the degraded list for anything that's from an older generation.
 665          */
 666         mutex_enter(&srp->sr_lock);
 667         for (scp = list_head(&srp->sr_conns); scp != NULL;
 668             scp = list_next(&srp->sr_conns, scp)) {
 669                 boolean_t fall = B_FALSE;
 670                 mutex_enter(&scp->sc_lock);
 671                 if (scp->sc_gen < srp->sr_gen)
 672                         fall = B_TRUE;
 673                 mutex_exit(&scp->sc_lock);
 674                 if (fall == B_TRUE)
 675                         svp_conn_fallout(scp);
 676         }
 677         mutex_exit(&srp->sr_lock);
 678 }
 679 
 680 /*
 681  * This connection is in the process of being reset, we need to reassign all of
 682  * its queries to other places or mark them as fatal. Note that the first
 683  * connection was the one in flight when this failed. We always mark it as
 684  * failed to avoid trying to reset its state.
 685  */
 686 void
 687 svp_remote_reassign(svp_remote_t *srp, svp_conn_t *scp)
 688 {
 689         boolean_t first = B_TRUE;
 690         assert(MUTEX_HELD(&srp->sr_lock));
 691         assert(MUTEX_HELD(&srp->sr_lock));
 692         svp_query_t *sqp;
 693 
 694         /*
 695          * As we try to reassigning all of its queries, remove it from the list.
 696          */
 697         list_remove(&srp->sr_conns, scp);
 698 
 699         while ((sqp = list_remove_head(&scp->sc_queries)) != NULL) {
 700 
 701                 if (first == B_TRUE) {
 702                         sqp->sq_status = SVP_S_FATAL;
 703                         sqp->sq_func(sqp, sqp->sq_arg);
 704                         continue;
 705                 }
 706 
 707                 sqp->sq_acttime = -1;
 708 
 709                 /*
 710                  * We may want to maintain a queue of these for some time rather
 711                  * than just failing them all.
 712                  */
 713                 if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
 714                         sqp->sq_status = SVP_S_FATAL;
 715                         sqp->sq_func(sqp, sqp->sq_arg);
 716                 }
 717         }
 718 
 719         /*
 720          * Now that we're done, go ahead and re-insert.
 721          */
 722         list_insert_tail(&srp->sr_conns, scp);
 723 }
 724 
 725 void
 726 svp_remote_degrade(svp_remote_t *srp, svp_degrade_state_t flag)
 727 {
 728         int sf, nf;
 729         char buf[256];
 730 
 731         assert(MUTEX_HELD(&srp->sr_lock));
 732 
 733         if (flag == SVP_RD_ALL || flag == 0)
 734                 libvarpd_panic("invalid flag passed to degrade");
 735 
 736         if ((flag & srp->sr_degrade) != 0) {
 737                 return;
 738         }
 739 
 740         sf = ffs(srp->sr_degrade);
 741         nf = ffs(flag);
 742         srp->sr_degrade |= flag;
 743         if (sf == 0 || sf > nf) {
 744                 svp_t *svp;
 745                 svp_remote_mkfmamsg(srp, flag, buf, sizeof (buf));
 746 
 747                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 748                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 749                         libvarpd_fma_degrade(svp->svp_hdl, buf);
 750                 }
 751         }
 752 }
 753 
 754 void
 755 svp_remote_restore(svp_remote_t *srp, svp_degrade_state_t flag)
 756 {
 757         int sf, nf;
 758 
 759         assert(MUTEX_HELD(&srp->sr_lock));
 760         sf = ffs(srp->sr_degrade);
 761         if ((srp->sr_degrade & flag) != flag)
 762                 return;
 763         srp->sr_degrade &= ~flag;
 764         nf = ffs(srp->sr_degrade);
 765 
 766         /*
 767          * If we're now empty, restore the device. If we still are degraded, but
 768          * we now have a higher base than we used to, change the message.
 769          */
 770         if (srp->sr_degrade == 0) {
 771                 svp_t *svp;
 772                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 773                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 774                         libvarpd_fma_restore(svp->svp_hdl);
 775                 }
 776         } else if (nf != sf) {
 777                 svp_t *svp;
 778                 char buf[256];
 779 
 780                 svp_remote_mkfmamsg(srp, 1U << (nf - 1), buf, sizeof (buf));
 781                 for (svp = avl_first(&srp->sr_tree); svp != NULL;
 782                     svp = AVL_NEXT(&srp->sr_tree, svp)) {
 783                         libvarpd_fma_degrade(svp->svp_hdl, buf);
 784                 }
 785         }
 786 }
 787 
 788 void
 789 svp_remote_shootdown_vl3_cb(svp_query_t *sqp, void *arg)
 790 {
 791         svp_shoot_vl3_t *squery = arg;
 792         svp_log_vl3_t *svl3 = squery->ssv_vl3;
 793         svp_sdlog_t *sdl = squery->ssv_log;
 794 
 795         if (sqp->sq_status == SVP_S_OK) {
 796                 svp_t *svp, lookup;
 797 
 798                 svp_remote_t *srp = sdl->sdl_remote;
 799                 svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
 800 
 801                 lookup.svp_vid = ntohl(svl3->svl3_vnetid);
 802                 mutex_enter(&srp->sr_lock);
 803                 if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
 804                         svp->svp_cb.scb_vl3_inject(svp, ntohs(svl3->svl3_vlan),
 805                             (struct in6_addr *)svl3->svl3_ip, vl3a->sl3a_mac,
 806                             NULL);
 807                 }
 808                 mutex_exit(&srp->sr_lock);
 809 
 810         }
 811 
 812         svp_shootdown_vl3_cb(sqp->sq_status, svl3, sdl);
 813 
 814         umem_free(squery, sizeof (svp_shoot_vl3_t));
 815 }
 816 
 817 void
 818 svp_remote_shootdown_vl3(svp_remote_t *srp, svp_log_vl3_t *svl3,
 819     svp_sdlog_t *sdl)
 820 {
 821         svp_shoot_vl3_t *squery;
 822 
 823         squery = umem_zalloc(sizeof (svp_shoot_vl3_t), UMEM_DEFAULT);
 824         if (squery == NULL) {
 825                 svp_shootdown_vl3_cb(SVP_S_FATAL, svl3, sdl);
 826                 return;
 827         }
 828 
 829         squery->ssv_vl3 = svl3;
 830         squery->ssv_log = sdl;
 831         squery->ssv_sock.sin6_family = AF_INET6;
 832         bcopy(svl3->svl3_ip, &squery->ssv_sock.sin6_addr,
 833             sizeof (svl3->svl3_ip));
 834         svp_remote_vl3_logreq(srp, &squery->ssv_query, ntohl(svl3->svl3_vnetid),
 835             (struct sockaddr *)&squery->ssv_sock, svp_remote_shootdown_vl3_cb,
 836             squery);
 837 }
 838 
 839 void
 840 svp_remote_shootdown_vl2(svp_remote_t *srp, svp_log_vl2_t *svl2)
 841 {
 842         svp_t *svp, lookup;
 843 
 844         lookup.svp_vid = ntohl(svl2->svl2_vnetid);
 845         mutex_enter(&srp->sr_lock);
 846         if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
 847                 svp->svp_cb.scb_vl2_invalidate(svp, svl2->svl2_mac);
 848         }
 849         mutex_exit(&srp->sr_lock);
 850 }
 851 
 852 int
 853 svp_remote_init(void)
 854 {
 855         svp_idspace = id_space_create("svp_req_ids", 1, INT32_MAX);
 856         if (svp_idspace == NULL)
 857                 return (errno);
 858         avl_create(&svp_remote_tree, svp_remote_comparator,
 859             sizeof (svp_remote_t), offsetof(svp_remote_t, sr_gnode));
 860         svp_dns_timer.st_func = svp_remote_dns_timer;
 861         svp_dns_timer.st_arg = NULL;
 862         svp_dns_timer.st_oneshot = B_FALSE;
 863         svp_dns_timer.st_value = svp_dns_timer_rate;
 864         svp_timer_add(&svp_dns_timer);
 865         return (0);
 866 }
 867 
 868 void
 869 svp_remote_fini(void)
 870 {
 871         svp_timer_remove(&svp_dns_timer);
 872         avl_destroy(&svp_remote_tree);
 873         if (svp_idspace == NULL)
 874                 id_space_destroy(svp_idspace);
 875 }