1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018, Joyent, Inc.
  14  */
  15 
  16 /*
  17  * This plugin implements the SDC VXLAN Protocol (SVP).
  18  *
  19  * This plugin is designed to work with a broader distributed system that
  20  * mainains a database of mappings and provides a means of looking up data and
  21  * provides a stream of updates. While it is named after VXLAN, there isn't
  22  * anything specific to VXLAN baked into the protocol at this time, other than
  23  * that it requires both an IP address and a port; however, if there's a good
  24  * reason to support others here, we can modify that.
  25  *
  26  * -----------
  27  * Terminology
  28  * -----------
  29  *
  30  * Throughout this module we refer to a few different kinds of addresses:
  31  *
  32  *    VL3
  33  *
  34  *      A VL3 address, or virtual layer 3, refers to the layer three addreses
  35  *      that are used by entities on an overlay network. As far as we're
  36  *      concerned that means that this is the IP address of an interface on an
  37  *      overlay network.
  38  *
  39  *    VL2
  40  *
  41  *      A VL2 address, or a virtual layer 2, referes to the link-layer addresses
  42  *      that are used by entities on an overlay network. As far as we're
  43  *      concerned that means that this is the MAC addresses of an interface on
  44  *      an overlay network.
  45  *
  46  *    UL3
  47  *
  48  *      A UL3, or underlay layer 3, refers to the layer three (IP) address on
  49  *      the underlay network.
  50  *
  51  * The svp plugin provides lookups from VL3->VL2, eg. the equivalent of an ARP
  52  * or NDP query, and then also provides VL2->UL3 lookups.
  53  *
  54  * -------------------
  55  * Protocol Operations
  56  * -------------------
  57  *
  58  * The svp protocol is defined in lib/varpd/svp/common/libvarpd_svp_prot.h. It
  59  * defines the basic TCP protocol that we use to communicate to hosts. At this
  60  * time, it is not quite 100% implemented in both this plug-in and our primary
  61  * server, sdc-portolan (see https://github.com/joyent/sdc-portolan).
  62  *
  63  * At this time, we don't quite support everything that we need to. Including
  64  * the SVP_R_BULK_REQ and SVP_R_SHOOTDOWN.
  65  *
  66  * ---------------------------------
  67  * General Design and Considerations
  68  * ---------------------------------
  69  *
  70  * Every instance of the svp plugin requires the hostname and port of a server
  71  * to contact. Though, we have co-opted the port 1296 (the year of the oldest
  72  * extant portolan) as our default port.
  73  *
  74  * Each of the different instance of the plugins has a corresponding remote
  75  * backend. The remote backend represents the tuple of the [ host, port ].
  76  * Different instances that share the same host and port tuple will use the same
  77  * backend.
  78  *
  79  * The backend is actually in charge of performing lookups, resolving and
  80  * updating the set of remote hosts based on the DNS resolution we've been
  81  * provided, and taking care of things like shootdowns.
  82  *
  83  * The whole plugin itself maintains an event loop and a number of threads to
  84  * service that event loop. On top of that event loop, we have a simple timer
  85  * backend that ticks at one second intervals and performs various callbacks,
  86  * such as idle query timers, DNS resolution, connection backoff, etc. Each of
  87  * the remote hosts that we obtain is wrapped up in an svp_conn_t, which manages
  88  * the connection state, reconnecting, etc.
  89  *
  90  * All in all, the general way that this all looks like is:
  91  *
  92  *  +----------------------------+
  93  *  | Plugin Instance            |
  94  *  | svp_t                      |
  95  *  |                            |
  96  *  | varpd_provider_handle_t * -+-> varpd handle
  97  *  | uint64_t               ----+-> varpd ID
  98  *  | char *                 ----+-> remote host
  99  *  | uint16_t               ----+-> remote port
 100  *  | svp_remote_t *   ---+------+-> remote backend
 101  *  +---------------------+------+
 102  *                        |
 103  *                        v
 104  *   +----------------------+                   +----------------+
 105  *   | Remote backend       |------------------>| Remove Backend |---> ...
 106  *   | svp_remote_t         |                   | svp_remote_t   |
 107  *   |                      |                   +----------------+
 108  *   | svp_remote_state_t --+-> state flags
 109  *   | svp_degrade_state_t -+-> degraded reason
 110  *   | struct addrinfo *  --+-> resolved hosts
 111  *   | uint_t            ---+-> active hosts
 112  *   | uint_t            ---+-> DNS generation
 113  *   | uint_t            ---+-> Reference count
 114  *   | uint_t            ---+-> active conns
 115  *   | uint_t            ---+-> degraded conns
 116  *   | list_t        ---+---+-> connection list
 117  *   +------------------+---+
 118  *                      |
 119  *                      +------------------------------+-----------------+
 120  *                      |                              |                 |
 121  *                      v                              v                 v
 122  *   +-------------------+                       +----------------
 123  *   | SVP Connection    |                       | SVP connection |     ...
 124  *   | svp_conn_t        |                       | svp_conn_t     |
 125  *   |                   |                       +----------------+
 126  *   | svp_event_t   ----+-> event loop handle
 127  *   | svp_timer_t   ----+-> backoff timer
 128  *   | svp_timer_t   ----+-> query timer
 129  *   | int           ----+-> socket fd
 130  *   | uint_t        ----+-> generation
 131  *   | uint_t        ----+-> current backoff
 132  *   | svp_conn_flags_t -+-> connection flags
 133  *   | svp_conn_state_t -+-> connection state
 134  *   | svp_conn_error_t -+-> connection error
 135  *   | int            ---+-> last errrno
 136  *   | hrtime_t       ---+-> activity timestamp
 137  *   | svp_conn_out_t ---+-> outgoing data state
 138  *   | svp_conn_in_t  ---+-> incoming data state
 139  *   | list_t      ---+--+-> active queries
 140  *   +----------------+--+
 141  *                    |
 142  *                    +----------------------------------+-----------------+
 143  *                    |                                  |                 |
 144  *                    v                                  v                 v
 145  *   +--------------------+                       +-------------+
 146  *   | SVP Query          |                       | SVP Query   |         ...
 147  *   | svp_query_t        |                       | svp_query_t |
 148  *   |                    |                       +-------------+
 149  *   | svp_query_f     ---+-> callback function
 150  *   | void *          ---+-> callback arg
 151  *   | svp_query_state_t -+-> state flags
 152  *   | svp_req_t       ---+-> svp prot. header
 153  *   | svp_query_data_t --+-> read data
 154  *   | svp_query_data_t --+-> write data
 155  *   | svp_status_t    ---+-> request status
 156  *   +--------------------+
 157  *
 158  * The svp_t is the instance that we assoicate with varpd. The instance itself
 159  * maintains properties and then when it's started associates with an
 160  * svp_remote_t, which is the remote backend. The remote backend itself,
 161  * maintains the DNS state and spins up and downs connections based on the
 162  * results from DNS. By default, we query DNS every 30 seconds. For more on the
 163  * connection life cycle, see the next section.
 164  *
 165  * By default, each connection maintains its own back off timer and list of
 166  * queries it's servicing. Only one request is generally outstanding at a time
 167  * and requests are round robined across the various connections.
 168  *
 169  * The query itself represents the svp request that's going on and keep track of
 170  * its state and is a place for data that's read and written to as part of the
 171  * request.
 172  *
 173  * Connections maintain a query timer such that if we have not received data on
 174  * a socket for a certain amount of time, we kill that socket and begin a
 175  * reconnection cycle with backoff.
 176  *
 177  * ------------------------
 178  * Connection State Machine
 179  * ------------------------
 180  *
 181  * We have a connection pool that's built upon DNS records. DNS describes the
 182  * membership of the set of remote peers that make up our pool and we maintain
 183  * one connection to each of them.  In addition, we maintain an exponential
 184  * backoff for each peer and will attempt to reconect immediately before backing
 185  * off. The following are the valid states that a connection can be in:
 186  *
 187  *      SVP_CS_ERROR            An OS error has occurred on this connection,
 188  *                              such as failure to create a socket or associate
 189  *                              the socket with an event port. We also
 190  *                              transition all connections to this state before
 191  *                              we destroy them.
 192  *
 193  *      SVP_CS_INITIAL          This is the initial state of a connection, all
 194  *                              that should exist is an unbound socket.
 195  *
 196  *      SVP_CS_CONNECTING       A call to connect has been made and we are
 197  *                              polling for it to complete.
 198  *
 199  *      SVP_CS_BACKOFF          A connect attempt has failed and we are
 200  *                              currently backing off, waiting to try again.
 201  *
 202  *      SVP_CS_ACTIVE           We have successfully connected to the remote
 203  *                              system.
 204  *
 205  *      SVP_CS_WINDDOWN         This connection is going to valhalla. In other
 206  *                              words, a previously active connection is no
 207  *                              longer valid in DNS, so we should curb our use
 208  *                              of it, and reap it as soon as we have other
 209  *                              active connections.
 210  *
 211  * The following diagram attempts to describe our state transition scheme, and
 212  * when we transition from one state to the next.
 213  *
 214  *                               |
 215  *                               * New remote IP from DNS resolution,
 216  *                               | not currently active in the system.
 217  *                               |
 218  *                               v                                Socket Error,
 219  *                       +----------------+                       still in DNS
 220  *  +----------------<---| SVP_CS_INITIAL |<----------------------*--------+
 221  *  |                    +----------------+                                |
 222  *  |                            System  |                                 |
 223  *  | Connection . . . . .       success *     Successful                  |
 224  *  | failed             .               |     connect()                   |
 225  *  |                    .               |        +-------------------+    |
 226  *  |               +----*---------+     |    +-*>| SVP_CS_VERSIONING +    |
 227  *  |               |              |     |    |   +-------------------+    |
 228  *  |               |              |     |    |          V   V Set version |
 229  *  |               |              |     |    |          |   * based on    |
 230  *  |               |              |     |    |          |   | SVP_R_PONG  |
 231  *  |               V              ^     v    ^          |   V             ^
 232  *  |  +----------------+         +-------------------+  |  +---------------+
 233  *  +<-| SVP_CS_BACKOFF |         | SVP_CS_CONNECTING |  |  | SVP_CS_ACTIVE |
 234  *  |  +----------------+         +-------------------+  |  +---------------+
 235  *  |               V              ^  V                  |    V  V
 236  *  | Backoff wait  *              |  |                  |    |  * Removed
 237  *  v interval      +--------------+  +-----------------<+----+  | from DNS
 238  *  | finished                        |                          |
 239  *  |                                 V                          |
 240  *  |                                 |                          V
 241  *  |                                 |            +-----------------+
 242  *  +----------------+----------<-----+-------<----| SVP_CS_WINDDOWN |
 243  *                   |                             +-----------------+
 244  *                   * . . .   Fatal system, not
 245  *                   |         socket error or
 246  *                   V         quiesced after
 247  *           +--------------+  removal from DNS
 248  *           | SVP_CS_ERROR |
 249  *           +--------------+
 250  *                   |
 251  *                   * . . . Removed from DNS
 252  *                   v
 253  *            +------------+
 254  *            | Connection |
 255  *            | Destroyed  |
 256  *            +------------+
 257  *
 258  * --------------------------
 259  * Connection Event Injection
 260  * --------------------------
 261  *
 262  * For each connection that exists in the system, we have a timer in place that
 263  * is in charge of performing timeout activity. It fires once every thirty
 264  * seconds or so for a given connection and checks to ensure that we have had
 265  * activity for the most recent query on the connection. If not, it terminates
 266  * the connection. This is important as if we have sent all our data and are
 267  * waiting for the remote end to reply, without enabling something like TCP
 268  * keep-alive, we will not be notified that anything that has happened to the
 269  * remote connection, for example a panic. In addition, this also protects
 270  * against a server that is up, but a portolan that is not making forward
 271  * progress.
 272  *
 273  * When a timeout occurs, we first try to disassociate any active events, which
 274  * by definition must exist. Once that's done, we inject a port source user
 275  * event. Now, there is a small gotcha. Let's assume for a moment that we have a
 276  * pathological portolan. That means that it knows to inject activity right at
 277  * the time out window. That means, that the event may be disassociated before
 278  * we could get to it. If that's the case, we must _not_ inject the user event
 279  * and instead, we'll let the pending event take care of it. We know that the
 280  * pending event hasn't hit the main part of the loop yet, otherwise, it would
 281  * have released the lock protecting our state and associated the event.
 282  *
 283  * ------------
 284  * Notes on DNS
 285  * ------------
 286  *
 287  * Unfortunately, doing host name resolution in a way that allows us to leverage
 288  * the system's resolvers and the system's caching, require us to make blocking
 289  * calls in libc via getaddrinfo(3SOCKET). If we can't reach a given server,
 290  * that will tie up a thread for quite some time. To work around that fact,
 291  * we're going to create a fixed number of threads and we'll use them to service
 292  * our DNS requests. While this isn't ideal, until we have a sane means of
 293  * integrating a DNS resolution into an event loop with say portfs, it's not
 294  * going to be a fun day no matter what we do.
 295  *
 296  * ------
 297  * Timers
 298  * ------
 299  *
 300  * We maintain a single timer based on CLOCK_REALTIME. It's designed to fire
 301  * every second. While we'd rather use CLOCK_HIGHRES just to alleviate ourselves
 302  * from timer drift; however, as zones may not actually have CLOCK_HIGHRES
 303  * access, we don't want them to end up in there. The timer itself is just a
 304  * simple avl tree sorted by expiration time, which is stored as a tick in the
 305  * future, a tick is just one second.
 306  *
 307  * ----------
 308  * Shootdowns
 309  * ----------
 310  *
 311  * As part of the protocol, we need to be able to handle shootdowns that inform
 312  * us some of the information in the system is out of date. This information
 313  * needs to be processed promptly; however, the information is hopefully going
 314  * to be relatively infrequent relative to the normal flow of information.
 315  *
 316  * The shoot down information needs to be done on a per-backend basis. The
 317  * general design is that we'll have a single query for this which can fire on a
 318  * 5-10s period, we randmoize the latter part to give us a bit more load
 319  * spreading. If we complete because there's no work to do, then we wait the
 320  * normal period. If we complete, but there's still work to do, we'll go again
 321  * after a second.
 322  *
 323  * A shootdown has a few different parts. We first receive a list of items to
 324  * shootdown. After performing all of those, we need to acknowledge them. When
 325  * that's been done successfully, we can move onto the next part. From a
 326  * protocol perspective, we make a SVP_R_LOG_REQ, we get a reply, and then after
 327  * processing them, send an SVP_R_LOG_RM. Only once that's been acked do we
 328  * continue.
 329  *
 330  * However, one of the challenges that we have is that these invalidations are
 331  * just that, an invalidation. For a virtual layer two request, that's fine,
 332  * because the kernel supports that. However, for virtual layer three
 333  * invalidations, we have a bit more work to do. These protocols, ARP and NDP,
 334  * don't really support a notion of just an invalidation, instead you have to
 335  * inject the new data in a gratuitous fashion.
 336  *
 337  * To that end, what we instead do is when we receive a VL3 invalidation, we
 338  * turn that info a VL3 request. We hold the general request as outstanding
 339  * until we receive all of the callbacks for the VL3 invalidations, at which
 340  * point we go through and do the log removal request.
 341  */
 342 
 343 #include <umem.h>
 344 #include <errno.h>
 345 #include <stdlib.h>
 346 #include <sys/types.h>
 347 #include <sys/socket.h>
 348 #include <netinet/in.h>
 349 #include <arpa/inet.h>
 350 #include <libnvpair.h>
 351 #include <strings.h>
 352 #include <string.h>
 353 #include <assert.h>
 354 #include <unistd.h>
 355 
 356 #include <libvarpd_provider.h>
 357 #include "libvarpd_svp.h"
 358 
 359 bunyan_logger_t *svp_bunyan;
 360 static int svp_defport = 1296;
 361 static int svp_defuport = 1339;
 362 static umem_cache_t *svp_lookup_cache;
 363 
 364 typedef enum svp_lookup_type {
 365         SVP_L_UNKNOWN   = 0x0,
 366         SVP_L_VL2       = 0x1,
 367         SVP_L_VL3       = 0x2,
 368         SVP_L_ROUTE     = 0x3
 369 } svp_lookup_type_t;
 370 
 371 typedef struct svp_lookup {
 372         int svl_type;
 373         union {
 374                 struct svl_lookup_vl2 {
 375                         varpd_query_handle_t    *svl_handle;
 376                         overlay_target_point_t  *svl_point;
 377                 } svl_vl2;
 378                 struct svl_lookup_vl3 {
 379                         varpd_arp_handle_t      *svl_vah;
 380                         uint8_t                 *svl_out;
 381                 } svl_vl3;
 382                 struct svl_lookup_route {
 383                         varpd_query_handle_t    *svl_handle;
 384                         overlay_target_point_t  *svl_point;
 385                         overlay_target_route_t  *svl_route;
 386                         overlay_target_mac_t    *svl_mac;
 387                 } svl_route;
 388         } svl_u;
 389         svp_query_t                             svl_query;
 390 } svp_lookup_t;
 391 
 392 static const char *varpd_svp_props[] = {
 393         "svp/host",
 394         "svp/port",
 395         "svp/underlay_ip",
 396         "svp/underlay_port",
 397         "svp/dcid",
 398         "svp/router_oui"
 399 };
 400 
 401 static const uint8_t svp_bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 402 
 403 int
 404 svp_comparator(const void *l, const void *r)
 405 {
 406         const svp_t *ls = l;
 407         const svp_t *rs = r;
 408 
 409         if (ls->svp_vid > rs->svp_vid)
 410                 return (1);
 411         if (ls->svp_vid < rs->svp_vid)
 412                 return (-1);
 413         return (0);
 414 }
 415 
 416 static void
 417 svp_vl2_lookup_cb(svp_t *svp, svp_status_t status, const struct in6_addr *uip,
 418     const uint16_t uport, void *arg)
 419 {
 420         svp_lookup_t *svl = arg;
 421         overlay_target_point_t *otp;
 422 
 423         assert(svp != NULL);
 424         assert(arg != NULL);
 425 
 426         if (status != SVP_S_OK) {
 427                 libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
 428                     VARPD_LOOKUP_DROP);
 429                 umem_cache_free(svp_lookup_cache, svl);
 430                 return;
 431         }
 432 
 433         otp = svl->svl_u.svl_vl2.svl_point;
 434         bcopy(uip, &otp->otp_ip, sizeof (struct in6_addr));
 435         otp->otp_port = uport;
 436         libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
 437             VARPD_LOOKUP_OK);
 438         umem_cache_free(svp_lookup_cache, svl);
 439 }
 440 
 441 static void
 442 svp_vl3_lookup_cb(svp_t *svp, svp_status_t status, const uint8_t *vl2mac,
 443     const struct in6_addr *uip, const uint16_t uport, void *arg)
 444 {
 445         /* Initialize address-holders to 0 for comparisons-to-zeroes later. */
 446         overlay_target_point_t point = { 0 };
 447         svp_lookup_t *svl = arg;
 448         uint8_t nexthop_mac[6] = { 0, 0, 0, 0, 0, 0 };
 449 
 450         assert(svp != NULL);
 451         assert(svl != NULL);
 452 
 453         if (status != SVP_S_OK) {
 454                 libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
 455                     VARPD_LOOKUP_DROP);
 456                 umem_cache_free(svp_lookup_cache, svl);
 457                 return;
 458         }
 459 
 460         /* Inject the L2 mapping before the L3 */
 461         if (uport != 0 &&
 462             bcmp(uip, &point.otp_ip, sizeof (struct in6_addr)) != 0) {
 463                 /* Normal L3 lookup result... */
 464                 bcopy(uip, &point.otp_ip, sizeof (struct in6_addr));
 465                 point.otp_port = uport;
 466                 libvarpd_inject_varp(svp->svp_hdl, vl2mac, &point);
 467         } else {
 468                 /*
 469                  * Oh my, we have a next-hop router IP.
 470                  * Set the MAC to the ouid+vid concatenated
 471                  * special-router-MAC. Overlay down below will know
 472                  * that uport == 0 means the MAC is a special one.
 473                  */
 474                 if (bcmp(svp->svp_router_oui, nexthop_mac, ETHERADDRL) == 0) {
 475                         /*
 476                          * We don't have a router_oui, so we can't support
 477                          * special-router-MAC.  Drop it.
 478                          */
 479                         libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
 480                             VARPD_LOOKUP_DROP);
 481                         umem_cache_free(svp_lookup_cache, svl);
 482                         return;
 483                 }
 484                 bcopy(svp->svp_router_oui, nexthop_mac, 3);
 485                 nexthop_mac[3] = (svp->svp_vid >> 16) & 0xff;
 486                 nexthop_mac[4] = (svp->svp_vid >> 8) & 0xff;
 487                 nexthop_mac[5] = svp->svp_vid & 0xff;
 488                 vl2mac = nexthop_mac;
 489         }
 490 
 491         bcopy(vl2mac, svl->svl_u.svl_vl3.svl_out, ETHERADDRL);
 492         libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
 493             VARPD_LOOKUP_OK);
 494         umem_cache_free(svp_lookup_cache, svl);
 495 }
 496 
 497 static void
 498 svp_vl2_invalidate_cb(svp_t *svp, const uint8_t *vl2mac)
 499 {
 500         libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
 501 }
 502 
 503 static void
 504 svp_vl3_inject_cb(svp_t *svp, const uint16_t vlan, const struct in6_addr *vl3ip,
 505     const uint8_t *vl2mac, const uint8_t *targmac)
 506 {
 507         struct in_addr v4;
 508 
 509         /*
 510          * At the moment we don't support any IPv6 related log entries, this
 511          * will change soon as we develop a bit more of the IPv6 related
 512          * infrastructure so we can properly test the injection.
 513          */
 514         if (IN6_IS_ADDR_V4MAPPED(vl3ip) == 0) {
 515                 return;
 516         } else {
 517                 IN6_V4MAPPED_TO_INADDR(vl3ip, &v4);
 518                 if (targmac == NULL)
 519                         targmac = svp_bcast;
 520                 libvarpd_inject_arp(svp->svp_hdl, vlan, vl2mac, &v4, targmac);
 521         }
 522 }
 523 
 524 /* ARGSUSED */
 525 static void
 526 svp_shootdown_cb(svp_t *svp, const uint8_t *vl2mac, const struct in6_addr *uip,
 527     const uint16_t uport)
 528 {
 529         /*
 530          * We should probably do a conditional invalidation here.
 531          */
 532         libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
 533 }
 534 
 535 static void
 536 svp_route_lookup_cb(svp_t *svp, svp_status_t status, uint32_t dcid,
 537     uint32_t vnetid, uint16_t vlan, uint8_t *srcmac, uint8_t *dstmac,
 538     uint16_t ul3_port, uint8_t *ul3_addr, uint8_t srcpfx, uint8_t dstpfx,
 539     void *arg)
 540 {
 541         svp_lookup_t *svl = arg;
 542         overlay_target_point_t *otp;
 543         overlay_target_route_t *otr;
 544         overlay_target_mac_t *otm;
 545 
 546         if (status != SVP_S_OK) {
 547                 libvarpd_plugin_query_reply(svl->svl_u.svl_route.svl_handle,
 548                     VARPD_LOOKUP_DROP);
 549                 umem_cache_free(svp_lookup_cache, svl);
 550                 return;
 551         }
 552 
 553         otp = svl->svl_u.svl_route.svl_point;
 554         bcopy(ul3_addr, &otp->otp_ip, sizeof (struct in6_addr));
 555         otp->otp_port = ul3_port;
 556 
 557         otr = svl->svl_u.svl_route.svl_route;
 558         otr->otr_vnet = vnetid;
 559         otr->otr_vlan = vlan;
 560         bcopy(srcmac, otr->otr_srcmac, ETHERADDRL);
 561 
 562         otm = svl->svl_u.svl_route.svl_mac;
 563         otm->otm_dcid = dcid;
 564         bcopy(dstmac, otm->otm_mac, ETHERADDRL);
 565 
 566         libvarpd_plugin_query_reply(svl->svl_u.svl_route.svl_handle,
 567             VARPD_LOOKUP_OK);
 568         umem_cache_free(svp_lookup_cache, svl);
 569 }
 570 
 571 static svp_cb_t svp_defops = {
 572         svp_vl2_lookup_cb,
 573         svp_vl3_lookup_cb,
 574         svp_vl2_invalidate_cb,
 575         svp_vl3_inject_cb,
 576         svp_shootdown_cb,
 577         svp_route_lookup_cb,
 578 };
 579 
 580 static boolean_t
 581 varpd_svp_valid_dest(overlay_plugin_dest_t dest)
 582 {
 583         if (dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
 584                 return (B_FALSE);
 585 
 586         return (B_TRUE);
 587 }
 588 
 589 static int
 590 varpd_svp_create(varpd_provider_handle_t *hdl, void **outp,
 591     overlay_plugin_dest_t dest)
 592 {
 593         int ret;
 594         svp_t *svp;
 595 
 596         if (varpd_svp_valid_dest(dest) == B_FALSE)
 597                 return (ENOTSUP);
 598 
 599         svp = umem_zalloc(sizeof (svp_t), UMEM_DEFAULT);
 600         if (svp == NULL)
 601                 return (ENOMEM);
 602 
 603         if ((ret = mutex_init(&svp->svp_lock, USYNC_THREAD | LOCK_ERRORCHECK,
 604             NULL)) != 0) {
 605                 umem_free(svp, sizeof (svp_t));
 606                 return (ret);
 607         }
 608 
 609         svp->svp_port = svp_defport;
 610         svp->svp_uport = svp_defuport;
 611         svp->svp_cb = svp_defops;
 612         svp->svp_hdl = hdl;
 613         svp->svp_vid = libvarpd_plugin_vnetid(svp->svp_hdl);
 614         *outp = svp;
 615         return (0);
 616 }
 617 
 618 static int
 619 varpd_svp_start(void *arg)
 620 {
 621         int ret;
 622         svp_remote_t *srp;
 623         svp_t *svp = arg;
 624 
 625         mutex_enter(&svp->svp_lock);
 626         if (svp->svp_host == NULL || svp->svp_port == 0 ||
 627             svp->svp_huip == B_FALSE || svp->svp_uport == 0) {
 628                 mutex_exit(&svp->svp_lock);
 629                 return (EAGAIN);
 630         }
 631         mutex_exit(&svp->svp_lock);
 632 
 633         if ((ret = svp_remote_find(svp->svp_host, svp->svp_port, &svp->svp_uip,
 634             &srp)) != 0)
 635                 return (ret);
 636 
 637         if ((ret = svp_remote_attach(srp, svp)) != 0) {
 638                 svp_remote_release(srp);
 639                 return (ret);
 640         }
 641 
 642         return (0);
 643 }
 644 
 645 static void
 646 varpd_svp_stop(void *arg)
 647 {
 648         svp_t *svp = arg;
 649 
 650         svp_remote_detach(svp);
 651 }
 652 
 653 static void
 654 varpd_svp_destroy(void *arg)
 655 {
 656         svp_t *svp = arg;
 657 
 658         if (svp->svp_host != NULL)
 659                 umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
 660 
 661         if (mutex_destroy(&svp->svp_lock) != 0)
 662                 libvarpd_panic("failed to destroy svp_t`svp_lock");
 663 
 664         umem_free(svp, sizeof (svp_t));
 665 }
 666 
 667 static void
 668 varpd_svp_lookup_l3(svp_t *svp, varpd_query_handle_t *vqh,
 669     const overlay_targ_lookup_t *otl, overlay_target_point_t *otp,
 670     overlay_target_route_t *otr, overlay_target_mac_t *otm)
 671 {
 672         svp_lookup_t *slp;
 673         uint32_t type;
 674         const struct in6_addr *src = &otl->otl_addru.otlu_l3.otl3_srcip,
 675             *dst = &otl->otl_addru.otlu_l3.otl3_dstip;
 676 
 677         /*
 678          * otl is an L3 request, so we have src/dst IPs for the inner packet.
 679          * We also have the vlan.
 680          *
 681          * Assume kernel's overlay module is caching well, so we are directly
 682          * going to query (i.e. no caching up here of actual destinations).
 683          *
 684          * Our existing remote sever (svp_remote), but with the new message
 685          * SVP_R_ROUTE_REQ.
 686          */
 687 
 688         if (IN6_IS_ADDR_V4MAPPED(src)) {
 689                 if (!IN6_IS_ADDR_V4MAPPED(dst)) {
 690                         libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 691                         return;
 692                 }
 693                 type = SVP_VL3_IP;
 694         } else {
 695                 if (IN6_IS_ADDR_V4MAPPED(dst)) {
 696                         libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 697                         return;
 698                 }
 699                 type = SVP_VL3_IPV6;
 700         }
 701 
 702         slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
 703         if (slp == NULL) {
 704                 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 705                 return;
 706         }
 707 
 708         slp->svl_type = SVP_L_ROUTE;
 709         slp->svl_u.svl_route.svl_handle = vqh;
 710         slp->svl_u.svl_route.svl_point = otp;
 711         slp->svl_u.svl_route.svl_route = otr;
 712         slp->svl_u.svl_route.svl_mac = otm;
 713 
 714         svp_remote_route_lookup(svp, &slp->svl_query, src, dst,
 715             otl->otl_vnetid, (uint16_t)otl->otl_vlan, slp);
 716 }
 717 
 718 static void
 719 varpd_svp_lookup(void *arg, varpd_query_handle_t *vqh,
 720     const overlay_targ_lookup_t *otl, overlay_target_point_t *otp,
 721     overlay_target_route_t *otr, overlay_target_mac_t *otm)
 722 {
 723         svp_lookup_t *slp;
 724         svp_t *svp = arg;
 725 
 726         /*
 727          * Shuffle off L3 lookups to their own codepath.
 728          */
 729         if (otl->otl_l3req) {
 730                 varpd_svp_lookup_l3(svp, vqh, otl, otp, otr, otm);
 731                 return;
 732         }
 733         /*
 734          * At this point, the traditional overlay_target_point_t is all that
 735          * needs filling in.  Zero-out the otr for safety.
 736          */
 737         bzero(otr, sizeof (*otr));
 738 
 739 
 740         /*
 741          * Check if this is something that we need to proxy, eg. arp or ndp.
 742          */
 743         if (otl->otl_addru.otlu_l2.otl2_sap == ETHERTYPE_ARP) {
 744                 libvarpd_plugin_proxy_arp(svp->svp_hdl, vqh, otl);
 745                 return;
 746         }
 747 
 748         if (otl->otl_addru.otlu_l2.otl2_dstaddr[0] == 0x33 &&
 749             otl->otl_addru.otlu_l2.otl2_dstaddr[1] == 0x33) {
 750                 if (otl->otl_addru.otlu_l2.otl2_sap == ETHERTYPE_IPV6) {
 751                         libvarpd_plugin_proxy_ndp(svp->svp_hdl, vqh, otl);
 752                 } else {
 753                         libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 754                 }
 755                 return;
 756         }
 757 
 758         /*
 759          * Watch out for various multicast and broadcast addresses. We've
 760          * already taken care of the IPv6 range above. Now we just need to
 761          * handle broadcast and if the multicast bit is set, lowest bit of the
 762          * first octet of the MAC, then we drop it now.
 763          */
 764         if (bcmp(otl->otl_addru.otlu_l2.otl2_dstaddr, svp_bcast,
 765             ETHERADDRL) == 0 ||
 766             (otl->otl_addru.otlu_l2.otl2_dstaddr[0] & 0x01) == 0x01) {
 767                 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 768                 return;
 769         }
 770 
 771         /*
 772          * If we have a failure to allocate memory for this, that's not good.
 773          * However, telling the kernel to just drop this packet is much better
 774          * than the alternative at this moment. At least we'll try again and we
 775          * may have something more available to us in a little bit.
 776          */
 777         slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
 778         if (slp == NULL) {
 779                 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 780                 return;
 781         }
 782 
 783         slp->svl_type = SVP_L_VL2;
 784         slp->svl_u.svl_vl2.svl_handle = vqh;
 785         slp->svl_u.svl_vl2.svl_point = otp;
 786 
 787         svp_remote_vl2_lookup(svp, &slp->svl_query,
 788             otl->otl_addru.otlu_l2.otl2_dstaddr, slp);
 789 }
 790 
 791 /* ARGSUSED */
 792 static int
 793 varpd_svp_nprops(void *arg, uint_t *nprops)
 794 {
 795         *nprops = sizeof (varpd_svp_props) / sizeof (char *);
 796         return (0);
 797 }
 798 
 799 /* ARGSUSED */
 800 static int
 801 varpd_svp_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph)
 802 {
 803         switch (propid) {
 804         case 0:
 805                 /* svp/host */
 806                 libvarpd_prop_set_name(vph, varpd_svp_props[0]);
 807                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 808                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING);
 809                 libvarpd_prop_set_nodefault(vph);
 810                 break;
 811         case 1:
 812                 /* svp/port */
 813                 libvarpd_prop_set_name(vph, varpd_svp_props[1]);
 814                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 815                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
 816                 (void) libvarpd_prop_set_default(vph, &svp_defport,
 817                     sizeof (svp_defport));
 818                 libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
 819                 break;
 820         case 2:
 821                 /* svp/underlay_ip */
 822                 libvarpd_prop_set_name(vph, varpd_svp_props[2]);
 823                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 824                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP);
 825                 libvarpd_prop_set_nodefault(vph);
 826                 break;
 827         case 3:
 828                 /* svp/underlay_port */
 829                 libvarpd_prop_set_name(vph, varpd_svp_props[3]);
 830                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 831                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
 832                 (void) libvarpd_prop_set_default(vph, &svp_defuport,
 833                     sizeof (svp_defuport));
 834                 libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
 835                 break;
 836         case 4:
 837                 /* svp/dcid */
 838                 libvarpd_prop_set_name(vph, varpd_svp_props[4]);
 839                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 840                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
 841                 libvarpd_prop_set_nodefault(vph);
 842                 libvarpd_prop_set_range_uint32(vph, 1, UINT32_MAX - 1);
 843                 break;
 844         case 5:
 845                 /* svp/router_oui */
 846                 libvarpd_prop_set_name(vph, varpd_svp_props[5]);
 847                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 848                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_ETHER);
 849                 libvarpd_prop_set_nodefault(vph);
 850                 break;
 851         default:
 852                 return (EINVAL);
 853         }
 854         return (0);
 855 }
 856 
 857 static int
 858 varpd_svp_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep)
 859 {
 860         svp_t *svp = arg;
 861 
 862         /* svp/host */
 863         if (strcmp(pname, varpd_svp_props[0]) == 0) {
 864                 size_t len;
 865 
 866                 mutex_enter(&svp->svp_lock);
 867                 if (svp->svp_host == NULL) {
 868                         *sizep = 0;
 869                 } else {
 870                         len = strlen(svp->svp_host) + 1;
 871                         if (*sizep < len) {
 872                                 mutex_exit(&svp->svp_lock);
 873                                 return (EOVERFLOW);
 874                         }
 875                         *sizep = len;
 876                         (void) strlcpy(buf, svp->svp_host, *sizep);
 877                 }
 878                 mutex_exit(&svp->svp_lock);
 879                 return (0);
 880         }
 881 
 882         /* svp/port */
 883         if (strcmp(pname, varpd_svp_props[1]) == 0) {
 884                 uint64_t val;
 885 
 886                 if (*sizep < sizeof (uint64_t))
 887                         return (EOVERFLOW);
 888 
 889                 mutex_enter(&svp->svp_lock);
 890                 if (svp->svp_port == 0) {
 891                         *sizep = 0;
 892                 } else {
 893                         val = svp->svp_port;
 894                         bcopy(&val, buf, sizeof (uint64_t));
 895                         *sizep = sizeof (uint64_t);
 896                 }
 897                 mutex_exit(&svp->svp_lock);
 898                 return (0);
 899         }
 900 
 901         /* svp/underlay_ip */
 902         if (strcmp(pname, varpd_svp_props[2]) == 0) {
 903                 if (*sizep < sizeof (struct in6_addr))
 904                         return (EOVERFLOW);
 905                 mutex_enter(&svp->svp_lock);
 906                 if (svp->svp_huip == B_FALSE) {
 907                         *sizep = 0;
 908                 } else {
 909                         bcopy(&svp->svp_uip, buf, sizeof (struct in6_addr));
 910                         *sizep = sizeof (struct in6_addr);
 911                 }
 912                 mutex_exit(&svp->svp_lock);
 913                 return (0);
 914         }
 915 
 916         /* svp/underlay_port */
 917         if (strcmp(pname, varpd_svp_props[3]) == 0) {
 918                 uint64_t val;
 919 
 920                 if (*sizep < sizeof (uint64_t))
 921                         return (EOVERFLOW);
 922 
 923                 mutex_enter(&svp->svp_lock);
 924                 if (svp->svp_uport == 0) {
 925                         *sizep = 0;
 926                 } else {
 927                         val = svp->svp_uport;
 928                         bcopy(&val, buf, sizeof (uint64_t));
 929                         *sizep = sizeof (uint64_t);
 930                 }
 931 
 932                 mutex_exit(&svp->svp_lock);
 933                 return (0);
 934         }
 935 
 936         /* svp/dcid */
 937         if (strcmp(pname, varpd_svp_props[4]) == 0) {
 938                 uint64_t val;
 939 
 940                 if (*sizep < sizeof (uint64_t))
 941                         return (EOVERFLOW);
 942 
 943                 mutex_enter(&svp->svp_lock);
 944                 if (svp->svp_uport == 0) {
 945                         *sizep = 0;
 946                 } else {
 947                         val = svp->svp_dcid;
 948                         bcopy(&val, buf, sizeof (uint64_t));
 949                         *sizep = sizeof (uint64_t);
 950                 }
 951 
 952                 mutex_exit(&svp->svp_lock);
 953                 return (0);
 954         }
 955 
 956         /* svp/router_oui */
 957         if (strcmp(pname, varpd_svp_props[5]) == 0) {
 958                 if (*sizep < ETHERADDRL)
 959                         return (EOVERFLOW);
 960                 mutex_enter(&svp->svp_lock);
 961 
 962                 if (ether_is_zero(&svp->svp_router_oui)) {
 963                         *sizep = 0;
 964                 } else {
 965                         bcopy(&svp->svp_router_oui, buf, ETHERADDRL);
 966                         *sizep = ETHERADDRL;
 967                 }
 968 
 969                 mutex_exit(&svp->svp_lock);
 970                 return (0);
 971         }
 972         return (EINVAL);
 973 }
 974 
 975 static int
 976 varpd_svp_setprop(void *arg, const char *pname, const void *buf,
 977     const uint32_t size)
 978 {
 979         svp_t *svp = arg;
 980 
 981         /* svp/host */
 982         if (strcmp(pname, varpd_svp_props[0]) == 0) {
 983                 char *dup;
 984                 dup = umem_alloc(size, UMEM_DEFAULT);
 985                 (void) strlcpy(dup, buf, size);
 986                 if (dup == NULL)
 987                         return (ENOMEM);
 988                 mutex_enter(&svp->svp_lock);
 989                 if (svp->svp_host != NULL)
 990                         umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
 991                 svp->svp_host = dup;
 992                 mutex_exit(&svp->svp_lock);
 993                 return (0);
 994         }
 995 
 996         /* svp/port */
 997         if (strcmp(pname, varpd_svp_props[1]) == 0) {
 998                 const uint64_t *valp = buf;
 999                 if (size < sizeof (uint64_t))
1000                         return (EOVERFLOW);
1001 
1002                 if (*valp == 0 || *valp > UINT16_MAX)
1003                         return (EINVAL);
1004 
1005                 mutex_enter(&svp->svp_lock);
1006                 svp->svp_port = (uint16_t)*valp;
1007                 mutex_exit(&svp->svp_lock);
1008                 return (0);
1009         }
1010 
1011         /* svp/underlay_ip */
1012         if (strcmp(pname, varpd_svp_props[2]) == 0) {
1013                 const struct in6_addr *ipv6 = buf;
1014 
1015                 if (size < sizeof (struct in6_addr))
1016                         return (EOVERFLOW);
1017 
1018                 if (IN6_IS_ADDR_V4COMPAT(ipv6))
1019                         return (EINVAL);
1020 
1021                 if (IN6_IS_ADDR_MULTICAST(ipv6))
1022                         return (EINVAL);
1023 
1024                 if (IN6_IS_ADDR_6TO4(ipv6))
1025                         return (EINVAL);
1026 
1027                 if (IN6_IS_ADDR_V4MAPPED(ipv6)) {
1028                         ipaddr_t v4;
1029                         IN6_V4MAPPED_TO_IPADDR(ipv6, v4);
1030                         if (IN_MULTICAST(v4))
1031                                 return (EINVAL);
1032                 }
1033 
1034                 mutex_enter(&svp->svp_lock);
1035                 bcopy(buf, &svp->svp_uip, sizeof (struct in6_addr));
1036                 svp->svp_huip = B_TRUE;
1037                 mutex_exit(&svp->svp_lock);
1038                 return (0);
1039         }
1040 
1041         /* svp/underlay_port */
1042         if (strcmp(pname, varpd_svp_props[3]) == 0) {
1043                 const uint64_t *valp = buf;
1044                 if (size < sizeof (uint64_t))
1045                         return (EOVERFLOW);
1046 
1047                 if (*valp == 0 || *valp > UINT16_MAX)
1048                         return (EINVAL);
1049 
1050                 mutex_enter(&svp->svp_lock);
1051                 svp->svp_uport = (uint16_t)*valp;
1052                 mutex_exit(&svp->svp_lock);
1053 
1054                 return (0);
1055         }
1056 
1057         /* svp/dcid */
1058         if (strcmp(pname, varpd_svp_props[4]) == 0) {
1059                 const uint64_t *valp = buf;
1060                 if (size < sizeof (uint64_t))
1061                         return (EOVERFLOW);
1062 
1063                 if (*valp == 0 || *valp > UINT32_MAX - 1)
1064                         return (EINVAL);
1065 
1066                 mutex_enter(&svp->svp_lock);
1067                 svp->svp_dcid = (uint32_t)*valp;
1068                 mutex_exit(&svp->svp_lock);
1069 
1070                 return (0);
1071         }
1072 
1073         /* svp/router_oui */
1074         if (strcmp(pname, varpd_svp_props[5]) == 0) {
1075                 if (size < ETHERADDRL)
1076                         return (EOVERFLOW);
1077                 mutex_enter(&svp->svp_lock);
1078                 bcopy(buf, &svp->svp_router_oui, ETHERADDRL);
1079                 /* Zero-out the low three bytes. */
1080                 svp->svp_router_oui[3] = 0;
1081                 svp->svp_router_oui[4] = 0;
1082                 svp->svp_router_oui[5] = 0;
1083                 mutex_exit(&svp->svp_lock);
1084                 return (0);
1085         }
1086 
1087         return (EINVAL);
1088 }
1089 
1090 static int
1091 varpd_svp_save(void *arg, nvlist_t *nvp)
1092 {
1093         int ret;
1094         svp_t *svp = arg;
1095 
1096         mutex_enter(&svp->svp_lock);
1097         /* svp/host */
1098         if (svp->svp_host != NULL) {
1099                 if ((ret = nvlist_add_string(nvp, varpd_svp_props[0],
1100                     svp->svp_host)) != 0) {
1101                         mutex_exit(&svp->svp_lock);
1102                         return (ret);
1103                 }
1104         }
1105 
1106         /* svp/port */
1107         if (svp->svp_port != 0) {
1108                 if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[1],
1109                     svp->svp_port)) != 0) {
1110                         mutex_exit(&svp->svp_lock);
1111                         return (ret);
1112                 }
1113         }
1114 
1115         /* svp/underlay_ip */
1116         if (svp->svp_huip == B_TRUE) {
1117                 char buf[INET6_ADDRSTRLEN];
1118 
1119                 if (inet_ntop(AF_INET6, &svp->svp_uip, buf, sizeof (buf)) ==
1120                     NULL)
1121                         libvarpd_panic("unexpected inet_ntop failure: %d",
1122                             errno);
1123 
1124                 if ((ret = nvlist_add_string(nvp, varpd_svp_props[2],
1125                     buf)) != 0) {
1126                         mutex_exit(&svp->svp_lock);
1127                         return (ret);
1128                 }
1129         }
1130 
1131         /* svp/underlay_port */
1132         if (svp->svp_uport != 0) {
1133                 if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[3],
1134                     svp->svp_uport)) != 0) {
1135                         mutex_exit(&svp->svp_lock);
1136                         return (ret);
1137                 }
1138         }
1139 
1140         /* svp/dcid */
1141         if (svp->svp_dcid != 0) {
1142                 if ((ret = nvlist_add_uint32(nvp, varpd_svp_props[4],
1143                     svp->svp_dcid)) != 0) {
1144                         mutex_exit(&svp->svp_lock);
1145                         return (ret);
1146                 }
1147         }
1148 
1149         /* svp/router_oui */
1150         if (!ether_is_zero(&svp->svp_router_oui)) {
1151                 char buf[ETHERADDRSTRL];
1152 
1153                 if (ether_ntoa_r((struct ether_addr *)&svp->svp_router_oui,
1154                     buf) == NULL) {
1155                         libvarpd_panic("unexpected ether_ntoa_r failure: %d",
1156                             errno);
1157                 }
1158 
1159                 if ((ret = nvlist_add_string(nvp, varpd_svp_props[5],
1160                     buf)) != 0) {
1161                         mutex_exit(&svp->svp_lock);
1162                         return (ret);
1163                 }
1164         }
1165 
1166         mutex_exit(&svp->svp_lock);
1167         return (0);
1168 }
1169 
1170 static int
1171 varpd_svp_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl,
1172     overlay_plugin_dest_t dest, void **outp)
1173 {
1174         int ret;
1175         svp_t *svp;
1176         char *ipstr, *hstr, *etherstr;
1177 
1178         if (varpd_svp_valid_dest(dest) == B_FALSE)
1179                 return (ENOTSUP);
1180 
1181         if ((ret = varpd_svp_create(hdl, (void **)&svp, dest)) != 0)
1182                 return (ret);
1183 
1184         /* svp/host */
1185         if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[0],
1186             &hstr)) != 0) {
1187                 if (ret != ENOENT) {
1188                         varpd_svp_destroy(svp);
1189                         return (ret);
1190                 }
1191                 svp->svp_host = NULL;
1192         } else {
1193                 size_t blen = strlen(hstr) + 1;
1194                 svp->svp_host = umem_alloc(blen, UMEM_DEFAULT);
1195                 (void) strlcpy(svp->svp_host, hstr, blen);
1196         }
1197 
1198         /* svp/port */
1199         if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[1],
1200             &svp->svp_port)) != 0) {
1201                 if (ret != ENOENT) {
1202                         varpd_svp_destroy(svp);
1203                         return (ret);
1204                 }
1205                 svp->svp_port = 0;
1206         }
1207 
1208         /* svp/underlay_ip */
1209         if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[2],
1210             &ipstr)) != 0) {
1211                 if (ret != ENOENT) {
1212                         varpd_svp_destroy(svp);
1213                         return (ret);
1214                 }
1215                 svp->svp_huip = B_FALSE;
1216         } else {
1217                 ret = inet_pton(AF_INET6, ipstr, &svp->svp_uip);
1218                 if (ret == -1) {
1219                         assert(errno == EAFNOSUPPORT);
1220                         libvarpd_panic("unexpected inet_pton failure: %d",
1221                             errno);
1222                 }
1223 
1224                 if (ret == 0) {
1225                         varpd_svp_destroy(svp);
1226                         return (EINVAL);
1227                 }
1228                 svp->svp_huip = B_TRUE;
1229         }
1230 
1231         /* svp/underlay_port */
1232         if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[3],
1233             &svp->svp_uport)) != 0) {
1234                 if (ret != ENOENT) {
1235                         varpd_svp_destroy(svp);
1236                         return (ret);
1237                 }
1238                 svp->svp_uport = 0;
1239         }
1240 
1241         /* svp/dcid */
1242         if ((ret = nvlist_lookup_uint32(nvp, varpd_svp_props[4],
1243             &svp->svp_dcid)) != 0) {
1244                 if (ret != ENOENT) {
1245                         varpd_svp_destroy(svp);
1246                         return (ret);
1247                 }
1248                 svp->svp_dcid = 0;
1249         }
1250 
1251         /* svp/router_oui */
1252         if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[5],
1253             &etherstr)) != 0) {
1254                 if (ret != ENOENT) {
1255                         varpd_svp_destroy(svp);
1256                         return (ret);
1257                 }
1258                 bzero(&svp->svp_router_oui, ETHERADDRL);
1259         } else if (ether_aton_r(etherstr,
1260             (struct ether_addr *)&svp->svp_router_oui) == NULL) {
1261                 libvarpd_panic("unexpected ether_aton_r failure: %d", errno);
1262         }
1263 
1264         svp->svp_hdl = hdl;
1265         *outp = svp;
1266         return (0);
1267 }
1268 
1269 static void
1270 varpd_svp_arp(void *arg, varpd_arp_handle_t *vah, int type,
1271     const struct sockaddr *sock, uint16_t vlan __unused, uint8_t *out)
1272 {
1273         svp_t *svp = arg;
1274         svp_lookup_t *svl;
1275 
1276         if (type != VARPD_QTYPE_ETHERNET) {
1277                 libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
1278                 return;
1279         }
1280 
1281         svl = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
1282         if (svl == NULL) {
1283                 libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
1284                 return;
1285         }
1286 
1287         svl->svl_type = SVP_L_VL3;
1288         svl->svl_u.svl_vl3.svl_vah = vah;
1289         svl->svl_u.svl_vl3.svl_out = out;
1290         svp_remote_vl3_lookup(svp, &svl->svl_query, sock, svl);
1291 }
1292 
1293 static const varpd_plugin_ops_t varpd_svp_ops = {
1294         0,
1295         varpd_svp_create,
1296         varpd_svp_start,
1297         varpd_svp_stop,
1298         varpd_svp_destroy,
1299         NULL,
1300         varpd_svp_lookup,
1301         varpd_svp_nprops,
1302         varpd_svp_propinfo,
1303         varpd_svp_getprop,
1304         varpd_svp_setprop,
1305         varpd_svp_save,
1306         varpd_svp_restore,
1307         varpd_svp_arp,
1308         NULL
1309 };
1310 
1311 static int
1312 svp_bunyan_init(void)
1313 {
1314         int ret;
1315 
1316         if ((ret = bunyan_init("svp", &svp_bunyan)) != 0)
1317                 return (ret);
1318         ret = bunyan_stream_add(svp_bunyan, "stderr", BUNYAN_L_INFO,
1319             bunyan_stream_fd, (void *)STDERR_FILENO);
1320         if (ret != 0)
1321                 bunyan_fini(svp_bunyan);
1322         return (ret);
1323 }
1324 
1325 static void
1326 svp_bunyan_fini(void)
1327 {
1328         if (svp_bunyan != NULL)
1329                 bunyan_fini(svp_bunyan);
1330 }
1331 
1332 #pragma init(varpd_svp_init)
1333 static void
1334 varpd_svp_init(void)
1335 {
1336         int err;
1337         varpd_plugin_register_t *vpr;
1338 
1339         if (svp_bunyan_init() != 0)
1340                 return;
1341 
1342         if ((err = svp_host_init()) != 0) {
1343                 (void) bunyan_error(svp_bunyan, "failed to init host subsystem",
1344                     BUNYAN_T_INT32, "error", err,
1345                     BUNYAN_T_END);
1346                 svp_bunyan_fini();
1347                 return;
1348         }
1349 
1350         svp_lookup_cache = umem_cache_create("svp_lookup",
1351             sizeof (svp_lookup_t),  0, NULL, NULL, NULL, NULL, NULL, 0);
1352         if (svp_lookup_cache == NULL) {
1353                 (void) bunyan_error(svp_bunyan,
1354                     "failed to create svp_lookup cache",
1355                     BUNYAN_T_INT32, "error", errno,
1356                     BUNYAN_T_END);
1357                 svp_bunyan_fini();
1358                 return;
1359         }
1360 
1361         if ((err = svp_event_init()) != 0) {
1362                 (void) bunyan_error(svp_bunyan,
1363                     "failed to init event subsystem",
1364                     BUNYAN_T_INT32, "error", err,
1365                     BUNYAN_T_END);
1366                 svp_bunyan_fini();
1367                 umem_cache_destroy(svp_lookup_cache);
1368                 return;
1369         }
1370 
1371         if ((err = svp_timer_init()) != 0) {
1372                 (void) bunyan_error(svp_bunyan,
1373                     "failed to init timer subsystem",
1374                     BUNYAN_T_INT32, "error", err,
1375                     BUNYAN_T_END);
1376                 svp_event_fini();
1377                 umem_cache_destroy(svp_lookup_cache);
1378                 svp_bunyan_fini();
1379                 return;
1380         }
1381 
1382         if ((err = svp_remote_init()) != 0) {
1383                 (void) bunyan_error(svp_bunyan,
1384                     "failed to init remote subsystem",
1385                     BUNYAN_T_INT32, "error", err,
1386                     BUNYAN_T_END);
1387                 svp_event_fini();
1388                 umem_cache_destroy(svp_lookup_cache);
1389                 svp_bunyan_fini();
1390                 return;
1391         }
1392 
1393         vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err);
1394         if (vpr == NULL) {
1395                 (void) bunyan_error(svp_bunyan,
1396                     "failed to alloc varpd plugin",
1397                     BUNYAN_T_INT32, "error", err,
1398                     BUNYAN_T_END);
1399                 svp_remote_fini();
1400                 svp_event_fini();
1401                 umem_cache_destroy(svp_lookup_cache);
1402                 svp_bunyan_fini();
1403                 return;
1404         }
1405 
1406         vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC;
1407         vpr->vpr_name = "svp";
1408         vpr->vpr_ops = &varpd_svp_ops;
1409 
1410         if ((err = libvarpd_plugin_register(vpr)) != 0) {
1411                 (void) bunyan_error(svp_bunyan,
1412                     "failed to register varpd plugin",
1413                     BUNYAN_T_INT32, "error", err,
1414                     BUNYAN_T_END);
1415                 svp_remote_fini();
1416                 svp_event_fini();
1417                 umem_cache_destroy(svp_lookup_cache);
1418                 svp_bunyan_fini();
1419 
1420         }
1421         libvarpd_plugin_free(vpr);
1422 }