1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018, Joyent, Inc.
  14  */
  15 
  16 /*
  17  * This plugin implements the SDC VXLAN Protocol (SVP).
  18  *
  19  * This plugin is designed to work with a broader distributed system that
  20  * mainains a database of mappings and provides a means of looking up data and
  21  * provides a stream of updates. While it is named after VXLAN, there isn't
  22  * anything specific to VXLAN baked into the protocol at this time, other than
  23  * that it requires both an IP address and a port; however, if there's a good
  24  * reason to support others here, we can modify that.
  25  *
  26  * -----------
  27  * Terminology
  28  * -----------
  29  *
  30  * Throughout this module we refer to a few different kinds of addresses:
  31  *
  32  *    VL3
  33  *
  34  *      A VL3 address, or virtual layer 3, refers to the layer three addreses
  35  *      that are used by entities on an overlay network. As far as we're
  36  *      concerned that means that this is the IP address of an interface on an
  37  *      overlay network.
  38  *
  39  *    VL2
  40  *
  41  *      A VL2 address, or a virtual layer 2, referes to the link-layer addresses
  42  *      that are used by entities on an overlay network. As far as we're
  43  *      concerned that means that this is the MAC addresses of an interface on
  44  *      an overlay network.
  45  *
  46  *    UL3
  47  *
  48  *      A UL3, or underlay layer 3, refers to the layer three (IP) address on
  49  *      the underlay network.
  50  *
  51  * The svp plugin provides lookups from VL3->VL2, eg. the equivalent of an ARP
  52  * or NDP query, and then also provides VL2->UL3 lookups.
  53  *
  54  * -------------------
  55  * Protocol Operations
  56  * -------------------
  57  *
  58  * The svp protocol is defined in lib/varpd/svp/common/libvarpd_svp_prot.h. It
  59  * defines the basic TCP protocol that we use to communicate to hosts. At this
  60  * time, it is not quite 100% implemented in both this plug-in and our primary
  61  * server, sdc-portolan (see https://github.com/joyent/sdc-portolan).
  62  *
  63  * At this time, we don't quite support everything that we need to. Including
  64  * the SVP_R_BULK_REQ and SVP_R_SHOOTDOWN.
  65  *
  66  * ---------------------------------
  67  * General Design and Considerations
  68  * ---------------------------------
  69  *
  70  * Every instance of the svp plugin requires the hostname and port of a server
  71  * to contact. Though, we have co-opted the port 1296 (the year of the oldest
  72  * extant portolan) as our default port.
  73  *
  74  * Each of the different instance of the plugins has a corresponding remote
  75  * backend. The remote backend represents the tuple of the [ host, port ].
  76  * Different instances that share the same host and port tuple will use the same
  77  * backend.
  78  *
  79  * The backend is actually in charge of performing lookups, resolving and
  80  * updating the set of remote hosts based on the DNS resolution we've been
  81  * provided, and taking care of things like shootdowns.
  82  *
  83  * The whole plugin itself maintains an event loop and a number of threads to
  84  * service that event loop. On top of that event loop, we have a simple timer
  85  * backend that ticks at one second intervals and performs various callbacks,
  86  * such as idle query timers, DNS resolution, connection backoff, etc. Each of
  87  * the remote hosts that we obtain is wrapped up in an svp_conn_t, which manages
  88  * the connection state, reconnecting, etc.
  89  *
  90  * All in all, the general way that this all looks like is:
  91  *
  92  *  +----------------------------+
  93  *  | Plugin Instance            |
  94  *  | svp_t                      |
  95  *  |                            |
  96  *  | varpd_provider_handle_t * -+-> varpd handle
  97  *  | uint64_t               ----+-> varpd ID
  98  *  | char *                 ----+-> remote host
  99  *  | uint16_t               ----+-> remote port
 100  *  | svp_remote_t *   ---+------+-> remote backend
 101  *  +---------------------+------+
 102  *                        |
 103  *                        v
 104  *   +----------------------+                   +----------------+
 105  *   | Remote backend       |------------------>| Remove Backend |---> ...
 106  *   | svp_remote_t         |                   | svp_remote_t   |
 107  *   |                      |                   +----------------+
 108  *   | svp_remote_state_t --+-> state flags
 109  *   | svp_degrade_state_t -+-> degraded reason
 110  *   | struct addrinfo *  --+-> resolved hosts
 111  *   | uint_t            ---+-> active hosts
 112  *   | uint_t            ---+-> DNS generation
 113  *   | uint_t            ---+-> Reference count
 114  *   | uint_t            ---+-> active conns
 115  *   | uint_t            ---+-> degraded conns
 116  *   | list_t        ---+---+-> connection list
 117  *   +------------------+---+
 118  *                      |
 119  *                      +------------------------------+-----------------+
 120  *                      |                              |                 |
 121  *                      v                              v                 v
 122  *   +-------------------+                       +----------------
 123  *   | SVP Connection    |                       | SVP connection |     ...
 124  *   | svp_conn_t        |                       | svp_conn_t     |
 125  *   |                   |                       +----------------+
 126  *   | svp_event_t   ----+-> event loop handle
 127  *   | svp_timer_t   ----+-> backoff timer
 128  *   | svp_timer_t   ----+-> query timer
 129  *   | int           ----+-> socket fd
 130  *   | uint_t        ----+-> generation
 131  *   | uint_t        ----+-> current backoff
 132  *   | svp_conn_flags_t -+-> connection flags
 133  *   | svp_conn_state_t -+-> connection state
 134  *   | svp_conn_error_t -+-> connection error
 135  *   | int            ---+-> last errrno
 136  *   | hrtime_t       ---+-> activity timestamp
 137  *   | svp_conn_out_t ---+-> outgoing data state
 138  *   | svp_conn_in_t  ---+-> incoming data state
 139  *   | list_t      ---+--+-> active queries
 140  *   +----------------+--+
 141  *                    |
 142  *                    +----------------------------------+-----------------+
 143  *                    |                                  |                 |
 144  *                    v                                  v                 v
 145  *   +--------------------+                       +-------------+
 146  *   | SVP Query          |                       | SVP Query   |         ...
 147  *   | svp_query_t        |                       | svp_query_t |
 148  *   |                    |                       +-------------+
 149  *   | svp_query_f     ---+-> callback function
 150  *   | void *          ---+-> callback arg
 151  *   | svp_query_state_t -+-> state flags
 152  *   | svp_req_t       ---+-> svp prot. header
 153  *   | svp_query_data_t --+-> read data
 154  *   | svp_query_data_t --+-> write data
 155  *   | svp_status_t    ---+-> request status
 156  *   +--------------------+
 157  *
 158  * The svp_t is the instance that we assoicate with varpd. The instance itself
 159  * maintains properties and then when it's started associates with an
 160  * svp_remote_t, which is the remote backend. The remote backend itself,
 161  * maintains the DNS state and spins up and downs connections based on the
 162  * results from DNS. By default, we query DNS every 30 seconds. For more on the
 163  * connection life cycle, see the next section.
 164  *
 165  * By default, each connection maintains its own back off timer and list of
 166  * queries it's servicing. Only one request is generally outstanding at a time
 167  * and requests are round robined across the various connections.
 168  *
 169  * The query itself represents the svp request that's going on and keep track of
 170  * its state and is a place for data that's read and written to as part of the
 171  * request.
 172  *
 173  * Connections maintain a query timer such that if we have not received data on
 174  * a socket for a certain amount of time, we kill that socket and begin a
 175  * reconnection cycle with backoff.
 176  *
 177  * ------------------------
 178  * Connection State Machine
 179  * ------------------------
 180  *
 181  * We have a connection pool that's built upon DNS records. DNS describes the
 182  * membership of the set of remote peers that make up our pool and we maintain
 183  * one connection to each of them.  In addition, we maintain an exponential
 184  * backoff for each peer and will attempt to reconect immediately before backing
 185  * off. The following are the valid states that a connection can be in:
 186  *
 187  *      SVP_CS_ERROR            An OS error has occurred on this connection,
 188  *                              such as failure to create a socket or associate
 189  *                              the socket with an event port. We also
 190  *                              transition all connections to this state before
 191  *                              we destroy them.
 192  *
 193  *      SVP_CS_INITIAL          This is the initial state of a connection, all
 194  *                              that should exist is an unbound socket.
 195  *
 196  *      SVP_CS_CONNECTING       A call to connect has been made and we are
 197  *                              polling for it to complete.
 198  *
 199  *      SVP_CS_BACKOFF          A connect attempt has failed and we are
 200  *                              currently backing off, waiting to try again.
 201  *
 202  *      SVP_CS_ACTIVE           We have successfully connected to the remote
 203  *                              system.
 204  *
 205  *      SVP_CS_WINDDOWN         This connection is going to valhalla. In other
 206  *                              words, a previously active connection is no
 207  *                              longer valid in DNS, so we should curb our use
 208  *                              of it, and reap it as soon as we have other
 209  *                              active connections.
 210  *
 211  * The following diagram attempts to describe our state transition scheme, and
 212  * when we transition from one state to the next.
 213  *
 214  *                               |
 215  *                               * New remote IP from DNS resolution,
 216  *                               | not currently active in the system.
 217  *                               |
 218  *                               v                                Socket Error,
 219  *                       +----------------+                       still in DNS
 220  *  +----------------<---| SVP_CS_INITIAL |<----------------------*-----+
 221  *  |                    +----------------+                             |
 222  *  |                            System  |                              |
 223  *  | Connection . . . . .       success *               Successful     |
 224  *  | failed             .               |               connect()      |
 225  *  |               +----*---------+     |        +-----------*--+      |
 226  *  |               |              |     |        |              |      |
 227  *  |               V              ^     v        ^              V      ^
 228  *  |  +----------------+         +-------------------+     +---------------+
 229  *  +<-| SVP_CS_BACKOFF |         | SVP_CS_CONNECTING |     | SVP_CS_ACTIVE |
 230  *  |  +----------------+         +-------------------+     +---------------+
 231  *  |               V              ^  V                       V  V
 232  *  | Backoff wait  *              |  |                       |  * Removed
 233  *  v interval      +--------------+  +-----------------<-----+  | from DNS
 234  *  | finished                        |                          |
 235  *  |                                 V                          |
 236  *  |                                 |                          V
 237  *  |                                 |            +-----------------+
 238  *  +----------------+----------<-----+-------<----| SVP_CS_WINDDOWN |
 239  *                   |                             +-----------------+
 240  *                   * . . .   Fatal system, not
 241  *                   |         socket error or
 242  *                   V         quiesced after
 243  *           +--------------+  removal from DNS
 244  *           | SVP_CS_ERROR |
 245  *           +--------------+
 246  *                   |
 247  *                   * . . . Removed from DNS
 248  *                   v
 249  *            +------------+
 250  *            | Connection |
 251  *            | Destroyed  |
 252  *            +------------+
 253  *
 254  * --------------------------
 255  * Connection Event Injection
 256  * --------------------------
 257  *
 258  * For each connection that exists in the system, we have a timer in place that
 259  * is in charge of performing timeout activity. It fires once every thirty
 260  * seconds or so for a given connection and checks to ensure that we have had
 261  * activity for the most recent query on the connection. If not, it terminates
 262  * the connection. This is important as if we have sent all our data and are
 263  * waiting for the remote end to reply, without enabling something like TCP
 264  * keep-alive, we will not be notified that anything that has happened to the
 265  * remote connection, for example a panic. In addition, this also protects
 266  * against a server that is up, but a portolan that is not making forward
 267  * progress.
 268  *
 269  * When a timeout occurs, we first try to disassociate any active events, which
 270  * by definition must exist. Once that's done, we inject a port source user
 271  * event. Now, there is a small gotcha. Let's assume for a moment that we have a
 272  * pathological portolan. That means that it knows to inject activity right at
 273  * the time out window. That means, that the event may be disassociated before
 274  * we could get to it. If that's the case, we must _not_ inject the user event
 275  * and instead, we'll let the pending event take care of it. We know that the
 276  * pending event hasn't hit the main part of the loop yet, otherwise, it would
 277  * have released the lock protecting our state and associated the event.
 278  *
 279  * ------------
 280  * Notes on DNS
 281  * ------------
 282  *
 283  * Unfortunately, doing host name resolution in a way that allows us to leverage
 284  * the system's resolvers and the system's caching, require us to make blocking
 285  * calls in libc via getaddrinfo(3SOCKET). If we can't reach a given server,
 286  * that will tie up a thread for quite some time. To work around that fact,
 287  * we're going to create a fixed number of threads and we'll use them to service
 288  * our DNS requests. While this isn't ideal, until we have a sane means of
 289  * integrating a DNS resolution into an event loop with say portfs, it's not
 290  * going to be a fun day no matter what we do.
 291  *
 292  * ------
 293  * Timers
 294  * ------
 295  *
 296  * We maintain a single timer based on CLOCK_REALTIME. It's designed to fire
 297  * every second. While we'd rather use CLOCK_HIGHRES just to alleviate ourselves
 298  * from timer drift; however, as zones may not actually have CLOCK_HIGHRES
 299  * access, we don't want them to end up in there. The timer itself is just a
 300  * simple avl tree sorted by expiration time, which is stored as a tick in the
 301  * future, a tick is just one second.
 302  *
 303  * ----------
 304  * Shootdowns
 305  * ----------
 306  *
 307  * As part of the protocol, we need to be able to handle shootdowns that inform
 308  * us some of the information in the system is out of date. This information
 309  * needs to be processed promptly; however, the information is hopefully going
 310  * to be relatively infrequent relative to the normal flow of information.
 311  *
 312  * The shoot down information needs to be done on a per-backend basis. The
 313  * general design is that we'll have a single query for this which can fire on a
 314  * 5-10s period, we randmoize the latter part to give us a bit more load
 315  * spreading. If we complete because there's no work to do, then we wait the
 316  * normal period. If we complete, but there's still work to do, we'll go again
 317  * after a second.
 318  *
 319  * A shootdown has a few different parts. We first receive a list of items to
 320  * shootdown. After performing all of those, we need to acknowledge them. When
 321  * that's been done successfully, we can move onto the next part. From a
 322  * protocol perspective, we make a SVP_R_LOG_REQ, we get a reply, and then after
 323  * processing them, send an SVP_R_LOG_RM. Only once that's been acked do we
 324  * continue.
 325  *
 326  * However, one of the challenges that we have is that these invalidations are
 327  * just that, an invalidation. For a virtual layer two request, that's fine,
 328  * because the kernel supports that. However, for virtual layer three
 329  * invalidations, we have a bit more work to do. These protocols, ARP and NDP,
 330  * don't really support a notion of just an invalidation, instead you have to
 331  * inject the new data in a gratuitous fashion.
 332  *
 333  * To that end, what we instead do is when we receive a VL3 invalidation, we
 334  * turn that info a VL3 request. We hold the general request as outstanding
 335  * until we receive all of the callbacks for the VL3 invalidations, at which
 336  * point we go through and do the log removal request.
 337  */
 338 
 339 #include <umem.h>
 340 #include <errno.h>
 341 #include <stdlib.h>
 342 #include <sys/types.h>
 343 #include <sys/socket.h>
 344 #include <netinet/in.h>
 345 #include <arpa/inet.h>
 346 #include <libnvpair.h>
 347 #include <strings.h>
 348 #include <string.h>
 349 #include <assert.h>
 350 #include <unistd.h>
 351 
 352 #include <libvarpd_provider.h>
 353 #include "libvarpd_svp.h"
 354 
 355 bunyan_logger_t *svp_bunyan;
 356 static int svp_defport = 1296;
 357 static int svp_defuport = 1339;
 358 static umem_cache_t *svp_lookup_cache;
 359 
 360 typedef enum svp_lookup_type {
 361         SVP_L_UNKNOWN   = 0x0,
 362         SVP_L_VL2       = 0x1,
 363         SVP_L_VL3       = 0x2,
 364         SVP_L_RVL3      = 0x3
 365 } svp_lookup_type_t;
 366 
 367 typedef struct svp_lookup {
 368         int svl_type;
 369         union {
 370                 struct svl_lookup_vl2 {
 371                         varpd_query_handle_t    *svl_handle;
 372                         overlay_target_point_t  *svl_point;
 373                 } svl_vl2;
 374                 struct svl_lookup_vl3 {
 375                         varpd_arp_handle_t      *svl_vah;
 376                         uint8_t                 *svl_out;
 377                 } svl_vl3;
 378                 struct svl_lookup_rvl3 {
 379                         varpd_query_handle_t    *svl_handle;
 380                         overlay_target_point_t  *svl_point;
 381                         overlay_target_route_t  *svl_route;
 382                 } svl_rvl3;
 383         } svl_u;
 384         svp_query_t                             svl_query;
 385 } svp_lookup_t;
 386 
 387 static const char *varpd_svp_props[] = {
 388         "svp/host",
 389         "svp/port",
 390         "svp/underlay_ip",
 391         "svp/underlay_port",
 392         "svp/dcid",
 393         "svp/router_mac"
 394 };
 395 
 396 static const uint8_t svp_bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 397 
 398 int
 399 svp_comparator(const void *l, const void *r)
 400 {
 401         const svp_t *ls = l;
 402         const svp_t *rs = r;
 403 
 404         if (ls->svp_vid > rs->svp_vid)
 405                 return (1);
 406         if (ls->svp_vid < rs->svp_vid)
 407                 return (-1);
 408         return (0);
 409 }
 410 
 411 static void
 412 svp_vl2_lookup_cb(svp_t *svp, svp_status_t status, const struct in6_addr *uip,
 413     const uint16_t uport, void *arg)
 414 {
 415         svp_lookup_t *svl = arg;
 416         overlay_target_point_t *otp;
 417 
 418         assert(svp != NULL);
 419         assert(arg != NULL);
 420 
 421         if (status != SVP_S_OK) {
 422                 libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
 423                     VARPD_LOOKUP_DROP);
 424                 umem_cache_free(svp_lookup_cache, svl);
 425                 return;
 426         }
 427 
 428         otp = svl->svl_u.svl_vl2.svl_point;
 429         bcopy(uip, &otp->otp_ip, sizeof (struct in6_addr));
 430         otp->otp_port = uport;
 431         libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
 432             VARPD_LOOKUP_OK);
 433         umem_cache_free(svp_lookup_cache, svl);
 434 }
 435 
 436 static void
 437 svp_vl3_lookup_cb(svp_t *svp, svp_status_t status, const uint8_t *vl2mac,
 438     const struct in6_addr *uip, const uint16_t uport, void *arg)
 439 {
 440         overlay_target_point_t point;
 441         svp_lookup_t *svl = arg;
 442 
 443         assert(svp != NULL);
 444         assert(svl != NULL);
 445 
 446         if (status != SVP_S_OK) {
 447                 libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
 448                     VARPD_LOOKUP_DROP);
 449                 umem_cache_free(svp_lookup_cache, svl);
 450                 return;
 451         }
 452 
 453         /* Inject the L2 mapping before the L3 */
 454         bcopy(uip, &point.otp_ip, sizeof (struct in6_addr));
 455         point.otp_port = uport;
 456         libvarpd_inject_varp(svp->svp_hdl, vl2mac, &point);
 457 
 458         bcopy(vl2mac, svl->svl_u.svl_vl3.svl_out, ETHERADDRL);
 459         libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
 460             VARPD_LOOKUP_OK);
 461         umem_cache_free(svp_lookup_cache, svl);
 462 }
 463 
 464 static void
 465 svp_vl2_invalidate_cb(svp_t *svp, const uint8_t *vl2mac)
 466 {
 467         libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
 468 }
 469 
 470 static void
 471 svp_vl3_inject_cb(svp_t *svp, const uint16_t vlan, const struct in6_addr *vl3ip,
 472     const uint8_t *vl2mac, const uint8_t *targmac)
 473 {
 474         struct in_addr v4;
 475 
 476         /*
 477          * At the moment we don't support any IPv6 related log entries, this
 478          * will change soon as we develop a bit more of the IPv6 related
 479          * infrastructure so we can properly test the injection.
 480          */
 481         if (IN6_IS_ADDR_V4MAPPED(vl3ip) == 0) {
 482                 return;
 483         } else {
 484                 IN6_V4MAPPED_TO_INADDR(vl3ip, &v4);
 485                 if (targmac == NULL)
 486                         targmac = svp_bcast;
 487                 libvarpd_inject_arp(svp->svp_hdl, vlan, vl2mac, &v4, targmac);
 488         }
 489 }
 490 
 491 /* ARGSUSED */
 492 static void
 493 svp_shootdown_cb(svp_t *svp, const uint8_t *vl2mac, const struct in6_addr *uip,
 494     const uint16_t uport)
 495 {
 496         /*
 497          * We should probably do a conditional invlaidation here.
 498          */
 499         libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
 500 }
 501 
 502 static void
 503 svp_rvl3_lookup_cb(svp_t *svp, svp_status_t status, /* XXX KEBE SAYS MORE */
 504     void *arg)
 505 {
 506         svp_lookup_t *svl = arg;
 507         overlay_target_point_t *otp;
 508         overlay_target_route_t *otr;
 509 
 510         if (status != SVP_S_OK) {
 511                 libvarpd_plugin_query_reply(svl->svl_u.svl_rvl3.svl_handle,
 512                     VARPD_LOOKUP_DROP);
 513                 umem_cache_free(svp_lookup_cache, svl);
 514                 return;
 515         }
 516 
 517         otp = svl->svl_u.svl_rvl3.svl_point;
 518         otr = svl->svl_u.svl_rvl3.svl_route;
 519         /* XXX KEBE SAYS FILL ME IN! */
 520 
 521         libvarpd_plugin_query_reply(svl->svl_u.svl_rvl3.svl_handle,
 522             VARPD_LOOKUP_OK);
 523         umem_cache_free(svp_lookup_cache, svl);
 524 }
 525 
 526 static svp_cb_t svp_defops = {
 527         svp_vl2_lookup_cb,
 528         svp_vl3_lookup_cb,
 529         svp_vl2_invalidate_cb,
 530         svp_vl3_inject_cb,
 531         svp_shootdown_cb,
 532         svp_rvl3_lookup_cb,
 533 };
 534 
 535 static boolean_t
 536 varpd_svp_valid_dest(overlay_plugin_dest_t dest)
 537 {
 538         if (dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
 539                 return (B_FALSE);
 540 
 541         return (B_TRUE);
 542 }
 543 
 544 static int
 545 varpd_svp_create(varpd_provider_handle_t *hdl, void **outp,
 546     overlay_plugin_dest_t dest)
 547 {
 548         int ret;
 549         svp_t *svp;
 550 
 551         if (varpd_svp_valid_dest(dest) == B_FALSE)
 552                 return (ENOTSUP);
 553 
 554         svp = umem_zalloc(sizeof (svp_t), UMEM_DEFAULT);
 555         if (svp == NULL)
 556                 return (ENOMEM);
 557 
 558         if ((ret = mutex_init(&svp->svp_lock, USYNC_THREAD | LOCK_ERRORCHECK,
 559             NULL)) != 0) {
 560                 umem_free(svp, sizeof (svp_t));
 561                 return (ret);
 562         }
 563 
 564         svp->svp_port = svp_defport;
 565         svp->svp_uport = svp_defuport;
 566         svp->svp_cb = svp_defops;
 567         svp->svp_hdl = hdl;
 568         svp->svp_vid = libvarpd_plugin_vnetid(svp->svp_hdl);
 569         *outp = svp;
 570         return (0);
 571 }
 572 
 573 static int
 574 varpd_svp_start(void *arg)
 575 {
 576         int ret;
 577         svp_remote_t *srp;
 578         svp_t *svp = arg;
 579 
 580         mutex_enter(&svp->svp_lock);
 581         if (svp->svp_host == NULL || svp->svp_port == 0 ||
 582             svp->svp_huip == B_FALSE || svp->svp_uport == 0) {
 583                 mutex_exit(&svp->svp_lock);
 584                 return (EAGAIN);
 585         }
 586         mutex_exit(&svp->svp_lock);
 587 
 588         if ((ret = svp_remote_find(svp->svp_host, svp->svp_port, &svp->svp_uip,
 589             &srp)) != 0)
 590                 return (ret);
 591 
 592         if ((ret = svp_remote_attach(srp, svp)) != 0) {
 593                 svp_remote_release(srp);
 594                 return (ret);
 595         }
 596 
 597         return (0);
 598 }
 599 
 600 static void
 601 varpd_svp_stop(void *arg)
 602 {
 603         svp_t *svp = arg;
 604 
 605         svp_remote_detach(svp);
 606 }
 607 
 608 static void
 609 varpd_svp_destroy(void *arg)
 610 {
 611         svp_t *svp = arg;
 612 
 613         if (svp->svp_host != NULL)
 614                 umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
 615 
 616         if (mutex_destroy(&svp->svp_lock) != 0)
 617                 libvarpd_panic("failed to destroy svp_t`svp_lock");
 618 
 619         umem_free(svp, sizeof (svp_t));
 620 }
 621 
 622 static void
 623 varpd_svp_lookup_l3(svp_t *svp, varpd_query_handle_t *vqh,
 624     const overlay_targ_lookup_t *otl, overlay_target_point_t *otp,
 625     overlay_target_route_t *otr)
 626 {
 627         svp_lookup_t *slp;
 628         uint32_t type;
 629         const struct in6_addr *src = &otl->otl_addru.otlu_l3.otl3_srcip,
 630             *dst = &otl->otl_addru.otlu_l3.otl3_dstip;
 631 
 632         /*
 633          * otl is an L3 request, so we have src/dst IPs for the inner packet.
 634          * We also have the vlan.
 635          *
 636          * Assume kernel's overlay module is caching well, so we are directly
 637          * going to query (i.e. no caching up here of actual destinations).
 638          *
 639          * Our existing remote sever (svp_remote), but with the new message
 640          * SVP_R_REMOTE_VL3_REQ.  Our naming of these functions already has
 641          * "remote" in it, but we'll use "rvl3" instead of "vl3".
 642          */
 643 
 644         /* XXX KEBE SAYS DO SOME otl verification too... */
 645         if (IN6_IS_ADDR_V4MAPPED(src)) {
 646                 if (!IN6_IS_ADDR_V4MAPPED(dst)) {
 647                         libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 648                         return;
 649                 }
 650                 type = SVP_VL3_IP;
 651         } else {
 652                 if (IN6_IS_ADDR_V4MAPPED(dst)) {
 653                         libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 654                         return;
 655                 }
 656                 type = SVP_VL3_IPV6;
 657         }
 658 
 659         slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
 660         if (slp == NULL) {
 661                 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 662                 return;
 663         }
 664 
 665         slp->svl_type = SVP_L_RVL3;
 666         slp->svl_u.svl_rvl3.svl_handle = vqh;
 667         slp->svl_u.svl_rvl3.svl_point = otp;
 668         slp->svl_u.svl_rvl3.svl_route = otr;
 669 
 670         /* XXX KEBE SAYS FILL IN ARGS PROPERLY... */
 671         svp_remote_rvl3_lookup(svp, &slp->svl_query, src, dst, type,
 672             otl->otl_vnetid, (uint16_t)otl->otl_vlan, slp);
 673 }
 674 
 675 static void
 676 varpd_svp_lookup(void *arg, varpd_query_handle_t *vqh,
 677     const overlay_targ_lookup_t *otl, overlay_target_point_t *otp,
 678     overlay_target_route_t *otr)
 679 {
 680         svp_lookup_t *slp;
 681         svp_t *svp = arg;
 682 
 683         /*
 684          * Shuffle off L3 lookups to their own codepath.
 685          */
 686         if (otl->otl_l3req) {
 687                 varpd_svp_lookup_l3(svp, vqh, otl, otp, otr);
 688                 return;
 689         }
 690         /*
 691          * At this point, the traditional overlay_target_point_t is all that
 692          * needs filling in.  Zero-out the otr for safety.
 693          */
 694         bzero(otr, sizeof (*otr));
 695 
 696 
 697         /*
 698          * Check if this is something that we need to proxy, eg. arp or ndp.
 699          */
 700         if (otl->otl_addru.otlu_l2.otl2_sap == ETHERTYPE_ARP) {
 701                 libvarpd_plugin_proxy_arp(svp->svp_hdl, vqh, otl);
 702                 return;
 703         }
 704 
 705         if (otl->otl_addru.otlu_l2.otl2_dstaddr[0] == 0x33 &&
 706             otl->otl_addru.otlu_l2.otl2_dstaddr[1] == 0x33) {
 707                 if (otl->otl_addru.otlu_l2.otl2_sap == ETHERTYPE_IPV6) {
 708                         libvarpd_plugin_proxy_ndp(svp->svp_hdl, vqh, otl);
 709                 } else {
 710                         libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 711                 }
 712                 return;
 713         }
 714 
 715         /*
 716          * Watch out for various multicast and broadcast addresses. We've
 717          * already taken care of the IPv6 range above. Now we just need to
 718          * handle broadcast and if the multicast bit is set, lowest bit of the
 719          * first octet of the MAC, then we drop it now.
 720          */
 721         if (bcmp(otl->otl_addru.otlu_l2.otl2_dstaddr, svp_bcast,
 722             ETHERADDRL) == 0 ||
 723             (otl->otl_addru.otlu_l2.otl2_dstaddr[0] & 0x01) == 0x01) {
 724                 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 725                 return;
 726         }
 727 
 728         /*
 729          * If we have a failure to allocate memory for this, that's not good.
 730          * However, telling the kernel to just drop this packet is much better
 731          * than the alternative at this moment. At least we'll try again and we
 732          * may have something more available to us in a little bit.
 733          */
 734         slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
 735         if (slp == NULL) {
 736                 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 737                 return;
 738         }
 739 
 740         slp->svl_type = SVP_L_VL2;
 741         slp->svl_u.svl_vl2.svl_handle = vqh;
 742         slp->svl_u.svl_vl2.svl_point = otp;
 743 
 744         svp_remote_vl2_lookup(svp, &slp->svl_query,
 745             otl->otl_addru.otlu_l2.otl2_dstaddr, slp);
 746 }
 747 
 748 /* ARGSUSED */
 749 static int
 750 varpd_svp_nprops(void *arg, uint_t *nprops)
 751 {
 752         *nprops = sizeof (varpd_svp_props) / sizeof (char *);
 753         return (0);
 754 }
 755 
 756 /* ARGSUSED */
 757 static int
 758 varpd_svp_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph)
 759 {
 760         switch (propid) {
 761         case 0:
 762                 /* svp/host */
 763                 libvarpd_prop_set_name(vph, varpd_svp_props[0]);
 764                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 765                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING);
 766                 libvarpd_prop_set_nodefault(vph);
 767                 break;
 768         case 1:
 769                 /* svp/port */
 770                 libvarpd_prop_set_name(vph, varpd_svp_props[1]);
 771                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 772                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
 773                 (void) libvarpd_prop_set_default(vph, &svp_defport,
 774                     sizeof (svp_defport));
 775                 libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
 776                 break;
 777         case 2:
 778                 /* svp/underlay_ip */
 779                 libvarpd_prop_set_name(vph, varpd_svp_props[2]);
 780                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 781                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP);
 782                 libvarpd_prop_set_nodefault(vph);
 783                 break;
 784         case 3:
 785                 /* svp/underlay_port */
 786                 libvarpd_prop_set_name(vph, varpd_svp_props[3]);
 787                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 788                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
 789                 (void) libvarpd_prop_set_default(vph, &svp_defuport,
 790                     sizeof (svp_defuport));
 791                 libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
 792                 break;
 793         case 4:
 794                 /* svp/dcid */
 795                 libvarpd_prop_set_name(vph, varpd_svp_props[4]);
 796                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 797                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
 798                 libvarpd_prop_set_nodefault(vph);
 799                 /* XXX KEBE ASKS should I just set high to UINT32_MAX? */
 800                 libvarpd_prop_set_range_uint32(vph, 1, UINT32_MAX - 1);
 801                 break;
 802         case 5:
 803                 /* svp/router_mac */
 804                 libvarpd_prop_set_name(vph, varpd_svp_props[5]);
 805                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 806                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_ETHER);
 807                 libvarpd_prop_set_nodefault(vph);
 808                 break;
 809         default:
 810                 return (EINVAL);
 811         }
 812         return (0);
 813 }
 814 
 815 static int
 816 varpd_svp_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep)
 817 {
 818         svp_t *svp = arg;
 819 
 820         /* svp/host */
 821         if (strcmp(pname, varpd_svp_props[0]) == 0) {
 822                 size_t len;
 823 
 824                 mutex_enter(&svp->svp_lock);
 825                 if (svp->svp_host == NULL) {
 826                         *sizep = 0;
 827                 } else {
 828                         len = strlen(svp->svp_host) + 1;
 829                         if (*sizep < len) {
 830                                 mutex_exit(&svp->svp_lock);
 831                                 return (EOVERFLOW);
 832                         }
 833                         *sizep = len;
 834                         (void) strlcpy(buf, svp->svp_host, *sizep);
 835                 }
 836                 mutex_exit(&svp->svp_lock);
 837                 return (0);
 838         }
 839 
 840         /* svp/port */
 841         if (strcmp(pname, varpd_svp_props[1]) == 0) {
 842                 uint64_t val;
 843 
 844                 if (*sizep < sizeof (uint64_t))
 845                         return (EOVERFLOW);
 846 
 847                 mutex_enter(&svp->svp_lock);
 848                 if (svp->svp_port == 0) {
 849                         *sizep = 0;
 850                 } else {
 851                         val = svp->svp_port;
 852                         bcopy(&val, buf, sizeof (uint64_t));
 853                         *sizep = sizeof (uint64_t);
 854                 }
 855                 mutex_exit(&svp->svp_lock);
 856                 return (0);
 857         }
 858 
 859         /* svp/underlay_ip */
 860         if (strcmp(pname, varpd_svp_props[2]) == 0) {
 861                 if (*sizep < sizeof (struct in6_addr))
 862                         return (EOVERFLOW);
 863                 mutex_enter(&svp->svp_lock);
 864                 if (svp->svp_huip == B_FALSE) {
 865                         *sizep = 0;
 866                 } else {
 867                         bcopy(&svp->svp_uip, buf, sizeof (struct in6_addr));
 868                         *sizep = sizeof (struct in6_addr);
 869                 }
 870                 mutex_exit(&svp->svp_lock);
 871                 return (0);
 872         }
 873 
 874         /* svp/underlay_port */
 875         if (strcmp(pname, varpd_svp_props[3]) == 0) {
 876                 uint64_t val;
 877 
 878                 if (*sizep < sizeof (uint64_t))
 879                         return (EOVERFLOW);
 880 
 881                 mutex_enter(&svp->svp_lock);
 882                 if (svp->svp_uport == 0) {
 883                         *sizep = 0;
 884                 } else {
 885                         val = svp->svp_uport;
 886                         bcopy(&val, buf, sizeof (uint64_t));
 887                         *sizep = sizeof (uint64_t);
 888                 }
 889 
 890                 mutex_exit(&svp->svp_lock);
 891                 return (0);
 892         }
 893 
 894         /* svp/dcid */
 895         if (strcmp(pname, varpd_svp_props[4]) == 0) {
 896                 uint64_t val;
 897 
 898                 if (*sizep < sizeof (uint64_t))
 899                         return (EOVERFLOW);
 900 
 901                 mutex_enter(&svp->svp_lock);
 902                 if (svp->svp_uport == 0) {
 903                         *sizep = 0;
 904                 } else {
 905                         val = svp->svp_dcid;
 906                         bcopy(&val, buf, sizeof (uint64_t));
 907                         *sizep = sizeof (uint64_t);
 908                 }
 909 
 910                 mutex_exit(&svp->svp_lock);
 911                 return (0);
 912         }
 913 
 914         /* svp/router_mac */
 915         if (strcmp(pname, varpd_svp_props[5]) == 0) {
 916                 if (*sizep < ETHERADDRL)
 917                         return (EOVERFLOW);
 918                 mutex_enter(&svp->svp_lock);
 919 
 920                 if (ether_is_zero(&svp->svp_router_mac)) {
 921                         *sizep = 0;
 922                 } else {
 923                         bcopy(&svp->svp_router_mac, buf, ETHERADDRL);
 924                         *sizep = ETHERADDRL;
 925                 }
 926 
 927                 mutex_exit(&svp->svp_lock);
 928                 return (0);
 929         }
 930         return (EINVAL);
 931 }
 932 
 933 static int
 934 varpd_svp_setprop(void *arg, const char *pname, const void *buf,
 935     const uint32_t size)
 936 {
 937         svp_t *svp = arg;
 938 
 939         /* svp/host */
 940         if (strcmp(pname, varpd_svp_props[0]) == 0) {
 941                 char *dup;
 942                 dup = umem_alloc(size, UMEM_DEFAULT);
 943                 (void) strlcpy(dup, buf, size);
 944                 if (dup == NULL)
 945                         return (ENOMEM);
 946                 mutex_enter(&svp->svp_lock);
 947                 if (svp->svp_host != NULL)
 948                         umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
 949                 svp->svp_host = dup;
 950                 mutex_exit(&svp->svp_lock);
 951                 return (0);
 952         }
 953 
 954         /* svp/port */
 955         if (strcmp(pname, varpd_svp_props[1]) == 0) {
 956                 const uint64_t *valp = buf;
 957                 if (size < sizeof (uint64_t))
 958                         return (EOVERFLOW);
 959 
 960                 if (*valp == 0 || *valp > UINT16_MAX)
 961                         return (EINVAL);
 962 
 963                 mutex_enter(&svp->svp_lock);
 964                 svp->svp_port = (uint16_t)*valp;
 965                 mutex_exit(&svp->svp_lock);
 966                 return (0);
 967         }
 968 
 969         /* svp/underlay_ip */
 970         if (strcmp(pname, varpd_svp_props[2]) == 0) {
 971                 const struct in6_addr *ipv6 = buf;
 972 
 973                 if (size < sizeof (struct in6_addr))
 974                         return (EOVERFLOW);
 975 
 976                 if (IN6_IS_ADDR_V4COMPAT(ipv6))
 977                         return (EINVAL);
 978 
 979                 if (IN6_IS_ADDR_MULTICAST(ipv6))
 980                         return (EINVAL);
 981 
 982                 if (IN6_IS_ADDR_6TO4(ipv6))
 983                         return (EINVAL);
 984 
 985                 if (IN6_IS_ADDR_V4MAPPED(ipv6)) {
 986                         ipaddr_t v4;
 987                         IN6_V4MAPPED_TO_IPADDR(ipv6, v4);
 988                         if (IN_MULTICAST(v4))
 989                                 return (EINVAL);
 990                 }
 991 
 992                 mutex_enter(&svp->svp_lock);
 993                 bcopy(buf, &svp->svp_uip, sizeof (struct in6_addr));
 994                 svp->svp_huip = B_TRUE;
 995                 mutex_exit(&svp->svp_lock);
 996                 return (0);
 997         }
 998 
 999         /* svp/underlay_port */
1000         if (strcmp(pname, varpd_svp_props[3]) == 0) {
1001                 const uint64_t *valp = buf;
1002                 if (size < sizeof (uint64_t))
1003                         return (EOVERFLOW);
1004 
1005                 if (*valp == 0 || *valp > UINT16_MAX)
1006                         return (EINVAL);
1007 
1008                 mutex_enter(&svp->svp_lock);
1009                 svp->svp_uport = (uint16_t)*valp;
1010                 mutex_exit(&svp->svp_lock);
1011 
1012                 return (0);
1013         }
1014 
1015         /* svp/dcid */
1016         if (strcmp(pname, varpd_svp_props[4]) == 0) {
1017                 const uint64_t *valp = buf;
1018                 if (size < sizeof (uint64_t))
1019                         return (EOVERFLOW);
1020 
1021                 /* XXX KEBE ASKS, use UINT32_MAX instead? */
1022                 if (*valp == 0 || *valp > UINT32_MAX - 1)
1023                         return (EINVAL);
1024 
1025                 mutex_enter(&svp->svp_lock);
1026                 svp->svp_dcid = (uint32_t)*valp;
1027                 mutex_exit(&svp->svp_lock);
1028 
1029                 return (0);
1030         }
1031 
1032         /* svp/router_mac */
1033         if (strcmp(pname, varpd_svp_props[5]) == 0) {
1034                 if (size < ETHERADDRL)
1035                         return (EOVERFLOW);
1036                 mutex_enter(&svp->svp_lock);
1037                 bcopy(buf, &svp->svp_router_mac, ETHERADDRL);
1038                 mutex_exit(&svp->svp_lock);
1039                 return (0);
1040         }
1041 
1042         return (EINVAL);
1043 }
1044 
1045 static int
1046 varpd_svp_save(void *arg, nvlist_t *nvp)
1047 {
1048         int ret;
1049         svp_t *svp = arg;
1050 
1051         mutex_enter(&svp->svp_lock);
1052         /* svp/host */
1053         if (svp->svp_host != NULL) {
1054                 if ((ret = nvlist_add_string(nvp, varpd_svp_props[0],
1055                     svp->svp_host)) != 0) {
1056                         mutex_exit(&svp->svp_lock);
1057                         return (ret);
1058                 }
1059         }
1060 
1061         /* svp/port */
1062         if (svp->svp_port != 0) {
1063                 if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[1],
1064                     svp->svp_port)) != 0) {
1065                         mutex_exit(&svp->svp_lock);
1066                         return (ret);
1067                 }
1068         }
1069 
1070         /* svp/underlay_ip */
1071         if (svp->svp_huip == B_TRUE) {
1072                 char buf[INET6_ADDRSTRLEN];
1073 
1074                 if (inet_ntop(AF_INET6, &svp->svp_uip, buf, sizeof (buf)) ==
1075                     NULL)
1076                         libvarpd_panic("unexpected inet_ntop failure: %d",
1077                             errno);
1078 
1079                 if ((ret = nvlist_add_string(nvp, varpd_svp_props[2],
1080                     buf)) != 0) {
1081                         mutex_exit(&svp->svp_lock);
1082                         return (ret);
1083                 }
1084         }
1085 
1086         /* svp/underlay_port */
1087         if (svp->svp_uport != 0) {
1088                 if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[3],
1089                     svp->svp_uport)) != 0) {
1090                         mutex_exit(&svp->svp_lock);
1091                         return (ret);
1092                 }
1093         }
1094 
1095         /* svp/dcid */
1096         if (svp->svp_dcid != 0) {
1097                 if ((ret = nvlist_add_uint32(nvp, varpd_svp_props[4],
1098                     svp->svp_dcid)) != 0) {
1099                         mutex_exit(&svp->svp_lock);
1100                         return (ret);
1101                 }
1102         }
1103 
1104         /* svp/router_mac */
1105         if (!ether_is_zero(&svp->svp_router_mac)) {
1106                 char buf[ETHERADDRSTRL];
1107 
1108                 /* XXX KEBE SAYS See underlay_ip... */
1109                 if (ether_ntoa_r(&svp->svp_router_mac, buf) == NULL) {
1110                         libvarpd_panic("unexpected ether_ntoa_r failure: %d",
1111                             errno);
1112                 }
1113 
1114                 if ((ret = nvlist_add_string(nvp, varpd_svp_props[5],
1115                     buf)) != 0) {
1116                         mutex_exit(&svp->svp_lock);
1117                         return (ret);
1118                 }
1119         }
1120 
1121         mutex_exit(&svp->svp_lock);
1122         return (0);
1123 }
1124 
1125 static int
1126 varpd_svp_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl,
1127     overlay_plugin_dest_t dest, void **outp)
1128 {
1129         int ret;
1130         svp_t *svp;
1131         char *ipstr, *hstr, *etherstr;
1132 
1133         if (varpd_svp_valid_dest(dest) == B_FALSE)
1134                 return (ENOTSUP);
1135 
1136         if ((ret = varpd_svp_create(hdl, (void **)&svp, dest)) != 0)
1137                 return (ret);
1138 
1139         /* svp/host */
1140         if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[0],
1141             &hstr)) != 0) {
1142                 if (ret != ENOENT) {
1143                         varpd_svp_destroy(svp);
1144                         return (ret);
1145                 }
1146                 svp->svp_host = NULL;
1147         } else {
1148                 size_t blen = strlen(hstr) + 1;
1149                 svp->svp_host = umem_alloc(blen, UMEM_DEFAULT);
1150                 (void) strlcpy(svp->svp_host, hstr, blen);
1151         }
1152 
1153         /* svp/port */
1154         if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[1],
1155             &svp->svp_port)) != 0) {
1156                 if (ret != ENOENT) {
1157                         varpd_svp_destroy(svp);
1158                         return (ret);
1159                 }
1160                 svp->svp_port = 0;
1161         }
1162 
1163         /* svp/underlay_ip */
1164         if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[2],
1165             &ipstr)) != 0) {
1166                 if (ret != ENOENT) {
1167                         varpd_svp_destroy(svp);
1168                         return (ret);
1169                 }
1170                 svp->svp_huip = B_FALSE;
1171         } else {
1172                 ret = inet_pton(AF_INET6, ipstr, &svp->svp_uip);
1173                 if (ret == -1) {
1174                         assert(errno == EAFNOSUPPORT);
1175                         libvarpd_panic("unexpected inet_pton failure: %d",
1176                             errno);
1177                 }
1178 
1179                 if (ret == 0) {
1180                         varpd_svp_destroy(svp);
1181                         return (EINVAL);
1182                 }
1183                 svp->svp_huip = B_TRUE;
1184         }
1185 
1186         /* svp/underlay_port */
1187         if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[3],
1188             &svp->svp_uport)) != 0) {
1189                 if (ret != ENOENT) {
1190                         varpd_svp_destroy(svp);
1191                         return (ret);
1192                 }
1193                 svp->svp_uport = 0;
1194         }
1195 
1196         /* svp/dcid */
1197         if ((ret = nvlist_lookup_uint32(nvp, varpd_svp_props[4],
1198             &svp->svp_dcid)) != 0) {
1199                 if (ret != ENOENT) {
1200                         varpd_svp_destroy(svp);
1201                         return (ret);
1202                 }
1203                 svp->svp_dcid = 0;
1204         }
1205 
1206         /* svp/router_mac */
1207         if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[5],
1208             &etherstr)) != 0) {
1209                 if (ret != ENOENT) {
1210                         varpd_svp_destroy(svp);
1211                         return (ret);
1212                 }
1213                 bzero(&svp->svp_router_mac, ETHERADDRL);
1214         } else if (ether_aton_r(etherstr, &svp->svp_router_mac) == NULL) {
1215                 libvarpd_panic("unexpected ether_aton_r failure: %d", errno);
1216         }
1217 
1218         svp->svp_hdl = hdl;
1219         *outp = svp;
1220         return (0);
1221 }
1222 
1223 static void
1224 varpd_svp_arp(void *arg, varpd_arp_handle_t *vah, int type,
1225     const struct sockaddr *sock, uint8_t *out)
1226 {
1227         svp_t *svp = arg;
1228         svp_lookup_t *svl;
1229 
1230         if (type != VARPD_QTYPE_ETHERNET) {
1231                 libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
1232                 return;
1233         }
1234 
1235         svl = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
1236         if (svl == NULL) {
1237                 libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
1238                 return;
1239         }
1240 
1241         svl->svl_type = SVP_L_VL3;
1242         svl->svl_u.svl_vl3.svl_vah = vah;
1243         svl->svl_u.svl_vl3.svl_out = out;
1244         svp_remote_vl3_lookup(svp, &svl->svl_query, sock, svl);
1245 }
1246 
1247 static const varpd_plugin_ops_t varpd_svp_ops = {
1248         0,
1249         varpd_svp_create,
1250         varpd_svp_start,
1251         varpd_svp_stop,
1252         varpd_svp_destroy,
1253         NULL,
1254         varpd_svp_lookup,
1255         varpd_svp_nprops,
1256         varpd_svp_propinfo,
1257         varpd_svp_getprop,
1258         varpd_svp_setprop,
1259         varpd_svp_save,
1260         varpd_svp_restore,
1261         varpd_svp_arp,
1262         NULL
1263 };
1264 
1265 static int
1266 svp_bunyan_init(void)
1267 {
1268         int ret;
1269 
1270         if ((ret = bunyan_init("svp", &svp_bunyan)) != 0)
1271                 return (ret);
1272         ret = bunyan_stream_add(svp_bunyan, "stderr", BUNYAN_L_INFO,
1273             bunyan_stream_fd, (void *)STDERR_FILENO);
1274         if (ret != 0)
1275                 bunyan_fini(svp_bunyan);
1276         return (ret);
1277 }
1278 
1279 static void
1280 svp_bunyan_fini(void)
1281 {
1282         if (svp_bunyan != NULL)
1283                 bunyan_fini(svp_bunyan);
1284 }
1285 
1286 #pragma init(varpd_svp_init)
1287 static void
1288 varpd_svp_init(void)
1289 {
1290         int err;
1291         varpd_plugin_register_t *vpr;
1292 
1293         if (svp_bunyan_init() != 0)
1294                 return;
1295 
1296         if ((err = svp_host_init()) != 0) {
1297                 (void) bunyan_error(svp_bunyan, "failed to init host subsystem",
1298                     BUNYAN_T_INT32, "error", err,
1299                     BUNYAN_T_END);
1300                 svp_bunyan_fini();
1301                 return;
1302         }
1303 
1304         svp_lookup_cache = umem_cache_create("svp_lookup",
1305             sizeof (svp_lookup_t),  0, NULL, NULL, NULL, NULL, NULL, 0);
1306         if (svp_lookup_cache == NULL) {
1307                 (void) bunyan_error(svp_bunyan,
1308                     "failed to create svp_lookup cache",
1309                     BUNYAN_T_INT32, "error", errno,
1310                     BUNYAN_T_END);
1311                 svp_bunyan_fini();
1312                 return;
1313         }
1314 
1315         if ((err = svp_event_init()) != 0) {
1316                 (void) bunyan_error(svp_bunyan,
1317                     "failed to init event subsystem",
1318                     BUNYAN_T_INT32, "error", err,
1319                     BUNYAN_T_END);
1320                 svp_bunyan_fini();
1321                 umem_cache_destroy(svp_lookup_cache);
1322                 return;
1323         }
1324 
1325         if ((err = svp_timer_init()) != 0) {
1326                 (void) bunyan_error(svp_bunyan,
1327                     "failed to init timer subsystem",
1328                     BUNYAN_T_INT32, "error", err,
1329                     BUNYAN_T_END);
1330                 svp_event_fini();
1331                 umem_cache_destroy(svp_lookup_cache);
1332                 svp_bunyan_fini();
1333                 return;
1334         }
1335 
1336         if ((err = svp_remote_init()) != 0) {
1337                 (void) bunyan_error(svp_bunyan,
1338                     "failed to init remote subsystem",
1339                     BUNYAN_T_INT32, "error", err,
1340                     BUNYAN_T_END);
1341                 svp_event_fini();
1342                 umem_cache_destroy(svp_lookup_cache);
1343                 svp_bunyan_fini();
1344                 return;
1345         }
1346 
1347         vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err);
1348         if (vpr == NULL) {
1349                 (void) bunyan_error(svp_bunyan,
1350                     "failed to alloc varpd plugin",
1351                     BUNYAN_T_INT32, "error", err,
1352                     BUNYAN_T_END);
1353                 svp_remote_fini();
1354                 svp_event_fini();
1355                 umem_cache_destroy(svp_lookup_cache);
1356                 svp_bunyan_fini();
1357                 return;
1358         }
1359 
1360         vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC;
1361         vpr->vpr_name = "svp";
1362         vpr->vpr_ops = &varpd_svp_ops;
1363 
1364         if ((err = libvarpd_plugin_register(vpr)) != 0) {
1365                 (void) bunyan_error(svp_bunyan,
1366                     "failed to register varpd plugin",
1367                     BUNYAN_T_INT32, "error", err,
1368                     BUNYAN_T_END);
1369                 svp_remote_fini();
1370                 svp_event_fini();
1371                 umem_cache_destroy(svp_lookup_cache);
1372                 svp_bunyan_fini();
1373 
1374         }
1375         libvarpd_plugin_free(vpr);
1376 }