1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018, Joyent, Inc.
  14  */
  15 
  16 /*
  17  * This plugin implements the SDC VXLAN Protocol (SVP).
  18  *
  19  * This plugin is designed to work with a broader distributed system that
  20  * mainains a database of mappings and provides a means of looking up data and
  21  * provides a stream of updates. While it is named after VXLAN, there isn't
  22  * anything specific to VXLAN baked into the protocol at this time, other than
  23  * that it requires both an IP address and a port; however, if there's a good
  24  * reason to support others here, we can modify that.
  25  *
  26  * -----------
  27  * Terminology
  28  * -----------
  29  *
  30  * Throughout this module we refer to a few different kinds of addresses:
  31  *
  32  *    VL3
  33  *
  34  *      A VL3 address, or virtual layer 3, refers to the layer three addreses
  35  *      that are used by entities on an overlay network. As far as we're
  36  *      concerned that means that this is the IP address of an interface on an
  37  *      overlay network.
  38  *
  39  *    VL2
  40  *
  41  *      A VL2 address, or a virtual layer 2, referes to the link-layer addresses
  42  *      that are used by entities on an overlay network. As far as we're
  43  *      concerned that means that this is the MAC addresses of an interface on
  44  *      an overlay network.
  45  *
  46  *    UL3
  47  *
  48  *      A UL3, or underlay layer 3, refers to the layer three (IP) address on
  49  *      the underlay network.
  50  *
  51  * The svp plugin provides lookups from VL3->VL2, eg. the equivalent of an ARP
  52  * or NDP query, and then also provides VL2->UL3 lookups.
  53  *
  54  * -------------------
  55  * Protocol Operations
  56  * -------------------
  57  *
  58  * The svp protocol is defined in lib/varpd/svp/common/libvarpd_svp_prot.h. It
  59  * defines the basic TCP protocol that we use to communicate to hosts. At this
  60  * time, it is not quite 100% implemented in both this plug-in and our primary
  61  * server, sdc-portolan (see https://github.com/joyent/sdc-portolan).
  62  *
  63  * At this time, we don't quite support everything that we need to. Including
  64  * the SVP_R_BULK_REQ and SVP_R_SHOOTDOWN.
  65  *
  66  * ---------------------------------
  67  * General Design and Considerations
  68  * ---------------------------------
  69  *
  70  * Every instance of the svp plugin requires the hostname and port of a server
  71  * to contact. Though, we have co-opted the port 1296 (the year of the oldest
  72  * extant portolan) as our default port.
  73  *
  74  * Each of the different instance of the plugins has a corresponding remote
  75  * backend. The remote backend represents the tuple of the [ host, port ].
  76  * Different instances that share the same host and port tuple will use the same
  77  * backend.
  78  *
  79  * The backend is actually in charge of performing lookups, resolving and
  80  * updating the set of remote hosts based on the DNS resolution we've been
  81  * provided, and taking care of things like shootdowns.
  82  *
  83  * The whole plugin itself maintains an event loop and a number of threads to
  84  * service that event loop. On top of that event loop, we have a simple timer
  85  * backend that ticks at one second intervals and performs various callbacks,
  86  * such as idle query timers, DNS resolution, connection backoff, etc. Each of
  87  * the remote hosts that we obtain is wrapped up in an svp_conn_t, which manages
  88  * the connection state, reconnecting, etc.
  89  *
  90  * All in all, the general way that this all looks like is:
  91  *
  92  *  +----------------------------+
  93  *  | Plugin Instance            |
  94  *  | svp_t                      |
  95  *  |                            |
  96  *  | varpd_provider_handle_t * -+-> varpd handle
  97  *  | uint64_t               ----+-> varpd ID
  98  *  | char *                 ----+-> remote host
  99  *  | uint16_t               ----+-> remote port
 100  *  | svp_remote_t *   ---+------+-> remote backend
 101  *  +---------------------+------+
 102  *                        |
 103  *                        v
 104  *   +----------------------+                   +----------------+
 105  *   | Remote backend       |------------------>| Remove Backend |---> ...
 106  *   | svp_remote_t         |                   | svp_remote_t   |
 107  *   |                      |                   +----------------+
 108  *   | svp_remote_state_t --+-> state flags
 109  *   | svp_degrade_state_t -+-> degraded reason
 110  *   | struct addrinfo *  --+-> resolved hosts
 111  *   | uint_t            ---+-> active hosts
 112  *   | uint_t            ---+-> DNS generation
 113  *   | uint_t            ---+-> Reference count
 114  *   | uint_t            ---+-> active conns
 115  *   | uint_t            ---+-> degraded conns
 116  *   | list_t        ---+---+-> connection list
 117  *   +------------------+---+
 118  *                      |
 119  *                      +------------------------------+-----------------+
 120  *                      |                              |                 |
 121  *                      v                              v                 v
 122  *   +-------------------+                       +----------------
 123  *   | SVP Connection    |                       | SVP connection |     ...
 124  *   | svp_conn_t        |                       | svp_conn_t     |
 125  *   |                   |                       +----------------+
 126  *   | svp_event_t   ----+-> event loop handle
 127  *   | svp_timer_t   ----+-> backoff timer
 128  *   | svp_timer_t   ----+-> query timer
 129  *   | int           ----+-> socket fd
 130  *   | uint_t        ----+-> generation
 131  *   | uint_t        ----+-> current backoff
 132  *   | svp_conn_flags_t -+-> connection flags
 133  *   | svp_conn_state_t -+-> connection state
 134  *   | svp_conn_error_t -+-> connection error
 135  *   | int            ---+-> last errrno
 136  *   | hrtime_t       ---+-> activity timestamp
 137  *   | svp_conn_out_t ---+-> outgoing data state
 138  *   | svp_conn_in_t  ---+-> incoming data state
 139  *   | list_t      ---+--+-> active queries
 140  *   +----------------+--+
 141  *                    |
 142  *                    +----------------------------------+-----------------+
 143  *                    |                                  |                 |
 144  *                    v                                  v                 v
 145  *   +--------------------+                       +-------------+
 146  *   | SVP Query          |                       | SVP Query   |         ...
 147  *   | svp_query_t        |                       | svp_query_t |
 148  *   |                    |                       +-------------+
 149  *   | svp_query_f     ---+-> callback function
 150  *   | void *          ---+-> callback arg
 151  *   | svp_query_state_t -+-> state flags
 152  *   | svp_req_t       ---+-> svp prot. header
 153  *   | svp_query_data_t --+-> read data
 154  *   | svp_query_data_t --+-> write data
 155  *   | svp_status_t    ---+-> request status
 156  *   +--------------------+
 157  *
 158  * The svp_t is the instance that we assoicate with varpd. The instance itself
 159  * maintains properties and then when it's started associates with an
 160  * svp_remote_t, which is the remote backend. The remote backend itself,
 161  * maintains the DNS state and spins up and downs connections based on the
 162  * results from DNS. By default, we query DNS every 30 seconds. For more on the
 163  * connection life cycle, see the next section.
 164  *
 165  * By default, each connection maintains its own back off timer and list of
 166  * queries it's servicing. Only one request is generally outstanding at a time
 167  * and requests are round robined across the various connections.
 168  *
 169  * The query itself represents the svp request that's going on and keep track of
 170  * its state and is a place for data that's read and written to as part of the
 171  * request.
 172  *
 173  * Connections maintain a query timer such that if we have not received data on
 174  * a socket for a certain amount of time, we kill that socket and begin a
 175  * reconnection cycle with backoff.
 176  *
 177  * ------------------------
 178  * Connection State Machine
 179  * ------------------------
 180  *
 181  * We have a connection pool that's built upon DNS records. DNS describes the
 182  * membership of the set of remote peers that make up our pool and we maintain
 183  * one connection to each of them.  In addition, we maintain an exponential
 184  * backoff for each peer and will attempt to reconect immediately before backing
 185  * off. The following are the valid states that a connection can be in:
 186  *
 187  *      SVP_CS_ERROR            An OS error has occurred on this connection,
 188  *                              such as failure to create a socket or associate
 189  *                              the socket with an event port. We also
 190  *                              transition all connections to this state before
 191  *                              we destroy them.
 192  *
 193  *      SVP_CS_INITIAL          This is the initial state of a connection, all
 194  *                              that should exist is an unbound socket.
 195  *
 196  *      SVP_CS_CONNECTING       A call to connect has been made and we are
 197  *                              polling for it to complete.
 198  *
 199  *      SVP_CS_BACKOFF          A connect attempt has failed and we are
 200  *                              currently backing off, waiting to try again.
 201  *
 202  *      SVP_CS_ACTIVE           We have successfully connected to the remote
 203  *                              system.
 204  *
 205  *      SVP_CS_WINDDOWN         This connection is going to valhalla. In other
 206  *                              words, a previously active connection is no
 207  *                              longer valid in DNS, so we should curb our use
 208  *                              of it, and reap it as soon as we have other
 209  *                              active connections.
 210  *
 211  * The following diagram attempts to describe our state transition scheme, and
 212  * when we transition from one state to the next.
 213  *
 214  *                               |
 215  *                               * New remote IP from DNS resolution,
 216  *                               | not currently active in the system.
 217  *                               |
 218  *                               v                                Socket Error,
 219  *                       +----------------+                       still in DNS
 220  *  +----------------<---| SVP_CS_INITIAL |<----------------------*-----+
 221  *  |                    +----------------+                             |
 222  *  |                            System  |                              |
 223  *  | Connection . . . . .       success *               Successful     |
 224  *  | failed             .               |               connect()      |
 225  *  |               +----*---------+     |        +-----------*--+      |
 226  *  |               |              |     |        |              |      |
 227  *  |               V              ^     v        ^              V      ^
 228  *  |  +----------------+         +-------------------+     +---------------+
 229  *  +<-| SVP_CS_BACKOFF |         | SVP_CS_CONNECTING |     | SVP_CS_ACTIVE |
 230  *  |  +----------------+         +-------------------+     +---------------+
 231  *  |               V              ^  V                       V  V
 232  *  | Backoff wait  *              |  |                       |  * Removed
 233  *  v interval      +--------------+  +-----------------<-----+  | from DNS
 234  *  | finished                        |                          |
 235  *  |                                 V                          |
 236  *  |                                 |                          V
 237  *  |                                 |            +-----------------+
 238  *  +----------------+----------<-----+-------<----| SVP_CS_WINDDOWN |
 239  *                   |                             +-----------------+
 240  *                   * . . .   Fatal system, not
 241  *                   |         socket error or
 242  *                   V         quiesced after
 243  *           +--------------+  removal from DNS
 244  *           | SVP_CS_ERROR |
 245  *           +--------------+
 246  *                   |
 247  *                   * . . . Removed from DNS
 248  *                   v
 249  *            +------------+
 250  *            | Connection |
 251  *            | Destroyed  |
 252  *            +------------+
 253  *
 254  * --------------------------
 255  * Connection Event Injection
 256  * --------------------------
 257  *
 258  * For each connection that exists in the system, we have a timer in place that
 259  * is in charge of performing timeout activity. It fires once every thirty
 260  * seconds or so for a given connection and checks to ensure that we have had
 261  * activity for the most recent query on the connection. If not, it terminates
 262  * the connection. This is important as if we have sent all our data and are
 263  * waiting for the remote end to reply, without enabling something like TCP
 264  * keep-alive, we will not be notified that anything that has happened to the
 265  * remote connection, for example a panic. In addition, this also protects
 266  * against a server that is up, but a portolan that is not making forward
 267  * progress.
 268  *
 269  * When a timeout occurs, we first try to disassociate any active events, which
 270  * by definition must exist. Once that's done, we inject a port source user
 271  * event. Now, there is a small gotcha. Let's assume for a moment that we have a
 272  * pathological portolan. That means that it knows to inject activity right at
 273  * the time out window. That means, that the event may be disassociated before
 274  * we could get to it. If that's the case, we must _not_ inject the user event
 275  * and instead, we'll let the pending event take care of it. We know that the
 276  * pending event hasn't hit the main part of the loop yet, otherwise, it would
 277  * have released the lock protecting our state and associated the event.
 278  *
 279  * ------------
 280  * Notes on DNS
 281  * ------------
 282  *
 283  * Unfortunately, doing host name resolution in a way that allows us to leverage
 284  * the system's resolvers and the system's caching, require us to make blocking
 285  * calls in libc via getaddrinfo(3SOCKET). If we can't reach a given server,
 286  * that will tie up a thread for quite some time. To work around that fact,
 287  * we're going to create a fixed number of threads and we'll use them to service
 288  * our DNS requests. While this isn't ideal, until we have a sane means of
 289  * integrating a DNS resolution into an event loop with say portfs, it's not
 290  * going to be a fun day no matter what we do.
 291  *
 292  * ------
 293  * Timers
 294  * ------
 295  *
 296  * We maintain a single timer based on CLOCK_REALTIME. It's designed to fire
 297  * every second. While we'd rather use CLOCK_HIGHRES just to alleviate ourselves
 298  * from timer drift; however, as zones may not actually have CLOCK_HIGHRES
 299  * access, we don't want them to end up in there. The timer itself is just a
 300  * simple avl tree sorted by expiration time, which is stored as a tick in the
 301  * future, a tick is just one second.
 302  *
 303  * ----------
 304  * Shootdowns
 305  * ----------
 306  *
 307  * As part of the protocol, we need to be able to handle shootdowns that inform
 308  * us some of the information in the system is out of date. This information
 309  * needs to be processed promptly; however, the information is hopefully going
 310  * to be relatively infrequent relative to the normal flow of information.
 311  *
 312  * The shoot down information needs to be done on a per-backend basis. The
 313  * general design is that we'll have a single query for this which can fire on a
 314  * 5-10s period, we randmoize the latter part to give us a bit more load
 315  * spreading. If we complete because there's no work to do, then we wait the
 316  * normal period. If we complete, but there's still work to do, we'll go again
 317  * after a second.
 318  *
 319  * A shootdown has a few different parts. We first receive a list of items to
 320  * shootdown. After performing all of those, we need to acknowledge them. When
 321  * that's been done successfully, we can move onto the next part. From a
 322  * protocol perspective, we make a SVP_R_LOG_REQ, we get a reply, and then after
 323  * processing them, send an SVP_R_LOG_RM. Only once that's been acked do we
 324  * continue.
 325  *
 326  * However, one of the challenges that we have is that these invalidations are
 327  * just that, an invalidation. For a virtual layer two request, that's fine,
 328  * because the kernel supports that. However, for virtual layer three
 329  * invalidations, we have a bit more work to do. These protocols, ARP and NDP,
 330  * don't really support a notion of just an invalidation, instead you have to
 331  * inject the new data in a gratuitous fashion.
 332  *
 333  * To that end, what we instead do is when we receive a VL3 invalidation, we
 334  * turn that info a VL3 request. We hold the general request as outstanding
 335  * until we receive all of the callbacks for the VL3 invalidations, at which
 336  * point we go through and do the log removal request.
 337  */
 338 
 339 #include <umem.h>
 340 #include <errno.h>
 341 #include <stdlib.h>
 342 #include <sys/types.h>
 343 #include <sys/socket.h>
 344 #include <netinet/in.h>
 345 #include <arpa/inet.h>
 346 #include <libnvpair.h>
 347 #include <strings.h>
 348 #include <string.h>
 349 #include <assert.h>
 350 #include <unistd.h>
 351 
 352 #include <libvarpd_provider.h>
 353 #include "libvarpd_svp.h"
 354 
 355 bunyan_logger_t *svp_bunyan;
 356 static int svp_defport = 1296;
 357 static int svp_defuport = 1339;
 358 static umem_cache_t *svp_lookup_cache;
 359 
 360 typedef enum svp_lookup_type {
 361         SVP_L_UNKNOWN   = 0x0,
 362         SVP_L_VL2       = 0x1,
 363         SVP_L_VL3       = 0x2,
 364         SVP_L_ROUTE     = 0x3
 365 } svp_lookup_type_t;
 366 
 367 typedef struct svp_lookup {
 368         int svl_type;
 369         union {
 370                 struct svl_lookup_vl2 {
 371                         varpd_query_handle_t    *svl_handle;
 372                         overlay_target_point_t  *svl_point;
 373                 } svl_vl2;
 374                 struct svl_lookup_vl3 {
 375                         varpd_arp_handle_t      *svl_vah;
 376                         uint8_t                 *svl_out;
 377                 } svl_vl3;
 378                 struct svl_lookup_route {
 379                         varpd_query_handle_t    *svl_handle;
 380                         overlay_target_point_t  *svl_point;
 381                         overlay_target_route_t  *svl_route;
 382                 } svl_route;
 383         } svl_u;
 384         svp_query_t                             svl_query;
 385 } svp_lookup_t;
 386 
 387 static const char *varpd_svp_props[] = {
 388         "svp/host",
 389         "svp/port",
 390         "svp/underlay_ip",
 391         "svp/underlay_port",
 392         "svp/dcid",
 393         "svp/router_oui"
 394 };
 395 
 396 static const uint8_t svp_bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 397 
 398 int
 399 svp_comparator(const void *l, const void *r)
 400 {
 401         const svp_t *ls = l;
 402         const svp_t *rs = r;
 403 
 404         if (ls->svp_vid > rs->svp_vid)
 405                 return (1);
 406         if (ls->svp_vid < rs->svp_vid)
 407                 return (-1);
 408         return (0);
 409 }
 410 
 411 static void
 412 svp_vl2_lookup_cb(svp_t *svp, svp_status_t status, const struct in6_addr *uip,
 413     const uint16_t uport, void *arg)
 414 {
 415         svp_lookup_t *svl = arg;
 416         overlay_target_point_t *otp;
 417 
 418         assert(svp != NULL);
 419         assert(arg != NULL);
 420 
 421         if (status != SVP_S_OK) {
 422                 libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
 423                     VARPD_LOOKUP_DROP);
 424                 umem_cache_free(svp_lookup_cache, svl);
 425                 return;
 426         }
 427 
 428         otp = svl->svl_u.svl_vl2.svl_point;
 429         bcopy(uip, &otp->otp_ip, sizeof (struct in6_addr));
 430         otp->otp_port = uport;
 431         libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
 432             VARPD_LOOKUP_OK);
 433         umem_cache_free(svp_lookup_cache, svl);
 434 }
 435 
 436 static void
 437 svp_vl3_lookup_cb(svp_t *svp, svp_status_t status, const uint8_t *vl2mac,
 438     const struct in6_addr *uip, const uint16_t uport, void *arg)
 439 {
 440         overlay_target_point_t point;
 441         svp_lookup_t *svl = arg;
 442         uint8_t nexthop_mac[6] = { 0, 0, 0, 0, 0, 0 };
 443 
 444         assert(svp != NULL);
 445         assert(svl != NULL);
 446 
 447         if (status != SVP_S_OK) {
 448                 libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
 449                     VARPD_LOOKUP_DROP);
 450                 umem_cache_free(svp_lookup_cache, svl);
 451                 return;
 452         }
 453 
 454         /* Inject the L2 mapping before the L3 */
 455         bcopy(uip, &point.otp_ip, sizeof (struct in6_addr));
 456         point.otp_port = uport;
 457         if (uport != 0) {
 458                 /* Normal L3 lookup result... */
 459                 libvarpd_inject_varp(svp->svp_hdl, vl2mac, &point);
 460         } else {
 461                 /*
 462                  * Oh my, we have a next-hop router IP.
 463                  * Set the MAC to the ouid+vid concatenated
 464                  * special-router-MAC. Overlay down below will know
 465                  * that uport == 0 means the MAC is a special one.
 466                  */
 467                 if (bcmp(svp->svp_router_oui, nexthop_mac, ETHERADDRL) == 0) {
 468                         /*
 469                          * We don't have a router_oui, so we can't support
 470                          * special-router-MAC.  Drop it.
 471                          */
 472                         libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
 473                             VARPD_LOOKUP_DROP);
 474                         umem_cache_free(svp_lookup_cache, svl);
 475                         return;
 476                 }
 477                 vl2mac = nexthop_mac;
 478                 bcopy(svp->svp_router_oui, vl2mac, 3);
 479                 vl2mac[3] = (svp->svp_vid >> 16) & 0xff;
 480                 vl2mac[4] = (svp->svp_vid >> 8) & 0xff;
 481                 vl2mac[5] = svp->svp_vid & 0xff;
 482         }
 483 
 484         bcopy(vl2mac, svl->svl_u.svl_vl3.svl_out, ETHERADDRL);
 485         libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
 486             VARPD_LOOKUP_OK);
 487         umem_cache_free(svp_lookup_cache, svl);
 488 }
 489 
 490 static void
 491 svp_vl2_invalidate_cb(svp_t *svp, const uint8_t *vl2mac)
 492 {
 493         libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
 494 }
 495 
 496 static void
 497 svp_vl3_inject_cb(svp_t *svp, const uint16_t vlan, const struct in6_addr *vl3ip,
 498     const uint8_t *vl2mac, const uint8_t *targmac)
 499 {
 500         struct in_addr v4;
 501 
 502         /*
 503          * At the moment we don't support any IPv6 related log entries, this
 504          * will change soon as we develop a bit more of the IPv6 related
 505          * infrastructure so we can properly test the injection.
 506          */
 507         if (IN6_IS_ADDR_V4MAPPED(vl3ip) == 0) {
 508                 return;
 509         } else {
 510                 IN6_V4MAPPED_TO_INADDR(vl3ip, &v4);
 511                 if (targmac == NULL)
 512                         targmac = svp_bcast;
 513                 libvarpd_inject_arp(svp->svp_hdl, vlan, vl2mac, &v4, targmac);
 514         }
 515 }
 516 
 517 /* ARGSUSED */
 518 static void
 519 svp_shootdown_cb(svp_t *svp, const uint8_t *vl2mac, const struct in6_addr *uip,
 520     const uint16_t uport)
 521 {
 522         /*
 523          * We should probably do a conditional invalidation here.
 524          */
 525         libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
 526 }
 527 
 528 static void
 529 svp_route_lookup_cb(svp_t *svp, svp_status_t status, uint32_t dcid,
 530     uint32_t vnetid, uint16_t vlan, uint8_t *srcmac, uint8_t *dstmac,
 531     uint16_t ul3_port, uint8_t *ul3_addr, uint8_t srcpfx, uint8_t dstpfx,
 532     void *arg)
 533 {
 534         svp_lookup_t *svl = arg;
 535         overlay_target_point_t *otp;
 536         overlay_target_route_t *otr;
 537 
 538         if (status != SVP_S_OK) {
 539                 libvarpd_plugin_query_reply(svl->svl_u.svl_route.svl_handle,
 540                     VARPD_LOOKUP_DROP);
 541                 umem_cache_free(svp_lookup_cache, svl);
 542                 return;
 543         }
 544 
 545         otp = svl->svl_u.svl_route.svl_point;
 546         bcopy(dstmac, otp->otp_mac, ETHERADDRL);
 547         bcopy(ul3_addr, &otp->otp_ip, sizeof (struct in6_addr));
 548         otp->otp_port = ul3_port;
 549 
 550         otr = svl->svl_u.svl_route.svl_route;
 551         otr->otr_vnet = vnetid;
 552         otr->otr_vlan = vlan;
 553         bcopy(srcmac, otr->otr_srcmac, ETHERADDRL);
 554         otr->otr_dcid = dcid;
 555         otr->otr_src_prefixlen = srcpfx;
 556         otr->otr_dst_prefixlen = dstpfx;
 557 
 558         libvarpd_plugin_query_reply(svl->svl_u.svl_route.svl_handle,
 559             VARPD_LOOKUP_OK);
 560         umem_cache_free(svp_lookup_cache, svl);
 561 }
 562 
 563 static svp_cb_t svp_defops = {
 564         svp_vl2_lookup_cb,
 565         svp_vl3_lookup_cb,
 566         svp_vl2_invalidate_cb,
 567         svp_vl3_inject_cb,
 568         svp_shootdown_cb,
 569         svp_route_lookup_cb,
 570 };
 571 
 572 static boolean_t
 573 varpd_svp_valid_dest(overlay_plugin_dest_t dest)
 574 {
 575         if (dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
 576                 return (B_FALSE);
 577 
 578         return (B_TRUE);
 579 }
 580 
 581 static int
 582 varpd_svp_create(varpd_provider_handle_t *hdl, void **outp,
 583     overlay_plugin_dest_t dest)
 584 {
 585         int ret;
 586         svp_t *svp;
 587 
 588         if (varpd_svp_valid_dest(dest) == B_FALSE)
 589                 return (ENOTSUP);
 590 
 591         svp = umem_zalloc(sizeof (svp_t), UMEM_DEFAULT);
 592         if (svp == NULL)
 593                 return (ENOMEM);
 594 
 595         if ((ret = mutex_init(&svp->svp_lock, USYNC_THREAD | LOCK_ERRORCHECK,
 596             NULL)) != 0) {
 597                 umem_free(svp, sizeof (svp_t));
 598                 return (ret);
 599         }
 600 
 601         svp->svp_port = svp_defport;
 602         svp->svp_uport = svp_defuport;
 603         svp->svp_cb = svp_defops;
 604         svp->svp_hdl = hdl;
 605         svp->svp_vid = libvarpd_plugin_vnetid(svp->svp_hdl);
 606         *outp = svp;
 607         return (0);
 608 }
 609 
 610 static int
 611 varpd_svp_start(void *arg)
 612 {
 613         int ret;
 614         svp_remote_t *srp;
 615         svp_t *svp = arg;
 616 
 617         mutex_enter(&svp->svp_lock);
 618         if (svp->svp_host == NULL || svp->svp_port == 0 ||
 619             svp->svp_huip == B_FALSE || svp->svp_uport == 0) {
 620                 mutex_exit(&svp->svp_lock);
 621                 return (EAGAIN);
 622         }
 623         mutex_exit(&svp->svp_lock);
 624 
 625         if ((ret = svp_remote_find(svp->svp_host, svp->svp_port, &svp->svp_uip,
 626             &srp)) != 0)
 627                 return (ret);
 628 
 629         if ((ret = svp_remote_attach(srp, svp)) != 0) {
 630                 svp_remote_release(srp);
 631                 return (ret);
 632         }
 633 
 634         return (0);
 635 }
 636 
 637 static void
 638 varpd_svp_stop(void *arg)
 639 {
 640         svp_t *svp = arg;
 641 
 642         svp_remote_detach(svp);
 643 }
 644 
 645 static void
 646 varpd_svp_destroy(void *arg)
 647 {
 648         svp_t *svp = arg;
 649 
 650         if (svp->svp_host != NULL)
 651                 umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
 652 
 653         if (mutex_destroy(&svp->svp_lock) != 0)
 654                 libvarpd_panic("failed to destroy svp_t`svp_lock");
 655 
 656         umem_free(svp, sizeof (svp_t));
 657 }
 658 
 659 static void
 660 varpd_svp_lookup_l3(svp_t *svp, varpd_query_handle_t *vqh,
 661     const overlay_targ_lookup_t *otl, overlay_target_point_t *otp,
 662     overlay_target_route_t *otr)
 663 {
 664         svp_lookup_t *slp;
 665         uint32_t type;
 666         const struct in6_addr *src = &otl->otl_addru.otlu_l3.otl3_srcip,
 667             *dst = &otl->otl_addru.otlu_l3.otl3_dstip;
 668 
 669         /*
 670          * otl is an L3 request, so we have src/dst IPs for the inner packet.
 671          * We also have the vlan.
 672          *
 673          * Assume kernel's overlay module is caching well, so we are directly
 674          * going to query (i.e. no caching up here of actual destinations).
 675          *
 676          * Our existing remote sever (svp_remote), but with the new message
 677          * SVP_R_ROUTE_REQ.
 678          */
 679 
 680         /* XXX KEBE SAYS DO SOME otl verification too... */
 681         if (IN6_IS_ADDR_V4MAPPED(src)) {
 682                 if (!IN6_IS_ADDR_V4MAPPED(dst)) {
 683                         libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 684                         return;
 685                 }
 686                 type = SVP_VL3_IP;
 687         } else {
 688                 if (IN6_IS_ADDR_V4MAPPED(dst)) {
 689                         libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 690                         return;
 691                 }
 692                 type = SVP_VL3_IPV6;
 693         }
 694 
 695         slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
 696         if (slp == NULL) {
 697                 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 698                 return;
 699         }
 700 
 701         slp->svl_type = SVP_L_ROUTE;
 702         slp->svl_u.svl_route.svl_handle = vqh;
 703         slp->svl_u.svl_route.svl_point = otp;
 704         slp->svl_u.svl_route.svl_route = otr;
 705 
 706         svp_remote_route_lookup(svp, &slp->svl_query, src, dst,
 707             otl->otl_vnetid, (uint16_t)otl->otl_vlan, slp);
 708 }
 709 
 710 static void
 711 varpd_svp_lookup(void *arg, varpd_query_handle_t *vqh,
 712     const overlay_targ_lookup_t *otl, overlay_target_point_t *otp,
 713     overlay_target_route_t *otr)
 714 {
 715         svp_lookup_t *slp;
 716         svp_t *svp = arg;
 717 
 718         /*
 719          * Shuffle off L3 lookups to their own codepath.
 720          */
 721         if (otl->otl_l3req) {
 722                 varpd_svp_lookup_l3(svp, vqh, otl, otp, otr);
 723                 return;
 724         }
 725         /*
 726          * At this point, the traditional overlay_target_point_t is all that
 727          * needs filling in.  Zero-out the otr for safety.
 728          */
 729         bzero(otr, sizeof (*otr));
 730 
 731 
 732         /*
 733          * Check if this is something that we need to proxy, eg. arp or ndp.
 734          */
 735         if (otl->otl_addru.otlu_l2.otl2_sap == ETHERTYPE_ARP) {
 736                 libvarpd_plugin_proxy_arp(svp->svp_hdl, vqh, otl);
 737                 return;
 738         }
 739 
 740         if (otl->otl_addru.otlu_l2.otl2_dstaddr[0] == 0x33 &&
 741             otl->otl_addru.otlu_l2.otl2_dstaddr[1] == 0x33) {
 742                 if (otl->otl_addru.otlu_l2.otl2_sap == ETHERTYPE_IPV6) {
 743                         libvarpd_plugin_proxy_ndp(svp->svp_hdl, vqh, otl);
 744                 } else {
 745                         libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 746                 }
 747                 return;
 748         }
 749 
 750         /*
 751          * Watch out for various multicast and broadcast addresses. We've
 752          * already taken care of the IPv6 range above. Now we just need to
 753          * handle broadcast and if the multicast bit is set, lowest bit of the
 754          * first octet of the MAC, then we drop it now.
 755          */
 756         if (bcmp(otl->otl_addru.otlu_l2.otl2_dstaddr, svp_bcast,
 757             ETHERADDRL) == 0 ||
 758             (otl->otl_addru.otlu_l2.otl2_dstaddr[0] & 0x01) == 0x01) {
 759                 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 760                 return;
 761         }
 762 
 763         /*
 764          * If we have a failure to allocate memory for this, that's not good.
 765          * However, telling the kernel to just drop this packet is much better
 766          * than the alternative at this moment. At least we'll try again and we
 767          * may have something more available to us in a little bit.
 768          */
 769         slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
 770         if (slp == NULL) {
 771                 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 772                 return;
 773         }
 774 
 775         slp->svl_type = SVP_L_VL2;
 776         slp->svl_u.svl_vl2.svl_handle = vqh;
 777         slp->svl_u.svl_vl2.svl_point = otp;
 778 
 779         svp_remote_vl2_lookup(svp, &slp->svl_query,
 780             otl->otl_addru.otlu_l2.otl2_dstaddr, slp);
 781 }
 782 
 783 /* ARGSUSED */
 784 static int
 785 varpd_svp_nprops(void *arg, uint_t *nprops)
 786 {
 787         *nprops = sizeof (varpd_svp_props) / sizeof (char *);
 788         return (0);
 789 }
 790 
 791 /* ARGSUSED */
 792 static int
 793 varpd_svp_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph)
 794 {
 795         switch (propid) {
 796         case 0:
 797                 /* svp/host */
 798                 libvarpd_prop_set_name(vph, varpd_svp_props[0]);
 799                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 800                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING);
 801                 libvarpd_prop_set_nodefault(vph);
 802                 break;
 803         case 1:
 804                 /* svp/port */
 805                 libvarpd_prop_set_name(vph, varpd_svp_props[1]);
 806                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 807                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
 808                 (void) libvarpd_prop_set_default(vph, &svp_defport,
 809                     sizeof (svp_defport));
 810                 libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
 811                 break;
 812         case 2:
 813                 /* svp/underlay_ip */
 814                 libvarpd_prop_set_name(vph, varpd_svp_props[2]);
 815                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 816                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP);
 817                 libvarpd_prop_set_nodefault(vph);
 818                 break;
 819         case 3:
 820                 /* svp/underlay_port */
 821                 libvarpd_prop_set_name(vph, varpd_svp_props[3]);
 822                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 823                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
 824                 (void) libvarpd_prop_set_default(vph, &svp_defuport,
 825                     sizeof (svp_defuport));
 826                 libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
 827                 break;
 828         case 4:
 829                 /* svp/dcid */
 830                 libvarpd_prop_set_name(vph, varpd_svp_props[4]);
 831                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 832                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
 833                 libvarpd_prop_set_nodefault(vph);
 834                 /* XXX KEBE ASKS should I just set high to UINT32_MAX? */
 835                 libvarpd_prop_set_range_uint32(vph, 1, UINT32_MAX - 1);
 836                 break;
 837         case 5:
 838                 /* svp/router_oui */
 839                 libvarpd_prop_set_name(vph, varpd_svp_props[5]);
 840                 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 841                 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_ETHER);
 842                 libvarpd_prop_set_nodefault(vph);
 843                 break;
 844         default:
 845                 return (EINVAL);
 846         }
 847         return (0);
 848 }
 849 
 850 static int
 851 varpd_svp_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep)
 852 {
 853         svp_t *svp = arg;
 854 
 855         /* svp/host */
 856         if (strcmp(pname, varpd_svp_props[0]) == 0) {
 857                 size_t len;
 858 
 859                 mutex_enter(&svp->svp_lock);
 860                 if (svp->svp_host == NULL) {
 861                         *sizep = 0;
 862                 } else {
 863                         len = strlen(svp->svp_host) + 1;
 864                         if (*sizep < len) {
 865                                 mutex_exit(&svp->svp_lock);
 866                                 return (EOVERFLOW);
 867                         }
 868                         *sizep = len;
 869                         (void) strlcpy(buf, svp->svp_host, *sizep);
 870                 }
 871                 mutex_exit(&svp->svp_lock);
 872                 return (0);
 873         }
 874 
 875         /* svp/port */
 876         if (strcmp(pname, varpd_svp_props[1]) == 0) {
 877                 uint64_t val;
 878 
 879                 if (*sizep < sizeof (uint64_t))
 880                         return (EOVERFLOW);
 881 
 882                 mutex_enter(&svp->svp_lock);
 883                 if (svp->svp_port == 0) {
 884                         *sizep = 0;
 885                 } else {
 886                         val = svp->svp_port;
 887                         bcopy(&val, buf, sizeof (uint64_t));
 888                         *sizep = sizeof (uint64_t);
 889                 }
 890                 mutex_exit(&svp->svp_lock);
 891                 return (0);
 892         }
 893 
 894         /* svp/underlay_ip */
 895         if (strcmp(pname, varpd_svp_props[2]) == 0) {
 896                 if (*sizep < sizeof (struct in6_addr))
 897                         return (EOVERFLOW);
 898                 mutex_enter(&svp->svp_lock);
 899                 if (svp->svp_huip == B_FALSE) {
 900                         *sizep = 0;
 901                 } else {
 902                         bcopy(&svp->svp_uip, buf, sizeof (struct in6_addr));
 903                         *sizep = sizeof (struct in6_addr);
 904                 }
 905                 mutex_exit(&svp->svp_lock);
 906                 return (0);
 907         }
 908 
 909         /* svp/underlay_port */
 910         if (strcmp(pname, varpd_svp_props[3]) == 0) {
 911                 uint64_t val;
 912 
 913                 if (*sizep < sizeof (uint64_t))
 914                         return (EOVERFLOW);
 915 
 916                 mutex_enter(&svp->svp_lock);
 917                 if (svp->svp_uport == 0) {
 918                         *sizep = 0;
 919                 } else {
 920                         val = svp->svp_uport;
 921                         bcopy(&val, buf, sizeof (uint64_t));
 922                         *sizep = sizeof (uint64_t);
 923                 }
 924 
 925                 mutex_exit(&svp->svp_lock);
 926                 return (0);
 927         }
 928 
 929         /* svp/dcid */
 930         if (strcmp(pname, varpd_svp_props[4]) == 0) {
 931                 uint64_t val;
 932 
 933                 if (*sizep < sizeof (uint64_t))
 934                         return (EOVERFLOW);
 935 
 936                 mutex_enter(&svp->svp_lock);
 937                 if (svp->svp_uport == 0) {
 938                         *sizep = 0;
 939                 } else {
 940                         val = svp->svp_dcid;
 941                         bcopy(&val, buf, sizeof (uint64_t));
 942                         *sizep = sizeof (uint64_t);
 943                 }
 944 
 945                 mutex_exit(&svp->svp_lock);
 946                 return (0);
 947         }
 948 
 949         /* svp/router_oui */
 950         if (strcmp(pname, varpd_svp_props[5]) == 0) {
 951                 if (*sizep < ETHERADDRL)
 952                         return (EOVERFLOW);
 953                 mutex_enter(&svp->svp_lock);
 954 
 955                 if (ether_is_zero(&svp->svp_router_oui)) {
 956                         *sizep = 0;
 957                 } else {
 958                         bcopy(&svp->svp_router_oui, buf, ETHERADDRL);
 959                         *sizep = ETHERADDRL;
 960                 }
 961 
 962                 mutex_exit(&svp->svp_lock);
 963                 return (0);
 964         }
 965         return (EINVAL);
 966 }
 967 
 968 static int
 969 varpd_svp_setprop(void *arg, const char *pname, const void *buf,
 970     const uint32_t size)
 971 {
 972         svp_t *svp = arg;
 973 
 974         /* svp/host */
 975         if (strcmp(pname, varpd_svp_props[0]) == 0) {
 976                 char *dup;
 977                 dup = umem_alloc(size, UMEM_DEFAULT);
 978                 (void) strlcpy(dup, buf, size);
 979                 if (dup == NULL)
 980                         return (ENOMEM);
 981                 mutex_enter(&svp->svp_lock);
 982                 if (svp->svp_host != NULL)
 983                         umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
 984                 svp->svp_host = dup;
 985                 mutex_exit(&svp->svp_lock);
 986                 return (0);
 987         }
 988 
 989         /* svp/port */
 990         if (strcmp(pname, varpd_svp_props[1]) == 0) {
 991                 const uint64_t *valp = buf;
 992                 if (size < sizeof (uint64_t))
 993                         return (EOVERFLOW);
 994 
 995                 if (*valp == 0 || *valp > UINT16_MAX)
 996                         return (EINVAL);
 997 
 998                 mutex_enter(&svp->svp_lock);
 999                 svp->svp_port = (uint16_t)*valp;
1000                 mutex_exit(&svp->svp_lock);
1001                 return (0);
1002         }
1003 
1004         /* svp/underlay_ip */
1005         if (strcmp(pname, varpd_svp_props[2]) == 0) {
1006                 const struct in6_addr *ipv6 = buf;
1007 
1008                 if (size < sizeof (struct in6_addr))
1009                         return (EOVERFLOW);
1010 
1011                 if (IN6_IS_ADDR_V4COMPAT(ipv6))
1012                         return (EINVAL);
1013 
1014                 if (IN6_IS_ADDR_MULTICAST(ipv6))
1015                         return (EINVAL);
1016 
1017                 if (IN6_IS_ADDR_6TO4(ipv6))
1018                         return (EINVAL);
1019 
1020                 if (IN6_IS_ADDR_V4MAPPED(ipv6)) {
1021                         ipaddr_t v4;
1022                         IN6_V4MAPPED_TO_IPADDR(ipv6, v4);
1023                         if (IN_MULTICAST(v4))
1024                                 return (EINVAL);
1025                 }
1026 
1027                 mutex_enter(&svp->svp_lock);
1028                 bcopy(buf, &svp->svp_uip, sizeof (struct in6_addr));
1029                 svp->svp_huip = B_TRUE;
1030                 mutex_exit(&svp->svp_lock);
1031                 return (0);
1032         }
1033 
1034         /* svp/underlay_port */
1035         if (strcmp(pname, varpd_svp_props[3]) == 0) {
1036                 const uint64_t *valp = buf;
1037                 if (size < sizeof (uint64_t))
1038                         return (EOVERFLOW);
1039 
1040                 if (*valp == 0 || *valp > UINT16_MAX)
1041                         return (EINVAL);
1042 
1043                 mutex_enter(&svp->svp_lock);
1044                 svp->svp_uport = (uint16_t)*valp;
1045                 mutex_exit(&svp->svp_lock);
1046 
1047                 return (0);
1048         }
1049 
1050         /* svp/dcid */
1051         if (strcmp(pname, varpd_svp_props[4]) == 0) {
1052                 const uint64_t *valp = buf;
1053                 if (size < sizeof (uint64_t))
1054                         return (EOVERFLOW);
1055 
1056                 /* XXX KEBE ASKS, use UINT32_MAX instead? */
1057                 if (*valp == 0 || *valp > UINT32_MAX - 1)
1058                         return (EINVAL);
1059 
1060                 mutex_enter(&svp->svp_lock);
1061                 svp->svp_dcid = (uint32_t)*valp;
1062                 mutex_exit(&svp->svp_lock);
1063 
1064                 return (0);
1065         }
1066 
1067         /* svp/router_oui */
1068         if (strcmp(pname, varpd_svp_props[5]) == 0) {
1069                 if (size < ETHERADDRL)
1070                         return (EOVERFLOW);
1071                 mutex_enter(&svp->svp_lock);
1072                 bcopy(buf, &svp->svp_router_oui, ETHERADDRL);
1073                 /* Zero-out the low three bytes. */
1074                 svp->svp_router_oui[3] = 0;
1075                 svp->svp_router_oui[4] = 0;
1076                 svp->svp_router_oui[5] = 0;
1077                 mutex_exit(&svp->svp_lock);
1078                 return (0);
1079         }
1080 
1081         return (EINVAL);
1082 }
1083 
1084 static int
1085 varpd_svp_save(void *arg, nvlist_t *nvp)
1086 {
1087         int ret;
1088         svp_t *svp = arg;
1089 
1090         mutex_enter(&svp->svp_lock);
1091         /* svp/host */
1092         if (svp->svp_host != NULL) {
1093                 if ((ret = nvlist_add_string(nvp, varpd_svp_props[0],
1094                     svp->svp_host)) != 0) {
1095                         mutex_exit(&svp->svp_lock);
1096                         return (ret);
1097                 }
1098         }
1099 
1100         /* svp/port */
1101         if (svp->svp_port != 0) {
1102                 if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[1],
1103                     svp->svp_port)) != 0) {
1104                         mutex_exit(&svp->svp_lock);
1105                         return (ret);
1106                 }
1107         }
1108 
1109         /* svp/underlay_ip */
1110         if (svp->svp_huip == B_TRUE) {
1111                 char buf[INET6_ADDRSTRLEN];
1112 
1113                 if (inet_ntop(AF_INET6, &svp->svp_uip, buf, sizeof (buf)) ==
1114                     NULL)
1115                         libvarpd_panic("unexpected inet_ntop failure: %d",
1116                             errno);
1117 
1118                 if ((ret = nvlist_add_string(nvp, varpd_svp_props[2],
1119                     buf)) != 0) {
1120                         mutex_exit(&svp->svp_lock);
1121                         return (ret);
1122                 }
1123         }
1124 
1125         /* svp/underlay_port */
1126         if (svp->svp_uport != 0) {
1127                 if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[3],
1128                     svp->svp_uport)) != 0) {
1129                         mutex_exit(&svp->svp_lock);
1130                         return (ret);
1131                 }
1132         }
1133 
1134         /* svp/dcid */
1135         if (svp->svp_dcid != 0) {
1136                 if ((ret = nvlist_add_uint32(nvp, varpd_svp_props[4],
1137                     svp->svp_dcid)) != 0) {
1138                         mutex_exit(&svp->svp_lock);
1139                         return (ret);
1140                 }
1141         }
1142 
1143         /* svp/router_oui */
1144         if (!ether_is_zero(&svp->svp_router_oui)) {
1145                 char buf[ETHERADDRSTRL];
1146 
1147                 /* XXX KEBE SAYS See underlay_ip... */
1148                 if (ether_ntoa_r(&svp->svp_router_oui, buf) == NULL) {
1149                         libvarpd_panic("unexpected ether_ntoa_r failure: %d",
1150                             errno);
1151                 }
1152 
1153                 if ((ret = nvlist_add_string(nvp, varpd_svp_props[5],
1154                     buf)) != 0) {
1155                         mutex_exit(&svp->svp_lock);
1156                         return (ret);
1157                 }
1158         }
1159 
1160         mutex_exit(&svp->svp_lock);
1161         return (0);
1162 }
1163 
1164 static int
1165 varpd_svp_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl,
1166     overlay_plugin_dest_t dest, void **outp)
1167 {
1168         int ret;
1169         svp_t *svp;
1170         char *ipstr, *hstr, *etherstr;
1171 
1172         if (varpd_svp_valid_dest(dest) == B_FALSE)
1173                 return (ENOTSUP);
1174 
1175         if ((ret = varpd_svp_create(hdl, (void **)&svp, dest)) != 0)
1176                 return (ret);
1177 
1178         /* svp/host */
1179         if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[0],
1180             &hstr)) != 0) {
1181                 if (ret != ENOENT) {
1182                         varpd_svp_destroy(svp);
1183                         return (ret);
1184                 }
1185                 svp->svp_host = NULL;
1186         } else {
1187                 size_t blen = strlen(hstr) + 1;
1188                 svp->svp_host = umem_alloc(blen, UMEM_DEFAULT);
1189                 (void) strlcpy(svp->svp_host, hstr, blen);
1190         }
1191 
1192         /* svp/port */
1193         if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[1],
1194             &svp->svp_port)) != 0) {
1195                 if (ret != ENOENT) {
1196                         varpd_svp_destroy(svp);
1197                         return (ret);
1198                 }
1199                 svp->svp_port = 0;
1200         }
1201 
1202         /* svp/underlay_ip */
1203         if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[2],
1204             &ipstr)) != 0) {
1205                 if (ret != ENOENT) {
1206                         varpd_svp_destroy(svp);
1207                         return (ret);
1208                 }
1209                 svp->svp_huip = B_FALSE;
1210         } else {
1211                 ret = inet_pton(AF_INET6, ipstr, &svp->svp_uip);
1212                 if (ret == -1) {
1213                         assert(errno == EAFNOSUPPORT);
1214                         libvarpd_panic("unexpected inet_pton failure: %d",
1215                             errno);
1216                 }
1217 
1218                 if (ret == 0) {
1219                         varpd_svp_destroy(svp);
1220                         return (EINVAL);
1221                 }
1222                 svp->svp_huip = B_TRUE;
1223         }
1224 
1225         /* svp/underlay_port */
1226         if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[3],
1227             &svp->svp_uport)) != 0) {
1228                 if (ret != ENOENT) {
1229                         varpd_svp_destroy(svp);
1230                         return (ret);
1231                 }
1232                 svp->svp_uport = 0;
1233         }
1234 
1235         /* svp/dcid */
1236         if ((ret = nvlist_lookup_uint32(nvp, varpd_svp_props[4],
1237             &svp->svp_dcid)) != 0) {
1238                 if (ret != ENOENT) {
1239                         varpd_svp_destroy(svp);
1240                         return (ret);
1241                 }
1242                 svp->svp_dcid = 0;
1243         }
1244 
1245         /* svp/router_oui */
1246         if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[5],
1247             ðerstr)) != 0) {
1248                 if (ret != ENOENT) {
1249                         varpd_svp_destroy(svp);
1250                         return (ret);
1251                 }
1252                 bzero(&svp->svp_router_oui, ETHERADDRL);
1253         } else if (ether_aton_r(etherstr, &svp->svp_router_oui) == NULL) {
1254                 libvarpd_panic("unexpected ether_aton_r failure: %d", errno);
1255         }
1256 
1257         svp->svp_hdl = hdl;
1258         *outp = svp;
1259         return (0);
1260 }
1261 
1262 static void
1263 varpd_svp_arp(void *arg, varpd_arp_handle_t *vah, int type,
1264     const struct sockaddr *sock, uint8_t *out)
1265 {
1266         svp_t *svp = arg;
1267         svp_lookup_t *svl;
1268 
1269         if (type != VARPD_QTYPE_ETHERNET) {
1270                 libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
1271                 return;
1272         }
1273 
1274         svl = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
1275         if (svl == NULL) {
1276                 libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
1277                 return;
1278         }
1279 
1280         svl->svl_type = SVP_L_VL3;
1281         svl->svl_u.svl_vl3.svl_vah = vah;
1282         svl->svl_u.svl_vl3.svl_out = out;
1283         svp_remote_vl3_lookup(svp, &svl->svl_query, sock, svl);
1284 }
1285 
1286 static const varpd_plugin_ops_t varpd_svp_ops = {
1287         0,
1288         varpd_svp_create,
1289         varpd_svp_start,
1290         varpd_svp_stop,
1291         varpd_svp_destroy,
1292         NULL,
1293         varpd_svp_lookup,
1294         varpd_svp_nprops,
1295         varpd_svp_propinfo,
1296         varpd_svp_getprop,
1297         varpd_svp_setprop,
1298         varpd_svp_save,
1299         varpd_svp_restore,
1300         varpd_svp_arp,
1301         NULL
1302 };
1303 
1304 static int
1305 svp_bunyan_init(void)
1306 {
1307         int ret;
1308 
1309         if ((ret = bunyan_init("svp", &svp_bunyan)) != 0)
1310                 return (ret);
1311         ret = bunyan_stream_add(svp_bunyan, "stderr", BUNYAN_L_INFO,
1312             bunyan_stream_fd, (void *)STDERR_FILENO);
1313         if (ret != 0)
1314                 bunyan_fini(svp_bunyan);
1315         return (ret);
1316 }
1317 
1318 static void
1319 svp_bunyan_fini(void)
1320 {
1321         if (svp_bunyan != NULL)
1322                 bunyan_fini(svp_bunyan);
1323 }
1324 
1325 #pragma init(varpd_svp_init)
1326 static void
1327 varpd_svp_init(void)
1328 {
1329         int err;
1330         varpd_plugin_register_t *vpr;
1331 
1332         if (svp_bunyan_init() != 0)
1333                 return;
1334 
1335         if ((err = svp_host_init()) != 0) {
1336                 (void) bunyan_error(svp_bunyan, "failed to init host subsystem",
1337                     BUNYAN_T_INT32, "error", err,
1338                     BUNYAN_T_END);
1339                 svp_bunyan_fini();
1340                 return;
1341         }
1342 
1343         svp_lookup_cache = umem_cache_create("svp_lookup",
1344             sizeof (svp_lookup_t),  0, NULL, NULL, NULL, NULL, NULL, 0);
1345         if (svp_lookup_cache == NULL) {
1346                 (void) bunyan_error(svp_bunyan,
1347                     "failed to create svp_lookup cache",
1348                     BUNYAN_T_INT32, "error", errno,
1349                     BUNYAN_T_END);
1350                 svp_bunyan_fini();
1351                 return;
1352         }
1353 
1354         if ((err = svp_event_init()) != 0) {
1355                 (void) bunyan_error(svp_bunyan,
1356                     "failed to init event subsystem",
1357                     BUNYAN_T_INT32, "error", err,
1358                     BUNYAN_T_END);
1359                 svp_bunyan_fini();
1360                 umem_cache_destroy(svp_lookup_cache);
1361                 return;
1362         }
1363 
1364         if ((err = svp_timer_init()) != 0) {
1365                 (void) bunyan_error(svp_bunyan,
1366                     "failed to init timer subsystem",
1367                     BUNYAN_T_INT32, "error", err,
1368                     BUNYAN_T_END);
1369                 svp_event_fini();
1370                 umem_cache_destroy(svp_lookup_cache);
1371                 svp_bunyan_fini();
1372                 return;
1373         }
1374 
1375         if ((err = svp_remote_init()) != 0) {
1376                 (void) bunyan_error(svp_bunyan,
1377                     "failed to init remote subsystem",
1378                     BUNYAN_T_INT32, "error", err,
1379                     BUNYAN_T_END);
1380                 svp_event_fini();
1381                 umem_cache_destroy(svp_lookup_cache);
1382                 svp_bunyan_fini();
1383                 return;
1384         }
1385 
1386         vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err);
1387         if (vpr == NULL) {
1388                 (void) bunyan_error(svp_bunyan,
1389                     "failed to alloc varpd plugin",
1390                     BUNYAN_T_INT32, "error", err,
1391                     BUNYAN_T_END);
1392                 svp_remote_fini();
1393                 svp_event_fini();
1394                 umem_cache_destroy(svp_lookup_cache);
1395                 svp_bunyan_fini();
1396                 return;
1397         }
1398 
1399         vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC;
1400         vpr->vpr_name = "svp";
1401         vpr->vpr_ops = &varpd_svp_ops;
1402 
1403         if ((err = libvarpd_plugin_register(vpr)) != 0) {
1404                 (void) bunyan_error(svp_bunyan,
1405                     "failed to register varpd plugin",
1406                     BUNYAN_T_INT32, "error", err,
1407                     BUNYAN_T_END);
1408                 svp_remote_fini();
1409                 svp_event_fini();
1410                 umem_cache_destroy(svp_lookup_cache);
1411                 svp_bunyan_fini();
1412 
1413         }
1414         libvarpd_plugin_free(vpr);
1415 }