Print this page
    
Interpret sl3a_uport == 0 in SVP_R_VL3_ACK to indicate the VL3 IP is a
next-hop router.
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/lib/varpd/svp/common/libvarpd_svp.c
          +++ new/usr/src/lib/varpd/svp/common/libvarpd_svp.c
   1    1  /*
   2    2   * This file and its contents are supplied under the terms of the
   3    3   * Common Development and Distribution License ("CDDL"), version 1.0.
   4    4   * You may only use this file in accordance with the terms of version
   5    5   * 1.0 of the CDDL.
   6    6   *
   7    7   * A full copy of the text of the CDDL should have accompanied this
   8    8   * source.  A copy of the CDDL is also available via the Internet at
   9    9   * http://www.illumos.org/license/CDDL.
  10   10   */
  11   11  
  12   12  /*
  13   13   * Copyright 2018, Joyent, Inc.
  14   14   */
  15   15  
  16   16  /*
  17   17   * This plugin implements the SDC VXLAN Protocol (SVP).
  18   18   *
  19   19   * This plugin is designed to work with a broader distributed system that
  20   20   * mainains a database of mappings and provides a means of looking up data and
  21   21   * provides a stream of updates. While it is named after VXLAN, there isn't
  22   22   * anything specific to VXLAN baked into the protocol at this time, other than
  23   23   * that it requires both an IP address and a port; however, if there's a good
  24   24   * reason to support others here, we can modify that.
  25   25   *
  26   26   * -----------
  27   27   * Terminology
  28   28   * -----------
  29   29   *
  30   30   * Throughout this module we refer to a few different kinds of addresses:
  31   31   *
  32   32   *    VL3
  33   33   *
  34   34   *      A VL3 address, or virtual layer 3, refers to the layer three addreses
  35   35   *      that are used by entities on an overlay network. As far as we're
  36   36   *      concerned that means that this is the IP address of an interface on an
  37   37   *      overlay network.
  38   38   *
  39   39   *    VL2
  40   40   *
  41   41   *      A VL2 address, or a virtual layer 2, referes to the link-layer addresses
  42   42   *      that are used by entities on an overlay network. As far as we're
  43   43   *      concerned that means that this is the MAC addresses of an interface on
  44   44   *      an overlay network.
  45   45   *
  46   46   *    UL3
  47   47   *
  48   48   *      A UL3, or underlay layer 3, refers to the layer three (IP) address on
  49   49   *      the underlay network.
  50   50   *
  51   51   * The svp plugin provides lookups from VL3->VL2, eg. the equivalent of an ARP
  52   52   * or NDP query, and then also provides VL2->UL3 lookups.
  53   53   *
  54   54   * -------------------
  55   55   * Protocol Operations
  56   56   * -------------------
  57   57   *
  58   58   * The svp protocol is defined in lib/varpd/svp/common/libvarpd_svp_prot.h. It
  59   59   * defines the basic TCP protocol that we use to communicate to hosts. At this
  60   60   * time, it is not quite 100% implemented in both this plug-in and our primary
  61   61   * server, sdc-portolan (see https://github.com/joyent/sdc-portolan).
  62   62   *
  63   63   * At this time, we don't quite support everything that we need to. Including
  64   64   * the SVP_R_BULK_REQ and SVP_R_SHOOTDOWN.
  65   65   *
  66   66   * ---------------------------------
  67   67   * General Design and Considerations
  68   68   * ---------------------------------
  69   69   *
  70   70   * Every instance of the svp plugin requires the hostname and port of a server
  71   71   * to contact. Though, we have co-opted the port 1296 (the year of the oldest
  72   72   * extant portolan) as our default port.
  73   73   *
  74   74   * Each of the different instance of the plugins has a corresponding remote
  75   75   * backend. The remote backend represents the tuple of the [ host, port ].
  76   76   * Different instances that share the same host and port tuple will use the same
  77   77   * backend.
  78   78   *
  79   79   * The backend is actually in charge of performing lookups, resolving and
  80   80   * updating the set of remote hosts based on the DNS resolution we've been
  81   81   * provided, and taking care of things like shootdowns.
  82   82   *
  83   83   * The whole plugin itself maintains an event loop and a number of threads to
  84   84   * service that event loop. On top of that event loop, we have a simple timer
  85   85   * backend that ticks at one second intervals and performs various callbacks,
  86   86   * such as idle query timers, DNS resolution, connection backoff, etc. Each of
  87   87   * the remote hosts that we obtain is wrapped up in an svp_conn_t, which manages
  88   88   * the connection state, reconnecting, etc.
  89   89   *
  90   90   * All in all, the general way that this all looks like is:
  91   91   *
  92   92   *  +----------------------------+
  93   93   *  | Plugin Instance            |
  94   94   *  | svp_t                      |
  95   95   *  |                            |
  96   96   *  | varpd_provider_handle_t * -+-> varpd handle
  97   97   *  | uint64_t               ----+-> varpd ID
  98   98   *  | char *                 ----+-> remote host
  99   99   *  | uint16_t               ----+-> remote port
 100  100   *  | svp_remote_t *   ---+------+-> remote backend
 101  101   *  +---------------------+------+
 102  102   *                        |
 103  103   *                        v
 104  104   *   +----------------------+                   +----------------+
 105  105   *   | Remote backend       |------------------>| Remove Backend |---> ...
 106  106   *   | svp_remote_t         |                   | svp_remote_t   |
 107  107   *   |                      |                   +----------------+
 108  108   *   | svp_remote_state_t --+-> state flags
 109  109   *   | svp_degrade_state_t -+-> degraded reason
 110  110   *   | struct addrinfo *  --+-> resolved hosts
 111  111   *   | uint_t            ---+-> active hosts
 112  112   *   | uint_t            ---+-> DNS generation
 113  113   *   | uint_t            ---+-> Reference count
 114  114   *   | uint_t            ---+-> active conns
 115  115   *   | uint_t            ---+-> degraded conns
 116  116   *   | list_t        ---+---+-> connection list
 117  117   *   +------------------+---+
 118  118   *                      |
 119  119   *                      +------------------------------+-----------------+
 120  120   *                      |                              |                 |
 121  121   *                      v                              v                 v
 122  122   *   +-------------------+                       +----------------
 123  123   *   | SVP Connection    |                       | SVP connection |     ...
 124  124   *   | svp_conn_t        |                       | svp_conn_t     |
 125  125   *   |                   |                       +----------------+
 126  126   *   | svp_event_t   ----+-> event loop handle
 127  127   *   | svp_timer_t   ----+-> backoff timer
 128  128   *   | svp_timer_t   ----+-> query timer
 129  129   *   | int           ----+-> socket fd
 130  130   *   | uint_t        ----+-> generation
 131  131   *   | uint_t        ----+-> current backoff
 132  132   *   | svp_conn_flags_t -+-> connection flags
 133  133   *   | svp_conn_state_t -+-> connection state
 134  134   *   | svp_conn_error_t -+-> connection error
 135  135   *   | int            ---+-> last errrno
 136  136   *   | hrtime_t       ---+-> activity timestamp
 137  137   *   | svp_conn_out_t ---+-> outgoing data state
 138  138   *   | svp_conn_in_t  ---+-> incoming data state
 139  139   *   | list_t      ---+--+-> active queries
 140  140   *   +----------------+--+
 141  141   *                    |
 142  142   *                    +----------------------------------+-----------------+
 143  143   *                    |                                  |                 |
 144  144   *                    v                                  v                 v
 145  145   *   +--------------------+                       +-------------+
 146  146   *   | SVP Query          |                       | SVP Query   |         ...
 147  147   *   | svp_query_t        |                       | svp_query_t |
 148  148   *   |                    |                       +-------------+
 149  149   *   | svp_query_f     ---+-> callback function
 150  150   *   | void *          ---+-> callback arg
 151  151   *   | svp_query_state_t -+-> state flags
 152  152   *   | svp_req_t       ---+-> svp prot. header
 153  153   *   | svp_query_data_t --+-> read data
 154  154   *   | svp_query_data_t --+-> write data
 155  155   *   | svp_status_t    ---+-> request status
 156  156   *   +--------------------+
 157  157   *
 158  158   * The svp_t is the instance that we assoicate with varpd. The instance itself
 159  159   * maintains properties and then when it's started associates with an
 160  160   * svp_remote_t, which is the remote backend. The remote backend itself,
 161  161   * maintains the DNS state and spins up and downs connections based on the
 162  162   * results from DNS. By default, we query DNS every 30 seconds. For more on the
 163  163   * connection life cycle, see the next section.
 164  164   *
 165  165   * By default, each connection maintains its own back off timer and list of
 166  166   * queries it's servicing. Only one request is generally outstanding at a time
 167  167   * and requests are round robined across the various connections.
 168  168   *
 169  169   * The query itself represents the svp request that's going on and keep track of
 170  170   * its state and is a place for data that's read and written to as part of the
 171  171   * request.
 172  172   *
 173  173   * Connections maintain a query timer such that if we have not received data on
 174  174   * a socket for a certain amount of time, we kill that socket and begin a
 175  175   * reconnection cycle with backoff.
 176  176   *
 177  177   * ------------------------
 178  178   * Connection State Machine
 179  179   * ------------------------
 180  180   *
 181  181   * We have a connection pool that's built upon DNS records. DNS describes the
 182  182   * membership of the set of remote peers that make up our pool and we maintain
 183  183   * one connection to each of them.  In addition, we maintain an exponential
 184  184   * backoff for each peer and will attempt to reconect immediately before backing
 185  185   * off. The following are the valid states that a connection can be in:
 186  186   *
 187  187   *      SVP_CS_ERROR            An OS error has occurred on this connection,
 188  188   *                              such as failure to create a socket or associate
 189  189   *                              the socket with an event port. We also
 190  190   *                              transition all connections to this state before
 191  191   *                              we destroy them.
 192  192   *
 193  193   *      SVP_CS_INITIAL          This is the initial state of a connection, all
 194  194   *                              that should exist is an unbound socket.
 195  195   *
 196  196   *      SVP_CS_CONNECTING       A call to connect has been made and we are
 197  197   *                              polling for it to complete.
 198  198   *
 199  199   *      SVP_CS_BACKOFF          A connect attempt has failed and we are
 200  200   *                              currently backing off, waiting to try again.
 201  201   *
 202  202   *      SVP_CS_ACTIVE           We have successfully connected to the remote
 203  203   *                              system.
 204  204   *
 205  205   *      SVP_CS_WINDDOWN         This connection is going to valhalla. In other
 206  206   *                              words, a previously active connection is no
 207  207   *                              longer valid in DNS, so we should curb our use
 208  208   *                              of it, and reap it as soon as we have other
 209  209   *                              active connections.
 210  210   *
 211  211   * The following diagram attempts to describe our state transition scheme, and
 212  212   * when we transition from one state to the next.
 213  213   *
 214  214   *                               |
 215  215   *                               * New remote IP from DNS resolution,
 216  216   *                               | not currently active in the system.
 217  217   *                               |
 218  218   *                               v                                Socket Error,
 219  219   *                       +----------------+                       still in DNS
 220  220   *  +----------------<---| SVP_CS_INITIAL |<----------------------*-----+
 221  221   *  |                    +----------------+                             |
 222  222   *  |                            System  |                              |
 223  223   *  | Connection . . . . .       success *               Successful     |
 224  224   *  | failed             .               |               connect()      |
 225  225   *  |               +----*---------+     |        +-----------*--+      |
 226  226   *  |               |              |     |        |              |      |
 227  227   *  |               V              ^     v        ^              V      ^
 228  228   *  |  +----------------+         +-------------------+     +---------------+
 229  229   *  +<-| SVP_CS_BACKOFF |         | SVP_CS_CONNECTING |     | SVP_CS_ACTIVE |
 230  230   *  |  +----------------+         +-------------------+     +---------------+
 231  231   *  |               V              ^  V                       V  V
 232  232   *  | Backoff wait  *              |  |                       |  * Removed
 233  233   *  v interval      +--------------+  +-----------------<-----+  | from DNS
 234  234   *  | finished                        |                          |
 235  235   *  |                                 V                          |
 236  236   *  |                                 |                          V
 237  237   *  |                                 |            +-----------------+
 238  238   *  +----------------+----------<-----+-------<----| SVP_CS_WINDDOWN |
 239  239   *                   |                             +-----------------+
 240  240   *                   * . . .   Fatal system, not
 241  241   *                   |         socket error or
 242  242   *                   V         quiesced after
 243  243   *           +--------------+  removal from DNS
 244  244   *           | SVP_CS_ERROR |
 245  245   *           +--------------+
 246  246   *                   |
 247  247   *                   * . . . Removed from DNS
 248  248   *                   v
 249  249   *            +------------+
 250  250   *            | Connection |
 251  251   *            | Destroyed  |
 252  252   *            +------------+
 253  253   *
 254  254   * --------------------------
 255  255   * Connection Event Injection
 256  256   * --------------------------
 257  257   *
 258  258   * For each connection that exists in the system, we have a timer in place that
 259  259   * is in charge of performing timeout activity. It fires once every thirty
 260  260   * seconds or so for a given connection and checks to ensure that we have had
 261  261   * activity for the most recent query on the connection. If not, it terminates
 262  262   * the connection. This is important as if we have sent all our data and are
 263  263   * waiting for the remote end to reply, without enabling something like TCP
 264  264   * keep-alive, we will not be notified that anything that has happened to the
 265  265   * remote connection, for example a panic. In addition, this also protects
 266  266   * against a server that is up, but a portolan that is not making forward
 267  267   * progress.
 268  268   *
 269  269   * When a timeout occurs, we first try to disassociate any active events, which
 270  270   * by definition must exist. Once that's done, we inject a port source user
 271  271   * event. Now, there is a small gotcha. Let's assume for a moment that we have a
 272  272   * pathological portolan. That means that it knows to inject activity right at
 273  273   * the time out window. That means, that the event may be disassociated before
 274  274   * we could get to it. If that's the case, we must _not_ inject the user event
 275  275   * and instead, we'll let the pending event take care of it. We know that the
 276  276   * pending event hasn't hit the main part of the loop yet, otherwise, it would
 277  277   * have released the lock protecting our state and associated the event.
 278  278   *
 279  279   * ------------
 280  280   * Notes on DNS
 281  281   * ------------
 282  282   *
 283  283   * Unfortunately, doing host name resolution in a way that allows us to leverage
 284  284   * the system's resolvers and the system's caching, require us to make blocking
 285  285   * calls in libc via getaddrinfo(3SOCKET). If we can't reach a given server,
 286  286   * that will tie up a thread for quite some time. To work around that fact,
 287  287   * we're going to create a fixed number of threads and we'll use them to service
 288  288   * our DNS requests. While this isn't ideal, until we have a sane means of
 289  289   * integrating a DNS resolution into an event loop with say portfs, it's not
 290  290   * going to be a fun day no matter what we do.
 291  291   *
 292  292   * ------
 293  293   * Timers
 294  294   * ------
 295  295   *
 296  296   * We maintain a single timer based on CLOCK_REALTIME. It's designed to fire
 297  297   * every second. While we'd rather use CLOCK_HIGHRES just to alleviate ourselves
 298  298   * from timer drift; however, as zones may not actually have CLOCK_HIGHRES
 299  299   * access, we don't want them to end up in there. The timer itself is just a
 300  300   * simple avl tree sorted by expiration time, which is stored as a tick in the
 301  301   * future, a tick is just one second.
 302  302   *
 303  303   * ----------
 304  304   * Shootdowns
 305  305   * ----------
 306  306   *
 307  307   * As part of the protocol, we need to be able to handle shootdowns that inform
 308  308   * us some of the information in the system is out of date. This information
 309  309   * needs to be processed promptly; however, the information is hopefully going
 310  310   * to be relatively infrequent relative to the normal flow of information.
 311  311   *
 312  312   * The shoot down information needs to be done on a per-backend basis. The
 313  313   * general design is that we'll have a single query for this which can fire on a
 314  314   * 5-10s period, we randmoize the latter part to give us a bit more load
 315  315   * spreading. If we complete because there's no work to do, then we wait the
 316  316   * normal period. If we complete, but there's still work to do, we'll go again
 317  317   * after a second.
 318  318   *
 319  319   * A shootdown has a few different parts. We first receive a list of items to
 320  320   * shootdown. After performing all of those, we need to acknowledge them. When
 321  321   * that's been done successfully, we can move onto the next part. From a
 322  322   * protocol perspective, we make a SVP_R_LOG_REQ, we get a reply, and then after
 323  323   * processing them, send an SVP_R_LOG_RM. Only once that's been acked do we
 324  324   * continue.
 325  325   *
 326  326   * However, one of the challenges that we have is that these invalidations are
 327  327   * just that, an invalidation. For a virtual layer two request, that's fine,
 328  328   * because the kernel supports that. However, for virtual layer three
 329  329   * invalidations, we have a bit more work to do. These protocols, ARP and NDP,
 330  330   * don't really support a notion of just an invalidation, instead you have to
 331  331   * inject the new data in a gratuitous fashion.
 332  332   *
 333  333   * To that end, what we instead do is when we receive a VL3 invalidation, we
 334  334   * turn that info a VL3 request. We hold the general request as outstanding
 335  335   * until we receive all of the callbacks for the VL3 invalidations, at which
 336  336   * point we go through and do the log removal request.
 337  337   */
 338  338  
 339  339  #include <umem.h>
 340  340  #include <errno.h>
 341  341  #include <stdlib.h>
 342  342  #include <sys/types.h>
 343  343  #include <sys/socket.h>
 344  344  #include <netinet/in.h>
 345  345  #include <arpa/inet.h>
 346  346  #include <libnvpair.h>
 347  347  #include <strings.h>
 348  348  #include <string.h>
 349  349  #include <assert.h>
 350  350  #include <unistd.h>
 351  351  
 352  352  #include <libvarpd_provider.h>
 353  353  #include "libvarpd_svp.h"
 354  354  
 355  355  bunyan_logger_t *svp_bunyan;
 356  356  static int svp_defport = 1296;
 357  357  static int svp_defuport = 1339;
 358  358  static umem_cache_t *svp_lookup_cache;
 359  359  
 360  360  typedef enum svp_lookup_type {
 361  361          SVP_L_UNKNOWN   = 0x0,
 362  362          SVP_L_VL2       = 0x1,
 363  363          SVP_L_VL3       = 0x2,
 364  364          SVP_L_ROUTE     = 0x3
 365  365  } svp_lookup_type_t;
 366  366  
 367  367  typedef struct svp_lookup {
 368  368          int svl_type;
 369  369          union {
 370  370                  struct svl_lookup_vl2 {
 371  371                          varpd_query_handle_t    *svl_handle;
 372  372                          overlay_target_point_t  *svl_point;
 373  373                  } svl_vl2;
 374  374                  struct svl_lookup_vl3 {
 375  375                          varpd_arp_handle_t      *svl_vah;
 376  376                          uint8_t                 *svl_out;
 377  377                  } svl_vl3;
 378  378                  struct svl_lookup_route {
 379  379                          varpd_query_handle_t    *svl_handle;
 380  380                          overlay_target_point_t  *svl_point;
 381  381                          overlay_target_route_t  *svl_route;
 382  382                  } svl_route;
  
    | 
      ↓ open down ↓ | 
    382 lines elided | 
    
      ↑ open up ↑ | 
  
 383  383          } svl_u;
 384  384          svp_query_t                             svl_query;
 385  385  } svp_lookup_t;
 386  386  
 387  387  static const char *varpd_svp_props[] = {
 388  388          "svp/host",
 389  389          "svp/port",
 390  390          "svp/underlay_ip",
 391  391          "svp/underlay_port",
 392  392          "svp/dcid",
 393      -        "svp/router_mac"
      393 +        "svp/router_oui"
 394  394  };
 395  395  
 396  396  static const uint8_t svp_bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 397  397  
 398  398  int
 399  399  svp_comparator(const void *l, const void *r)
 400  400  {
 401  401          const svp_t *ls = l;
 402  402          const svp_t *rs = r;
 403  403  
 404  404          if (ls->svp_vid > rs->svp_vid)
 405  405                  return (1);
 406  406          if (ls->svp_vid < rs->svp_vid)
 407  407                  return (-1);
 408  408          return (0);
 409  409  }
 410  410  
 411  411  static void
 412  412  svp_vl2_lookup_cb(svp_t *svp, svp_status_t status, const struct in6_addr *uip,
 413  413      const uint16_t uport, void *arg)
 414  414  {
 415  415          svp_lookup_t *svl = arg;
 416  416          overlay_target_point_t *otp;
 417  417  
 418  418          assert(svp != NULL);
 419  419          assert(arg != NULL);
 420  420  
 421  421          if (status != SVP_S_OK) {
 422  422                  libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
 423  423                      VARPD_LOOKUP_DROP);
 424  424                  umem_cache_free(svp_lookup_cache, svl);
 425  425                  return;
 426  426          }
 427  427  
 428  428          otp = svl->svl_u.svl_vl2.svl_point;
 429  429          bcopy(uip, &otp->otp_ip, sizeof (struct in6_addr));
 430  430          otp->otp_port = uport;
 431  431          libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
  
    | 
      ↓ open down ↓ | 
    28 lines elided | 
    
      ↑ open up ↑ | 
  
 432  432              VARPD_LOOKUP_OK);
 433  433          umem_cache_free(svp_lookup_cache, svl);
 434  434  }
 435  435  
 436  436  static void
 437  437  svp_vl3_lookup_cb(svp_t *svp, svp_status_t status, const uint8_t *vl2mac,
 438  438      const struct in6_addr *uip, const uint16_t uport, void *arg)
 439  439  {
 440  440          overlay_target_point_t point;
 441  441          svp_lookup_t *svl = arg;
      442 +        uint8_t nexthop_mac[6] = { 0, 0, 0, 0, 0, 0 };
 442  443  
 443  444          assert(svp != NULL);
 444  445          assert(svl != NULL);
 445  446  
 446  447          if (status != SVP_S_OK) {
 447  448                  libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
 448  449                      VARPD_LOOKUP_DROP);
 449  450                  umem_cache_free(svp_lookup_cache, svl);
 450  451                  return;
 451  452          }
 452  453  
 453  454          /* Inject the L2 mapping before the L3 */
 454  455          bcopy(uip, &point.otp_ip, sizeof (struct in6_addr));
 455  456          point.otp_port = uport;
 456      -        libvarpd_inject_varp(svp->svp_hdl, vl2mac, &point);
      457 +        if (uport != 0) {
      458 +                /* Normal L3 lookup result... */
      459 +                libvarpd_inject_varp(svp->svp_hdl, vl2mac, &point);
      460 +        } else {
      461 +                /*
      462 +                 * Oh my, we have a next-hop router IP.
      463 +                 * Set the MAC to the ouid+vid concatenated
      464 +                 * special-router-MAC. Overlay down below will know
      465 +                 * that uport == 0 means the MAC is a special one.
      466 +                 */
      467 +                if (bcmp(svp->svp_router_oui, nexthop_mac, ETHERADDRL) == 0) {
      468 +                        /*
      469 +                         * We don't have a router_oui, so we can't support
      470 +                         * special-router-MAC.  Drop it.
      471 +                         */
      472 +                        libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
      473 +                            VARPD_LOOKUP_DROP);
      474 +                        umem_cache_free(svp_lookup_cache, svl);
      475 +                        return;
      476 +                }
      477 +                vl2mac = nexthop_mac;
      478 +                bcopy(svp->svp_router_oui, vl2mac, 3);
      479 +                vl2mac[3] = (svp->svp_vid >> 16) & 0xff;
      480 +                vl2mac[4] = (svp->svp_vid >> 8) & 0xff;
      481 +                vl2mac[5] = svp->svp_vid & 0xff;
      482 +        }
 457  483  
 458  484          bcopy(vl2mac, svl->svl_u.svl_vl3.svl_out, ETHERADDRL);
 459  485          libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
 460  486              VARPD_LOOKUP_OK);
 461  487          umem_cache_free(svp_lookup_cache, svl);
 462  488  }
 463  489  
 464  490  static void
 465  491  svp_vl2_invalidate_cb(svp_t *svp, const uint8_t *vl2mac)
 466  492  {
 467  493          libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
 468  494  }
 469  495  
 470  496  static void
 471  497  svp_vl3_inject_cb(svp_t *svp, const uint16_t vlan, const struct in6_addr *vl3ip,
 472  498      const uint8_t *vl2mac, const uint8_t *targmac)
 473  499  {
 474  500          struct in_addr v4;
 475  501  
 476  502          /*
 477  503           * At the moment we don't support any IPv6 related log entries, this
 478  504           * will change soon as we develop a bit more of the IPv6 related
 479  505           * infrastructure so we can properly test the injection.
 480  506           */
 481  507          if (IN6_IS_ADDR_V4MAPPED(vl3ip) == 0) {
 482  508                  return;
 483  509          } else {
 484  510                  IN6_V4MAPPED_TO_INADDR(vl3ip, &v4);
 485  511                  if (targmac == NULL)
 486  512                          targmac = svp_bcast;
 487  513                  libvarpd_inject_arp(svp->svp_hdl, vlan, vl2mac, &v4, targmac);
 488  514          }
 489  515  }
 490  516  
 491  517  /* ARGSUSED */
 492  518  static void
 493  519  svp_shootdown_cb(svp_t *svp, const uint8_t *vl2mac, const struct in6_addr *uip,
 494  520      const uint16_t uport)
 495  521  {
 496  522          /*
 497  523           * We should probably do a conditional invalidation here.
 498  524           */
 499  525          libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
 500  526  }
 501  527  
 502  528  static void
 503  529  svp_route_lookup_cb(svp_t *svp, svp_status_t status, uint32_t dcid,
 504  530      uint32_t vnetid, uint16_t vlan, uint8_t *srcmac, uint8_t *dstmac,
 505  531      uint16_t ul3_port, uint8_t *ul3_addr, uint8_t srcpfx, uint8_t dstpfx,
 506  532      void *arg)
 507  533  {
 508  534          svp_lookup_t *svl = arg;
 509  535          overlay_target_point_t *otp;
 510  536          overlay_target_route_t *otr;
 511  537  
 512  538          if (status != SVP_S_OK) {
 513  539                  libvarpd_plugin_query_reply(svl->svl_u.svl_route.svl_handle,
 514  540                      VARPD_LOOKUP_DROP);
 515  541                  umem_cache_free(svp_lookup_cache, svl);
 516  542                  return;
 517  543          }
 518  544  
 519  545          otp = svl->svl_u.svl_route.svl_point;
 520  546          bcopy(dstmac, otp->otp_mac, ETHERADDRL);
 521  547          bcopy(ul3_addr, &otp->otp_ip, sizeof (struct in6_addr));
 522  548          otp->otp_port = ul3_port;
 523  549  
 524  550          otr = svl->svl_u.svl_route.svl_route;
 525  551          otr->otr_vnet = vnetid;
 526  552          otr->otr_vlan = vlan;
 527  553          bcopy(srcmac, otr->otr_srcmac, ETHERADDRL);
 528  554          otr->otr_dcid = dcid;
 529  555          otr->otr_src_prefixlen = srcpfx;
 530  556          otr->otr_dst_prefixlen = dstpfx;
 531  557  
 532  558          libvarpd_plugin_query_reply(svl->svl_u.svl_route.svl_handle,
 533  559              VARPD_LOOKUP_OK);
 534  560          umem_cache_free(svp_lookup_cache, svl);
 535  561  }
 536  562  
 537  563  static svp_cb_t svp_defops = {
 538  564          svp_vl2_lookup_cb,
 539  565          svp_vl3_lookup_cb,
 540  566          svp_vl2_invalidate_cb,
 541  567          svp_vl3_inject_cb,
 542  568          svp_shootdown_cb,
 543  569          svp_route_lookup_cb,
 544  570  };
 545  571  
 546  572  static boolean_t
 547  573  varpd_svp_valid_dest(overlay_plugin_dest_t dest)
 548  574  {
 549  575          if (dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
 550  576                  return (B_FALSE);
 551  577  
 552  578          return (B_TRUE);
 553  579  }
 554  580  
 555  581  static int
 556  582  varpd_svp_create(varpd_provider_handle_t *hdl, void **outp,
 557  583      overlay_plugin_dest_t dest)
 558  584  {
 559  585          int ret;
 560  586          svp_t *svp;
 561  587  
 562  588          if (varpd_svp_valid_dest(dest) == B_FALSE)
 563  589                  return (ENOTSUP);
 564  590  
 565  591          svp = umem_zalloc(sizeof (svp_t), UMEM_DEFAULT);
 566  592          if (svp == NULL)
 567  593                  return (ENOMEM);
 568  594  
 569  595          if ((ret = mutex_init(&svp->svp_lock, USYNC_THREAD | LOCK_ERRORCHECK,
 570  596              NULL)) != 0) {
 571  597                  umem_free(svp, sizeof (svp_t));
 572  598                  return (ret);
 573  599          }
 574  600  
 575  601          svp->svp_port = svp_defport;
 576  602          svp->svp_uport = svp_defuport;
 577  603          svp->svp_cb = svp_defops;
 578  604          svp->svp_hdl = hdl;
 579  605          svp->svp_vid = libvarpd_plugin_vnetid(svp->svp_hdl);
 580  606          *outp = svp;
 581  607          return (0);
 582  608  }
 583  609  
 584  610  static int
 585  611  varpd_svp_start(void *arg)
 586  612  {
 587  613          int ret;
 588  614          svp_remote_t *srp;
 589  615          svp_t *svp = arg;
 590  616  
 591  617          mutex_enter(&svp->svp_lock);
 592  618          if (svp->svp_host == NULL || svp->svp_port == 0 ||
 593  619              svp->svp_huip == B_FALSE || svp->svp_uport == 0) {
 594  620                  mutex_exit(&svp->svp_lock);
 595  621                  return (EAGAIN);
 596  622          }
 597  623          mutex_exit(&svp->svp_lock);
 598  624  
 599  625          if ((ret = svp_remote_find(svp->svp_host, svp->svp_port, &svp->svp_uip,
 600  626              &srp)) != 0)
 601  627                  return (ret);
 602  628  
 603  629          if ((ret = svp_remote_attach(srp, svp)) != 0) {
 604  630                  svp_remote_release(srp);
 605  631                  return (ret);
 606  632          }
 607  633  
 608  634          return (0);
 609  635  }
 610  636  
 611  637  static void
 612  638  varpd_svp_stop(void *arg)
 613  639  {
 614  640          svp_t *svp = arg;
 615  641  
 616  642          svp_remote_detach(svp);
 617  643  }
 618  644  
 619  645  static void
 620  646  varpd_svp_destroy(void *arg)
 621  647  {
 622  648          svp_t *svp = arg;
 623  649  
 624  650          if (svp->svp_host != NULL)
 625  651                  umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
 626  652  
 627  653          if (mutex_destroy(&svp->svp_lock) != 0)
 628  654                  libvarpd_panic("failed to destroy svp_t`svp_lock");
 629  655  
 630  656          umem_free(svp, sizeof (svp_t));
 631  657  }
 632  658  
 633  659  static void
 634  660  varpd_svp_lookup_l3(svp_t *svp, varpd_query_handle_t *vqh,
 635  661      const overlay_targ_lookup_t *otl, overlay_target_point_t *otp,
 636  662      overlay_target_route_t *otr)
 637  663  {
 638  664          svp_lookup_t *slp;
 639  665          uint32_t type;
 640  666          const struct in6_addr *src = &otl->otl_addru.otlu_l3.otl3_srcip,
 641  667              *dst = &otl->otl_addru.otlu_l3.otl3_dstip;
 642  668  
 643  669          /*
 644  670           * otl is an L3 request, so we have src/dst IPs for the inner packet.
 645  671           * We also have the vlan.
 646  672           *
 647  673           * Assume kernel's overlay module is caching well, so we are directly
 648  674           * going to query (i.e. no caching up here of actual destinations).
 649  675           *
 650  676           * Our existing remote sever (svp_remote), but with the new message
 651  677           * SVP_R_ROUTE_REQ.
 652  678           */
 653  679  
 654  680          /* XXX KEBE SAYS DO SOME otl verification too... */
 655  681          if (IN6_IS_ADDR_V4MAPPED(src)) {
 656  682                  if (!IN6_IS_ADDR_V4MAPPED(dst)) {
 657  683                          libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 658  684                          return;
 659  685                  }
 660  686                  type = SVP_VL3_IP;
 661  687          } else {
 662  688                  if (IN6_IS_ADDR_V4MAPPED(dst)) {
 663  689                          libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 664  690                          return;
 665  691                  }
 666  692                  type = SVP_VL3_IPV6;
 667  693          }
 668  694  
 669  695          slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
 670  696          if (slp == NULL) {
 671  697                  libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 672  698                  return;
 673  699          }
 674  700  
 675  701          slp->svl_type = SVP_L_ROUTE;
 676  702          slp->svl_u.svl_route.svl_handle = vqh;
 677  703          slp->svl_u.svl_route.svl_point = otp;
 678  704          slp->svl_u.svl_route.svl_route = otr;
 679  705  
 680  706          svp_remote_route_lookup(svp, &slp->svl_query, src, dst,
 681  707              otl->otl_vnetid, (uint16_t)otl->otl_vlan, slp);
 682  708  }
 683  709  
 684  710  static void
 685  711  varpd_svp_lookup(void *arg, varpd_query_handle_t *vqh,
 686  712      const overlay_targ_lookup_t *otl, overlay_target_point_t *otp,
 687  713      overlay_target_route_t *otr)
 688  714  {
 689  715          svp_lookup_t *slp;
 690  716          svp_t *svp = arg;
 691  717  
 692  718          /*
 693  719           * Shuffle off L3 lookups to their own codepath.
 694  720           */
 695  721          if (otl->otl_l3req) {
 696  722                  varpd_svp_lookup_l3(svp, vqh, otl, otp, otr);
 697  723                  return;
 698  724          }
 699  725          /*
 700  726           * At this point, the traditional overlay_target_point_t is all that
 701  727           * needs filling in.  Zero-out the otr for safety.
 702  728           */
 703  729          bzero(otr, sizeof (*otr));
 704  730  
 705  731  
 706  732          /*
 707  733           * Check if this is something that we need to proxy, eg. arp or ndp.
 708  734           */
 709  735          if (otl->otl_addru.otlu_l2.otl2_sap == ETHERTYPE_ARP) {
 710  736                  libvarpd_plugin_proxy_arp(svp->svp_hdl, vqh, otl);
 711  737                  return;
 712  738          }
 713  739  
 714  740          if (otl->otl_addru.otlu_l2.otl2_dstaddr[0] == 0x33 &&
 715  741              otl->otl_addru.otlu_l2.otl2_dstaddr[1] == 0x33) {
 716  742                  if (otl->otl_addru.otlu_l2.otl2_sap == ETHERTYPE_IPV6) {
 717  743                          libvarpd_plugin_proxy_ndp(svp->svp_hdl, vqh, otl);
 718  744                  } else {
 719  745                          libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 720  746                  }
 721  747                  return;
 722  748          }
 723  749  
 724  750          /*
 725  751           * Watch out for various multicast and broadcast addresses. We've
 726  752           * already taken care of the IPv6 range above. Now we just need to
 727  753           * handle broadcast and if the multicast bit is set, lowest bit of the
 728  754           * first octet of the MAC, then we drop it now.
 729  755           */
 730  756          if (bcmp(otl->otl_addru.otlu_l2.otl2_dstaddr, svp_bcast,
 731  757              ETHERADDRL) == 0 ||
 732  758              (otl->otl_addru.otlu_l2.otl2_dstaddr[0] & 0x01) == 0x01) {
 733  759                  libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 734  760                  return;
 735  761          }
 736  762  
 737  763          /*
 738  764           * If we have a failure to allocate memory for this, that's not good.
 739  765           * However, telling the kernel to just drop this packet is much better
 740  766           * than the alternative at this moment. At least we'll try again and we
 741  767           * may have something more available to us in a little bit.
 742  768           */
 743  769          slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
 744  770          if (slp == NULL) {
 745  771                  libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 746  772                  return;
 747  773          }
 748  774  
 749  775          slp->svl_type = SVP_L_VL2;
 750  776          slp->svl_u.svl_vl2.svl_handle = vqh;
 751  777          slp->svl_u.svl_vl2.svl_point = otp;
 752  778  
 753  779          svp_remote_vl2_lookup(svp, &slp->svl_query,
 754  780              otl->otl_addru.otlu_l2.otl2_dstaddr, slp);
 755  781  }
 756  782  
 757  783  /* ARGSUSED */
 758  784  static int
 759  785  varpd_svp_nprops(void *arg, uint_t *nprops)
 760  786  {
 761  787          *nprops = sizeof (varpd_svp_props) / sizeof (char *);
 762  788          return (0);
 763  789  }
 764  790  
 765  791  /* ARGSUSED */
 766  792  static int
 767  793  varpd_svp_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph)
 768  794  {
 769  795          switch (propid) {
 770  796          case 0:
 771  797                  /* svp/host */
 772  798                  libvarpd_prop_set_name(vph, varpd_svp_props[0]);
 773  799                  libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 774  800                  libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING);
 775  801                  libvarpd_prop_set_nodefault(vph);
 776  802                  break;
 777  803          case 1:
 778  804                  /* svp/port */
 779  805                  libvarpd_prop_set_name(vph, varpd_svp_props[1]);
 780  806                  libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 781  807                  libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
 782  808                  (void) libvarpd_prop_set_default(vph, &svp_defport,
 783  809                      sizeof (svp_defport));
 784  810                  libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
 785  811                  break;
 786  812          case 2:
 787  813                  /* svp/underlay_ip */
 788  814                  libvarpd_prop_set_name(vph, varpd_svp_props[2]);
 789  815                  libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 790  816                  libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP);
 791  817                  libvarpd_prop_set_nodefault(vph);
 792  818                  break;
 793  819          case 3:
 794  820                  /* svp/underlay_port */
 795  821                  libvarpd_prop_set_name(vph, varpd_svp_props[3]);
 796  822                  libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 797  823                  libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
 798  824                  (void) libvarpd_prop_set_default(vph, &svp_defuport,
 799  825                      sizeof (svp_defuport));
 800  826                  libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
 801  827                  break;
  
    | 
      ↓ open down ↓ | 
    335 lines elided | 
    
      ↑ open up ↑ | 
  
 802  828          case 4:
 803  829                  /* svp/dcid */
 804  830                  libvarpd_prop_set_name(vph, varpd_svp_props[4]);
 805  831                  libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 806  832                  libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
 807  833                  libvarpd_prop_set_nodefault(vph);
 808  834                  /* XXX KEBE ASKS should I just set high to UINT32_MAX? */
 809  835                  libvarpd_prop_set_range_uint32(vph, 1, UINT32_MAX - 1);
 810  836                  break;
 811  837          case 5:
 812      -                /* svp/router_mac */
      838 +                /* svp/router_oui */
 813  839                  libvarpd_prop_set_name(vph, varpd_svp_props[5]);
 814  840                  libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 815  841                  libvarpd_prop_set_type(vph, OVERLAY_PROP_T_ETHER);
 816  842                  libvarpd_prop_set_nodefault(vph);
 817  843                  break;
 818  844          default:
 819  845                  return (EINVAL);
 820  846          }
 821  847          return (0);
 822  848  }
 823  849  
 824  850  static int
 825  851  varpd_svp_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep)
 826  852  {
 827  853          svp_t *svp = arg;
 828  854  
 829  855          /* svp/host */
 830  856          if (strcmp(pname, varpd_svp_props[0]) == 0) {
 831  857                  size_t len;
 832  858  
 833  859                  mutex_enter(&svp->svp_lock);
 834  860                  if (svp->svp_host == NULL) {
 835  861                          *sizep = 0;
 836  862                  } else {
 837  863                          len = strlen(svp->svp_host) + 1;
 838  864                          if (*sizep < len) {
 839  865                                  mutex_exit(&svp->svp_lock);
 840  866                                  return (EOVERFLOW);
 841  867                          }
 842  868                          *sizep = len;
 843  869                          (void) strlcpy(buf, svp->svp_host, *sizep);
 844  870                  }
 845  871                  mutex_exit(&svp->svp_lock);
 846  872                  return (0);
 847  873          }
 848  874  
 849  875          /* svp/port */
 850  876          if (strcmp(pname, varpd_svp_props[1]) == 0) {
 851  877                  uint64_t val;
 852  878  
 853  879                  if (*sizep < sizeof (uint64_t))
 854  880                          return (EOVERFLOW);
 855  881  
 856  882                  mutex_enter(&svp->svp_lock);
 857  883                  if (svp->svp_port == 0) {
 858  884                          *sizep = 0;
 859  885                  } else {
 860  886                          val = svp->svp_port;
 861  887                          bcopy(&val, buf, sizeof (uint64_t));
 862  888                          *sizep = sizeof (uint64_t);
 863  889                  }
 864  890                  mutex_exit(&svp->svp_lock);
 865  891                  return (0);
 866  892          }
 867  893  
 868  894          /* svp/underlay_ip */
 869  895          if (strcmp(pname, varpd_svp_props[2]) == 0) {
 870  896                  if (*sizep < sizeof (struct in6_addr))
 871  897                          return (EOVERFLOW);
 872  898                  mutex_enter(&svp->svp_lock);
 873  899                  if (svp->svp_huip == B_FALSE) {
 874  900                          *sizep = 0;
 875  901                  } else {
 876  902                          bcopy(&svp->svp_uip, buf, sizeof (struct in6_addr));
 877  903                          *sizep = sizeof (struct in6_addr);
 878  904                  }
 879  905                  mutex_exit(&svp->svp_lock);
 880  906                  return (0);
 881  907          }
 882  908  
 883  909          /* svp/underlay_port */
 884  910          if (strcmp(pname, varpd_svp_props[3]) == 0) {
 885  911                  uint64_t val;
 886  912  
 887  913                  if (*sizep < sizeof (uint64_t))
 888  914                          return (EOVERFLOW);
 889  915  
 890  916                  mutex_enter(&svp->svp_lock);
 891  917                  if (svp->svp_uport == 0) {
 892  918                          *sizep = 0;
 893  919                  } else {
 894  920                          val = svp->svp_uport;
 895  921                          bcopy(&val, buf, sizeof (uint64_t));
 896  922                          *sizep = sizeof (uint64_t);
 897  923                  }
 898  924  
 899  925                  mutex_exit(&svp->svp_lock);
 900  926                  return (0);
 901  927          }
 902  928  
 903  929          /* svp/dcid */
 904  930          if (strcmp(pname, varpd_svp_props[4]) == 0) {
 905  931                  uint64_t val;
 906  932  
 907  933                  if (*sizep < sizeof (uint64_t))
 908  934                          return (EOVERFLOW);
 909  935  
 910  936                  mutex_enter(&svp->svp_lock);
 911  937                  if (svp->svp_uport == 0) {
 912  938                          *sizep = 0;
  
    | 
      ↓ open down ↓ | 
    90 lines elided | 
    
      ↑ open up ↑ | 
  
 913  939                  } else {
 914  940                          val = svp->svp_dcid;
 915  941                          bcopy(&val, buf, sizeof (uint64_t));
 916  942                          *sizep = sizeof (uint64_t);
 917  943                  }
 918  944  
 919  945                  mutex_exit(&svp->svp_lock);
 920  946                  return (0);
 921  947          }
 922  948  
 923      -        /* svp/router_mac */
      949 +        /* svp/router_oui */
 924  950          if (strcmp(pname, varpd_svp_props[5]) == 0) {
 925  951                  if (*sizep < ETHERADDRL)
 926  952                          return (EOVERFLOW);
 927  953                  mutex_enter(&svp->svp_lock);
 928  954  
 929      -                if (ether_is_zero(&svp->svp_router_mac)) {
      955 +                if (ether_is_zero(&svp->svp_router_oui)) {
 930  956                          *sizep = 0;
 931  957                  } else {
 932      -                        bcopy(&svp->svp_router_mac, buf, ETHERADDRL);
      958 +                        bcopy(&svp->svp_router_oui, buf, ETHERADDRL);
 933  959                          *sizep = ETHERADDRL;
 934  960                  }
 935  961  
 936  962                  mutex_exit(&svp->svp_lock);
 937  963                  return (0);
 938  964          }
 939  965          return (EINVAL);
 940  966  }
 941  967  
 942  968  static int
 943  969  varpd_svp_setprop(void *arg, const char *pname, const void *buf,
 944  970      const uint32_t size)
 945  971  {
 946  972          svp_t *svp = arg;
 947  973  
 948  974          /* svp/host */
 949  975          if (strcmp(pname, varpd_svp_props[0]) == 0) {
 950  976                  char *dup;
 951  977                  dup = umem_alloc(size, UMEM_DEFAULT);
 952  978                  (void) strlcpy(dup, buf, size);
 953  979                  if (dup == NULL)
 954  980                          return (ENOMEM);
 955  981                  mutex_enter(&svp->svp_lock);
 956  982                  if (svp->svp_host != NULL)
 957  983                          umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
 958  984                  svp->svp_host = dup;
 959  985                  mutex_exit(&svp->svp_lock);
 960  986                  return (0);
 961  987          }
 962  988  
 963  989          /* svp/port */
 964  990          if (strcmp(pname, varpd_svp_props[1]) == 0) {
 965  991                  const uint64_t *valp = buf;
 966  992                  if (size < sizeof (uint64_t))
 967  993                          return (EOVERFLOW);
 968  994  
 969  995                  if (*valp == 0 || *valp > UINT16_MAX)
 970  996                          return (EINVAL);
 971  997  
 972  998                  mutex_enter(&svp->svp_lock);
 973  999                  svp->svp_port = (uint16_t)*valp;
 974 1000                  mutex_exit(&svp->svp_lock);
 975 1001                  return (0);
 976 1002          }
 977 1003  
 978 1004          /* svp/underlay_ip */
 979 1005          if (strcmp(pname, varpd_svp_props[2]) == 0) {
 980 1006                  const struct in6_addr *ipv6 = buf;
 981 1007  
 982 1008                  if (size < sizeof (struct in6_addr))
 983 1009                          return (EOVERFLOW);
 984 1010  
 985 1011                  if (IN6_IS_ADDR_V4COMPAT(ipv6))
 986 1012                          return (EINVAL);
 987 1013  
 988 1014                  if (IN6_IS_ADDR_MULTICAST(ipv6))
 989 1015                          return (EINVAL);
 990 1016  
 991 1017                  if (IN6_IS_ADDR_6TO4(ipv6))
 992 1018                          return (EINVAL);
 993 1019  
 994 1020                  if (IN6_IS_ADDR_V4MAPPED(ipv6)) {
 995 1021                          ipaddr_t v4;
 996 1022                          IN6_V4MAPPED_TO_IPADDR(ipv6, v4);
 997 1023                          if (IN_MULTICAST(v4))
 998 1024                                  return (EINVAL);
 999 1025                  }
1000 1026  
1001 1027                  mutex_enter(&svp->svp_lock);
1002 1028                  bcopy(buf, &svp->svp_uip, sizeof (struct in6_addr));
1003 1029                  svp->svp_huip = B_TRUE;
1004 1030                  mutex_exit(&svp->svp_lock);
1005 1031                  return (0);
1006 1032          }
1007 1033  
1008 1034          /* svp/underlay_port */
1009 1035          if (strcmp(pname, varpd_svp_props[3]) == 0) {
1010 1036                  const uint64_t *valp = buf;
1011 1037                  if (size < sizeof (uint64_t))
1012 1038                          return (EOVERFLOW);
1013 1039  
1014 1040                  if (*valp == 0 || *valp > UINT16_MAX)
1015 1041                          return (EINVAL);
1016 1042  
1017 1043                  mutex_enter(&svp->svp_lock);
1018 1044                  svp->svp_uport = (uint16_t)*valp;
1019 1045                  mutex_exit(&svp->svp_lock);
1020 1046  
1021 1047                  return (0);
1022 1048          }
1023 1049  
1024 1050          /* svp/dcid */
1025 1051          if (strcmp(pname, varpd_svp_props[4]) == 0) {
1026 1052                  const uint64_t *valp = buf;
1027 1053                  if (size < sizeof (uint64_t))
1028 1054                          return (EOVERFLOW);
1029 1055  
1030 1056                  /* XXX KEBE ASKS, use UINT32_MAX instead? */
  
    | 
      ↓ open down ↓ | 
    88 lines elided | 
    
      ↑ open up ↑ | 
  
1031 1057                  if (*valp == 0 || *valp > UINT32_MAX - 1)
1032 1058                          return (EINVAL);
1033 1059  
1034 1060                  mutex_enter(&svp->svp_lock);
1035 1061                  svp->svp_dcid = (uint32_t)*valp;
1036 1062                  mutex_exit(&svp->svp_lock);
1037 1063  
1038 1064                  return (0);
1039 1065          }
1040 1066  
1041      -        /* svp/router_mac */
     1067 +        /* svp/router_oui */
1042 1068          if (strcmp(pname, varpd_svp_props[5]) == 0) {
1043 1069                  if (size < ETHERADDRL)
1044 1070                          return (EOVERFLOW);
1045 1071                  mutex_enter(&svp->svp_lock);
1046      -                bcopy(buf, &svp->svp_router_mac, ETHERADDRL);
     1072 +                bcopy(buf, &svp->svp_router_oui, ETHERADDRL);
     1073 +                /* Zero-out the low three bytes. */
     1074 +                svp->svp_router_oui[3] = 0;
     1075 +                svp->svp_router_oui[4] = 0;
     1076 +                svp->svp_router_oui[5] = 0;
1047 1077                  mutex_exit(&svp->svp_lock);
1048 1078                  return (0);
1049 1079          }
1050 1080  
1051 1081          return (EINVAL);
1052 1082  }
1053 1083  
1054 1084  static int
1055 1085  varpd_svp_save(void *arg, nvlist_t *nvp)
1056 1086  {
1057 1087          int ret;
1058 1088          svp_t *svp = arg;
1059 1089  
1060 1090          mutex_enter(&svp->svp_lock);
1061 1091          /* svp/host */
1062 1092          if (svp->svp_host != NULL) {
1063 1093                  if ((ret = nvlist_add_string(nvp, varpd_svp_props[0],
1064 1094                      svp->svp_host)) != 0) {
1065 1095                          mutex_exit(&svp->svp_lock);
1066 1096                          return (ret);
1067 1097                  }
1068 1098          }
1069 1099  
1070 1100          /* svp/port */
1071 1101          if (svp->svp_port != 0) {
1072 1102                  if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[1],
1073 1103                      svp->svp_port)) != 0) {
1074 1104                          mutex_exit(&svp->svp_lock);
1075 1105                          return (ret);
1076 1106                  }
1077 1107          }
1078 1108  
1079 1109          /* svp/underlay_ip */
1080 1110          if (svp->svp_huip == B_TRUE) {
1081 1111                  char buf[INET6_ADDRSTRLEN];
1082 1112  
1083 1113                  if (inet_ntop(AF_INET6, &svp->svp_uip, buf, sizeof (buf)) ==
1084 1114                      NULL)
1085 1115                          libvarpd_panic("unexpected inet_ntop failure: %d",
1086 1116                              errno);
1087 1117  
1088 1118                  if ((ret = nvlist_add_string(nvp, varpd_svp_props[2],
1089 1119                      buf)) != 0) {
1090 1120                          mutex_exit(&svp->svp_lock);
1091 1121                          return (ret);
1092 1122                  }
1093 1123          }
1094 1124  
1095 1125          /* svp/underlay_port */
1096 1126          if (svp->svp_uport != 0) {
1097 1127                  if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[3],
1098 1128                      svp->svp_uport)) != 0) {
1099 1129                          mutex_exit(&svp->svp_lock);
1100 1130                          return (ret);
1101 1131                  }
1102 1132          }
  
    | 
      ↓ open down ↓ | 
    46 lines elided | 
    
      ↑ open up ↑ | 
  
1103 1133  
1104 1134          /* svp/dcid */
1105 1135          if (svp->svp_dcid != 0) {
1106 1136                  if ((ret = nvlist_add_uint32(nvp, varpd_svp_props[4],
1107 1137                      svp->svp_dcid)) != 0) {
1108 1138                          mutex_exit(&svp->svp_lock);
1109 1139                          return (ret);
1110 1140                  }
1111 1141          }
1112 1142  
1113      -        /* svp/router_mac */
1114      -        if (!ether_is_zero(&svp->svp_router_mac)) {
     1143 +        /* svp/router_oui */
     1144 +        if (!ether_is_zero(&svp->svp_router_oui)) {
1115 1145                  char buf[ETHERADDRSTRL];
1116 1146  
1117 1147                  /* XXX KEBE SAYS See underlay_ip... */
1118      -                if (ether_ntoa_r(&svp->svp_router_mac, buf) == NULL) {
     1148 +                if (ether_ntoa_r(&svp->svp_router_oui, buf) == NULL) {
1119 1149                          libvarpd_panic("unexpected ether_ntoa_r failure: %d",
1120 1150                              errno);
1121 1151                  }
1122 1152  
1123 1153                  if ((ret = nvlist_add_string(nvp, varpd_svp_props[5],
1124 1154                      buf)) != 0) {
1125 1155                          mutex_exit(&svp->svp_lock);
1126 1156                          return (ret);
1127 1157                  }
1128 1158          }
1129 1159  
1130 1160          mutex_exit(&svp->svp_lock);
1131 1161          return (0);
1132 1162  }
1133 1163  
1134 1164  static int
1135 1165  varpd_svp_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl,
1136 1166      overlay_plugin_dest_t dest, void **outp)
1137 1167  {
1138 1168          int ret;
1139 1169          svp_t *svp;
1140 1170          char *ipstr, *hstr, *etherstr;
1141 1171  
1142 1172          if (varpd_svp_valid_dest(dest) == B_FALSE)
1143 1173                  return (ENOTSUP);
1144 1174  
1145 1175          if ((ret = varpd_svp_create(hdl, (void **)&svp, dest)) != 0)
1146 1176                  return (ret);
1147 1177  
1148 1178          /* svp/host */
1149 1179          if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[0],
1150 1180              &hstr)) != 0) {
1151 1181                  if (ret != ENOENT) {
1152 1182                          varpd_svp_destroy(svp);
1153 1183                          return (ret);
1154 1184                  }
1155 1185                  svp->svp_host = NULL;
1156 1186          } else {
1157 1187                  size_t blen = strlen(hstr) + 1;
1158 1188                  svp->svp_host = umem_alloc(blen, UMEM_DEFAULT);
1159 1189                  (void) strlcpy(svp->svp_host, hstr, blen);
1160 1190          }
1161 1191  
1162 1192          /* svp/port */
1163 1193          if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[1],
1164 1194              &svp->svp_port)) != 0) {
1165 1195                  if (ret != ENOENT) {
1166 1196                          varpd_svp_destroy(svp);
1167 1197                          return (ret);
1168 1198                  }
1169 1199                  svp->svp_port = 0;
1170 1200          }
1171 1201  
1172 1202          /* svp/underlay_ip */
1173 1203          if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[2],
1174 1204              &ipstr)) != 0) {
1175 1205                  if (ret != ENOENT) {
1176 1206                          varpd_svp_destroy(svp);
1177 1207                          return (ret);
1178 1208                  }
1179 1209                  svp->svp_huip = B_FALSE;
1180 1210          } else {
1181 1211                  ret = inet_pton(AF_INET6, ipstr, &svp->svp_uip);
1182 1212                  if (ret == -1) {
1183 1213                          assert(errno == EAFNOSUPPORT);
1184 1214                          libvarpd_panic("unexpected inet_pton failure: %d",
1185 1215                              errno);
1186 1216                  }
1187 1217  
1188 1218                  if (ret == 0) {
1189 1219                          varpd_svp_destroy(svp);
1190 1220                          return (EINVAL);
1191 1221                  }
1192 1222                  svp->svp_huip = B_TRUE;
1193 1223          }
1194 1224  
1195 1225          /* svp/underlay_port */
1196 1226          if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[3],
1197 1227              &svp->svp_uport)) != 0) {
1198 1228                  if (ret != ENOENT) {
1199 1229                          varpd_svp_destroy(svp);
1200 1230                          return (ret);
1201 1231                  }
1202 1232                  svp->svp_uport = 0;
1203 1233          }
1204 1234  
  
    | 
      ↓ open down ↓ | 
    76 lines elided | 
    
      ↑ open up ↑ | 
  
1205 1235          /* svp/dcid */
1206 1236          if ((ret = nvlist_lookup_uint32(nvp, varpd_svp_props[4],
1207 1237              &svp->svp_dcid)) != 0) {
1208 1238                  if (ret != ENOENT) {
1209 1239                          varpd_svp_destroy(svp);
1210 1240                          return (ret);
1211 1241                  }
1212 1242                  svp->svp_dcid = 0;
1213 1243          }
1214 1244  
1215      -        /* svp/router_mac */
     1245 +        /* svp/router_oui */
1216 1246          if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[5],
1217 1247              ðerstr)) != 0) {
1218 1248                  if (ret != ENOENT) {
1219 1249                          varpd_svp_destroy(svp);
1220 1250                          return (ret);
1221 1251                  }
1222      -                bzero(&svp->svp_router_mac, ETHERADDRL);
1223      -        } else if (ether_aton_r(etherstr, &svp->svp_router_mac) == NULL) {
     1252 +                bzero(&svp->svp_router_oui, ETHERADDRL);
     1253 +        } else if (ether_aton_r(etherstr, &svp->svp_router_oui) == NULL) {
1224 1254                  libvarpd_panic("unexpected ether_aton_r failure: %d", errno);
1225 1255          }
1226 1256  
1227 1257          svp->svp_hdl = hdl;
1228 1258          *outp = svp;
1229 1259          return (0);
1230 1260  }
1231 1261  
1232 1262  static void
1233 1263  varpd_svp_arp(void *arg, varpd_arp_handle_t *vah, int type,
1234 1264      const struct sockaddr *sock, uint8_t *out)
1235 1265  {
1236 1266          svp_t *svp = arg;
1237 1267          svp_lookup_t *svl;
1238 1268  
1239 1269          if (type != VARPD_QTYPE_ETHERNET) {
1240 1270                  libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
1241 1271                  return;
1242 1272          }
1243 1273  
1244 1274          svl = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
1245 1275          if (svl == NULL) {
1246 1276                  libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
1247 1277                  return;
1248 1278          }
1249 1279  
1250 1280          svl->svl_type = SVP_L_VL3;
1251 1281          svl->svl_u.svl_vl3.svl_vah = vah;
1252 1282          svl->svl_u.svl_vl3.svl_out = out;
1253 1283          svp_remote_vl3_lookup(svp, &svl->svl_query, sock, svl);
1254 1284  }
1255 1285  
1256 1286  static const varpd_plugin_ops_t varpd_svp_ops = {
1257 1287          0,
1258 1288          varpd_svp_create,
1259 1289          varpd_svp_start,
1260 1290          varpd_svp_stop,
1261 1291          varpd_svp_destroy,
1262 1292          NULL,
1263 1293          varpd_svp_lookup,
1264 1294          varpd_svp_nprops,
1265 1295          varpd_svp_propinfo,
1266 1296          varpd_svp_getprop,
1267 1297          varpd_svp_setprop,
1268 1298          varpd_svp_save,
1269 1299          varpd_svp_restore,
1270 1300          varpd_svp_arp,
1271 1301          NULL
1272 1302  };
1273 1303  
1274 1304  static int
1275 1305  svp_bunyan_init(void)
1276 1306  {
1277 1307          int ret;
1278 1308  
1279 1309          if ((ret = bunyan_init("svp", &svp_bunyan)) != 0)
1280 1310                  return (ret);
1281 1311          ret = bunyan_stream_add(svp_bunyan, "stderr", BUNYAN_L_INFO,
1282 1312              bunyan_stream_fd, (void *)STDERR_FILENO);
1283 1313          if (ret != 0)
1284 1314                  bunyan_fini(svp_bunyan);
1285 1315          return (ret);
1286 1316  }
1287 1317  
1288 1318  static void
1289 1319  svp_bunyan_fini(void)
1290 1320  {
1291 1321          if (svp_bunyan != NULL)
1292 1322                  bunyan_fini(svp_bunyan);
1293 1323  }
1294 1324  
1295 1325  #pragma init(varpd_svp_init)
1296 1326  static void
1297 1327  varpd_svp_init(void)
1298 1328  {
1299 1329          int err;
1300 1330          varpd_plugin_register_t *vpr;
1301 1331  
1302 1332          if (svp_bunyan_init() != 0)
1303 1333                  return;
1304 1334  
1305 1335          if ((err = svp_host_init()) != 0) {
1306 1336                  (void) bunyan_error(svp_bunyan, "failed to init host subsystem",
1307 1337                      BUNYAN_T_INT32, "error", err,
1308 1338                      BUNYAN_T_END);
1309 1339                  svp_bunyan_fini();
1310 1340                  return;
1311 1341          }
1312 1342  
1313 1343          svp_lookup_cache = umem_cache_create("svp_lookup",
1314 1344              sizeof (svp_lookup_t),  0, NULL, NULL, NULL, NULL, NULL, 0);
1315 1345          if (svp_lookup_cache == NULL) {
1316 1346                  (void) bunyan_error(svp_bunyan,
1317 1347                      "failed to create svp_lookup cache",
1318 1348                      BUNYAN_T_INT32, "error", errno,
1319 1349                      BUNYAN_T_END);
1320 1350                  svp_bunyan_fini();
1321 1351                  return;
1322 1352          }
1323 1353  
1324 1354          if ((err = svp_event_init()) != 0) {
1325 1355                  (void) bunyan_error(svp_bunyan,
1326 1356                      "failed to init event subsystem",
1327 1357                      BUNYAN_T_INT32, "error", err,
1328 1358                      BUNYAN_T_END);
1329 1359                  svp_bunyan_fini();
1330 1360                  umem_cache_destroy(svp_lookup_cache);
1331 1361                  return;
1332 1362          }
1333 1363  
1334 1364          if ((err = svp_timer_init()) != 0) {
1335 1365                  (void) bunyan_error(svp_bunyan,
1336 1366                      "failed to init timer subsystem",
1337 1367                      BUNYAN_T_INT32, "error", err,
1338 1368                      BUNYAN_T_END);
1339 1369                  svp_event_fini();
1340 1370                  umem_cache_destroy(svp_lookup_cache);
1341 1371                  svp_bunyan_fini();
1342 1372                  return;
1343 1373          }
1344 1374  
1345 1375          if ((err = svp_remote_init()) != 0) {
1346 1376                  (void) bunyan_error(svp_bunyan,
1347 1377                      "failed to init remote subsystem",
1348 1378                      BUNYAN_T_INT32, "error", err,
1349 1379                      BUNYAN_T_END);
1350 1380                  svp_event_fini();
1351 1381                  umem_cache_destroy(svp_lookup_cache);
1352 1382                  svp_bunyan_fini();
1353 1383                  return;
1354 1384          }
1355 1385  
1356 1386          vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err);
1357 1387          if (vpr == NULL) {
1358 1388                  (void) bunyan_error(svp_bunyan,
1359 1389                      "failed to alloc varpd plugin",
1360 1390                      BUNYAN_T_INT32, "error", err,
1361 1391                      BUNYAN_T_END);
1362 1392                  svp_remote_fini();
1363 1393                  svp_event_fini();
1364 1394                  umem_cache_destroy(svp_lookup_cache);
1365 1395                  svp_bunyan_fini();
1366 1396                  return;
1367 1397          }
1368 1398  
1369 1399          vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC;
1370 1400          vpr->vpr_name = "svp";
1371 1401          vpr->vpr_ops = &varpd_svp_ops;
1372 1402  
1373 1403          if ((err = libvarpd_plugin_register(vpr)) != 0) {
1374 1404                  (void) bunyan_error(svp_bunyan,
1375 1405                      "failed to register varpd plugin",
1376 1406                      BUNYAN_T_INT32, "error", err,
1377 1407                      BUNYAN_T_END);
1378 1408                  svp_remote_fini();
1379 1409                  svp_event_fini();
1380 1410                  umem_cache_destroy(svp_lookup_cache);
1381 1411                  svp_bunyan_fini();
1382 1412  
1383 1413          }
1384 1414          libvarpd_plugin_free(vpr);
1385 1415  }
  
    | 
      ↓ open down ↓ | 
    152 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX