Print this page
    
Support route deletion entries in SVP_R_LOG_ACK.
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/lib/varpd/svp/common/libvarpd_svp.c
          +++ new/usr/src/lib/varpd/svp/common/libvarpd_svp.c
   1    1  /*
   2    2   * This file and its contents are supplied under the terms of the
   3    3   * Common Development and Distribution License ("CDDL"), version 1.0.
   4    4   * You may only use this file in accordance with the terms of version
   5    5   * 1.0 of the CDDL.
   6    6   *
   7    7   * A full copy of the text of the CDDL should have accompanied this
   8    8   * source.  A copy of the CDDL is also available via the Internet at
   9    9   * http://www.illumos.org/license/CDDL.
  10   10   */
  11   11  
  12   12  /*
  13   13   * Copyright 2018, Joyent, Inc.
  14   14   */
  15   15  
  16   16  /*
  17   17   * This plugin implements the SDC VXLAN Protocol (SVP).
  18   18   *
  19   19   * This plugin is designed to work with a broader distributed system that
  20   20   * mainains a database of mappings and provides a means of looking up data and
  21   21   * provides a stream of updates. While it is named after VXLAN, there isn't
  22   22   * anything specific to VXLAN baked into the protocol at this time, other than
  23   23   * that it requires both an IP address and a port; however, if there's a good
  24   24   * reason to support others here, we can modify that.
  25   25   *
  26   26   * -----------
  27   27   * Terminology
  28   28   * -----------
  29   29   *
  30   30   * Throughout this module we refer to a few different kinds of addresses:
  31   31   *
  32   32   *    VL3
  33   33   *
  34   34   *      A VL3 address, or virtual layer 3, refers to the layer three addreses
  35   35   *      that are used by entities on an overlay network. As far as we're
  36   36   *      concerned that means that this is the IP address of an interface on an
  37   37   *      overlay network.
  38   38   *
  39   39   *    VL2
  40   40   *
  41   41   *      A VL2 address, or a virtual layer 2, referes to the link-layer addresses
  42   42   *      that are used by entities on an overlay network. As far as we're
  43   43   *      concerned that means that this is the MAC addresses of an interface on
  44   44   *      an overlay network.
  45   45   *
  46   46   *    UL3
  47   47   *
  48   48   *      A UL3, or underlay layer 3, refers to the layer three (IP) address on
  49   49   *      the underlay network.
  50   50   *
  51   51   * The svp plugin provides lookups from VL3->VL2, eg. the equivalent of an ARP
  52   52   * or NDP query, and then also provides VL2->UL3 lookups.
  53   53   *
  54   54   * -------------------
  55   55   * Protocol Operations
  56   56   * -------------------
  57   57   *
  58   58   * The svp protocol is defined in lib/varpd/svp/common/libvarpd_svp_prot.h. It
  59   59   * defines the basic TCP protocol that we use to communicate to hosts. At this
  60   60   * time, it is not quite 100% implemented in both this plug-in and our primary
  61   61   * server, sdc-portolan (see https://github.com/joyent/sdc-portolan).
  62   62   *
  63   63   * At this time, we don't quite support everything that we need to. Including
  64   64   * the SVP_R_BULK_REQ and SVP_R_SHOOTDOWN.
  65   65   *
  66   66   * ---------------------------------
  67   67   * General Design and Considerations
  68   68   * ---------------------------------
  69   69   *
  70   70   * Every instance of the svp plugin requires the hostname and port of a server
  71   71   * to contact. Though, we have co-opted the port 1296 (the year of the oldest
  72   72   * extant portolan) as our default port.
  73   73   *
  74   74   * Each of the different instance of the plugins has a corresponding remote
  75   75   * backend. The remote backend represents the tuple of the [ host, port ].
  76   76   * Different instances that share the same host and port tuple will use the same
  77   77   * backend.
  78   78   *
  79   79   * The backend is actually in charge of performing lookups, resolving and
  80   80   * updating the set of remote hosts based on the DNS resolution we've been
  81   81   * provided, and taking care of things like shootdowns.
  82   82   *
  83   83   * The whole plugin itself maintains an event loop and a number of threads to
  84   84   * service that event loop. On top of that event loop, we have a simple timer
  85   85   * backend that ticks at one second intervals and performs various callbacks,
  86   86   * such as idle query timers, DNS resolution, connection backoff, etc. Each of
  87   87   * the remote hosts that we obtain is wrapped up in an svp_conn_t, which manages
  88   88   * the connection state, reconnecting, etc.
  89   89   *
  90   90   * All in all, the general way that this all looks like is:
  91   91   *
  92   92   *  +----------------------------+
  93   93   *  | Plugin Instance            |
  94   94   *  | svp_t                      |
  95   95   *  |                            |
  96   96   *  | varpd_provider_handle_t * -+-> varpd handle
  97   97   *  | uint64_t               ----+-> varpd ID
  98   98   *  | char *                 ----+-> remote host
  99   99   *  | uint16_t               ----+-> remote port
 100  100   *  | svp_remote_t *   ---+------+-> remote backend
 101  101   *  +---------------------+------+
 102  102   *                        |
 103  103   *                        v
 104  104   *   +----------------------+                   +----------------+
 105  105   *   | Remote backend       |------------------>| Remove Backend |---> ...
 106  106   *   | svp_remote_t         |                   | svp_remote_t   |
 107  107   *   |                      |                   +----------------+
 108  108   *   | svp_remote_state_t --+-> state flags
 109  109   *   | svp_degrade_state_t -+-> degraded reason
 110  110   *   | struct addrinfo *  --+-> resolved hosts
 111  111   *   | uint_t            ---+-> active hosts
 112  112   *   | uint_t            ---+-> DNS generation
 113  113   *   | uint_t            ---+-> Reference count
 114  114   *   | uint_t            ---+-> active conns
 115  115   *   | uint_t            ---+-> degraded conns
 116  116   *   | list_t        ---+---+-> connection list
 117  117   *   +------------------+---+
 118  118   *                      |
 119  119   *                      +------------------------------+-----------------+
 120  120   *                      |                              |                 |
 121  121   *                      v                              v                 v
 122  122   *   +-------------------+                       +----------------
 123  123   *   | SVP Connection    |                       | SVP connection |     ...
 124  124   *   | svp_conn_t        |                       | svp_conn_t     |
 125  125   *   |                   |                       +----------------+
 126  126   *   | svp_event_t   ----+-> event loop handle
 127  127   *   | svp_timer_t   ----+-> backoff timer
 128  128   *   | svp_timer_t   ----+-> query timer
 129  129   *   | int           ----+-> socket fd
 130  130   *   | uint_t        ----+-> generation
 131  131   *   | uint_t        ----+-> current backoff
 132  132   *   | svp_conn_flags_t -+-> connection flags
 133  133   *   | svp_conn_state_t -+-> connection state
 134  134   *   | svp_conn_error_t -+-> connection error
 135  135   *   | int            ---+-> last errrno
 136  136   *   | hrtime_t       ---+-> activity timestamp
 137  137   *   | svp_conn_out_t ---+-> outgoing data state
 138  138   *   | svp_conn_in_t  ---+-> incoming data state
 139  139   *   | list_t      ---+--+-> active queries
 140  140   *   +----------------+--+
 141  141   *                    |
 142  142   *                    +----------------------------------+-----------------+
 143  143   *                    |                                  |                 |
 144  144   *                    v                                  v                 v
 145  145   *   +--------------------+                       +-------------+
 146  146   *   | SVP Query          |                       | SVP Query   |         ...
 147  147   *   | svp_query_t        |                       | svp_query_t |
 148  148   *   |                    |                       +-------------+
 149  149   *   | svp_query_f     ---+-> callback function
 150  150   *   | void *          ---+-> callback arg
 151  151   *   | svp_query_state_t -+-> state flags
 152  152   *   | svp_req_t       ---+-> svp prot. header
 153  153   *   | svp_query_data_t --+-> read data
 154  154   *   | svp_query_data_t --+-> write data
 155  155   *   | svp_status_t    ---+-> request status
 156  156   *   +--------------------+
 157  157   *
 158  158   * The svp_t is the instance that we assoicate with varpd. The instance itself
 159  159   * maintains properties and then when it's started associates with an
 160  160   * svp_remote_t, which is the remote backend. The remote backend itself,
 161  161   * maintains the DNS state and spins up and downs connections based on the
 162  162   * results from DNS. By default, we query DNS every 30 seconds. For more on the
 163  163   * connection life cycle, see the next section.
 164  164   *
 165  165   * By default, each connection maintains its own back off timer and list of
 166  166   * queries it's servicing. Only one request is generally outstanding at a time
 167  167   * and requests are round robined across the various connections.
 168  168   *
 169  169   * The query itself represents the svp request that's going on and keep track of
 170  170   * its state and is a place for data that's read and written to as part of the
 171  171   * request.
 172  172   *
 173  173   * Connections maintain a query timer such that if we have not received data on
 174  174   * a socket for a certain amount of time, we kill that socket and begin a
 175  175   * reconnection cycle with backoff.
 176  176   *
 177  177   * ------------------------
 178  178   * Connection State Machine
 179  179   * ------------------------
 180  180   *
 181  181   * We have a connection pool that's built upon DNS records. DNS describes the
 182  182   * membership of the set of remote peers that make up our pool and we maintain
 183  183   * one connection to each of them.  In addition, we maintain an exponential
 184  184   * backoff for each peer and will attempt to reconect immediately before backing
 185  185   * off. The following are the valid states that a connection can be in:
 186  186   *
 187  187   *      SVP_CS_ERROR            An OS error has occurred on this connection,
 188  188   *                              such as failure to create a socket or associate
 189  189   *                              the socket with an event port. We also
 190  190   *                              transition all connections to this state before
 191  191   *                              we destroy them.
 192  192   *
 193  193   *      SVP_CS_INITIAL          This is the initial state of a connection, all
 194  194   *                              that should exist is an unbound socket.
 195  195   *
 196  196   *      SVP_CS_CONNECTING       A call to connect has been made and we are
 197  197   *                              polling for it to complete.
 198  198   *
 199  199   *      SVP_CS_BACKOFF          A connect attempt has failed and we are
 200  200   *                              currently backing off, waiting to try again.
 201  201   *
 202  202   *      SVP_CS_ACTIVE           We have successfully connected to the remote
 203  203   *                              system.
 204  204   *
 205  205   *      SVP_CS_WINDDOWN         This connection is going to valhalla. In other
 206  206   *                              words, a previously active connection is no
 207  207   *                              longer valid in DNS, so we should curb our use
 208  208   *                              of it, and reap it as soon as we have other
 209  209   *                              active connections.
 210  210   *
 211  211   * The following diagram attempts to describe our state transition scheme, and
 212  212   * when we transition from one state to the next.
 213  213   *
 214  214   *                               |
 215  215   *                               * New remote IP from DNS resolution,
 216  216   *                               | not currently active in the system.
 217  217   *                               |
 218  218   *                               v                                Socket Error,
 219  219   *                       +----------------+                       still in DNS
 220  220   *  +----------------<---| SVP_CS_INITIAL |<----------------------*--------+
 221  221   *  |                    +----------------+                                |
 222  222   *  |                            System  |                                 |
 223  223   *  | Connection . . . . .       success *     Successful                  |
 224  224   *  | failed             .               |     connect()                   |
 225  225   *  |                    .               |        +-------------------+    |
 226  226   *  |               +----*---------+     |    +-*>| SVP_CS_VERSIONING +    |
 227  227   *  |               |              |     |    |   +-------------------+    |
 228  228   *  |               |              |     |    |          V   V Set version |
 229  229   *  |               |              |     |    |          |   * based on    |
 230  230   *  |               |              |     |    |          |   | SVP_R_PONG  |
 231  231   *  |               V              ^     v    ^          |   V             ^
 232  232   *  |  +----------------+         +-------------------+  |  +---------------+
 233  233   *  +<-| SVP_CS_BACKOFF |         | SVP_CS_CONNECTING |  |  | SVP_CS_ACTIVE |
 234  234   *  |  +----------------+         +-------------------+  |  +---------------+
 235  235   *  |               V              ^  V                  |    V  V
 236  236   *  | Backoff wait  *              |  |                  |    |  * Removed
 237  237   *  v interval      +--------------+  +-----------------<+----+  | from DNS
 238  238   *  | finished                        |                          |
 239  239   *  |                                 V                          |
 240  240   *  |                                 |                          V
 241  241   *  |                                 |            +-----------------+
 242  242   *  +----------------+----------<-----+-------<----| SVP_CS_WINDDOWN |
 243  243   *                   |                             +-----------------+
 244  244   *                   * . . .   Fatal system, not
 245  245   *                   |         socket error or
 246  246   *                   V         quiesced after
 247  247   *           +--------------+  removal from DNS
 248  248   *           | SVP_CS_ERROR |
 249  249   *           +--------------+
 250  250   *                   |
 251  251   *                   * . . . Removed from DNS
 252  252   *                   v
 253  253   *            +------------+
 254  254   *            | Connection |
 255  255   *            | Destroyed  |
 256  256   *            +------------+
 257  257   *
 258  258   * --------------------------
 259  259   * Connection Event Injection
 260  260   * --------------------------
 261  261   *
 262  262   * For each connection that exists in the system, we have a timer in place that
 263  263   * is in charge of performing timeout activity. It fires once every thirty
 264  264   * seconds or so for a given connection and checks to ensure that we have had
 265  265   * activity for the most recent query on the connection. If not, it terminates
 266  266   * the connection. This is important as if we have sent all our data and are
 267  267   * waiting for the remote end to reply, without enabling something like TCP
 268  268   * keep-alive, we will not be notified that anything that has happened to the
 269  269   * remote connection, for example a panic. In addition, this also protects
 270  270   * against a server that is up, but a portolan that is not making forward
 271  271   * progress.
 272  272   *
 273  273   * When a timeout occurs, we first try to disassociate any active events, which
 274  274   * by definition must exist. Once that's done, we inject a port source user
 275  275   * event. Now, there is a small gotcha. Let's assume for a moment that we have a
 276  276   * pathological portolan. That means that it knows to inject activity right at
 277  277   * the time out window. That means, that the event may be disassociated before
 278  278   * we could get to it. If that's the case, we must _not_ inject the user event
 279  279   * and instead, we'll let the pending event take care of it. We know that the
 280  280   * pending event hasn't hit the main part of the loop yet, otherwise, it would
 281  281   * have released the lock protecting our state and associated the event.
 282  282   *
 283  283   * ------------
 284  284   * Notes on DNS
 285  285   * ------------
 286  286   *
 287  287   * Unfortunately, doing host name resolution in a way that allows us to leverage
 288  288   * the system's resolvers and the system's caching, require us to make blocking
 289  289   * calls in libc via getaddrinfo(3SOCKET). If we can't reach a given server,
 290  290   * that will tie up a thread for quite some time. To work around that fact,
 291  291   * we're going to create a fixed number of threads and we'll use them to service
 292  292   * our DNS requests. While this isn't ideal, until we have a sane means of
 293  293   * integrating a DNS resolution into an event loop with say portfs, it's not
 294  294   * going to be a fun day no matter what we do.
 295  295   *
 296  296   * ------
 297  297   * Timers
 298  298   * ------
 299  299   *
 300  300   * We maintain a single timer based on CLOCK_REALTIME. It's designed to fire
 301  301   * every second. While we'd rather use CLOCK_HIGHRES just to alleviate ourselves
 302  302   * from timer drift; however, as zones may not actually have CLOCK_HIGHRES
 303  303   * access, we don't want them to end up in there. The timer itself is just a
 304  304   * simple avl tree sorted by expiration time, which is stored as a tick in the
 305  305   * future, a tick is just one second.
 306  306   *
 307  307   * ----------
  
    | 
      ↓ open down ↓ | 
    307 lines elided | 
    
      ↑ open up ↑ | 
  
 308  308   * Shootdowns
 309  309   * ----------
 310  310   *
 311  311   * As part of the protocol, we need to be able to handle shootdowns that inform
 312  312   * us some of the information in the system is out of date. This information
 313  313   * needs to be processed promptly; however, the information is hopefully going
 314  314   * to be relatively infrequent relative to the normal flow of information.
 315  315   *
 316  316   * The shoot down information needs to be done on a per-backend basis. The
 317  317   * general design is that we'll have a single query for this which can fire on a
 318      - * 5-10s period, we randmoize the latter part to give us a bit more load
      318 + * 5-10s period, we randomize the latter part to give us a bit more load
 319  319   * spreading. If we complete because there's no work to do, then we wait the
 320  320   * normal period. If we complete, but there's still work to do, we'll go again
 321  321   * after a second.
 322  322   *
 323  323   * A shootdown has a few different parts. We first receive a list of items to
 324  324   * shootdown. After performing all of those, we need to acknowledge them. When
 325  325   * that's been done successfully, we can move onto the next part. From a
 326  326   * protocol perspective, we make a SVP_R_LOG_REQ, we get a reply, and then after
 327  327   * processing them, send an SVP_R_LOG_RM. Only once that's been acked do we
 328  328   * continue.
 329  329   *
 330  330   * However, one of the challenges that we have is that these invalidations are
 331  331   * just that, an invalidation. For a virtual layer two request, that's fine,
 332  332   * because the kernel supports that. However, for virtual layer three
 333  333   * invalidations, we have a bit more work to do. These protocols, ARP and NDP,
 334  334   * don't really support a notion of just an invalidation, instead you have to
 335  335   * inject the new data in a gratuitous fashion.
 336  336   *
 337  337   * To that end, what we instead do is when we receive a VL3 invalidation, we
 338  338   * turn that info a VL3 request. We hold the general request as outstanding
 339  339   * until we receive all of the callbacks for the VL3 invalidations, at which
 340  340   * point we go through and do the log removal request.
 341  341   */
 342  342  
 343  343  #include <umem.h>
 344  344  #include <errno.h>
 345  345  #include <stdlib.h>
 346  346  #include <sys/types.h>
 347  347  #include <sys/socket.h>
 348  348  #include <netinet/in.h>
 349  349  #include <arpa/inet.h>
 350  350  #include <libnvpair.h>
 351  351  #include <strings.h>
 352  352  #include <string.h>
 353  353  #include <assert.h>
 354  354  #include <unistd.h>
 355  355  
 356  356  #include <libvarpd_provider.h>
 357  357  #include "libvarpd_svp.h"
 358  358  
 359  359  bunyan_logger_t *svp_bunyan;
 360  360  static int svp_defport = 1296;
 361  361  static int svp_defuport = 1339;
 362  362  static umem_cache_t *svp_lookup_cache;
 363  363  
 364  364  typedef enum svp_lookup_type {
 365  365          SVP_L_UNKNOWN   = 0x0,
 366  366          SVP_L_VL2       = 0x1,
 367  367          SVP_L_VL3       = 0x2,
 368  368          SVP_L_ROUTE     = 0x3
 369  369  } svp_lookup_type_t;
 370  370  
 371  371  typedef struct svp_lookup {
 372  372          int svl_type;
 373  373          union {
 374  374                  struct svl_lookup_vl2 {
 375  375                          varpd_query_handle_t    *svl_handle;
 376  376                          overlay_target_point_t  *svl_point;
 377  377                  } svl_vl2;
 378  378                  struct svl_lookup_vl3 {
 379  379                          varpd_arp_handle_t      *svl_vah;
 380  380                          uint8_t                 *svl_out;
 381  381                  } svl_vl3;
 382  382                  struct svl_lookup_route {
 383  383                          varpd_query_handle_t    *svl_handle;
 384  384                          overlay_target_point_t  *svl_point;
 385  385                          overlay_target_route_t  *svl_route;
 386  386                          overlay_target_mac_t    *svl_mac;
 387  387                  } svl_route;
 388  388          } svl_u;
 389  389          svp_query_t                             svl_query;
 390  390  } svp_lookup_t;
 391  391  
 392  392  static const char *varpd_svp_props[] = {
 393  393          "svp/host",
 394  394          "svp/port",
 395  395          "svp/underlay_ip",
 396  396          "svp/underlay_port",
 397  397          "svp/dcid",
 398  398          "svp/router_oui"
 399  399  };
 400  400  
 401  401  static const uint8_t svp_bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 402  402  
 403  403  int
 404  404  svp_comparator(const void *l, const void *r)
 405  405  {
 406  406          const svp_t *ls = l;
 407  407          const svp_t *rs = r;
 408  408  
 409  409          if (ls->svp_vid > rs->svp_vid)
 410  410                  return (1);
 411  411          if (ls->svp_vid < rs->svp_vid)
 412  412                  return (-1);
 413  413          return (0);
 414  414  }
 415  415  
 416  416  static void
 417  417  svp_vl2_lookup_cb(svp_t *svp, svp_status_t status, const struct in6_addr *uip,
 418  418      const uint16_t uport, void *arg)
 419  419  {
 420  420          svp_lookup_t *svl = arg;
 421  421          overlay_target_point_t *otp;
 422  422  
 423  423          assert(svp != NULL);
 424  424          assert(arg != NULL);
 425  425  
 426  426          if (status != SVP_S_OK) {
 427  427                  libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
 428  428                      VARPD_LOOKUP_DROP);
 429  429                  umem_cache_free(svp_lookup_cache, svl);
 430  430                  return;
 431  431          }
 432  432  
 433  433          otp = svl->svl_u.svl_vl2.svl_point;
 434  434          bcopy(uip, &otp->otp_ip, sizeof (struct in6_addr));
 435  435          otp->otp_port = uport;
 436  436          libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
 437  437              VARPD_LOOKUP_OK);
 438  438          umem_cache_free(svp_lookup_cache, svl);
 439  439  }
 440  440  
 441  441  static void
 442  442  svp_vl3_lookup_cb(svp_t *svp, svp_status_t status, const uint8_t *vl2mac,
 443  443      const struct in6_addr *uip, const uint16_t uport, void *arg)
 444  444  {
 445  445          /* Initialize address-holders to 0 for comparisons-to-zeroes later. */
 446  446          overlay_target_point_t point = { 0 };
 447  447          svp_lookup_t *svl = arg;
 448  448          uint8_t nexthop_mac[6] = { 0, 0, 0, 0, 0, 0 };
 449  449  
 450  450          assert(svp != NULL);
 451  451          assert(svl != NULL);
 452  452  
 453  453          if (status != SVP_S_OK) {
 454  454                  libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
 455  455                      VARPD_LOOKUP_DROP);
 456  456                  umem_cache_free(svp_lookup_cache, svl);
 457  457                  return;
 458  458          }
 459  459  
 460  460          /* Inject the L2 mapping before the L3 */
 461  461          bcopy(uip, &point.otp_ip, sizeof (struct in6_addr));
 462  462          point.otp_port = uport;
 463  463          libvarpd_inject_varp(svp->svp_hdl, vl2mac, &point);
 464  464  
 465  465          bcopy(vl2mac, svl->svl_u.svl_vl3.svl_out, ETHERADDRL);
 466  466          libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
 467  467              VARPD_LOOKUP_OK);
 468  468          umem_cache_free(svp_lookup_cache, svl);
 469  469  }
 470  470  
 471  471  static void
 472  472  svp_vl2_invalidate_cb(svp_t *svp, const uint8_t *vl2mac)
 473  473  {
 474  474          libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
 475  475  }
 476  476  
 477  477  static void
 478  478  svp_vl3_inject_cb(svp_t *svp, const uint16_t vlan, const struct in6_addr *vl3ip,
 479  479      const uint8_t *vl2mac, const uint8_t *targmac)
 480  480  {
 481  481          struct in_addr v4;
 482  482  
 483  483          /*
 484  484           * At the moment we don't support any IPv6 related log entries, this
 485  485           * will change soon as we develop a bit more of the IPv6 related
 486  486           * infrastructure so we can properly test the injection.
 487  487           */
 488  488          if (IN6_IS_ADDR_V4MAPPED(vl3ip) == 0) {
 489  489                  return;
 490  490          } else {
 491  491                  IN6_V4MAPPED_TO_INADDR(vl3ip, &v4);
 492  492                  if (targmac == NULL)
 493  493                          targmac = svp_bcast;
 494  494                  libvarpd_inject_arp(svp->svp_hdl, vlan, vl2mac, &v4, targmac);
 495  495          }
 496  496  }
 497  497  
 498  498  /* ARGSUSED */
 499  499  static void
 500  500  svp_shootdown_cb(svp_t *svp, const uint8_t *vl2mac, const struct in6_addr *uip,
 501  501      const uint16_t uport)
 502  502  {
 503  503          /*
 504  504           * We should probably do a conditional invalidation here.
 505  505           */
 506  506          libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
 507  507  }
 508  508  
 509  509  static void
 510  510  svp_route_lookup_cb(svp_t *svp, svp_status_t status, uint32_t dcid,
 511  511      uint32_t vnetid, uint16_t vlan, uint8_t *srcmac, uint8_t *dstmac,
 512  512      uint16_t ul3_port, uint8_t *ul3_addr, uint8_t srcpfx, uint8_t dstpfx,
 513  513      void *arg)
 514  514  {
 515  515          svp_lookup_t *svl = arg;
 516  516          overlay_target_point_t *otp;
 517  517          overlay_target_route_t *otr;
 518  518          overlay_target_mac_t *otm;
 519  519  
 520  520          if (status != SVP_S_OK) {
 521  521                  libvarpd_plugin_query_reply(svl->svl_u.svl_route.svl_handle,
 522  522                      VARPD_LOOKUP_DROP);
 523  523                  umem_cache_free(svp_lookup_cache, svl);
 524  524                  return;
 525  525          }
 526  526  
 527  527          otp = svl->svl_u.svl_route.svl_point;
 528  528          bcopy(ul3_addr, &otp->otp_ip, sizeof (struct in6_addr));
 529  529          otp->otp_port = ul3_port;
 530  530  
 531  531          otr = svl->svl_u.svl_route.svl_route;
 532  532          otr->otr_vnet = vnetid;
 533  533          otr->otr_vlan = vlan;
 534  534          bcopy(srcmac, otr->otr_srcmac, ETHERADDRL);
  
    | 
      ↓ open down ↓ | 
    206 lines elided | 
    
      ↑ open up ↑ | 
  
 535  535  
 536  536          otm = svl->svl_u.svl_route.svl_mac;
 537  537          otm->otm_dcid = dcid;
 538  538          bcopy(dstmac, otm->otm_mac, ETHERADDRL);
 539  539  
 540  540          libvarpd_plugin_query_reply(svl->svl_u.svl_route.svl_handle,
 541  541              VARPD_LOOKUP_OK);
 542  542          umem_cache_free(svp_lookup_cache, svl);
 543  543  }
 544  544  
      545 +/*
      546 + * Tell the overlay instance to flush out entries matcthing this route.
      547 + * See libvarpd_route_flush() for more.
      548 + */
      549 +static void
      550 +svp_route_shootdown_cb(svp_t *svp, uint8_t *srcip, uint8_t *dstip,
      551 +    uint8_t src_prefixlen, uint8_t dst_prefixlen, uint16_t vlan_id)
      552 +{
      553 +        libvarpd_route_flush(svp->svp_hdl, srcip, dstip, src_prefixlen,
      554 +            dst_prefixlen, vlan_id);
      555 +}
      556 +
 545  557  static svp_cb_t svp_defops = {
 546  558          svp_vl2_lookup_cb,
 547  559          svp_vl3_lookup_cb,
 548  560          svp_vl2_invalidate_cb,
 549  561          svp_vl3_inject_cb,
 550  562          svp_shootdown_cb,
 551  563          svp_route_lookup_cb,
      564 +        svp_route_shootdown_cb
 552  565  };
 553  566  
 554  567  static boolean_t
 555  568  varpd_svp_valid_dest(overlay_plugin_dest_t dest)
 556  569  {
 557  570          if (dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
 558  571                  return (B_FALSE);
 559  572  
 560  573          return (B_TRUE);
 561  574  }
 562  575  
 563  576  static int
 564  577  varpd_svp_create(varpd_provider_handle_t *hdl, void **outp,
 565  578      overlay_plugin_dest_t dest)
 566  579  {
 567  580          int ret;
 568  581          svp_t *svp;
 569  582  
 570  583          if (varpd_svp_valid_dest(dest) == B_FALSE)
 571  584                  return (ENOTSUP);
 572  585  
 573  586          svp = umem_zalloc(sizeof (svp_t), UMEM_DEFAULT);
 574  587          if (svp == NULL)
 575  588                  return (ENOMEM);
 576  589  
 577  590          if ((ret = mutex_init(&svp->svp_lock, USYNC_THREAD | LOCK_ERRORCHECK,
 578  591              NULL)) != 0) {
 579  592                  umem_free(svp, sizeof (svp_t));
 580  593                  return (ret);
 581  594          }
 582  595  
 583  596          svp->svp_port = svp_defport;
 584  597          svp->svp_uport = svp_defuport;
 585  598          svp->svp_cb = svp_defops;
 586  599          svp->svp_hdl = hdl;
 587  600          svp->svp_vid = libvarpd_plugin_vnetid(svp->svp_hdl);
 588  601          *outp = svp;
 589  602          return (0);
 590  603  }
 591  604  
 592  605  static int
 593  606  varpd_svp_start(void *arg)
 594  607  {
 595  608          int ret;
 596  609          svp_remote_t *srp;
 597  610          svp_t *svp = arg;
 598  611  
 599  612          mutex_enter(&svp->svp_lock);
 600  613          if (svp->svp_host == NULL || svp->svp_port == 0 ||
 601  614              svp->svp_huip == B_FALSE || svp->svp_uport == 0) {
 602  615                  mutex_exit(&svp->svp_lock);
 603  616                  return (EAGAIN);
 604  617          }
 605  618          mutex_exit(&svp->svp_lock);
 606  619  
 607  620          if ((ret = svp_remote_find(svp->svp_host, svp->svp_port, &svp->svp_uip,
 608  621              &srp)) != 0)
 609  622                  return (ret);
 610  623  
 611  624          if ((ret = svp_remote_attach(srp, svp)) != 0) {
 612  625                  svp_remote_release(srp);
 613  626                  return (ret);
 614  627          }
 615  628  
 616  629          return (0);
 617  630  }
 618  631  
 619  632  static void
 620  633  varpd_svp_stop(void *arg)
 621  634  {
 622  635          svp_t *svp = arg;
 623  636  
 624  637          svp_remote_detach(svp);
 625  638  }
 626  639  
 627  640  static void
 628  641  varpd_svp_destroy(void *arg)
 629  642  {
 630  643          svp_t *svp = arg;
 631  644  
 632  645          if (svp->svp_host != NULL)
 633  646                  umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
 634  647  
 635  648          if (mutex_destroy(&svp->svp_lock) != 0)
 636  649                  libvarpd_panic("failed to destroy svp_t`svp_lock");
 637  650  
 638  651          umem_free(svp, sizeof (svp_t));
 639  652  }
 640  653  
 641  654  static void
 642  655  varpd_svp_lookup_l3(svp_t *svp, varpd_query_handle_t *vqh,
 643  656      const overlay_targ_lookup_t *otl, overlay_target_point_t *otp,
 644  657      overlay_target_route_t *otr, overlay_target_mac_t *otm)
 645  658  {
 646  659          svp_lookup_t *slp;
 647  660          uint32_t type;
 648  661          const struct in6_addr *src = &otl->otl_addru.otlu_l3.otl3_srcip,
 649  662              *dst = &otl->otl_addru.otlu_l3.otl3_dstip;
 650  663  
 651  664          /*
 652  665           * otl is an L3 request, so we have src/dst IPs for the inner packet.
 653  666           * We also have the vlan.
 654  667           *
 655  668           * Assume kernel's overlay module is caching well, so we are directly
 656  669           * going to query (i.e. no caching up here of actual destinations).
 657  670           *
 658  671           * Our existing remote sever (svp_remote), but with the new message
 659  672           * SVP_R_ROUTE_REQ.
 660  673           */
 661  674  
 662  675          if (IN6_IS_ADDR_V4MAPPED(src)) {
 663  676                  if (!IN6_IS_ADDR_V4MAPPED(dst)) {
 664  677                          libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 665  678                          return;
 666  679                  }
 667  680                  type = SVP_VL3_IP;
 668  681          } else {
 669  682                  if (IN6_IS_ADDR_V4MAPPED(dst)) {
 670  683                          libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 671  684                          return;
 672  685                  }
 673  686                  type = SVP_VL3_IPV6;
 674  687          }
 675  688  
 676  689          slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
 677  690          if (slp == NULL) {
 678  691                  libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 679  692                  return;
 680  693          }
 681  694  
 682  695          slp->svl_type = SVP_L_ROUTE;
 683  696          slp->svl_u.svl_route.svl_handle = vqh;
 684  697          slp->svl_u.svl_route.svl_point = otp;
 685  698          slp->svl_u.svl_route.svl_route = otr;
 686  699          slp->svl_u.svl_route.svl_mac = otm;
 687  700  
 688  701          svp_remote_route_lookup(svp, &slp->svl_query, src, dst,
 689  702              otl->otl_vnetid, (uint16_t)otl->otl_vlan, slp);
 690  703  }
 691  704  
 692  705  static void
 693  706  varpd_svp_lookup(void *arg, varpd_query_handle_t *vqh,
 694  707      const overlay_targ_lookup_t *otl, overlay_target_point_t *otp,
 695  708      overlay_target_route_t *otr, overlay_target_mac_t *otm)
 696  709  {
 697  710          svp_lookup_t *slp;
 698  711          svp_t *svp = arg;
 699  712  
 700  713          /*
 701  714           * Shuffle off L3 lookups to their own codepath.
 702  715           */
 703  716          if (otl->otl_l3req) {
 704  717                  varpd_svp_lookup_l3(svp, vqh, otl, otp, otr, otm);
 705  718                  return;
 706  719          }
 707  720          /*
 708  721           * At this point, the traditional overlay_target_point_t is all that
 709  722           * needs filling in.  Zero-out the otr for safety.
 710  723           */
 711  724          bzero(otr, sizeof (*otr));
 712  725  
 713  726  
 714  727          /*
 715  728           * Check if this is something that we need to proxy, eg. arp or ndp.
 716  729           */
 717  730          if (otl->otl_addru.otlu_l2.otl2_sap == ETHERTYPE_ARP) {
 718  731                  libvarpd_plugin_proxy_arp(svp->svp_hdl, vqh, otl);
 719  732                  return;
 720  733          }
 721  734  
 722  735          if (otl->otl_addru.otlu_l2.otl2_dstaddr[0] == 0x33 &&
 723  736              otl->otl_addru.otlu_l2.otl2_dstaddr[1] == 0x33) {
 724  737                  if (otl->otl_addru.otlu_l2.otl2_sap == ETHERTYPE_IPV6) {
 725  738                          libvarpd_plugin_proxy_ndp(svp->svp_hdl, vqh, otl);
 726  739                  } else {
 727  740                          libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 728  741                  }
 729  742                  return;
 730  743          }
 731  744  
 732  745          /*
 733  746           * Watch out for various multicast and broadcast addresses. We've
 734  747           * already taken care of the IPv6 range above. Now we just need to
 735  748           * handle broadcast and if the multicast bit is set, lowest bit of the
 736  749           * first octet of the MAC, then we drop it now.
 737  750           */
 738  751          if (bcmp(otl->otl_addru.otlu_l2.otl2_dstaddr, svp_bcast,
 739  752              ETHERADDRL) == 0 ||
 740  753              (otl->otl_addru.otlu_l2.otl2_dstaddr[0] & 0x01) == 0x01) {
 741  754                  libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 742  755                  return;
 743  756          }
 744  757  
 745  758          /*
 746  759           * If we have a failure to allocate memory for this, that's not good.
 747  760           * However, telling the kernel to just drop this packet is much better
 748  761           * than the alternative at this moment. At least we'll try again and we
 749  762           * may have something more available to us in a little bit.
 750  763           */
 751  764          slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
 752  765          if (slp == NULL) {
 753  766                  libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
 754  767                  return;
 755  768          }
 756  769  
 757  770          slp->svl_type = SVP_L_VL2;
 758  771          slp->svl_u.svl_vl2.svl_handle = vqh;
 759  772          slp->svl_u.svl_vl2.svl_point = otp;
 760  773  
 761  774          svp_remote_vl2_lookup(svp, &slp->svl_query,
 762  775              otl->otl_addru.otlu_l2.otl2_dstaddr, slp);
 763  776  }
 764  777  
 765  778  /* ARGSUSED */
 766  779  static int
 767  780  varpd_svp_nprops(void *arg, uint_t *nprops)
 768  781  {
 769  782          *nprops = sizeof (varpd_svp_props) / sizeof (char *);
 770  783          return (0);
 771  784  }
 772  785  
 773  786  /* ARGSUSED */
 774  787  static int
 775  788  varpd_svp_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph)
 776  789  {
 777  790          switch (propid) {
 778  791          case 0:
 779  792                  /* svp/host */
 780  793                  libvarpd_prop_set_name(vph, varpd_svp_props[0]);
 781  794                  libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 782  795                  libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING);
 783  796                  libvarpd_prop_set_nodefault(vph);
 784  797                  break;
 785  798          case 1:
 786  799                  /* svp/port */
 787  800                  libvarpd_prop_set_name(vph, varpd_svp_props[1]);
 788  801                  libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 789  802                  libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
 790  803                  (void) libvarpd_prop_set_default(vph, &svp_defport,
 791  804                      sizeof (svp_defport));
 792  805                  libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
 793  806                  break;
 794  807          case 2:
 795  808                  /* svp/underlay_ip */
 796  809                  libvarpd_prop_set_name(vph, varpd_svp_props[2]);
 797  810                  libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 798  811                  libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP);
 799  812                  libvarpd_prop_set_nodefault(vph);
 800  813                  break;
 801  814          case 3:
 802  815                  /* svp/underlay_port */
 803  816                  libvarpd_prop_set_name(vph, varpd_svp_props[3]);
 804  817                  libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 805  818                  libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
 806  819                  (void) libvarpd_prop_set_default(vph, &svp_defuport,
 807  820                      sizeof (svp_defuport));
 808  821                  libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
 809  822                  break;
 810  823          case 4:
 811  824                  /* svp/dcid */
 812  825                  libvarpd_prop_set_name(vph, varpd_svp_props[4]);
 813  826                  libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 814  827                  libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
 815  828                  libvarpd_prop_set_nodefault(vph);
 816  829                  libvarpd_prop_set_range_uint32(vph, 1, UINT32_MAX - 1);
 817  830                  break;
 818  831          case 5:
 819  832                  /* svp/router_oui */
 820  833                  libvarpd_prop_set_name(vph, varpd_svp_props[5]);
 821  834                  libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
 822  835                  libvarpd_prop_set_type(vph, OVERLAY_PROP_T_ETHER);
 823  836                  libvarpd_prop_set_nodefault(vph);
 824  837                  break;
 825  838          default:
 826  839                  return (EINVAL);
 827  840          }
 828  841          return (0);
 829  842  }
 830  843  
 831  844  static int
 832  845  varpd_svp_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep)
 833  846  {
 834  847          svp_t *svp = arg;
 835  848  
 836  849          /* svp/host */
 837  850          if (strcmp(pname, varpd_svp_props[0]) == 0) {
 838  851                  size_t len;
 839  852  
 840  853                  mutex_enter(&svp->svp_lock);
 841  854                  if (svp->svp_host == NULL) {
 842  855                          *sizep = 0;
 843  856                  } else {
 844  857                          len = strlen(svp->svp_host) + 1;
 845  858                          if (*sizep < len) {
 846  859                                  mutex_exit(&svp->svp_lock);
 847  860                                  return (EOVERFLOW);
 848  861                          }
 849  862                          *sizep = len;
 850  863                          (void) strlcpy(buf, svp->svp_host, *sizep);
 851  864                  }
 852  865                  mutex_exit(&svp->svp_lock);
 853  866                  return (0);
 854  867          }
 855  868  
 856  869          /* svp/port */
 857  870          if (strcmp(pname, varpd_svp_props[1]) == 0) {
 858  871                  uint64_t val;
 859  872  
 860  873                  if (*sizep < sizeof (uint64_t))
 861  874                          return (EOVERFLOW);
 862  875  
 863  876                  mutex_enter(&svp->svp_lock);
 864  877                  if (svp->svp_port == 0) {
 865  878                          *sizep = 0;
 866  879                  } else {
 867  880                          val = svp->svp_port;
 868  881                          bcopy(&val, buf, sizeof (uint64_t));
 869  882                          *sizep = sizeof (uint64_t);
 870  883                  }
 871  884                  mutex_exit(&svp->svp_lock);
 872  885                  return (0);
 873  886          }
 874  887  
 875  888          /* svp/underlay_ip */
 876  889          if (strcmp(pname, varpd_svp_props[2]) == 0) {
 877  890                  if (*sizep < sizeof (struct in6_addr))
 878  891                          return (EOVERFLOW);
 879  892                  mutex_enter(&svp->svp_lock);
 880  893                  if (svp->svp_huip == B_FALSE) {
 881  894                          *sizep = 0;
 882  895                  } else {
 883  896                          bcopy(&svp->svp_uip, buf, sizeof (struct in6_addr));
 884  897                          *sizep = sizeof (struct in6_addr);
 885  898                  }
 886  899                  mutex_exit(&svp->svp_lock);
 887  900                  return (0);
 888  901          }
 889  902  
 890  903          /* svp/underlay_port */
 891  904          if (strcmp(pname, varpd_svp_props[3]) == 0) {
 892  905                  uint64_t val;
 893  906  
 894  907                  if (*sizep < sizeof (uint64_t))
 895  908                          return (EOVERFLOW);
 896  909  
 897  910                  mutex_enter(&svp->svp_lock);
 898  911                  if (svp->svp_uport == 0) {
 899  912                          *sizep = 0;
 900  913                  } else {
 901  914                          val = svp->svp_uport;
 902  915                          bcopy(&val, buf, sizeof (uint64_t));
 903  916                          *sizep = sizeof (uint64_t);
 904  917                  }
 905  918  
 906  919                  mutex_exit(&svp->svp_lock);
 907  920                  return (0);
 908  921          }
 909  922  
 910  923          /* svp/dcid */
 911  924          if (strcmp(pname, varpd_svp_props[4]) == 0) {
 912  925                  uint64_t val;
 913  926  
 914  927                  if (*sizep < sizeof (uint64_t))
 915  928                          return (EOVERFLOW);
 916  929  
 917  930                  mutex_enter(&svp->svp_lock);
 918  931                  if (svp->svp_uport == 0) {
 919  932                          *sizep = 0;
 920  933                  } else {
 921  934                          val = svp->svp_dcid;
 922  935                          bcopy(&val, buf, sizeof (uint64_t));
 923  936                          *sizep = sizeof (uint64_t);
 924  937                  }
 925  938  
 926  939                  mutex_exit(&svp->svp_lock);
 927  940                  return (0);
 928  941          }
 929  942  
 930  943          /* svp/router_oui */
 931  944          if (strcmp(pname, varpd_svp_props[5]) == 0) {
 932  945                  if (*sizep < ETHERADDRL)
 933  946                          return (EOVERFLOW);
 934  947                  mutex_enter(&svp->svp_lock);
 935  948  
 936  949                  if (ether_is_zero(&svp->svp_router_oui)) {
 937  950                          *sizep = 0;
 938  951                  } else {
 939  952                          bcopy(&svp->svp_router_oui, buf, ETHERADDRL);
 940  953                          *sizep = ETHERADDRL;
 941  954                  }
 942  955  
 943  956                  mutex_exit(&svp->svp_lock);
 944  957                  return (0);
 945  958          }
 946  959          return (EINVAL);
 947  960  }
 948  961  
 949  962  static int
 950  963  varpd_svp_setprop(void *arg, const char *pname, const void *buf,
 951  964      const uint32_t size)
 952  965  {
 953  966          svp_t *svp = arg;
 954  967  
 955  968          /* svp/host */
 956  969          if (strcmp(pname, varpd_svp_props[0]) == 0) {
 957  970                  char *dup;
 958  971                  dup = umem_alloc(size, UMEM_DEFAULT);
 959  972                  (void) strlcpy(dup, buf, size);
 960  973                  if (dup == NULL)
 961  974                          return (ENOMEM);
 962  975                  mutex_enter(&svp->svp_lock);
 963  976                  if (svp->svp_host != NULL)
 964  977                          umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
 965  978                  svp->svp_host = dup;
 966  979                  mutex_exit(&svp->svp_lock);
 967  980                  return (0);
 968  981          }
 969  982  
 970  983          /* svp/port */
 971  984          if (strcmp(pname, varpd_svp_props[1]) == 0) {
 972  985                  const uint64_t *valp = buf;
 973  986                  if (size < sizeof (uint64_t))
 974  987                          return (EOVERFLOW);
 975  988  
 976  989                  if (*valp == 0 || *valp > UINT16_MAX)
 977  990                          return (EINVAL);
 978  991  
 979  992                  mutex_enter(&svp->svp_lock);
 980  993                  svp->svp_port = (uint16_t)*valp;
 981  994                  mutex_exit(&svp->svp_lock);
 982  995                  return (0);
 983  996          }
 984  997  
 985  998          /* svp/underlay_ip */
 986  999          if (strcmp(pname, varpd_svp_props[2]) == 0) {
 987 1000                  const struct in6_addr *ipv6 = buf;
 988 1001  
 989 1002                  if (size < sizeof (struct in6_addr))
 990 1003                          return (EOVERFLOW);
 991 1004  
 992 1005                  if (IN6_IS_ADDR_V4COMPAT(ipv6))
 993 1006                          return (EINVAL);
 994 1007  
 995 1008                  if (IN6_IS_ADDR_MULTICAST(ipv6))
 996 1009                          return (EINVAL);
 997 1010  
 998 1011                  if (IN6_IS_ADDR_6TO4(ipv6))
 999 1012                          return (EINVAL);
1000 1013  
1001 1014                  if (IN6_IS_ADDR_V4MAPPED(ipv6)) {
1002 1015                          ipaddr_t v4;
1003 1016                          IN6_V4MAPPED_TO_IPADDR(ipv6, v4);
1004 1017                          if (IN_MULTICAST(v4))
1005 1018                                  return (EINVAL);
1006 1019                  }
1007 1020  
1008 1021                  mutex_enter(&svp->svp_lock);
1009 1022                  bcopy(buf, &svp->svp_uip, sizeof (struct in6_addr));
1010 1023                  svp->svp_huip = B_TRUE;
1011 1024                  mutex_exit(&svp->svp_lock);
1012 1025                  return (0);
1013 1026          }
1014 1027  
1015 1028          /* svp/underlay_port */
1016 1029          if (strcmp(pname, varpd_svp_props[3]) == 0) {
1017 1030                  const uint64_t *valp = buf;
1018 1031                  if (size < sizeof (uint64_t))
1019 1032                          return (EOVERFLOW);
1020 1033  
1021 1034                  if (*valp == 0 || *valp > UINT16_MAX)
1022 1035                          return (EINVAL);
1023 1036  
1024 1037                  mutex_enter(&svp->svp_lock);
1025 1038                  svp->svp_uport = (uint16_t)*valp;
1026 1039                  mutex_exit(&svp->svp_lock);
1027 1040  
1028 1041                  return (0);
1029 1042          }
1030 1043  
1031 1044          /* svp/dcid */
1032 1045          if (strcmp(pname, varpd_svp_props[4]) == 0) {
1033 1046                  const uint64_t *valp = buf;
1034 1047                  if (size < sizeof (uint64_t))
1035 1048                          return (EOVERFLOW);
1036 1049  
1037 1050                  if (*valp == 0 || *valp > UINT32_MAX - 1)
1038 1051                          return (EINVAL);
1039 1052  
1040 1053                  mutex_enter(&svp->svp_lock);
1041 1054                  svp->svp_dcid = (uint32_t)*valp;
1042 1055                  mutex_exit(&svp->svp_lock);
1043 1056  
1044 1057                  return (0);
1045 1058          }
1046 1059  
1047 1060          /* svp/router_oui */
1048 1061          if (strcmp(pname, varpd_svp_props[5]) == 0) {
1049 1062                  if (size < ETHERADDRL)
1050 1063                          return (EOVERFLOW);
1051 1064                  mutex_enter(&svp->svp_lock);
1052 1065                  bcopy(buf, &svp->svp_router_oui, ETHERADDRL);
1053 1066                  /* Zero-out the low three bytes. */
1054 1067                  svp->svp_router_oui[3] = 0;
1055 1068                  svp->svp_router_oui[4] = 0;
1056 1069                  svp->svp_router_oui[5] = 0;
1057 1070                  mutex_exit(&svp->svp_lock);
1058 1071                  return (0);
1059 1072          }
1060 1073  
1061 1074          return (EINVAL);
1062 1075  }
1063 1076  
1064 1077  static int
1065 1078  varpd_svp_save(void *arg, nvlist_t *nvp)
1066 1079  {
1067 1080          int ret;
1068 1081          svp_t *svp = arg;
1069 1082  
1070 1083          mutex_enter(&svp->svp_lock);
1071 1084          /* svp/host */
1072 1085          if (svp->svp_host != NULL) {
1073 1086                  if ((ret = nvlist_add_string(nvp, varpd_svp_props[0],
1074 1087                      svp->svp_host)) != 0) {
1075 1088                          mutex_exit(&svp->svp_lock);
1076 1089                          return (ret);
1077 1090                  }
1078 1091          }
1079 1092  
1080 1093          /* svp/port */
1081 1094          if (svp->svp_port != 0) {
1082 1095                  if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[1],
1083 1096                      svp->svp_port)) != 0) {
1084 1097                          mutex_exit(&svp->svp_lock);
1085 1098                          return (ret);
1086 1099                  }
1087 1100          }
1088 1101  
1089 1102          /* svp/underlay_ip */
1090 1103          if (svp->svp_huip == B_TRUE) {
1091 1104                  char buf[INET6_ADDRSTRLEN];
1092 1105  
1093 1106                  if (inet_ntop(AF_INET6, &svp->svp_uip, buf, sizeof (buf)) ==
1094 1107                      NULL)
1095 1108                          libvarpd_panic("unexpected inet_ntop failure: %d",
1096 1109                              errno);
1097 1110  
1098 1111                  if ((ret = nvlist_add_string(nvp, varpd_svp_props[2],
1099 1112                      buf)) != 0) {
1100 1113                          mutex_exit(&svp->svp_lock);
1101 1114                          return (ret);
1102 1115                  }
1103 1116          }
1104 1117  
1105 1118          /* svp/underlay_port */
1106 1119          if (svp->svp_uport != 0) {
1107 1120                  if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[3],
1108 1121                      svp->svp_uport)) != 0) {
1109 1122                          mutex_exit(&svp->svp_lock);
1110 1123                          return (ret);
1111 1124                  }
1112 1125          }
1113 1126  
1114 1127          /* svp/dcid */
1115 1128          if (svp->svp_dcid != 0) {
1116 1129                  if ((ret = nvlist_add_uint32(nvp, varpd_svp_props[4],
1117 1130                      svp->svp_dcid)) != 0) {
1118 1131                          mutex_exit(&svp->svp_lock);
1119 1132                          return (ret);
1120 1133                  }
1121 1134          }
1122 1135  
1123 1136          /* svp/router_oui */
1124 1137          if (!ether_is_zero(&svp->svp_router_oui)) {
1125 1138                  char buf[ETHERADDRSTRL];
1126 1139  
1127 1140                  if (ether_ntoa_r((struct ether_addr *)&svp->svp_router_oui,
1128 1141                      buf) == NULL) {
1129 1142                          libvarpd_panic("unexpected ether_ntoa_r failure: %d",
1130 1143                              errno);
1131 1144                  }
1132 1145  
1133 1146                  if ((ret = nvlist_add_string(nvp, varpd_svp_props[5],
1134 1147                      buf)) != 0) {
1135 1148                          mutex_exit(&svp->svp_lock);
1136 1149                          return (ret);
1137 1150                  }
1138 1151          }
1139 1152  
1140 1153          mutex_exit(&svp->svp_lock);
1141 1154          return (0);
1142 1155  }
1143 1156  
1144 1157  static int
1145 1158  varpd_svp_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl,
1146 1159      overlay_plugin_dest_t dest, void **outp)
1147 1160  {
1148 1161          int ret;
1149 1162          svp_t *svp;
1150 1163          char *ipstr, *hstr, *etherstr;
1151 1164  
1152 1165          if (varpd_svp_valid_dest(dest) == B_FALSE)
1153 1166                  return (ENOTSUP);
1154 1167  
1155 1168          if ((ret = varpd_svp_create(hdl, (void **)&svp, dest)) != 0)
1156 1169                  return (ret);
1157 1170  
1158 1171          /* svp/host */
1159 1172          if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[0],
1160 1173              &hstr)) != 0) {
1161 1174                  if (ret != ENOENT) {
1162 1175                          varpd_svp_destroy(svp);
1163 1176                          return (ret);
1164 1177                  }
1165 1178                  svp->svp_host = NULL;
1166 1179          } else {
1167 1180                  size_t blen = strlen(hstr) + 1;
1168 1181                  svp->svp_host = umem_alloc(blen, UMEM_DEFAULT);
1169 1182                  (void) strlcpy(svp->svp_host, hstr, blen);
1170 1183          }
1171 1184  
1172 1185          /* svp/port */
1173 1186          if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[1],
1174 1187              &svp->svp_port)) != 0) {
1175 1188                  if (ret != ENOENT) {
1176 1189                          varpd_svp_destroy(svp);
1177 1190                          return (ret);
1178 1191                  }
1179 1192                  svp->svp_port = 0;
1180 1193          }
1181 1194  
1182 1195          /* svp/underlay_ip */
1183 1196          if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[2],
1184 1197              &ipstr)) != 0) {
1185 1198                  if (ret != ENOENT) {
1186 1199                          varpd_svp_destroy(svp);
1187 1200                          return (ret);
1188 1201                  }
1189 1202                  svp->svp_huip = B_FALSE;
1190 1203          } else {
1191 1204                  ret = inet_pton(AF_INET6, ipstr, &svp->svp_uip);
1192 1205                  if (ret == -1) {
1193 1206                          assert(errno == EAFNOSUPPORT);
1194 1207                          libvarpd_panic("unexpected inet_pton failure: %d",
1195 1208                              errno);
1196 1209                  }
1197 1210  
1198 1211                  if (ret == 0) {
1199 1212                          varpd_svp_destroy(svp);
1200 1213                          return (EINVAL);
1201 1214                  }
1202 1215                  svp->svp_huip = B_TRUE;
1203 1216          }
1204 1217  
1205 1218          /* svp/underlay_port */
1206 1219          if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[3],
1207 1220              &svp->svp_uport)) != 0) {
1208 1221                  if (ret != ENOENT) {
1209 1222                          varpd_svp_destroy(svp);
1210 1223                          return (ret);
1211 1224                  }
1212 1225                  svp->svp_uport = 0;
1213 1226          }
1214 1227  
1215 1228          /* svp/dcid */
1216 1229          if ((ret = nvlist_lookup_uint32(nvp, varpd_svp_props[4],
1217 1230              &svp->svp_dcid)) != 0) {
1218 1231                  if (ret != ENOENT) {
1219 1232                          varpd_svp_destroy(svp);
1220 1233                          return (ret);
1221 1234                  }
1222 1235                  svp->svp_dcid = 0;
1223 1236          }
1224 1237  
1225 1238          /* svp/router_oui */
1226 1239          if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[5],
1227 1240              ðerstr)) != 0) {
1228 1241                  if (ret != ENOENT) {
1229 1242                          varpd_svp_destroy(svp);
1230 1243                          return (ret);
1231 1244                  }
1232 1245                  bzero(&svp->svp_router_oui, ETHERADDRL);
1233 1246          } else if (ether_aton_r(etherstr,
1234 1247              (struct ether_addr *)&svp->svp_router_oui) == NULL) {
1235 1248                  libvarpd_panic("unexpected ether_aton_r failure: %d", errno);
1236 1249          }
1237 1250  
1238 1251          svp->svp_hdl = hdl;
1239 1252          *outp = svp;
1240 1253          return (0);
1241 1254  }
1242 1255  
1243 1256  static void
1244 1257  varpd_svp_arp(void *arg, varpd_arp_handle_t *vah, int type,
1245 1258      const struct sockaddr *sock, uint16_t vlan __unused, uint8_t *out)
1246 1259  {
1247 1260          svp_t *svp = arg;
1248 1261          svp_lookup_t *svl;
1249 1262  
1250 1263          if (type != VARPD_QTYPE_ETHERNET) {
1251 1264                  libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
1252 1265                  return;
1253 1266          }
1254 1267  
1255 1268          svl = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
1256 1269          if (svl == NULL) {
1257 1270                  libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
1258 1271                  return;
1259 1272          }
1260 1273  
1261 1274          svl->svl_type = SVP_L_VL3;
1262 1275          svl->svl_u.svl_vl3.svl_vah = vah;
1263 1276          svl->svl_u.svl_vl3.svl_out = out;
1264 1277          svp_remote_vl3_lookup(svp, &svl->svl_query, sock, svl);
1265 1278  }
1266 1279  
1267 1280  static const varpd_plugin_ops_t varpd_svp_ops = {
1268 1281          0,
1269 1282          varpd_svp_create,
1270 1283          varpd_svp_start,
1271 1284          varpd_svp_stop,
1272 1285          varpd_svp_destroy,
1273 1286          NULL,
1274 1287          varpd_svp_lookup,
1275 1288          varpd_svp_nprops,
1276 1289          varpd_svp_propinfo,
1277 1290          varpd_svp_getprop,
1278 1291          varpd_svp_setprop,
1279 1292          varpd_svp_save,
1280 1293          varpd_svp_restore,
1281 1294          varpd_svp_arp,
1282 1295          NULL
1283 1296  };
1284 1297  
1285 1298  static int
1286 1299  svp_bunyan_init(void)
1287 1300  {
1288 1301          int ret;
1289 1302  
1290 1303          if ((ret = bunyan_init("svp", &svp_bunyan)) != 0)
1291 1304                  return (ret);
1292 1305          ret = bunyan_stream_add(svp_bunyan, "stderr", BUNYAN_L_INFO,
1293 1306              bunyan_stream_fd, (void *)STDERR_FILENO);
1294 1307          if (ret != 0)
1295 1308                  bunyan_fini(svp_bunyan);
1296 1309          return (ret);
1297 1310  }
1298 1311  
1299 1312  static void
1300 1313  svp_bunyan_fini(void)
1301 1314  {
1302 1315          if (svp_bunyan != NULL)
1303 1316                  bunyan_fini(svp_bunyan);
1304 1317  }
1305 1318  
1306 1319  #pragma init(varpd_svp_init)
1307 1320  static void
1308 1321  varpd_svp_init(void)
1309 1322  {
1310 1323          int err;
1311 1324          varpd_plugin_register_t *vpr;
1312 1325  
1313 1326          if (svp_bunyan_init() != 0)
1314 1327                  return;
1315 1328  
1316 1329          if ((err = svp_host_init()) != 0) {
1317 1330                  (void) bunyan_error(svp_bunyan, "failed to init host subsystem",
1318 1331                      BUNYAN_T_INT32, "error", err,
1319 1332                      BUNYAN_T_END);
1320 1333                  svp_bunyan_fini();
1321 1334                  return;
1322 1335          }
1323 1336  
1324 1337          svp_lookup_cache = umem_cache_create("svp_lookup",
1325 1338              sizeof (svp_lookup_t),  0, NULL, NULL, NULL, NULL, NULL, 0);
1326 1339          if (svp_lookup_cache == NULL) {
1327 1340                  (void) bunyan_error(svp_bunyan,
1328 1341                      "failed to create svp_lookup cache",
1329 1342                      BUNYAN_T_INT32, "error", errno,
1330 1343                      BUNYAN_T_END);
1331 1344                  svp_bunyan_fini();
1332 1345                  return;
1333 1346          }
1334 1347  
1335 1348          if ((err = svp_event_init()) != 0) {
1336 1349                  (void) bunyan_error(svp_bunyan,
1337 1350                      "failed to init event subsystem",
1338 1351                      BUNYAN_T_INT32, "error", err,
1339 1352                      BUNYAN_T_END);
1340 1353                  svp_bunyan_fini();
1341 1354                  umem_cache_destroy(svp_lookup_cache);
1342 1355                  return;
1343 1356          }
1344 1357  
1345 1358          if ((err = svp_timer_init()) != 0) {
1346 1359                  (void) bunyan_error(svp_bunyan,
1347 1360                      "failed to init timer subsystem",
1348 1361                      BUNYAN_T_INT32, "error", err,
1349 1362                      BUNYAN_T_END);
1350 1363                  svp_event_fini();
1351 1364                  umem_cache_destroy(svp_lookup_cache);
1352 1365                  svp_bunyan_fini();
1353 1366                  return;
1354 1367          }
1355 1368  
1356 1369          if ((err = svp_remote_init()) != 0) {
1357 1370                  (void) bunyan_error(svp_bunyan,
1358 1371                      "failed to init remote subsystem",
1359 1372                      BUNYAN_T_INT32, "error", err,
1360 1373                      BUNYAN_T_END);
1361 1374                  svp_event_fini();
1362 1375                  umem_cache_destroy(svp_lookup_cache);
1363 1376                  svp_bunyan_fini();
1364 1377                  return;
1365 1378          }
1366 1379  
1367 1380          vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err);
1368 1381          if (vpr == NULL) {
1369 1382                  (void) bunyan_error(svp_bunyan,
1370 1383                      "failed to alloc varpd plugin",
1371 1384                      BUNYAN_T_INT32, "error", err,
1372 1385                      BUNYAN_T_END);
1373 1386                  svp_remote_fini();
1374 1387                  svp_event_fini();
1375 1388                  umem_cache_destroy(svp_lookup_cache);
1376 1389                  svp_bunyan_fini();
1377 1390                  return;
1378 1391          }
1379 1392  
1380 1393          vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC;
1381 1394          vpr->vpr_name = "svp";
1382 1395          vpr->vpr_ops = &varpd_svp_ops;
1383 1396  
1384 1397          if ((err = libvarpd_plugin_register(vpr)) != 0) {
1385 1398                  (void) bunyan_error(svp_bunyan,
1386 1399                      "failed to register varpd plugin",
1387 1400                      BUNYAN_T_INT32, "error", err,
1388 1401                      BUNYAN_T_END);
1389 1402                  svp_remote_fini();
1390 1403                  svp_event_fini();
1391 1404                  umem_cache_destroy(svp_lookup_cache);
1392 1405                  svp_bunyan_fini();
1393 1406  
1394 1407          }
1395 1408          libvarpd_plugin_free(vpr);
1396 1409  }
  
    | 
      ↓ open down ↓ | 
    835 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX