1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018 Joyent, Inc.
  14  */
  15 
  16 #ifndef _LIBVARPD_SVP_H
  17 #define _LIBVARPD_SVP_H
  18 
  19 /*
  20  * Implementation details of the SVP plugin and the SVP protocol.
  21  */
  22 
  23 #include <netinet/in.h>
  24 #include <sys/ethernet.h>
  25 #include <thread.h>
  26 #include <synch.h>
  27 #include <libvarpd_provider.h>
  28 #include <sys/avl.h>
  29 #include <port.h>
  30 #include <sys/list.h>
  31 #include <bunyan.h>
  32 
  33 #include <libvarpd_svp_prot.h>
  34 
  35 #ifdef __cplusplus
  36 extern "C" {
  37 #endif
  38 
  39 typedef struct svp svp_t;
  40 typedef struct svp_remote svp_remote_t;
  41 typedef struct svp_conn svp_conn_t;
  42 typedef struct svp_query svp_query_t;
  43 
  44 typedef void (*svp_event_f)(port_event_t *, void *);
  45 
  46 typedef struct svp_event {
  47         svp_event_f     se_func;
  48         void            *se_arg;
  49         int             se_events;
  50 } svp_event_t;
  51 
  52 typedef void (*svp_timer_f)(void *);
  53 
  54 typedef struct svp_timer {
  55         svp_timer_f     st_func;        /* Timer callback function */
  56         void            *st_arg;        /* Timer callback arg */
  57         boolean_t       st_oneshot;     /* Is timer a one shot? */
  58         uint32_t        st_value;       /* periodic or one-shot time */
  59         /* Fields below here are private to the svp_timer implementaiton */
  60         uint64_t        st_expire;      /* Next expiration */
  61         boolean_t       st_delivering;  /* Are we currently delivering this */
  62         avl_node_t      st_link;
  63 } svp_timer_t;
  64 
  65 /*
  66  * Note, both the svp_log_ack_t and svp_lrm_req_t are not part of this structure
  67  * as they are rather variable sized data and we don't want to constrain their
  68  * size. Instead, the rdata and wdata members must be set appropriately.
  69  */
  70 typedef union svp_query_data {
  71         svp_vl2_req_t   sqd_vl2r;
  72         svp_vl2_ack_t   sqd_vl2a;
  73         svp_vl3_req_t   sdq_vl3r;
  74         svp_vl3_ack_t   sdq_vl3a;
  75         svp_log_req_t   sdq_logr;
  76         svp_lrm_ack_t   sdq_lrma;
  77         svp_route_req_t sqd_rr;
  78         svp_route_ack_t sqd_ra;
  79 } svp_query_data_t;
  80 
  81 typedef void (*svp_query_f)(svp_query_t *, void *);
  82 
  83 typedef enum svp_query_state {
  84         SVP_QUERY_INIT          = 0x00,
  85         SVP_QUERY_WRITING       = 0x01,
  86         SVP_QUERY_READING       = 0x02,
  87         SVP_QUERY_FINISHED      = 0x03
  88 } svp_query_state_t;
  89 
  90 /*
  91  * The query structure is usable for all forms of svp queries that end up
  92  * getting passed across. Right now it's optimized for the fixed size data
  93  * requests as opposed to requests whose responses will always be streaming in
  94  * nature. Though, the streaming requests are the less common ones we have. We
  95  * may need to make additional changes for those.
  96  */
  97 struct svp_query {
  98         list_node_t             sq_lnode;       /* List entry */
  99         svp_query_f             sq_func;        /* Callback function */
 100         svp_query_state_t       sq_state;       /* Query state */
 101         void                    *sq_arg;        /* Callback function arg */
 102         svp_t                   *sq_svp;        /* Pointer back to svp_t */
 103         svp_req_t               sq_header;      /* Header for the query */
 104         svp_query_data_t        sq_rdun;        /* Union for read data */
 105         svp_query_data_t        sq_wdun;        /* Union for write data */
 106         svp_status_t            sq_status;      /* Query response status */
 107         size_t                  sq_size;        /* Query response size */
 108         void                    *sq_rdata;      /* Read data pointer */
 109         size_t                  sq_rsize;       /* Read data size */
 110         void                    *sq_wdata;      /* Write data pointer */
 111         size_t                  sq_wsize;       /* Write data size */
 112         hrtime_t                sq_acttime;     /* Last I/O activity time */
 113 };
 114 
 115 typedef enum svp_conn_state {
 116         SVP_CS_ERROR            = 0x00,
 117         SVP_CS_INITIAL          = 0x01,
 118         SVP_CS_CONNECTING       = 0x02,
 119         SVP_CS_BACKOFF          = 0x03,
 120         SVP_CS_ACTIVE           = 0x04,
 121         SVP_CS_WINDDOWN         = 0x05
 122 } svp_conn_state_t;
 123 
 124 typedef enum svp_conn_error {
 125         SVP_CE_NONE             = 0x00,
 126         SVP_CE_ASSOCIATE        = 0x01,
 127         SVP_CE_NOPOLLOUT        = 0x02,
 128         SVP_CE_SOCKET           = 0x03
 129 } svp_conn_error_t;
 130 
 131 typedef enum svp_conn_flags {
 132         SVP_CF_ADDED            = 0x01,
 133         SVP_CF_DEGRADED         = 0x02,
 134         SVP_CF_REAP             = 0x04,
 135         SVP_CF_TEARDOWN         = 0x08,
 136         SVP_CF_UFLAG            = 0x0c,
 137         SVP_CF_USER             = 0x10
 138 } svp_conn_flags_t;
 139 
 140 typedef struct svp_conn_out {
 141         svp_query_t             *sco_query;
 142         size_t                  sco_offset;
 143 } svp_conn_out_t;
 144 
 145 typedef struct svp_conn_in {
 146         svp_query_t             *sci_query;
 147         svp_req_t               sci_req;
 148         size_t                  sci_offset;
 149 } svp_conn_in_t;
 150 
 151 struct svp_conn {
 152         svp_remote_t            *sc_remote;     /* RO */
 153         struct in6_addr         sc_addr;        /* RO */
 154         list_node_t             sc_rlist;       /* svp_remote_t`sr_lock */
 155         mutex_t                 sc_lock;
 156         svp_event_t             sc_event;
 157         svp_timer_t             sc_btimer;
 158         svp_timer_t             sc_qtimer;
 159         int                     sc_socket;
 160         uint_t                  sc_gen;
 161         uint_t                  sc_nbackoff;
 162         svp_conn_flags_t        sc_flags;
 163         svp_conn_state_t        sc_cstate;
 164         svp_conn_error_t        sc_error;
 165         int                     sc_errno;
 166         list_t                  sc_queries;
 167         svp_conn_out_t          sc_output;
 168         svp_conn_in_t           sc_input;
 169 };
 170 
 171 typedef enum svp_remote_state {
 172         SVP_RS_LOOKUP_SCHEDULED         = 0x01, /* On the DNS Queue */
 173         SVP_RS_LOOKUP_INPROGRESS        = 0x02, /* Doing a DNS lookup */
 174         SVP_RS_LOOKUP_VALID             = 0x04  /* addrinfo valid */
 175 } svp_remote_state_t;
 176 
 177 /*
 178  * These series of bit-based flags should be ordered such that the most severe
 179  * is first. We only can set one message that user land can see, so if more than
 180  * one is set we want to make sure that one is there.
 181  */
 182 typedef enum svp_degrade_state {
 183         SVP_RD_DNS_FAIL         = 0x01, /* DNS Resolution Failure */
 184         SVP_RD_REMOTE_FAIL      = 0x02, /* cannot reach any remote peers */
 185         SVP_RD_ALL              = 0x03  /* Only suitable for restore */
 186 } svp_degrade_state_t;
 187 
 188 typedef enum svp_shootdown_flags {
 189         SVP_SD_RUNNING          = 0x01,
 190         SVP_SD_QUIESCE          = 0x02,
 191         SVP_SD_DORM             = 0x04
 192 } svp_shootdown_flags_t;
 193 
 194 /*
 195  * There is a single svp_sdlog_t per svp_remote_t. It maintains its own lock and
 196  * condition variables. See the big theory statement for more information on how
 197  * it's used.
 198  */
 199 typedef struct svp_sdlog {
 200         mutex_t                 sdl_lock;
 201         cond_t                  sdl_cond;
 202         uint_t                  sdl_ref;
 203         svp_timer_t             sdl_timer;
 204         svp_shootdown_flags_t   sdl_flags;
 205         svp_query_t             sdl_query;
 206         void                    *sdl_logack;
 207         void                    *sdl_logrm;
 208         void                    *sdl_remote;
 209 } svp_sdlog_t;
 210 
 211 struct svp_remote {
 212         char                    *sr_hostname;   /* RO */
 213         uint16_t                sr_rport;       /* RO */
 214         struct in6_addr         sr_uip;         /* RO */
 215         avl_node_t              sr_gnode;       /* svp_remote_lock */
 216         svp_remote_t            *sr_nexthost;   /* svp_host_lock */
 217         mutex_t                 sr_lock;
 218         cond_t                  sr_cond;
 219         svp_remote_state_t      sr_state;
 220         svp_degrade_state_t     sr_degrade;
 221         struct addrinfo         *sr_addrinfo;
 222         avl_tree_t              sr_tree;
 223         uint_t                  sr_count;       /* active count */
 224         uint_t                  sr_gen;
 225         uint_t                  sr_tconns;      /* total conns + dconns */
 226         uint_t                  sr_ndconns;     /* number of degraded conns */
 227         list_t                  sr_conns;       /* all conns */
 228         svp_sdlog_t             sr_shoot;
 229 };
 230 
 231 /*
 232  * We have a bunch of different things that we get back from the API at the
 233  * plug-in layer. These include:
 234  *
 235  *   o OOB Shootdowns
 236  *   o VL3->VL2 Lookups
 237  *   o VL2->UL3 Lookups
 238  *   o VL2 Log invalidations
 239  *   o VL3 Log injections
 240  */
 241 typedef void (*svp_vl2_lookup_f)(svp_t *, svp_status_t, const struct in6_addr *,
 242     const uint16_t, void *);
 243 typedef void (*svp_vl3_lookup_f)(svp_t *, svp_status_t, const uint8_t *,
 244     const struct in6_addr *, const uint16_t, void *);
 245 typedef void (*svp_vl2_invalidation_f)(svp_t *, const uint8_t *);
 246 typedef void (*svp_vl3_inject_f)(svp_t *, const uint16_t,
 247     const struct in6_addr *, const uint8_t *, const uint8_t *);
 248 typedef void (*svp_shootdown_f)(svp_t *, const uint8_t *,
 249     const struct in6_addr *, const uint16_t uport);
 250 typedef void (*svp_route_lookup_f)(svp_t *, svp_status_t, uint32_t, uint32_t,
 251     uint16_t, uint8_t *, uint8_t *, uint16_t, uint8_t *, uint8_t, uint8_t,
 252     void *);
 253 
 254 typedef struct svp_cb {
 255         svp_vl2_lookup_f        scb_vl2_lookup;
 256         svp_vl3_lookup_f        scb_vl3_lookup;
 257         svp_vl2_invalidation_f  scb_vl2_invalidate;
 258         svp_vl3_inject_f        scb_vl3_inject;
 259         svp_shootdown_f         scb_shootdown;
 260         svp_route_lookup_f      scb_route_lookup;
 261 } svp_cb_t;
 262 
 263 /*
 264  * Core implementation structure.
 265  */
 266 struct svp {
 267         overlay_plugin_dest_t   svp_dest;       /* RO */
 268         varpd_provider_handle_t *svp_hdl;       /* RO */
 269         svp_cb_t                svp_cb;         /* RO */
 270         uint64_t                svp_vid;        /* RO */
 271         avl_node_t              svp_rlink;      /* Owned by svp_remote */
 272         svp_remote_t            *svp_remote;    /* RO iff started */
 273         mutex_t                 svp_lock;
 274         char                    *svp_host;      /* svp_lock */
 275         uint16_t                svp_port;       /* svp_lock */
 276         uint16_t                svp_uport;      /* svp_lock */
 277         uint32_t                svp_dcid;       /* svp_lock (but write-once?) */
 278         boolean_t               svp_huip;       /* svp_lock */
 279         struct in6_addr         svp_uip;        /* svp_lock */
 280         /* NOTE: lower-3 bytes are 0s. */
 281         uint8_t         svp_router_oui[6];      /* svp_lock (but write-once?) */
 282 };
 283 
 284 extern bunyan_logger_t *svp_bunyan;
 285 
 286 extern int svp_remote_find(char *, uint16_t, struct in6_addr *,
 287     svp_remote_t **);
 288 extern int svp_remote_attach(svp_remote_t *, svp_t *);
 289 extern void svp_remote_detach(svp_t *);
 290 extern void svp_remote_release(svp_remote_t *);
 291 extern void svp_remote_vl3_lookup(svp_t *, svp_query_t *,
 292     const struct sockaddr *, void *);
 293 extern void svp_remote_vl2_lookup(svp_t *, svp_query_t *, const uint8_t *,
 294     void *);
 295 extern void svp_remote_route_lookup(svp_t *, svp_query_t *,
 296     const struct in6_addr *, const struct in6_addr *, uint32_t,
 297     uint16_t, void *);
 298 
 299 
 300 /*
 301  * Init functions
 302  */
 303 extern int svp_remote_init(void);
 304 extern void svp_remote_fini(void);
 305 extern int svp_event_init(void);
 306 extern int svp_event_timer_init(svp_event_t *);
 307 extern void svp_event_fini(void);
 308 extern int svp_host_init(void);
 309 extern int svp_timer_init(void);
 310 
 311 /*
 312  * Timers
 313  */
 314 extern int svp_tickrate;
 315 extern void svp_timer_add(svp_timer_t *);
 316 extern void svp_timer_remove(svp_timer_t *);
 317 
 318 /*
 319  * Event loop management
 320  */
 321 extern int svp_event_associate(svp_event_t *, int);
 322 extern int svp_event_dissociate(svp_event_t *, int);
 323 extern int svp_event_inject(svp_event_t *);
 324 
 325 /*
 326  * Connection manager
 327  */
 328 extern int svp_conn_create(svp_remote_t *, const struct in6_addr *);
 329 extern void svp_conn_destroy(svp_conn_t *);
 330 extern void svp_conn_fallout(svp_conn_t *);
 331 extern void svp_conn_queue(svp_conn_t *, svp_query_t *);
 332 
 333 /*
 334  * FMA related
 335  */
 336 extern void svp_remote_degrade(svp_remote_t *, svp_degrade_state_t);
 337 extern void svp_remote_restore(svp_remote_t *, svp_degrade_state_t);
 338 
 339 /*
 340  * Misc.
 341  */
 342 extern int svp_comparator(const void *, const void *);
 343 extern void svp_remote_reassign(svp_remote_t *, svp_conn_t *);
 344 extern void svp_remote_resolved(svp_remote_t *, struct addrinfo *);
 345 extern void svp_host_queue(svp_remote_t *);
 346 extern void svp_query_release(svp_query_t *);
 347 extern void svp_query_crc32(svp_req_t *, void *, size_t);
 348 
 349 /*
 350  * Shootdown related
 351  */
 352 extern void svp_remote_shootdown_vl3(svp_remote_t *, svp_log_vl3_t *,
 353     svp_sdlog_t *);
 354 extern void svp_remote_shootdown_vl2(svp_remote_t *, svp_log_vl2_t *);
 355 extern void svp_remote_log_request(svp_remote_t *, svp_query_t *, void *,
 356     size_t);
 357 extern void svp_remote_lrm_request(svp_remote_t *, svp_query_t *, void *,
 358     size_t);
 359 extern void svp_shootdown_logr_cb(svp_remote_t *, svp_status_t, void *, size_t);
 360 extern void svp_shootdown_lrm_cb(svp_remote_t *, svp_status_t);
 361 extern void svp_shootdown_vl3_cb(svp_status_t, svp_log_vl3_t *, svp_sdlog_t *);
 362 extern int svp_shootdown_init(svp_remote_t *);
 363 extern void svp_shootdown_fini(svp_remote_t *);
 364 extern void svp_shootdown_start(svp_remote_t *);
 365 
 366 #ifdef __cplusplus
 367 }
 368 #endif
 369 
 370 #endif /* _LIBVARPD_SVP_H */