1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018 Joyent, Inc.
  14  */
  15 
  16 #ifndef _LIBVARPD_SVP_H
  17 #define _LIBVARPD_SVP_H
  18 
  19 /*
  20  * Implementation details of the SVP plugin and the SVP protocol.
  21  */
  22 
  23 #include <netinet/in.h>
  24 #include <sys/ethernet.h>
  25 #include <thread.h>
  26 #include <synch.h>
  27 #include <libvarpd_provider.h>
  28 #include <sys/avl.h>
  29 #include <port.h>
  30 #include <sys/list.h>
  31 #include <bunyan.h>
  32 
  33 #include <libvarpd_svp_prot.h>
  34 
  35 #ifdef __cplusplus
  36 extern "C" {
  37 #endif
  38 
  39 typedef struct svp svp_t;
  40 typedef struct svp_remote svp_remote_t;
  41 typedef struct svp_conn svp_conn_t;
  42 typedef struct svp_query svp_query_t;
  43 
  44 typedef void (*svp_event_f)(port_event_t *, void *);
  45 
  46 typedef struct svp_event {
  47         svp_event_f     se_func;
  48         void            *se_arg;
  49         int             se_events;
  50 } svp_event_t;
  51 
  52 typedef void (*svp_timer_f)(void *);
  53 
  54 typedef struct svp_timer {
  55         svp_timer_f     st_func;        /* Timer callback function */
  56         void            *st_arg;        /* Timer callback arg */
  57         boolean_t       st_oneshot;     /* Is timer a one shot? */
  58         uint32_t        st_value;       /* periodic or one-shot time */
  59         /* Fields below here are private to the svp_timer implementaiton */
  60         uint64_t        st_expire;      /* Next expiration */
  61         boolean_t       st_delivering;  /* Are we currently delivering this */
  62         avl_node_t      st_link;
  63 } svp_timer_t;
  64 
  65 /*
  66  * Note, both the svp_log_ack_t and svp_lrm_req_t are not part of this structure
  67  * as they are rather variable sized data and we don't want to constrain their
  68  * size. Instead, the rdata and wdata members must be set appropriately.
  69  */
  70 typedef union svp_query_data {
  71         svp_vl2_req_t   sqd_vl2r;
  72         svp_vl2_ack_t   sqd_vl2a;
  73         svp_vl3_req_t   sdq_vl3r;
  74         svp_vl3_ack_t   sdq_vl3a;
  75         svp_log_req_t   sdq_logr;
  76         svp_lrm_ack_t   sdq_lrma;
  77         svp_route_req_t sqd_rr;
  78         svp_route_ack_t sqd_ra;
  79 } svp_query_data_t;
  80 
  81 typedef void (*svp_query_f)(svp_query_t *, void *);
  82 
  83 typedef enum svp_query_state {
  84         SVP_QUERY_INIT          = 0x00,
  85         SVP_QUERY_WRITING       = 0x01,
  86         SVP_QUERY_READING       = 0x02,
  87         SVP_QUERY_FINISHED      = 0x03
  88 } svp_query_state_t;
  89 
  90 /*
  91  * The query structure is usable for all forms of svp queries that end up
  92  * getting passed across. Right now it's optimized for the fixed size data
  93  * requests as opposed to requests whose responses will always be streaming in
  94  * nature. Though, the streaming requests are the less common ones we have. We
  95  * may need to make additional changes for those.
  96  */
  97 struct svp_query {
  98         list_node_t             sq_lnode;       /* List entry */
  99         svp_query_f             sq_func;        /* Callback function */
 100         svp_query_state_t       sq_state;       /* Query state */
 101         void                    *sq_arg;        /* Callback function arg */
 102         svp_t                   *sq_svp;        /* Pointer back to svp_t */
 103         svp_req_t               sq_header;      /* Header for the query */
 104         svp_query_data_t        sq_rdun;        /* Union for read data */
 105         svp_query_data_t        sq_wdun;        /* Union for write data */
 106         svp_status_t            sq_status;      /* Query response status */
 107         size_t                  sq_size;        /* Query response size */
 108         void                    *sq_rdata;      /* Read data pointer */
 109         size_t                  sq_rsize;       /* Read data size */
 110         void                    *sq_wdata;      /* Write data pointer */
 111         size_t                  sq_wsize;       /* Write data size */
 112         hrtime_t                sq_acttime;     /* Last I/O activity time */
 113 };
 114 
 115 typedef enum svp_conn_state {
 116         SVP_CS_ERROR            = 0x00,
 117         SVP_CS_INITIAL          = 0x01,
 118         SVP_CS_CONNECTING       = 0x02,
 119         SVP_CS_BACKOFF          = 0x03,
 120         SVP_CS_ACTIVE           = 0x04,
 121         SVP_CS_WINDDOWN         = 0x05
 122 } svp_conn_state_t;
 123 
 124 typedef enum svp_conn_error {
 125         SVP_CE_NONE             = 0x00,
 126         SVP_CE_ASSOCIATE        = 0x01,
 127         SVP_CE_NOPOLLOUT        = 0x02,
 128         SVP_CE_SOCKET           = 0x03,
 129         SVP_CE_VERSION_PONG     = 0x04
 130 } svp_conn_error_t;
 131 
 132 typedef enum svp_conn_flags {
 133         SVP_CF_ADDED            = 0x01,
 134         SVP_CF_DEGRADED         = 0x02,
 135         SVP_CF_REAP             = 0x04,
 136         SVP_CF_TEARDOWN         = 0x08,
 137         SVP_CF_UFLAG            = 0x0c,
 138         SVP_CF_USER             = 0x10
 139 } svp_conn_flags_t;
 140 
 141 typedef struct svp_conn_out {
 142         svp_query_t             *sco_query;
 143         size_t                  sco_offset;
 144 } svp_conn_out_t;
 145 
 146 typedef struct svp_conn_in {
 147         svp_query_t             *sci_query;
 148         svp_req_t               sci_req;
 149         size_t                  sci_offset;
 150 } svp_conn_in_t;
 151 
 152 struct svp_conn {
 153         svp_remote_t            *sc_remote;     /* RO */
 154         struct in6_addr         sc_addr;        /* RO */
 155         list_node_t             sc_rlist;       /* svp_remote_t`sr_lock */
 156         mutex_t                 sc_lock;
 157         svp_event_t             sc_event;
 158         svp_timer_t             sc_btimer;
 159         svp_timer_t             sc_qtimer;
 160         int                     sc_socket;
 161         uint_t                  sc_gen;
 162         uint_t                  sc_nbackoff;
 163         svp_conn_flags_t        sc_flags;
 164         svp_conn_state_t        sc_cstate;
 165         svp_conn_error_t        sc_error;
 166         int                     sc_errno;
 167         list_t                  sc_queries;
 168         svp_conn_out_t          sc_output;
 169         svp_conn_in_t           sc_input;
 170         uint_t                  sc_version;
 171 };
 172 
 173 typedef enum svp_remote_state {
 174         SVP_RS_LOOKUP_SCHEDULED         = 0x01, /* On the DNS Queue */
 175         SVP_RS_LOOKUP_INPROGRESS        = 0x02, /* Doing a DNS lookup */
 176         SVP_RS_LOOKUP_VALID             = 0x04  /* addrinfo valid */
 177 } svp_remote_state_t;
 178 
 179 /*
 180  * These series of bit-based flags should be ordered such that the most severe
 181  * is first. We only can set one message that user land can see, so if more than
 182  * one is set we want to make sure that one is there.
 183  */
 184 typedef enum svp_degrade_state {
 185         SVP_RD_DNS_FAIL         = 0x01, /* DNS Resolution Failure */
 186         SVP_RD_REMOTE_FAIL      = 0x02, /* cannot reach any remote peers */
 187         SVP_RD_ALL              = 0x03  /* Only suitable for restore */
 188 } svp_degrade_state_t;
 189 
 190 typedef enum svp_shootdown_flags {
 191         SVP_SD_RUNNING          = 0x01,
 192         SVP_SD_QUIESCE          = 0x02,
 193         SVP_SD_DORM             = 0x04
 194 } svp_shootdown_flags_t;
 195 
 196 /*
 197  * There is a single svp_sdlog_t per svp_remote_t. It maintains its own lock and
 198  * condition variables. See the big theory statement for more information on how
 199  * it's used.
 200  */
 201 typedef struct svp_sdlog {
 202         mutex_t                 sdl_lock;
 203         cond_t                  sdl_cond;
 204         uint_t                  sdl_ref;
 205         svp_timer_t             sdl_timer;
 206         svp_shootdown_flags_t   sdl_flags;
 207         svp_query_t             sdl_query;
 208         void                    *sdl_logack;
 209         void                    *sdl_logrm;
 210         void                    *sdl_remote;
 211 } svp_sdlog_t;
 212 
 213 struct svp_remote {
 214         char                    *sr_hostname;   /* RO */
 215         uint16_t                sr_rport;       /* RO */
 216         struct in6_addr         sr_uip;         /* RO */
 217         avl_node_t              sr_gnode;       /* svp_remote_lock */
 218         svp_remote_t            *sr_nexthost;   /* svp_host_lock */
 219         mutex_t                 sr_lock;
 220         cond_t                  sr_cond;
 221         svp_remote_state_t      sr_state;
 222         svp_degrade_state_t     sr_degrade;
 223         struct addrinfo         *sr_addrinfo;
 224         avl_tree_t              sr_tree;
 225         uint_t                  sr_count;       /* active count */
 226         uint_t                  sr_gen;
 227         uint_t                  sr_tconns;      /* total conns + dconns */
 228         uint_t                  sr_ndconns;     /* number of degraded conns */
 229         list_t                  sr_conns;       /* all conns */
 230         svp_sdlog_t             sr_shoot;
 231 };
 232 
 233 /*
 234  * We have a bunch of different things that we get back from the API at the
 235  * plug-in layer. These include:
 236  *
 237  *   o OOB Shootdowns
 238  *   o VL3->VL2 Lookups
 239  *   o VL2->UL3 Lookups
 240  *   o VL2 Log invalidations
 241  *   o VL3 Log injections
 242  */
 243 typedef void (*svp_vl2_lookup_f)(svp_t *, svp_status_t, const struct in6_addr *,
 244     const uint16_t, void *);
 245 typedef void (*svp_vl3_lookup_f)(svp_t *, svp_status_t, const uint8_t *,
 246     const struct in6_addr *, const uint16_t, void *);
 247 typedef void (*svp_vl2_invalidation_f)(svp_t *, const uint8_t *);
 248 typedef void (*svp_vl3_inject_f)(svp_t *, const uint16_t,
 249     const struct in6_addr *, const uint8_t *, const uint8_t *);
 250 typedef void (*svp_shootdown_f)(svp_t *, const uint8_t *,
 251     const struct in6_addr *, const uint16_t uport);
 252 typedef void (*svp_route_lookup_f)(svp_t *, svp_status_t, uint32_t, uint32_t,
 253     uint16_t, uint8_t *, uint8_t *, uint16_t, uint8_t *, uint8_t, uint8_t,
 254     void *);
 255 
 256 typedef struct svp_cb {
 257         svp_vl2_lookup_f        scb_vl2_lookup;
 258         svp_vl3_lookup_f        scb_vl3_lookup;
 259         svp_vl2_invalidation_f  scb_vl2_invalidate;
 260         svp_vl3_inject_f        scb_vl3_inject;
 261         svp_shootdown_f         scb_shootdown;
 262         svp_route_lookup_f      scb_route_lookup;
 263 } svp_cb_t;
 264 
 265 /*
 266  * Core implementation structure.
 267  */
 268 struct svp {
 269         overlay_plugin_dest_t   svp_dest;       /* RO */
 270         varpd_provider_handle_t *svp_hdl;       /* RO */
 271         svp_cb_t                svp_cb;         /* RO */
 272         uint64_t                svp_vid;        /* RO */
 273         avl_node_t              svp_rlink;      /* Owned by svp_remote */
 274         svp_remote_t            *svp_remote;    /* RO iff started */
 275         mutex_t                 svp_lock;
 276         char                    *svp_host;      /* svp_lock */
 277         uint16_t                svp_port;       /* svp_lock */
 278         uint16_t                svp_uport;      /* svp_lock */
 279         uint32_t                svp_dcid;       /* svp_lock (but write-once?) */
 280         boolean_t               svp_huip;       /* svp_lock */
 281         struct in6_addr         svp_uip;        /* svp_lock */
 282         /* NOTE: lower-3 bytes are 0s. */
 283         uint8_t         svp_router_oui[6];      /* svp_lock (but write-once?) */
 284 };
 285 
 286 extern bunyan_logger_t *svp_bunyan;
 287 
 288 extern int svp_remote_find(char *, uint16_t, struct in6_addr *,
 289     svp_remote_t **);
 290 extern int svp_remote_attach(svp_remote_t *, svp_t *);
 291 extern void svp_remote_detach(svp_t *);
 292 extern void svp_remote_release(svp_remote_t *);
 293 extern void svp_remote_vl3_lookup(svp_t *, svp_query_t *,
 294     const struct sockaddr *, void *);
 295 extern void svp_remote_vl2_lookup(svp_t *, svp_query_t *, const uint8_t *,
 296     void *);
 297 extern void svp_remote_route_lookup(svp_t *, svp_query_t *,
 298     const struct in6_addr *, const struct in6_addr *, uint32_t,
 299     uint16_t, void *);
 300 
 301 
 302 /*
 303  * Init functions
 304  */
 305 extern int svp_remote_init(void);
 306 extern void svp_remote_fini(void);
 307 extern int svp_event_init(void);
 308 extern int svp_event_timer_init(svp_event_t *);
 309 extern void svp_event_fini(void);
 310 extern int svp_host_init(void);
 311 extern int svp_timer_init(void);
 312 
 313 /*
 314  * Timers
 315  */
 316 extern int svp_tickrate;
 317 extern void svp_timer_add(svp_timer_t *);
 318 extern void svp_timer_remove(svp_timer_t *);
 319 
 320 /*
 321  * Event loop management
 322  */
 323 extern int svp_event_associate(svp_event_t *, int);
 324 extern int svp_event_dissociate(svp_event_t *, int);
 325 extern int svp_event_inject(svp_event_t *);
 326 
 327 /*
 328  * Connection manager
 329  */
 330 extern int svp_conn_create(svp_remote_t *, const struct in6_addr *);
 331 extern void svp_conn_destroy(svp_conn_t *);
 332 extern void svp_conn_fallout(svp_conn_t *);
 333 extern void svp_conn_queue(svp_conn_t *, svp_query_t *);
 334 
 335 /*
 336  * FMA related
 337  */
 338 extern void svp_remote_degrade(svp_remote_t *, svp_degrade_state_t);
 339 extern void svp_remote_restore(svp_remote_t *, svp_degrade_state_t);
 340 
 341 /*
 342  * Misc.
 343  */
 344 extern int svp_comparator(const void *, const void *);
 345 extern void svp_remote_reassign(svp_remote_t *, svp_conn_t *);
 346 extern void svp_remote_resolved(svp_remote_t *, struct addrinfo *);
 347 extern void svp_host_queue(svp_remote_t *);
 348 extern void svp_query_release(svp_query_t *);
 349 extern void svp_query_crc32(svp_req_t *, void *, size_t);
 350 
 351 /*
 352  * Shootdown related
 353  */
 354 extern void svp_remote_shootdown_vl3(svp_remote_t *, svp_log_vl3_t *,
 355     svp_sdlog_t *);
 356 extern void svp_remote_shootdown_vl2(svp_remote_t *, svp_log_vl2_t *);
 357 extern void svp_remote_log_request(svp_remote_t *, svp_query_t *, void *,
 358     size_t);
 359 extern void svp_remote_lrm_request(svp_remote_t *, svp_query_t *, void *,
 360     size_t);
 361 extern void svp_shootdown_logr_cb(svp_remote_t *, svp_status_t, void *, size_t);
 362 extern void svp_shootdown_lrm_cb(svp_remote_t *, svp_status_t);
 363 extern void svp_shootdown_vl3_cb(svp_status_t, svp_log_vl3_t *, svp_sdlog_t *);
 364 extern int svp_shootdown_init(svp_remote_t *);
 365 extern void svp_shootdown_fini(svp_remote_t *);
 366 extern void svp_shootdown_start(svp_remote_t *);
 367 
 368 #ifdef __cplusplus
 369 }
 370 #endif
 371 
 372 #endif /* _LIBVARPD_SVP_H */