1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2018 Joyent, Inc.
  14  */
  15 
  16 #ifndef _LIBVARPD_SVP_H
  17 #define _LIBVARPD_SVP_H
  18 
  19 /*
  20  * Implementation details of the SVP plugin and the SVP protocol.
  21  */
  22 
  23 #include <netinet/in.h>
  24 #include <sys/ethernet.h>
  25 #include <thread.h>
  26 #include <synch.h>
  27 #include <libvarpd_provider.h>
  28 #include <sys/avl.h>
  29 #include <port.h>
  30 #include <sys/list.h>
  31 #include <bunyan.h>
  32 
  33 #include <libvarpd_svp_prot.h>
  34 
  35 #ifdef __cplusplus
  36 extern "C" {
  37 #endif
  38 
  39 typedef struct svp svp_t;
  40 typedef struct svp_remote svp_remote_t;
  41 typedef struct svp_conn svp_conn_t;
  42 typedef struct svp_query svp_query_t;
  43 
  44 typedef void (*svp_event_f)(port_event_t *, void *);
  45 
  46 typedef struct svp_event {
  47         svp_event_f     se_func;
  48         void            *se_arg;
  49         int             se_events;
  50 } svp_event_t;
  51 
  52 typedef void (*svp_timer_f)(void *);
  53 
  54 typedef struct svp_timer {
  55         svp_timer_f     st_func;        /* Timer callback function */
  56         void            *st_arg;        /* Timer callback arg */
  57         boolean_t       st_oneshot;     /* Is timer a one shot? */
  58         uint32_t        st_value;       /* periodic or one-shot time */
  59         /* Fields below here are private to the svp_timer implementaiton */
  60         uint64_t        st_expire;      /* Next expiration */
  61         boolean_t       st_delivering;  /* Are we currently delivering this */
  62         avl_node_t      st_link;
  63 } svp_timer_t;
  64 
  65 /*
  66  * Note, both the svp_log_ack_t and svp_lrm_req_t are not part of this structure
  67  * as they are rather variable sized data and we don't want to constrain their
  68  * size. Instead, the rdata and wdata members must be set appropriately.
  69  */
  70 typedef union svp_query_data {
  71         svp_vl2_req_t   sqd_vl2r;
  72         svp_vl2_ack_t   sqd_vl2a;
  73         svp_vl3_req_t   sdq_vl3r;
  74         svp_vl3_ack_t   sdq_vl3a;
  75         svp_log_req_t   sdq_logr;
  76         svp_lrm_ack_t   sdq_lrma;
  77         svp_route_req_t sqd_rr;
  78         svp_route_ack_t sqd_ra;
  79 } svp_query_data_t;
  80 
  81 typedef void (*svp_query_f)(svp_query_t *, void *);
  82 
  83 typedef enum svp_query_state {
  84         SVP_QUERY_INIT          = 0x00,
  85         SVP_QUERY_WRITING       = 0x01,
  86         SVP_QUERY_READING       = 0x02,
  87         SVP_QUERY_FINISHED      = 0x03
  88 } svp_query_state_t;
  89 
  90 /*
  91  * The query structure is usable for all forms of svp queries that end up
  92  * getting passed across. Right now it's optimized for the fixed size data
  93  * requests as opposed to requests whose responses will always be streaming in
  94  * nature. Though, the streaming requests are the less common ones we have. We
  95  * may need to make additional changes for those.
  96  */
  97 struct svp_query {
  98         list_node_t             sq_lnode;       /* List entry */
  99         svp_query_f             sq_func;        /* Callback function */
 100         svp_query_state_t       sq_state;       /* Query state */
 101         void                    *sq_arg;        /* Callback function arg */
 102         svp_t                   *sq_svp;        /* Pointer back to svp_t */
 103         svp_req_t               sq_header;      /* Header for the query */
 104         svp_query_data_t        sq_rdun;        /* Union for read data */
 105         svp_query_data_t        sq_wdun;        /* Union for write data */
 106         svp_status_t            sq_status;      /* Query response status */
 107         size_t                  sq_size;        /* Query response size */
 108         void                    *sq_rdata;      /* Read data pointer */
 109         size_t                  sq_rsize;       /* Read data size */
 110         void                    *sq_wdata;      /* Write data pointer */
 111         size_t                  sq_wsize;       /* Write data size */
 112         hrtime_t                sq_acttime;     /* Last I/O activity time */
 113 };
 114 
 115 typedef enum svp_conn_state {
 116         SVP_CS_ERROR            = 0x00,
 117         SVP_CS_INITIAL          = 0x01,
 118         SVP_CS_CONNECTING       = 0x02,
 119         SVP_CS_BACKOFF          = 0x03,
 120         SVP_CS_ACTIVE           = 0x04,
 121         SVP_CS_WINDDOWN         = 0x05,
 122         SVP_CS_VERSIONING       = 0x06
 123 } svp_conn_state_t;
 124 
 125 typedef enum svp_conn_error {
 126         SVP_CE_NONE             = 0x00,
 127         SVP_CE_ASSOCIATE        = 0x01,
 128         SVP_CE_NOPOLLOUT        = 0x02,
 129         SVP_CE_SOCKET           = 0x03,
 130         SVP_CE_VERSION_PONG     = 0x04
 131 } svp_conn_error_t;
 132 
 133 typedef enum svp_conn_flags {
 134         SVP_CF_ADDED            = 0x01,
 135         SVP_CF_DEGRADED         = 0x02,
 136         SVP_CF_REAP             = 0x04,
 137         SVP_CF_TEARDOWN         = 0x08,
 138         SVP_CF_UFLAG            = 0x0c,
 139         SVP_CF_USER             = 0x10
 140 } svp_conn_flags_t;
 141 
 142 typedef struct svp_conn_out {
 143         svp_query_t             *sco_query;
 144         size_t                  sco_offset;
 145 } svp_conn_out_t;
 146 
 147 typedef struct svp_conn_in {
 148         svp_query_t             *sci_query;
 149         svp_req_t               sci_req;
 150         size_t                  sci_offset;
 151 } svp_conn_in_t;
 152 
 153 struct svp_conn {
 154         svp_remote_t            *sc_remote;     /* RO */
 155         struct in6_addr         sc_addr;        /* RO */
 156         list_node_t             sc_rlist;       /* svp_remote_t`sr_lock */
 157         mutex_t                 sc_lock;
 158         svp_event_t             sc_event;
 159         svp_timer_t             sc_btimer;
 160         svp_timer_t             sc_qtimer;
 161         int                     sc_socket;
 162         uint_t                  sc_gen;
 163         uint_t                  sc_nbackoff;
 164         svp_conn_flags_t        sc_flags;
 165         svp_conn_state_t        sc_cstate;
 166         svp_conn_error_t        sc_error;
 167         int                     sc_errno;
 168         list_t                  sc_queries;
 169         svp_conn_out_t          sc_output;
 170         svp_conn_in_t           sc_input;
 171         uint_t                  sc_version;
 172 };
 173 
 174 typedef enum svp_remote_state {
 175         SVP_RS_LOOKUP_SCHEDULED         = 0x01, /* On the DNS Queue */
 176         SVP_RS_LOOKUP_INPROGRESS        = 0x02, /* Doing a DNS lookup */
 177         SVP_RS_LOOKUP_VALID             = 0x04  /* addrinfo valid */
 178 } svp_remote_state_t;
 179 
 180 /*
 181  * These series of bit-based flags should be ordered such that the most severe
 182  * is first. We only can set one message that user land can see, so if more than
 183  * one is set we want to make sure that one is there.
 184  */
 185 typedef enum svp_degrade_state {
 186         SVP_RD_DNS_FAIL         = 0x01, /* DNS Resolution Failure */
 187         SVP_RD_REMOTE_FAIL      = 0x02, /* cannot reach any remote peers */
 188         SVP_RD_ALL              = 0x03  /* Only suitable for restore */
 189 } svp_degrade_state_t;
 190 
 191 typedef enum svp_shootdown_flags {
 192         SVP_SD_RUNNING          = 0x01,
 193         SVP_SD_QUIESCE          = 0x02,
 194         SVP_SD_DORM             = 0x04
 195 } svp_shootdown_flags_t;
 196 
 197 /*
 198  * There is a single svp_sdlog_t per svp_remote_t. It maintains its own lock and
 199  * condition variables. See the big theory statement for more information on how
 200  * it's used.
 201  */
 202 typedef struct svp_sdlog {
 203         mutex_t                 sdl_lock;
 204         cond_t                  sdl_cond;
 205         uint_t                  sdl_ref;
 206         svp_timer_t             sdl_timer;
 207         svp_shootdown_flags_t   sdl_flags;
 208         svp_query_t             sdl_query;
 209         void                    *sdl_logack;
 210         void                    *sdl_logrm;
 211         void                    *sdl_remote;
 212 } svp_sdlog_t;
 213 
 214 struct svp_remote {
 215         char                    *sr_hostname;   /* RO */
 216         uint16_t                sr_rport;       /* RO */
 217         struct in6_addr         sr_uip;         /* RO */
 218         avl_node_t              sr_gnode;       /* svp_remote_lock */
 219         svp_remote_t            *sr_nexthost;   /* svp_host_lock */
 220         mutex_t                 sr_lock;
 221         cond_t                  sr_cond;
 222         svp_remote_state_t      sr_state;
 223         svp_degrade_state_t     sr_degrade;
 224         struct addrinfo         *sr_addrinfo;
 225         avl_tree_t              sr_tree;
 226         uint_t                  sr_count;       /* active count */
 227         uint_t                  sr_gen;
 228         uint_t                  sr_tconns;      /* total conns + dconns */
 229         uint_t                  sr_ndconns;     /* number of degraded conns */
 230         list_t                  sr_conns;       /* all conns */
 231         svp_sdlog_t             sr_shoot;
 232 };
 233 
 234 /*
 235  * We have a bunch of different things that we get back from the API at the
 236  * plug-in layer. These include:
 237  *
 238  *   o OOB Shootdowns
 239  *   o VL3->VL2 Lookups
 240  *   o VL2->UL3 Lookups
 241  *   o VL2 Log invalidations
 242  *   o VL3 Log injections
 243  */
 244 typedef void (*svp_vl2_lookup_f)(svp_t *, svp_status_t, const struct in6_addr *,
 245     const uint16_t, void *);
 246 typedef void (*svp_vl3_lookup_f)(svp_t *, svp_status_t, const uint8_t *,
 247     const struct in6_addr *, const uint16_t, void *);
 248 typedef void (*svp_vl2_invalidation_f)(svp_t *, const uint8_t *);
 249 typedef void (*svp_vl3_inject_f)(svp_t *, const uint16_t,
 250     const struct in6_addr *, const uint8_t *, const uint8_t *);
 251 typedef void (*svp_shootdown_f)(svp_t *, const uint8_t *,
 252     const struct in6_addr *, const uint16_t uport);
 253 typedef void (*svp_route_lookup_f)(svp_t *, svp_status_t, uint32_t, uint32_t,
 254     uint16_t, uint8_t *, uint8_t *, uint16_t, uint8_t *, uint8_t, uint8_t,
 255     void *);
 256 
 257 typedef struct svp_cb {
 258         svp_vl2_lookup_f        scb_vl2_lookup;
 259         svp_vl3_lookup_f        scb_vl3_lookup;
 260         svp_vl2_invalidation_f  scb_vl2_invalidate;
 261         svp_vl3_inject_f        scb_vl3_inject;
 262         svp_shootdown_f         scb_shootdown;
 263         svp_route_lookup_f      scb_route_lookup;
 264 } svp_cb_t;
 265 
 266 /*
 267  * Core implementation structure.
 268  */
 269 struct svp {
 270         overlay_plugin_dest_t   svp_dest;       /* RO */
 271         varpd_provider_handle_t *svp_hdl;       /* RO */
 272         svp_cb_t                svp_cb;         /* RO */
 273         uint64_t                svp_vid;        /* RO */
 274         avl_node_t              svp_rlink;      /* Owned by svp_remote */
 275         svp_remote_t            *svp_remote;    /* RO iff started */
 276         mutex_t                 svp_lock;
 277         char                    *svp_host;      /* svp_lock */
 278         uint16_t                svp_port;       /* svp_lock */
 279         uint16_t                svp_uport;      /* svp_lock */
 280         uint32_t                svp_dcid;       /* svp_lock (but write-once?) */
 281         boolean_t               svp_huip;       /* svp_lock */
 282         struct in6_addr         svp_uip;        /* svp_lock */
 283         /* NOTE: lower-3 bytes are 0s. */
 284         uint8_t         svp_router_oui[6];      /* svp_lock (but write-once?) */
 285 };
 286 
 287 extern bunyan_logger_t *svp_bunyan;
 288 
 289 extern int svp_remote_find(char *, uint16_t, struct in6_addr *,
 290     svp_remote_t **);
 291 extern int svp_remote_attach(svp_remote_t *, svp_t *);
 292 extern void svp_remote_detach(svp_t *);
 293 extern void svp_remote_release(svp_remote_t *);
 294 extern void svp_remote_vl3_lookup(svp_t *, svp_query_t *,
 295     const struct sockaddr *, void *);
 296 extern void svp_remote_vl2_lookup(svp_t *, svp_query_t *, const uint8_t *,
 297     void *);
 298 extern void svp_remote_route_lookup(svp_t *, svp_query_t *,
 299     const struct in6_addr *, const struct in6_addr *, uint32_t,
 300     uint16_t, void *);
 301 
 302 
 303 /*
 304  * Init functions
 305  */
 306 extern int svp_remote_init(void);
 307 extern void svp_remote_fini(void);
 308 extern int svp_event_init(void);
 309 extern int svp_event_timer_init(svp_event_t *);
 310 extern void svp_event_fini(void);
 311 extern int svp_host_init(void);
 312 extern int svp_timer_init(void);
 313 
 314 /*
 315  * Timers
 316  */
 317 extern int svp_tickrate;
 318 extern void svp_timer_add(svp_timer_t *);
 319 extern void svp_timer_remove(svp_timer_t *);
 320 
 321 /*
 322  * Event loop management
 323  */
 324 extern int svp_event_associate(svp_event_t *, int);
 325 extern int svp_event_dissociate(svp_event_t *, int);
 326 extern int svp_event_inject(svp_event_t *);
 327 
 328 /*
 329  * Connection manager
 330  */
 331 extern int svp_conn_create(svp_remote_t *, const struct in6_addr *);
 332 extern void svp_conn_destroy(svp_conn_t *);
 333 extern void svp_conn_fallout(svp_conn_t *);
 334 extern void svp_conn_queue(svp_conn_t *, svp_query_t *);
 335 
 336 /*
 337  * FMA related
 338  */
 339 extern void svp_remote_degrade(svp_remote_t *, svp_degrade_state_t);
 340 extern void svp_remote_restore(svp_remote_t *, svp_degrade_state_t);
 341 
 342 /*
 343  * Misc.
 344  */
 345 extern int svp_comparator(const void *, const void *);
 346 extern void svp_remote_reassign(svp_remote_t *, svp_conn_t *);
 347 extern void svp_remote_resolved(svp_remote_t *, struct addrinfo *);
 348 extern void svp_host_queue(svp_remote_t *);
 349 extern void svp_query_release(svp_query_t *);
 350 extern void svp_query_crc32(svp_req_t *, void *, size_t);
 351 extern id_t svp_id_alloc(void);
 352 
 353 /*
 354  * Shootdown related
 355  */
 356 extern void svp_remote_shootdown_vl3(svp_remote_t *, svp_log_vl3_t *,
 357     svp_sdlog_t *);
 358 extern void svp_remote_shootdown_vl2(svp_remote_t *, svp_log_vl2_t *);
 359 extern void svp_remote_log_request(svp_remote_t *, svp_query_t *, void *,
 360     size_t);
 361 extern void svp_remote_lrm_request(svp_remote_t *, svp_query_t *, void *,
 362     size_t);
 363 extern void svp_shootdown_logr_cb(svp_remote_t *, svp_status_t, void *, size_t);
 364 extern void svp_shootdown_lrm_cb(svp_remote_t *, svp_status_t);
 365 extern void svp_shootdown_vl3_cb(svp_status_t, svp_log_vl3_t *, svp_sdlog_t *);
 366 extern int svp_shootdown_init(svp_remote_t *);
 367 extern void svp_shootdown_fini(svp_remote_t *);
 368 extern void svp_shootdown_start(svp_remote_t *);
 369 
 370 #ifdef __cplusplus
 371 }
 372 #endif
 373 
 374 #endif /* _LIBVARPD_SVP_H */