1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2018 Joyent, Inc.
14 */
15
16 #ifndef _LIBVARPD_SVP_H
17 #define _LIBVARPD_SVP_H
18
19 /*
20 * Implementation details of the SVP plugin and the SVP protocol.
21 */
22
23 #include <netinet/in.h>
24 #include <sys/ethernet.h>
25 #include <thread.h>
26 #include <synch.h>
27 #include <libvarpd_provider.h>
28 #include <sys/avl.h>
29 #include <port.h>
30 #include <sys/list.h>
31 #include <bunyan.h>
32
33 #include <libvarpd_svp_prot.h>
34
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38
39 typedef struct svp svp_t;
40 typedef struct svp_remote svp_remote_t;
41 typedef struct svp_conn svp_conn_t;
42 typedef struct svp_query svp_query_t;
43
44 typedef void (*svp_event_f)(port_event_t *, void *);
45
46 typedef struct svp_event {
47 svp_event_f se_func;
48 void *se_arg;
49 int se_events;
50 } svp_event_t;
51
52 typedef void (*svp_timer_f)(void *);
53
54 typedef struct svp_timer {
55 svp_timer_f st_func; /* Timer callback function */
56 void *st_arg; /* Timer callback arg */
57 boolean_t st_oneshot; /* Is timer a one shot? */
58 uint32_t st_value; /* periodic or one-shot time */
59 /* Fields below here are private to the svp_timer implementaiton */
60 uint64_t st_expire; /* Next expiration */
61 boolean_t st_delivering; /* Are we currently delivering this */
62 avl_node_t st_link;
63 } svp_timer_t;
64
65 /*
66 * Note, both the svp_log_ack_t and svp_lrm_req_t are not part of this structure
67 * as they are rather variable sized data and we don't want to constrain their
68 * size. Instead, the rdata and wdata members must be set appropriately.
69 */
70 typedef union svp_query_data {
71 svp_vl2_req_t sqd_vl2r;
72 svp_vl2_ack_t sqd_vl2a;
73 svp_vl3_req_t sdq_vl3r;
74 svp_vl3_ack_t sdq_vl3a;
75 svp_log_req_t sdq_logr;
76 svp_lrm_ack_t sdq_lrma;
77 svp_route_req_t sqd_rr;
78 svp_route_ack_t sqd_ra;
79 } svp_query_data_t;
80
81 typedef void (*svp_query_f)(svp_query_t *, void *);
82
83 typedef enum svp_query_state {
84 SVP_QUERY_INIT = 0x00,
85 SVP_QUERY_WRITING = 0x01,
86 SVP_QUERY_READING = 0x02,
87 SVP_QUERY_FINISHED = 0x03
88 } svp_query_state_t;
89
90 /*
91 * The query structure is usable for all forms of svp queries that end up
92 * getting passed across. Right now it's optimized for the fixed size data
93 * requests as opposed to requests whose responses will always be streaming in
94 * nature. Though, the streaming requests are the less common ones we have. We
95 * may need to make additional changes for those.
96 */
97 struct svp_query {
98 list_node_t sq_lnode; /* List entry */
99 svp_query_f sq_func; /* Callback function */
100 svp_query_state_t sq_state; /* Query state */
101 void *sq_arg; /* Callback function arg */
102 svp_t *sq_svp; /* Pointer back to svp_t */
103 svp_req_t sq_header; /* Header for the query */
104 svp_query_data_t sq_rdun; /* Union for read data */
105 svp_query_data_t sq_wdun; /* Union for write data */
106 svp_status_t sq_status; /* Query response status */
107 size_t sq_size; /* Query response size */
108 void *sq_rdata; /* Read data pointer */
109 size_t sq_rsize; /* Read data size */
110 void *sq_wdata; /* Write data pointer */
111 size_t sq_wsize; /* Write data size */
112 hrtime_t sq_acttime; /* Last I/O activity time */
113 };
114
115 typedef enum svp_conn_state {
116 SVP_CS_ERROR = 0x00,
117 SVP_CS_INITIAL = 0x01,
118 SVP_CS_CONNECTING = 0x02,
119 SVP_CS_BACKOFF = 0x03,
120 SVP_CS_ACTIVE = 0x04,
121 SVP_CS_WINDDOWN = 0x05
122 } svp_conn_state_t;
123
124 typedef enum svp_conn_error {
125 SVP_CE_NONE = 0x00,
126 SVP_CE_ASSOCIATE = 0x01,
127 SVP_CE_NOPOLLOUT = 0x02,
128 SVP_CE_SOCKET = 0x03,
129 SVP_CE_VERSION_PONG = 0x04
130 } svp_conn_error_t;
131
132 typedef enum svp_conn_flags {
133 SVP_CF_ADDED = 0x01,
134 SVP_CF_DEGRADED = 0x02,
135 SVP_CF_REAP = 0x04,
136 SVP_CF_TEARDOWN = 0x08,
137 SVP_CF_UFLAG = 0x0c,
138 SVP_CF_USER = 0x10
139 } svp_conn_flags_t;
140
141 typedef struct svp_conn_out {
142 svp_query_t *sco_query;
143 size_t sco_offset;
144 } svp_conn_out_t;
145
146 typedef struct svp_conn_in {
147 svp_query_t *sci_query;
148 svp_req_t sci_req;
149 size_t sci_offset;
150 } svp_conn_in_t;
151
152 struct svp_conn {
153 svp_remote_t *sc_remote; /* RO */
154 struct in6_addr sc_addr; /* RO */
155 list_node_t sc_rlist; /* svp_remote_t`sr_lock */
156 mutex_t sc_lock;
157 svp_event_t sc_event;
158 svp_timer_t sc_btimer;
159 svp_timer_t sc_qtimer;
160 int sc_socket;
161 uint_t sc_gen;
162 uint_t sc_nbackoff;
163 svp_conn_flags_t sc_flags;
164 svp_conn_state_t sc_cstate;
165 svp_conn_error_t sc_error;
166 int sc_errno;
167 list_t sc_queries;
168 svp_conn_out_t sc_output;
169 svp_conn_in_t sc_input;
170 uint_t sc_version;
171 };
172
173 typedef enum svp_remote_state {
174 SVP_RS_LOOKUP_SCHEDULED = 0x01, /* On the DNS Queue */
175 SVP_RS_LOOKUP_INPROGRESS = 0x02, /* Doing a DNS lookup */
176 SVP_RS_LOOKUP_VALID = 0x04 /* addrinfo valid */
177 } svp_remote_state_t;
178
179 /*
180 * These series of bit-based flags should be ordered such that the most severe
181 * is first. We only can set one message that user land can see, so if more than
182 * one is set we want to make sure that one is there.
183 */
184 typedef enum svp_degrade_state {
185 SVP_RD_DNS_FAIL = 0x01, /* DNS Resolution Failure */
186 SVP_RD_REMOTE_FAIL = 0x02, /* cannot reach any remote peers */
187 SVP_RD_ALL = 0x03 /* Only suitable for restore */
188 } svp_degrade_state_t;
189
190 typedef enum svp_shootdown_flags {
191 SVP_SD_RUNNING = 0x01,
192 SVP_SD_QUIESCE = 0x02,
193 SVP_SD_DORM = 0x04
194 } svp_shootdown_flags_t;
195
196 /*
197 * There is a single svp_sdlog_t per svp_remote_t. It maintains its own lock and
198 * condition variables. See the big theory statement for more information on how
199 * it's used.
200 */
201 typedef struct svp_sdlog {
202 mutex_t sdl_lock;
203 cond_t sdl_cond;
204 uint_t sdl_ref;
205 svp_timer_t sdl_timer;
206 svp_shootdown_flags_t sdl_flags;
207 svp_query_t sdl_query;
208 void *sdl_logack;
209 void *sdl_logrm;
210 void *sdl_remote;
211 } svp_sdlog_t;
212
213 struct svp_remote {
214 char *sr_hostname; /* RO */
215 uint16_t sr_rport; /* RO */
216 struct in6_addr sr_uip; /* RO */
217 avl_node_t sr_gnode; /* svp_remote_lock */
218 svp_remote_t *sr_nexthost; /* svp_host_lock */
219 mutex_t sr_lock;
220 cond_t sr_cond;
221 svp_remote_state_t sr_state;
222 svp_degrade_state_t sr_degrade;
223 struct addrinfo *sr_addrinfo;
224 avl_tree_t sr_tree;
225 uint_t sr_count; /* active count */
226 uint_t sr_gen;
227 uint_t sr_tconns; /* total conns + dconns */
228 uint_t sr_ndconns; /* number of degraded conns */
229 list_t sr_conns; /* all conns */
230 svp_sdlog_t sr_shoot;
231 };
232
233 /*
234 * We have a bunch of different things that we get back from the API at the
235 * plug-in layer. These include:
236 *
237 * o OOB Shootdowns
238 * o VL3->VL2 Lookups
239 * o VL2->UL3 Lookups
240 * o VL2 Log invalidations
241 * o VL3 Log injections
242 */
243 typedef void (*svp_vl2_lookup_f)(svp_t *, svp_status_t, const struct in6_addr *,
244 const uint16_t, void *);
245 typedef void (*svp_vl3_lookup_f)(svp_t *, svp_status_t, const uint8_t *,
246 const struct in6_addr *, const uint16_t, void *);
247 typedef void (*svp_vl2_invalidation_f)(svp_t *, const uint8_t *);
248 typedef void (*svp_vl3_inject_f)(svp_t *, const uint16_t,
249 const struct in6_addr *, const uint8_t *, const uint8_t *);
250 typedef void (*svp_shootdown_f)(svp_t *, const uint8_t *,
251 const struct in6_addr *, const uint16_t uport);
252 typedef void (*svp_route_lookup_f)(svp_t *, svp_status_t, uint32_t, uint32_t,
253 uint16_t, uint8_t *, uint8_t *, uint16_t, uint8_t *, uint8_t, uint8_t,
254 void *);
255
256 typedef struct svp_cb {
257 svp_vl2_lookup_f scb_vl2_lookup;
258 svp_vl3_lookup_f scb_vl3_lookup;
259 svp_vl2_invalidation_f scb_vl2_invalidate;
260 svp_vl3_inject_f scb_vl3_inject;
261 svp_shootdown_f scb_shootdown;
262 svp_route_lookup_f scb_route_lookup;
263 } svp_cb_t;
264
265 /*
266 * Core implementation structure.
267 */
268 struct svp {
269 overlay_plugin_dest_t svp_dest; /* RO */
270 varpd_provider_handle_t *svp_hdl; /* RO */
271 svp_cb_t svp_cb; /* RO */
272 uint64_t svp_vid; /* RO */
273 avl_node_t svp_rlink; /* Owned by svp_remote */
274 svp_remote_t *svp_remote; /* RO iff started */
275 mutex_t svp_lock;
276 char *svp_host; /* svp_lock */
277 uint16_t svp_port; /* svp_lock */
278 uint16_t svp_uport; /* svp_lock */
279 uint32_t svp_dcid; /* svp_lock (but write-once?) */
280 boolean_t svp_huip; /* svp_lock */
281 struct in6_addr svp_uip; /* svp_lock */
282 /* NOTE: lower-3 bytes are 0s. */
283 uint8_t svp_router_oui[6]; /* svp_lock (but write-once?) */
284 };
285
286 extern bunyan_logger_t *svp_bunyan;
287
288 extern int svp_remote_find(char *, uint16_t, struct in6_addr *,
289 svp_remote_t **);
290 extern int svp_remote_attach(svp_remote_t *, svp_t *);
291 extern void svp_remote_detach(svp_t *);
292 extern void svp_remote_release(svp_remote_t *);
293 extern void svp_remote_vl3_lookup(svp_t *, svp_query_t *,
294 const struct sockaddr *, void *);
295 extern void svp_remote_vl2_lookup(svp_t *, svp_query_t *, const uint8_t *,
296 void *);
297 extern void svp_remote_route_lookup(svp_t *, svp_query_t *,
298 const struct in6_addr *, const struct in6_addr *, uint32_t,
299 uint16_t, void *);
300
301
302 /*
303 * Init functions
304 */
305 extern int svp_remote_init(void);
306 extern void svp_remote_fini(void);
307 extern int svp_event_init(void);
308 extern int svp_event_timer_init(svp_event_t *);
309 extern void svp_event_fini(void);
310 extern int svp_host_init(void);
311 extern int svp_timer_init(void);
312
313 /*
314 * Timers
315 */
316 extern int svp_tickrate;
317 extern void svp_timer_add(svp_timer_t *);
318 extern void svp_timer_remove(svp_timer_t *);
319
320 /*
321 * Event loop management
322 */
323 extern int svp_event_associate(svp_event_t *, int);
324 extern int svp_event_dissociate(svp_event_t *, int);
325 extern int svp_event_inject(svp_event_t *);
326
327 /*
328 * Connection manager
329 */
330 extern int svp_conn_create(svp_remote_t *, const struct in6_addr *);
331 extern void svp_conn_destroy(svp_conn_t *);
332 extern void svp_conn_fallout(svp_conn_t *);
333 extern void svp_conn_queue(svp_conn_t *, svp_query_t *);
334
335 /*
336 * FMA related
337 */
338 extern void svp_remote_degrade(svp_remote_t *, svp_degrade_state_t);
339 extern void svp_remote_restore(svp_remote_t *, svp_degrade_state_t);
340
341 /*
342 * Misc.
343 */
344 extern int svp_comparator(const void *, const void *);
345 extern void svp_remote_reassign(svp_remote_t *, svp_conn_t *);
346 extern void svp_remote_resolved(svp_remote_t *, struct addrinfo *);
347 extern void svp_host_queue(svp_remote_t *);
348 extern void svp_query_release(svp_query_t *);
349 extern void svp_query_crc32(svp_req_t *, void *, size_t);
350
351 /*
352 * Shootdown related
353 */
354 extern void svp_remote_shootdown_vl3(svp_remote_t *, svp_log_vl3_t *,
355 svp_sdlog_t *);
356 extern void svp_remote_shootdown_vl2(svp_remote_t *, svp_log_vl2_t *);
357 extern void svp_remote_log_request(svp_remote_t *, svp_query_t *, void *,
358 size_t);
359 extern void svp_remote_lrm_request(svp_remote_t *, svp_query_t *, void *,
360 size_t);
361 extern void svp_shootdown_logr_cb(svp_remote_t *, svp_status_t, void *, size_t);
362 extern void svp_shootdown_lrm_cb(svp_remote_t *, svp_status_t);
363 extern void svp_shootdown_vl3_cb(svp_status_t, svp_log_vl3_t *, svp_sdlog_t *);
364 extern int svp_shootdown_init(svp_remote_t *);
365 extern void svp_shootdown_fini(svp_remote_t *);
366 extern void svp_shootdown_start(svp_remote_t *);
367
368 #ifdef __cplusplus
369 }
370 #endif
371
372 #endif /* _LIBVARPD_SVP_H */