1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2018 Joyent, Inc.
14 */
15
16 #ifndef _LIBVARPD_SVP_H
17 #define _LIBVARPD_SVP_H
18
19 /*
20 * Implementation details of the SVP plugin and the SVP protocol.
21 */
22
23 #include <netinet/in.h>
24 #include <sys/ethernet.h>
25 #include <thread.h>
26 #include <synch.h>
27 #include <libvarpd_provider.h>
28 #include <sys/avl.h>
29 #include <port.h>
30 #include <sys/list.h>
31 #include <bunyan.h>
32
33 #include <libvarpd_svp_prot.h>
34
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38
39 typedef struct svp svp_t;
40 typedef struct svp_remote svp_remote_t;
41 typedef struct svp_conn svp_conn_t;
42 typedef struct svp_query svp_query_t;
43
44 typedef void (*svp_event_f)(port_event_t *, void *);
45
46 typedef struct svp_event {
47 svp_event_f se_func;
48 void *se_arg;
49 int se_events;
50 } svp_event_t;
51
52 typedef void (*svp_timer_f)(void *);
53
54 typedef struct svp_timer {
55 svp_timer_f st_func; /* Timer callback function */
56 void *st_arg; /* Timer callback arg */
57 boolean_t st_oneshot; /* Is timer a one shot? */
58 uint32_t st_value; /* periodic or one-shot time */
59 /* Fields below here are private to the svp_timer implementaiton */
60 uint64_t st_expire; /* Next expiration */
61 boolean_t st_delivering; /* Are we currently delivering this */
62 avl_node_t st_link;
63 } svp_timer_t;
64
65 /*
66 * Note, both the svp_log_ack_t and svp_lrm_req_t are not part of this structure
67 * as they are rather variable sized data and we don't want to constrain their
68 * size. Instead, the rdata and wdata members must be set appropriately.
69 */
70 typedef union svp_query_data {
71 svp_vl2_req_t sqd_vl2r;
72 svp_vl2_ack_t sqd_vl2a;
73 svp_vl3_req_t sdq_vl3r;
74 svp_vl3_ack_t sdq_vl3a;
75 svp_log_req_t sdq_logr;
76 svp_lrm_ack_t sdq_lrma;
77 svp_route_req_t sqd_rr;
78 svp_route_ack_t sqd_ra;
79 } svp_query_data_t;
80
81 typedef void (*svp_query_f)(svp_query_t *, void *);
82
83 typedef enum svp_query_state {
84 SVP_QUERY_INIT = 0x00,
85 SVP_QUERY_WRITING = 0x01,
86 SVP_QUERY_READING = 0x02,
87 SVP_QUERY_FINISHED = 0x03
88 } svp_query_state_t;
89
90 /*
91 * The query structure is usable for all forms of svp queries that end up
92 * getting passed across. Right now it's optimized for the fixed size data
93 * requests as opposed to requests whose responses will always be streaming in
94 * nature. Though, the streaming requests are the less common ones we have. We
95 * may need to make additional changes for those.
96 */
97 struct svp_query {
98 list_node_t sq_lnode; /* List entry */
99 svp_query_f sq_func; /* Callback function */
100 svp_query_state_t sq_state; /* Query state */
101 void *sq_arg; /* Callback function arg */
102 svp_t *sq_svp; /* Pointer back to svp_t */
103 svp_req_t sq_header; /* Header for the query */
104 svp_query_data_t sq_rdun; /* Union for read data */
105 svp_query_data_t sq_wdun; /* Union for write data */
106 svp_status_t sq_status; /* Query response status */
107 size_t sq_size; /* Query response size */
108 void *sq_rdata; /* Read data pointer */
109 size_t sq_rsize; /* Read data size */
110 void *sq_wdata; /* Write data pointer */
111 size_t sq_wsize; /* Write data size */
112 hrtime_t sq_acttime; /* Last I/O activity time */
113 };
114
115 typedef enum svp_conn_state {
116 SVP_CS_ERROR = 0x00,
117 SVP_CS_INITIAL = 0x01,
118 SVP_CS_CONNECTING = 0x02,
119 SVP_CS_BACKOFF = 0x03,
120 SVP_CS_ACTIVE = 0x04,
121 SVP_CS_WINDDOWN = 0x05,
122 SVP_CS_VERSIONING = 0x06
123 } svp_conn_state_t;
124
125 typedef enum svp_conn_error {
126 SVP_CE_NONE = 0x00,
127 SVP_CE_ASSOCIATE = 0x01,
128 SVP_CE_NOPOLLOUT = 0x02,
129 SVP_CE_SOCKET = 0x03,
130 SVP_CE_VERSION_PONG = 0x04
131 } svp_conn_error_t;
132
133 typedef enum svp_conn_flags {
134 SVP_CF_ADDED = 0x01,
135 SVP_CF_DEGRADED = 0x02,
136 SVP_CF_REAP = 0x04,
137 SVP_CF_TEARDOWN = 0x08,
138 SVP_CF_UFLAG = 0x0c,
139 SVP_CF_USER = 0x10
140 } svp_conn_flags_t;
141
142 typedef struct svp_conn_out {
143 svp_query_t *sco_query;
144 size_t sco_offset;
145 } svp_conn_out_t;
146
147 typedef struct svp_conn_in {
148 svp_query_t *sci_query;
149 svp_req_t sci_req;
150 size_t sci_offset;
151 } svp_conn_in_t;
152
153 struct svp_conn {
154 svp_remote_t *sc_remote; /* RO */
155 struct in6_addr sc_addr; /* RO */
156 list_node_t sc_rlist; /* svp_remote_t`sr_lock */
157 mutex_t sc_lock;
158 svp_event_t sc_event;
159 svp_timer_t sc_btimer;
160 svp_timer_t sc_qtimer;
161 int sc_socket;
162 uint_t sc_gen;
163 uint_t sc_nbackoff;
164 svp_conn_flags_t sc_flags;
165 svp_conn_state_t sc_cstate;
166 svp_conn_error_t sc_error;
167 int sc_errno;
168 list_t sc_queries;
169 svp_conn_out_t sc_output;
170 svp_conn_in_t sc_input;
171 uint_t sc_version;
172 };
173
174 typedef enum svp_remote_state {
175 SVP_RS_LOOKUP_SCHEDULED = 0x01, /* On the DNS Queue */
176 SVP_RS_LOOKUP_INPROGRESS = 0x02, /* Doing a DNS lookup */
177 SVP_RS_LOOKUP_VALID = 0x04 /* addrinfo valid */
178 } svp_remote_state_t;
179
180 /*
181 * These series of bit-based flags should be ordered such that the most severe
182 * is first. We only can set one message that user land can see, so if more than
183 * one is set we want to make sure that one is there.
184 */
185 typedef enum svp_degrade_state {
186 SVP_RD_DNS_FAIL = 0x01, /* DNS Resolution Failure */
187 SVP_RD_REMOTE_FAIL = 0x02, /* cannot reach any remote peers */
188 SVP_RD_ALL = 0x03 /* Only suitable for restore */
189 } svp_degrade_state_t;
190
191 typedef enum svp_shootdown_flags {
192 SVP_SD_RUNNING = 0x01,
193 SVP_SD_QUIESCE = 0x02,
194 SVP_SD_DORM = 0x04
195 } svp_shootdown_flags_t;
196
197 /*
198 * There is a single svp_sdlog_t per svp_remote_t. It maintains its own lock and
199 * condition variables. See the big theory statement for more information on how
200 * it's used.
201 */
202 typedef struct svp_sdlog {
203 mutex_t sdl_lock;
204 cond_t sdl_cond;
205 uint_t sdl_ref;
206 svp_timer_t sdl_timer;
207 svp_shootdown_flags_t sdl_flags;
208 svp_query_t sdl_query;
209 void *sdl_logack;
210 void *sdl_logrm;
211 void *sdl_remote;
212 } svp_sdlog_t;
213
214 struct svp_remote {
215 char *sr_hostname; /* RO */
216 uint16_t sr_rport; /* RO */
217 struct in6_addr sr_uip; /* RO */
218 avl_node_t sr_gnode; /* svp_remote_lock */
219 svp_remote_t *sr_nexthost; /* svp_host_lock */
220 mutex_t sr_lock;
221 cond_t sr_cond;
222 svp_remote_state_t sr_state;
223 svp_degrade_state_t sr_degrade;
224 struct addrinfo *sr_addrinfo;
225 avl_tree_t sr_tree;
226 uint_t sr_count; /* active count */
227 uint_t sr_gen;
228 uint_t sr_tconns; /* total conns + dconns */
229 uint_t sr_ndconns; /* number of degraded conns */
230 list_t sr_conns; /* all conns */
231 svp_sdlog_t sr_shoot;
232 };
233
234 /*
235 * We have a bunch of different things that we get back from the API at the
236 * plug-in layer. These include:
237 *
238 * o OOB Shootdowns
239 * o VL3->VL2 Lookups
240 * o VL2->UL3 Lookups
241 * o VL2 Log invalidations
242 * o VL3 Log injections
243 */
244 typedef void (*svp_vl2_lookup_f)(svp_t *, svp_status_t, const struct in6_addr *,
245 const uint16_t, void *);
246 typedef void (*svp_vl3_lookup_f)(svp_t *, svp_status_t, const uint8_t *,
247 const struct in6_addr *, const uint16_t, void *);
248 typedef void (*svp_vl2_invalidation_f)(svp_t *, const uint8_t *);
249 typedef void (*svp_vl3_inject_f)(svp_t *, const uint16_t,
250 const struct in6_addr *, const uint8_t *, const uint8_t *);
251 typedef void (*svp_shootdown_f)(svp_t *, const uint8_t *,
252 const struct in6_addr *, const uint16_t uport);
253 typedef void (*svp_route_lookup_f)(svp_t *, svp_status_t, uint32_t, uint32_t,
254 uint16_t, uint8_t *, uint8_t *, uint16_t, uint8_t *, uint8_t, uint8_t,
255 void *);
256
257 typedef struct svp_cb {
258 svp_vl2_lookup_f scb_vl2_lookup;
259 svp_vl3_lookup_f scb_vl3_lookup;
260 svp_vl2_invalidation_f scb_vl2_invalidate;
261 svp_vl3_inject_f scb_vl3_inject;
262 svp_shootdown_f scb_shootdown;
263 svp_route_lookup_f scb_route_lookup;
264 } svp_cb_t;
265
266 /*
267 * Core implementation structure.
268 */
269 struct svp {
270 overlay_plugin_dest_t svp_dest; /* RO */
271 varpd_provider_handle_t *svp_hdl; /* RO */
272 svp_cb_t svp_cb; /* RO */
273 uint64_t svp_vid; /* RO */
274 avl_node_t svp_rlink; /* Owned by svp_remote */
275 svp_remote_t *svp_remote; /* RO iff started */
276 mutex_t svp_lock;
277 char *svp_host; /* svp_lock */
278 uint16_t svp_port; /* svp_lock */
279 uint16_t svp_uport; /* svp_lock */
280 uint32_t svp_dcid; /* svp_lock (but write-once?) */
281 boolean_t svp_huip; /* svp_lock */
282 struct in6_addr svp_uip; /* svp_lock */
283 /* NOTE: lower-3 bytes are 0s. */
284 uint8_t svp_router_oui[6]; /* svp_lock (but write-once?) */
285 };
286
287 extern bunyan_logger_t *svp_bunyan;
288
289 extern int svp_remote_find(char *, uint16_t, struct in6_addr *,
290 svp_remote_t **);
291 extern int svp_remote_attach(svp_remote_t *, svp_t *);
292 extern void svp_remote_detach(svp_t *);
293 extern void svp_remote_release(svp_remote_t *);
294 extern void svp_remote_vl3_lookup(svp_t *, svp_query_t *,
295 const struct sockaddr *, void *);
296 extern void svp_remote_vl2_lookup(svp_t *, svp_query_t *, const uint8_t *,
297 void *);
298 extern void svp_remote_route_lookup(svp_t *, svp_query_t *,
299 const struct in6_addr *, const struct in6_addr *, uint32_t,
300 uint16_t, void *);
301
302
303 /*
304 * Init functions
305 */
306 extern int svp_remote_init(void);
307 extern void svp_remote_fini(void);
308 extern int svp_event_init(void);
309 extern int svp_event_timer_init(svp_event_t *);
310 extern void svp_event_fini(void);
311 extern int svp_host_init(void);
312 extern int svp_timer_init(void);
313
314 /*
315 * Timers
316 */
317 extern int svp_tickrate;
318 extern void svp_timer_add(svp_timer_t *);
319 extern void svp_timer_remove(svp_timer_t *);
320
321 /*
322 * Event loop management
323 */
324 extern int svp_event_associate(svp_event_t *, int);
325 extern int svp_event_dissociate(svp_event_t *, int);
326 extern int svp_event_inject(svp_event_t *);
327
328 /*
329 * Connection manager
330 */
331 extern int svp_conn_create(svp_remote_t *, const struct in6_addr *);
332 extern void svp_conn_destroy(svp_conn_t *);
333 extern void svp_conn_fallout(svp_conn_t *);
334 extern void svp_conn_queue(svp_conn_t *, svp_query_t *);
335
336 /*
337 * FMA related
338 */
339 extern void svp_remote_degrade(svp_remote_t *, svp_degrade_state_t);
340 extern void svp_remote_restore(svp_remote_t *, svp_degrade_state_t);
341
342 /*
343 * Misc.
344 */
345 extern int svp_comparator(const void *, const void *);
346 extern void svp_remote_reassign(svp_remote_t *, svp_conn_t *);
347 extern void svp_remote_resolved(svp_remote_t *, struct addrinfo *);
348 extern void svp_host_queue(svp_remote_t *);
349 extern void svp_query_release(svp_query_t *);
350 extern void svp_query_crc32(svp_req_t *, void *, size_t);
351 extern id_t svp_id_alloc(void);
352
353 /*
354 * Shootdown related
355 */
356 extern void svp_remote_shootdown_vl3(svp_remote_t *, svp_log_vl3_t *,
357 svp_sdlog_t *);
358 extern void svp_remote_shootdown_vl2(svp_remote_t *, svp_log_vl2_t *);
359 extern void svp_remote_log_request(svp_remote_t *, svp_query_t *, void *,
360 size_t);
361 extern void svp_remote_lrm_request(svp_remote_t *, svp_query_t *, void *,
362 size_t);
363 extern void svp_shootdown_logr_cb(svp_remote_t *, svp_status_t, void *, size_t);
364 extern void svp_shootdown_lrm_cb(svp_remote_t *, svp_status_t);
365 extern void svp_shootdown_vl3_cb(svp_status_t, svp_log_vl3_t *, svp_sdlog_t *);
366 extern int svp_shootdown_init(svp_remote_t *);
367 extern void svp_shootdown_fini(svp_remote_t *);
368 extern void svp_shootdown_start(svp_remote_t *);
369
370 #ifdef __cplusplus
371 }
372 #endif
373
374 #endif /* _LIBVARPD_SVP_H */