Print this page
Try versioning as a new state
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/lib/varpd/svp/common/libvarpd_svp.c
+++ new/usr/src/lib/varpd/svp/common/libvarpd_svp.c
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright 2018, Joyent, Inc.
14 14 */
15 15
16 16 /*
17 17 * This plugin implements the SDC VXLAN Protocol (SVP).
18 18 *
19 19 * This plugin is designed to work with a broader distributed system that
20 20 * mainains a database of mappings and provides a means of looking up data and
21 21 * provides a stream of updates. While it is named after VXLAN, there isn't
22 22 * anything specific to VXLAN baked into the protocol at this time, other than
23 23 * that it requires both an IP address and a port; however, if there's a good
24 24 * reason to support others here, we can modify that.
25 25 *
26 26 * -----------
27 27 * Terminology
28 28 * -----------
29 29 *
30 30 * Throughout this module we refer to a few different kinds of addresses:
31 31 *
32 32 * VL3
33 33 *
34 34 * A VL3 address, or virtual layer 3, refers to the layer three addreses
35 35 * that are used by entities on an overlay network. As far as we're
36 36 * concerned that means that this is the IP address of an interface on an
37 37 * overlay network.
38 38 *
39 39 * VL2
40 40 *
41 41 * A VL2 address, or a virtual layer 2, referes to the link-layer addresses
42 42 * that are used by entities on an overlay network. As far as we're
43 43 * concerned that means that this is the MAC addresses of an interface on
44 44 * an overlay network.
45 45 *
46 46 * UL3
47 47 *
48 48 * A UL3, or underlay layer 3, refers to the layer three (IP) address on
49 49 * the underlay network.
50 50 *
51 51 * The svp plugin provides lookups from VL3->VL2, eg. the equivalent of an ARP
52 52 * or NDP query, and then also provides VL2->UL3 lookups.
53 53 *
54 54 * -------------------
55 55 * Protocol Operations
56 56 * -------------------
57 57 *
58 58 * The svp protocol is defined in lib/varpd/svp/common/libvarpd_svp_prot.h. It
59 59 * defines the basic TCP protocol that we use to communicate to hosts. At this
60 60 * time, it is not quite 100% implemented in both this plug-in and our primary
61 61 * server, sdc-portolan (see https://github.com/joyent/sdc-portolan).
62 62 *
63 63 * At this time, we don't quite support everything that we need to. Including
64 64 * the SVP_R_BULK_REQ and SVP_R_SHOOTDOWN.
65 65 *
66 66 * ---------------------------------
67 67 * General Design and Considerations
68 68 * ---------------------------------
69 69 *
70 70 * Every instance of the svp plugin requires the hostname and port of a server
71 71 * to contact. Though, we have co-opted the port 1296 (the year of the oldest
72 72 * extant portolan) as our default port.
73 73 *
74 74 * Each of the different instance of the plugins has a corresponding remote
75 75 * backend. The remote backend represents the tuple of the [ host, port ].
76 76 * Different instances that share the same host and port tuple will use the same
77 77 * backend.
78 78 *
79 79 * The backend is actually in charge of performing lookups, resolving and
80 80 * updating the set of remote hosts based on the DNS resolution we've been
81 81 * provided, and taking care of things like shootdowns.
82 82 *
83 83 * The whole plugin itself maintains an event loop and a number of threads to
84 84 * service that event loop. On top of that event loop, we have a simple timer
85 85 * backend that ticks at one second intervals and performs various callbacks,
86 86 * such as idle query timers, DNS resolution, connection backoff, etc. Each of
87 87 * the remote hosts that we obtain is wrapped up in an svp_conn_t, which manages
88 88 * the connection state, reconnecting, etc.
89 89 *
90 90 * All in all, the general way that this all looks like is:
91 91 *
92 92 * +----------------------------+
93 93 * | Plugin Instance |
94 94 * | svp_t |
95 95 * | |
96 96 * | varpd_provider_handle_t * -+-> varpd handle
97 97 * | uint64_t ----+-> varpd ID
98 98 * | char * ----+-> remote host
99 99 * | uint16_t ----+-> remote port
100 100 * | svp_remote_t * ---+------+-> remote backend
101 101 * +---------------------+------+
102 102 * |
103 103 * v
104 104 * +----------------------+ +----------------+
105 105 * | Remote backend |------------------>| Remove Backend |---> ...
106 106 * | svp_remote_t | | svp_remote_t |
107 107 * | | +----------------+
108 108 * | svp_remote_state_t --+-> state flags
109 109 * | svp_degrade_state_t -+-> degraded reason
110 110 * | struct addrinfo * --+-> resolved hosts
111 111 * | uint_t ---+-> active hosts
112 112 * | uint_t ---+-> DNS generation
113 113 * | uint_t ---+-> Reference count
114 114 * | uint_t ---+-> active conns
115 115 * | uint_t ---+-> degraded conns
116 116 * | list_t ---+---+-> connection list
117 117 * +------------------+---+
118 118 * |
119 119 * +------------------------------+-----------------+
120 120 * | | |
121 121 * v v v
122 122 * +-------------------+ +----------------
123 123 * | SVP Connection | | SVP connection | ...
124 124 * | svp_conn_t | | svp_conn_t |
125 125 * | | +----------------+
126 126 * | svp_event_t ----+-> event loop handle
127 127 * | svp_timer_t ----+-> backoff timer
128 128 * | svp_timer_t ----+-> query timer
129 129 * | int ----+-> socket fd
130 130 * | uint_t ----+-> generation
131 131 * | uint_t ----+-> current backoff
132 132 * | svp_conn_flags_t -+-> connection flags
133 133 * | svp_conn_state_t -+-> connection state
134 134 * | svp_conn_error_t -+-> connection error
135 135 * | int ---+-> last errrno
136 136 * | hrtime_t ---+-> activity timestamp
137 137 * | svp_conn_out_t ---+-> outgoing data state
138 138 * | svp_conn_in_t ---+-> incoming data state
139 139 * | list_t ---+--+-> active queries
140 140 * +----------------+--+
141 141 * |
142 142 * +----------------------------------+-----------------+
143 143 * | | |
144 144 * v v v
145 145 * +--------------------+ +-------------+
146 146 * | SVP Query | | SVP Query | ...
147 147 * | svp_query_t | | svp_query_t |
148 148 * | | +-------------+
149 149 * | svp_query_f ---+-> callback function
150 150 * | void * ---+-> callback arg
151 151 * | svp_query_state_t -+-> state flags
152 152 * | svp_req_t ---+-> svp prot. header
153 153 * | svp_query_data_t --+-> read data
154 154 * | svp_query_data_t --+-> write data
155 155 * | svp_status_t ---+-> request status
156 156 * +--------------------+
157 157 *
158 158 * The svp_t is the instance that we assoicate with varpd. The instance itself
159 159 * maintains properties and then when it's started associates with an
160 160 * svp_remote_t, which is the remote backend. The remote backend itself,
161 161 * maintains the DNS state and spins up and downs connections based on the
162 162 * results from DNS. By default, we query DNS every 30 seconds. For more on the
163 163 * connection life cycle, see the next section.
164 164 *
165 165 * By default, each connection maintains its own back off timer and list of
166 166 * queries it's servicing. Only one request is generally outstanding at a time
167 167 * and requests are round robined across the various connections.
168 168 *
169 169 * The query itself represents the svp request that's going on and keep track of
170 170 * its state and is a place for data that's read and written to as part of the
171 171 * request.
172 172 *
173 173 * Connections maintain a query timer such that if we have not received data on
174 174 * a socket for a certain amount of time, we kill that socket and begin a
175 175 * reconnection cycle with backoff.
176 176 *
177 177 * ------------------------
178 178 * Connection State Machine
179 179 * ------------------------
180 180 *
181 181 * We have a connection pool that's built upon DNS records. DNS describes the
182 182 * membership of the set of remote peers that make up our pool and we maintain
183 183 * one connection to each of them. In addition, we maintain an exponential
184 184 * backoff for each peer and will attempt to reconect immediately before backing
185 185 * off. The following are the valid states that a connection can be in:
186 186 *
187 187 * SVP_CS_ERROR An OS error has occurred on this connection,
188 188 * such as failure to create a socket or associate
189 189 * the socket with an event port. We also
190 190 * transition all connections to this state before
191 191 * we destroy them.
192 192 *
193 193 * SVP_CS_INITIAL This is the initial state of a connection, all
194 194 * that should exist is an unbound socket.
195 195 *
196 196 * SVP_CS_CONNECTING A call to connect has been made and we are
197 197 * polling for it to complete.
198 198 *
199 199 * SVP_CS_BACKOFF A connect attempt has failed and we are
200 200 * currently backing off, waiting to try again.
201 201 *
202 202 * SVP_CS_ACTIVE We have successfully connected to the remote
203 203 * system.
204 204 *
205 205 * SVP_CS_WINDDOWN This connection is going to valhalla. In other
206 206 * words, a previously active connection is no
207 207 * longer valid in DNS, so we should curb our use
208 208 * of it, and reap it as soon as we have other
209 209 * active connections.
|
↓ open down ↓ |
209 lines elided |
↑ open up ↑ |
210 210 *
211 211 * The following diagram attempts to describe our state transition scheme, and
212 212 * when we transition from one state to the next.
213 213 *
214 214 * |
215 215 * * New remote IP from DNS resolution,
216 216 * | not currently active in the system.
217 217 * |
218 218 * v Socket Error,
219 219 * +----------------+ still in DNS
220 - * +----------------<---| SVP_CS_INITIAL |<----------------------*-----+
221 - * | +----------------+ |
222 - * | System | |
223 - * | Connection . . . . . success * Successful |
224 - * | failed . | connect() |
225 - * | +----*---------+ | +-----------*--+ |
226 - * | | | | | | |
227 - * | V ^ v ^ V ^
228 - * | +----------------+ +-------------------+ +---------------+
229 - * +<-| SVP_CS_BACKOFF | | SVP_CS_CONNECTING | | SVP_CS_ACTIVE |
230 - * | +----------------+ +-------------------+ +---------------+
231 - * | V ^ V V V
232 - * | Backoff wait * | | | * Removed
233 - * v interval +--------------+ +-----------------<-----+ | from DNS
220 + * +----------------<---| SVP_CS_INITIAL |<----------------------*--------+
221 + * | +----------------+ |
222 + * | System | |
223 + * | Connection . . . . . success * Successful |
224 + * | failed . | connect() |
225 + * | . | +-------------------+ |
226 + * | +----*---------+ | +-*>| SVP_CS_VERSIONING + |
227 + * | | | | | +-------------------+ |
228 + * | | | | | V V Set version |
229 + * | | | | | | * based on |
230 + * | | | | | | | SVP_R_PONG |
231 + * | V ^ v ^ | V ^
232 + * | +----------------+ +-------------------+ | +---------------+
233 + * +<-| SVP_CS_BACKOFF | | SVP_CS_CONNECTING | | | SVP_CS_ACTIVE |
234 + * | +----------------+ +-------------------+ | +---------------+
235 + * | V ^ V | V V
236 + * | Backoff wait * | | | | * Removed
237 + * v interval +--------------+ +-----------------<+----+ | from DNS
234 238 * | finished | |
235 239 * | V |
236 240 * | | V
237 241 * | | +-----------------+
238 242 * +----------------+----------<-----+-------<----| SVP_CS_WINDDOWN |
239 243 * | +-----------------+
240 244 * * . . . Fatal system, not
241 245 * | socket error or
242 246 * V quiesced after
243 247 * +--------------+ removal from DNS
244 248 * | SVP_CS_ERROR |
245 249 * +--------------+
246 250 * |
247 251 * * . . . Removed from DNS
248 252 * v
249 253 * +------------+
250 254 * | Connection |
251 255 * | Destroyed |
252 256 * +------------+
253 257 *
254 258 * --------------------------
255 259 * Connection Event Injection
256 260 * --------------------------
257 261 *
258 262 * For each connection that exists in the system, we have a timer in place that
259 263 * is in charge of performing timeout activity. It fires once every thirty
260 264 * seconds or so for a given connection and checks to ensure that we have had
261 265 * activity for the most recent query on the connection. If not, it terminates
262 266 * the connection. This is important as if we have sent all our data and are
263 267 * waiting for the remote end to reply, without enabling something like TCP
264 268 * keep-alive, we will not be notified that anything that has happened to the
265 269 * remote connection, for example a panic. In addition, this also protects
266 270 * against a server that is up, but a portolan that is not making forward
267 271 * progress.
268 272 *
269 273 * When a timeout occurs, we first try to disassociate any active events, which
270 274 * by definition must exist. Once that's done, we inject a port source user
271 275 * event. Now, there is a small gotcha. Let's assume for a moment that we have a
272 276 * pathological portolan. That means that it knows to inject activity right at
273 277 * the time out window. That means, that the event may be disassociated before
274 278 * we could get to it. If that's the case, we must _not_ inject the user event
275 279 * and instead, we'll let the pending event take care of it. We know that the
276 280 * pending event hasn't hit the main part of the loop yet, otherwise, it would
277 281 * have released the lock protecting our state and associated the event.
278 282 *
279 283 * ------------
280 284 * Notes on DNS
281 285 * ------------
282 286 *
283 287 * Unfortunately, doing host name resolution in a way that allows us to leverage
284 288 * the system's resolvers and the system's caching, require us to make blocking
285 289 * calls in libc via getaddrinfo(3SOCKET). If we can't reach a given server,
286 290 * that will tie up a thread for quite some time. To work around that fact,
287 291 * we're going to create a fixed number of threads and we'll use them to service
288 292 * our DNS requests. While this isn't ideal, until we have a sane means of
289 293 * integrating a DNS resolution into an event loop with say portfs, it's not
290 294 * going to be a fun day no matter what we do.
291 295 *
292 296 * ------
293 297 * Timers
294 298 * ------
295 299 *
296 300 * We maintain a single timer based on CLOCK_REALTIME. It's designed to fire
297 301 * every second. While we'd rather use CLOCK_HIGHRES just to alleviate ourselves
298 302 * from timer drift; however, as zones may not actually have CLOCK_HIGHRES
299 303 * access, we don't want them to end up in there. The timer itself is just a
300 304 * simple avl tree sorted by expiration time, which is stored as a tick in the
301 305 * future, a tick is just one second.
302 306 *
303 307 * ----------
304 308 * Shootdowns
305 309 * ----------
306 310 *
307 311 * As part of the protocol, we need to be able to handle shootdowns that inform
308 312 * us some of the information in the system is out of date. This information
309 313 * needs to be processed promptly; however, the information is hopefully going
310 314 * to be relatively infrequent relative to the normal flow of information.
311 315 *
312 316 * The shoot down information needs to be done on a per-backend basis. The
313 317 * general design is that we'll have a single query for this which can fire on a
314 318 * 5-10s period, we randmoize the latter part to give us a bit more load
315 319 * spreading. If we complete because there's no work to do, then we wait the
316 320 * normal period. If we complete, but there's still work to do, we'll go again
317 321 * after a second.
318 322 *
319 323 * A shootdown has a few different parts. We first receive a list of items to
320 324 * shootdown. After performing all of those, we need to acknowledge them. When
321 325 * that's been done successfully, we can move onto the next part. From a
322 326 * protocol perspective, we make a SVP_R_LOG_REQ, we get a reply, and then after
323 327 * processing them, send an SVP_R_LOG_RM. Only once that's been acked do we
324 328 * continue.
325 329 *
326 330 * However, one of the challenges that we have is that these invalidations are
327 331 * just that, an invalidation. For a virtual layer two request, that's fine,
328 332 * because the kernel supports that. However, for virtual layer three
329 333 * invalidations, we have a bit more work to do. These protocols, ARP and NDP,
330 334 * don't really support a notion of just an invalidation, instead you have to
331 335 * inject the new data in a gratuitous fashion.
332 336 *
333 337 * To that end, what we instead do is when we receive a VL3 invalidation, we
334 338 * turn that info a VL3 request. We hold the general request as outstanding
335 339 * until we receive all of the callbacks for the VL3 invalidations, at which
336 340 * point we go through and do the log removal request.
337 341 */
338 342
339 343 #include <umem.h>
340 344 #include <errno.h>
341 345 #include <stdlib.h>
342 346 #include <sys/types.h>
343 347 #include <sys/socket.h>
344 348 #include <netinet/in.h>
345 349 #include <arpa/inet.h>
346 350 #include <libnvpair.h>
347 351 #include <strings.h>
348 352 #include <string.h>
349 353 #include <assert.h>
350 354 #include <unistd.h>
351 355
352 356 #include <libvarpd_provider.h>
353 357 #include "libvarpd_svp.h"
354 358
355 359 bunyan_logger_t *svp_bunyan;
356 360 static int svp_defport = 1296;
357 361 static int svp_defuport = 1339;
358 362 static umem_cache_t *svp_lookup_cache;
359 363
360 364 typedef enum svp_lookup_type {
361 365 SVP_L_UNKNOWN = 0x0,
362 366 SVP_L_VL2 = 0x1,
363 367 SVP_L_VL3 = 0x2,
364 368 SVP_L_ROUTE = 0x3
365 369 } svp_lookup_type_t;
366 370
367 371 typedef struct svp_lookup {
368 372 int svl_type;
369 373 union {
370 374 struct svl_lookup_vl2 {
371 375 varpd_query_handle_t *svl_handle;
372 376 overlay_target_point_t *svl_point;
373 377 } svl_vl2;
374 378 struct svl_lookup_vl3 {
375 379 varpd_arp_handle_t *svl_vah;
376 380 uint8_t *svl_out;
377 381 } svl_vl3;
378 382 struct svl_lookup_route {
379 383 varpd_query_handle_t *svl_handle;
380 384 overlay_target_point_t *svl_point;
381 385 overlay_target_route_t *svl_route;
382 386 overlay_target_mac_t *svl_mac;
383 387 } svl_route;
384 388 } svl_u;
385 389 svp_query_t svl_query;
386 390 } svp_lookup_t;
387 391
388 392 static const char *varpd_svp_props[] = {
389 393 "svp/host",
390 394 "svp/port",
391 395 "svp/underlay_ip",
392 396 "svp/underlay_port",
393 397 "svp/dcid",
394 398 "svp/router_oui"
395 399 };
396 400
397 401 static const uint8_t svp_bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
398 402
399 403 int
400 404 svp_comparator(const void *l, const void *r)
401 405 {
402 406 const svp_t *ls = l;
403 407 const svp_t *rs = r;
404 408
405 409 if (ls->svp_vid > rs->svp_vid)
406 410 return (1);
407 411 if (ls->svp_vid < rs->svp_vid)
408 412 return (-1);
409 413 return (0);
410 414 }
411 415
412 416 static void
413 417 svp_vl2_lookup_cb(svp_t *svp, svp_status_t status, const struct in6_addr *uip,
414 418 const uint16_t uport, void *arg)
415 419 {
416 420 svp_lookup_t *svl = arg;
417 421 overlay_target_point_t *otp;
418 422
419 423 assert(svp != NULL);
420 424 assert(arg != NULL);
421 425
422 426 if (status != SVP_S_OK) {
423 427 libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
424 428 VARPD_LOOKUP_DROP);
425 429 umem_cache_free(svp_lookup_cache, svl);
426 430 return;
427 431 }
428 432
429 433 otp = svl->svl_u.svl_vl2.svl_point;
430 434 bcopy(uip, &otp->otp_ip, sizeof (struct in6_addr));
431 435 otp->otp_port = uport;
432 436 libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
433 437 VARPD_LOOKUP_OK);
434 438 umem_cache_free(svp_lookup_cache, svl);
435 439 }
436 440
437 441 static void
438 442 svp_vl3_lookup_cb(svp_t *svp, svp_status_t status, const uint8_t *vl2mac,
439 443 const struct in6_addr *uip, const uint16_t uport, void *arg)
440 444 {
441 445 /* Initialize address-holders to 0 for comparisons-to-zeroes later. */
442 446 overlay_target_point_t point = { 0 };
443 447 svp_lookup_t *svl = arg;
444 448 uint8_t nexthop_mac[6] = { 0, 0, 0, 0, 0, 0 };
445 449
446 450 assert(svp != NULL);
447 451 assert(svl != NULL);
448 452
449 453 if (status != SVP_S_OK) {
450 454 libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
451 455 VARPD_LOOKUP_DROP);
452 456 umem_cache_free(svp_lookup_cache, svl);
453 457 return;
454 458 }
455 459
456 460 /* Inject the L2 mapping before the L3 */
457 461 if (uport != 0 &&
458 462 bcmp(uip, &point.otp_ip, sizeof (struct in6_addr)) != 0) {
459 463 /* Normal L3 lookup result... */
460 464 bcopy(uip, &point.otp_ip, sizeof (struct in6_addr));
461 465 point.otp_port = uport;
462 466 libvarpd_inject_varp(svp->svp_hdl, vl2mac, &point);
463 467 } else {
464 468 /*
465 469 * Oh my, we have a next-hop router IP.
466 470 * Set the MAC to the ouid+vid concatenated
467 471 * special-router-MAC. Overlay down below will know
468 472 * that uport == 0 means the MAC is a special one.
469 473 */
470 474 if (bcmp(svp->svp_router_oui, nexthop_mac, ETHERADDRL) == 0) {
471 475 /*
472 476 * We don't have a router_oui, so we can't support
473 477 * special-router-MAC. Drop it.
474 478 */
475 479 libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
476 480 VARPD_LOOKUP_DROP);
477 481 umem_cache_free(svp_lookup_cache, svl);
478 482 return;
479 483 }
480 484 bcopy(svp->svp_router_oui, nexthop_mac, 3);
481 485 nexthop_mac[3] = (svp->svp_vid >> 16) & 0xff;
482 486 nexthop_mac[4] = (svp->svp_vid >> 8) & 0xff;
483 487 nexthop_mac[5] = svp->svp_vid & 0xff;
484 488 vl2mac = nexthop_mac;
485 489 }
486 490
487 491 bcopy(vl2mac, svl->svl_u.svl_vl3.svl_out, ETHERADDRL);
488 492 libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
489 493 VARPD_LOOKUP_OK);
490 494 umem_cache_free(svp_lookup_cache, svl);
491 495 }
492 496
493 497 static void
494 498 svp_vl2_invalidate_cb(svp_t *svp, const uint8_t *vl2mac)
495 499 {
496 500 libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
497 501 }
498 502
499 503 static void
500 504 svp_vl3_inject_cb(svp_t *svp, const uint16_t vlan, const struct in6_addr *vl3ip,
501 505 const uint8_t *vl2mac, const uint8_t *targmac)
502 506 {
503 507 struct in_addr v4;
504 508
505 509 /*
506 510 * At the moment we don't support any IPv6 related log entries, this
507 511 * will change soon as we develop a bit more of the IPv6 related
508 512 * infrastructure so we can properly test the injection.
509 513 */
510 514 if (IN6_IS_ADDR_V4MAPPED(vl3ip) == 0) {
511 515 return;
512 516 } else {
513 517 IN6_V4MAPPED_TO_INADDR(vl3ip, &v4);
514 518 if (targmac == NULL)
515 519 targmac = svp_bcast;
516 520 libvarpd_inject_arp(svp->svp_hdl, vlan, vl2mac, &v4, targmac);
517 521 }
518 522 }
519 523
520 524 /* ARGSUSED */
521 525 static void
522 526 svp_shootdown_cb(svp_t *svp, const uint8_t *vl2mac, const struct in6_addr *uip,
523 527 const uint16_t uport)
524 528 {
525 529 /*
526 530 * We should probably do a conditional invalidation here.
527 531 */
528 532 libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
529 533 }
530 534
531 535 static void
532 536 svp_route_lookup_cb(svp_t *svp, svp_status_t status, uint32_t dcid,
533 537 uint32_t vnetid, uint16_t vlan, uint8_t *srcmac, uint8_t *dstmac,
534 538 uint16_t ul3_port, uint8_t *ul3_addr, uint8_t srcpfx, uint8_t dstpfx,
535 539 void *arg)
536 540 {
537 541 svp_lookup_t *svl = arg;
538 542 overlay_target_point_t *otp;
539 543 overlay_target_route_t *otr;
540 544 overlay_target_mac_t *otm;
541 545
542 546 if (status != SVP_S_OK) {
543 547 libvarpd_plugin_query_reply(svl->svl_u.svl_route.svl_handle,
544 548 VARPD_LOOKUP_DROP);
545 549 umem_cache_free(svp_lookup_cache, svl);
546 550 return;
547 551 }
548 552
549 553 otp = svl->svl_u.svl_route.svl_point;
550 554 bcopy(ul3_addr, &otp->otp_ip, sizeof (struct in6_addr));
551 555 otp->otp_port = ul3_port;
552 556
553 557 otr = svl->svl_u.svl_route.svl_route;
554 558 otr->otr_vnet = vnetid;
555 559 otr->otr_vlan = vlan;
556 560 bcopy(srcmac, otr->otr_srcmac, ETHERADDRL);
557 561
558 562 otm = svl->svl_u.svl_route.svl_mac;
559 563 otm->otm_dcid = dcid;
560 564 bcopy(dstmac, otm->otm_mac, ETHERADDRL);
561 565
562 566 libvarpd_plugin_query_reply(svl->svl_u.svl_route.svl_handle,
563 567 VARPD_LOOKUP_OK);
564 568 umem_cache_free(svp_lookup_cache, svl);
565 569 }
566 570
567 571 static svp_cb_t svp_defops = {
568 572 svp_vl2_lookup_cb,
569 573 svp_vl3_lookup_cb,
570 574 svp_vl2_invalidate_cb,
571 575 svp_vl3_inject_cb,
572 576 svp_shootdown_cb,
573 577 svp_route_lookup_cb,
574 578 };
575 579
576 580 static boolean_t
577 581 varpd_svp_valid_dest(overlay_plugin_dest_t dest)
578 582 {
579 583 if (dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
580 584 return (B_FALSE);
581 585
582 586 return (B_TRUE);
583 587 }
584 588
585 589 static int
586 590 varpd_svp_create(varpd_provider_handle_t *hdl, void **outp,
587 591 overlay_plugin_dest_t dest)
588 592 {
589 593 int ret;
590 594 svp_t *svp;
591 595
592 596 if (varpd_svp_valid_dest(dest) == B_FALSE)
593 597 return (ENOTSUP);
594 598
595 599 svp = umem_zalloc(sizeof (svp_t), UMEM_DEFAULT);
596 600 if (svp == NULL)
597 601 return (ENOMEM);
598 602
599 603 if ((ret = mutex_init(&svp->svp_lock, USYNC_THREAD | LOCK_ERRORCHECK,
600 604 NULL)) != 0) {
601 605 umem_free(svp, sizeof (svp_t));
602 606 return (ret);
603 607 }
604 608
605 609 svp->svp_port = svp_defport;
606 610 svp->svp_uport = svp_defuport;
607 611 svp->svp_cb = svp_defops;
608 612 svp->svp_hdl = hdl;
609 613 svp->svp_vid = libvarpd_plugin_vnetid(svp->svp_hdl);
610 614 *outp = svp;
611 615 return (0);
612 616 }
613 617
614 618 static int
615 619 varpd_svp_start(void *arg)
616 620 {
617 621 int ret;
618 622 svp_remote_t *srp;
619 623 svp_t *svp = arg;
620 624
621 625 mutex_enter(&svp->svp_lock);
622 626 if (svp->svp_host == NULL || svp->svp_port == 0 ||
623 627 svp->svp_huip == B_FALSE || svp->svp_uport == 0) {
624 628 mutex_exit(&svp->svp_lock);
625 629 return (EAGAIN);
626 630 }
627 631 mutex_exit(&svp->svp_lock);
628 632
629 633 if ((ret = svp_remote_find(svp->svp_host, svp->svp_port, &svp->svp_uip,
630 634 &srp)) != 0)
631 635 return (ret);
632 636
633 637 if ((ret = svp_remote_attach(srp, svp)) != 0) {
634 638 svp_remote_release(srp);
635 639 return (ret);
636 640 }
637 641
638 642 return (0);
639 643 }
640 644
641 645 static void
642 646 varpd_svp_stop(void *arg)
643 647 {
644 648 svp_t *svp = arg;
645 649
646 650 svp_remote_detach(svp);
647 651 }
648 652
649 653 static void
650 654 varpd_svp_destroy(void *arg)
651 655 {
652 656 svp_t *svp = arg;
653 657
654 658 if (svp->svp_host != NULL)
655 659 umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
656 660
657 661 if (mutex_destroy(&svp->svp_lock) != 0)
658 662 libvarpd_panic("failed to destroy svp_t`svp_lock");
659 663
660 664 umem_free(svp, sizeof (svp_t));
661 665 }
662 666
663 667 static void
664 668 varpd_svp_lookup_l3(svp_t *svp, varpd_query_handle_t *vqh,
665 669 const overlay_targ_lookup_t *otl, overlay_target_point_t *otp,
666 670 overlay_target_route_t *otr, overlay_target_mac_t *otm)
667 671 {
668 672 svp_lookup_t *slp;
669 673 uint32_t type;
670 674 const struct in6_addr *src = &otl->otl_addru.otlu_l3.otl3_srcip,
671 675 *dst = &otl->otl_addru.otlu_l3.otl3_dstip;
672 676
673 677 /*
674 678 * otl is an L3 request, so we have src/dst IPs for the inner packet.
675 679 * We also have the vlan.
676 680 *
677 681 * Assume kernel's overlay module is caching well, so we are directly
678 682 * going to query (i.e. no caching up here of actual destinations).
679 683 *
680 684 * Our existing remote sever (svp_remote), but with the new message
681 685 * SVP_R_ROUTE_REQ.
682 686 */
683 687
684 688 if (IN6_IS_ADDR_V4MAPPED(src)) {
685 689 if (!IN6_IS_ADDR_V4MAPPED(dst)) {
686 690 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
687 691 return;
688 692 }
689 693 type = SVP_VL3_IP;
690 694 } else {
691 695 if (IN6_IS_ADDR_V4MAPPED(dst)) {
692 696 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
693 697 return;
694 698 }
695 699 type = SVP_VL3_IPV6;
696 700 }
697 701
698 702 slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
699 703 if (slp == NULL) {
700 704 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
701 705 return;
702 706 }
703 707
704 708 slp->svl_type = SVP_L_ROUTE;
705 709 slp->svl_u.svl_route.svl_handle = vqh;
706 710 slp->svl_u.svl_route.svl_point = otp;
707 711 slp->svl_u.svl_route.svl_route = otr;
708 712 slp->svl_u.svl_route.svl_mac = otm;
709 713
710 714 svp_remote_route_lookup(svp, &slp->svl_query, src, dst,
711 715 otl->otl_vnetid, (uint16_t)otl->otl_vlan, slp);
712 716 }
713 717
714 718 static void
715 719 varpd_svp_lookup(void *arg, varpd_query_handle_t *vqh,
716 720 const overlay_targ_lookup_t *otl, overlay_target_point_t *otp,
717 721 overlay_target_route_t *otr, overlay_target_mac_t *otm)
718 722 {
719 723 svp_lookup_t *slp;
720 724 svp_t *svp = arg;
721 725
722 726 /*
723 727 * Shuffle off L3 lookups to their own codepath.
724 728 */
725 729 if (otl->otl_l3req) {
726 730 varpd_svp_lookup_l3(svp, vqh, otl, otp, otr, otm);
727 731 return;
728 732 }
729 733 /*
730 734 * At this point, the traditional overlay_target_point_t is all that
731 735 * needs filling in. Zero-out the otr for safety.
732 736 */
733 737 bzero(otr, sizeof (*otr));
734 738
735 739
736 740 /*
737 741 * Check if this is something that we need to proxy, eg. arp or ndp.
738 742 */
739 743 if (otl->otl_addru.otlu_l2.otl2_sap == ETHERTYPE_ARP) {
740 744 libvarpd_plugin_proxy_arp(svp->svp_hdl, vqh, otl);
741 745 return;
742 746 }
743 747
744 748 if (otl->otl_addru.otlu_l2.otl2_dstaddr[0] == 0x33 &&
745 749 otl->otl_addru.otlu_l2.otl2_dstaddr[1] == 0x33) {
746 750 if (otl->otl_addru.otlu_l2.otl2_sap == ETHERTYPE_IPV6) {
747 751 libvarpd_plugin_proxy_ndp(svp->svp_hdl, vqh, otl);
748 752 } else {
749 753 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
750 754 }
751 755 return;
752 756 }
753 757
754 758 /*
755 759 * Watch out for various multicast and broadcast addresses. We've
756 760 * already taken care of the IPv6 range above. Now we just need to
757 761 * handle broadcast and if the multicast bit is set, lowest bit of the
758 762 * first octet of the MAC, then we drop it now.
759 763 */
760 764 if (bcmp(otl->otl_addru.otlu_l2.otl2_dstaddr, svp_bcast,
761 765 ETHERADDRL) == 0 ||
762 766 (otl->otl_addru.otlu_l2.otl2_dstaddr[0] & 0x01) == 0x01) {
763 767 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
764 768 return;
765 769 }
766 770
767 771 /*
768 772 * If we have a failure to allocate memory for this, that's not good.
769 773 * However, telling the kernel to just drop this packet is much better
770 774 * than the alternative at this moment. At least we'll try again and we
771 775 * may have something more available to us in a little bit.
772 776 */
773 777 slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
774 778 if (slp == NULL) {
775 779 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
776 780 return;
777 781 }
778 782
779 783 slp->svl_type = SVP_L_VL2;
780 784 slp->svl_u.svl_vl2.svl_handle = vqh;
781 785 slp->svl_u.svl_vl2.svl_point = otp;
782 786
783 787 svp_remote_vl2_lookup(svp, &slp->svl_query,
784 788 otl->otl_addru.otlu_l2.otl2_dstaddr, slp);
785 789 }
786 790
787 791 /* ARGSUSED */
788 792 static int
789 793 varpd_svp_nprops(void *arg, uint_t *nprops)
790 794 {
791 795 *nprops = sizeof (varpd_svp_props) / sizeof (char *);
792 796 return (0);
793 797 }
794 798
795 799 /* ARGSUSED */
796 800 static int
797 801 varpd_svp_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph)
798 802 {
799 803 switch (propid) {
800 804 case 0:
801 805 /* svp/host */
802 806 libvarpd_prop_set_name(vph, varpd_svp_props[0]);
803 807 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
804 808 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING);
805 809 libvarpd_prop_set_nodefault(vph);
806 810 break;
807 811 case 1:
808 812 /* svp/port */
809 813 libvarpd_prop_set_name(vph, varpd_svp_props[1]);
810 814 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
811 815 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
812 816 (void) libvarpd_prop_set_default(vph, &svp_defport,
813 817 sizeof (svp_defport));
814 818 libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
815 819 break;
816 820 case 2:
817 821 /* svp/underlay_ip */
818 822 libvarpd_prop_set_name(vph, varpd_svp_props[2]);
819 823 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
820 824 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP);
821 825 libvarpd_prop_set_nodefault(vph);
822 826 break;
823 827 case 3:
824 828 /* svp/underlay_port */
825 829 libvarpd_prop_set_name(vph, varpd_svp_props[3]);
826 830 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
827 831 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
828 832 (void) libvarpd_prop_set_default(vph, &svp_defuport,
829 833 sizeof (svp_defuport));
830 834 libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
831 835 break;
832 836 case 4:
833 837 /* svp/dcid */
834 838 libvarpd_prop_set_name(vph, varpd_svp_props[4]);
835 839 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
836 840 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
837 841 libvarpd_prop_set_nodefault(vph);
838 842 libvarpd_prop_set_range_uint32(vph, 1, UINT32_MAX - 1);
839 843 break;
840 844 case 5:
841 845 /* svp/router_oui */
842 846 libvarpd_prop_set_name(vph, varpd_svp_props[5]);
843 847 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
844 848 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_ETHER);
845 849 libvarpd_prop_set_nodefault(vph);
846 850 break;
847 851 default:
848 852 return (EINVAL);
849 853 }
850 854 return (0);
851 855 }
852 856
853 857 static int
854 858 varpd_svp_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep)
855 859 {
856 860 svp_t *svp = arg;
857 861
858 862 /* svp/host */
859 863 if (strcmp(pname, varpd_svp_props[0]) == 0) {
860 864 size_t len;
861 865
862 866 mutex_enter(&svp->svp_lock);
863 867 if (svp->svp_host == NULL) {
864 868 *sizep = 0;
865 869 } else {
866 870 len = strlen(svp->svp_host) + 1;
867 871 if (*sizep < len) {
868 872 mutex_exit(&svp->svp_lock);
869 873 return (EOVERFLOW);
870 874 }
871 875 *sizep = len;
872 876 (void) strlcpy(buf, svp->svp_host, *sizep);
873 877 }
874 878 mutex_exit(&svp->svp_lock);
875 879 return (0);
876 880 }
877 881
878 882 /* svp/port */
879 883 if (strcmp(pname, varpd_svp_props[1]) == 0) {
880 884 uint64_t val;
881 885
882 886 if (*sizep < sizeof (uint64_t))
883 887 return (EOVERFLOW);
884 888
885 889 mutex_enter(&svp->svp_lock);
886 890 if (svp->svp_port == 0) {
887 891 *sizep = 0;
888 892 } else {
889 893 val = svp->svp_port;
890 894 bcopy(&val, buf, sizeof (uint64_t));
891 895 *sizep = sizeof (uint64_t);
892 896 }
893 897 mutex_exit(&svp->svp_lock);
894 898 return (0);
895 899 }
896 900
897 901 /* svp/underlay_ip */
898 902 if (strcmp(pname, varpd_svp_props[2]) == 0) {
899 903 if (*sizep < sizeof (struct in6_addr))
900 904 return (EOVERFLOW);
901 905 mutex_enter(&svp->svp_lock);
902 906 if (svp->svp_huip == B_FALSE) {
903 907 *sizep = 0;
904 908 } else {
905 909 bcopy(&svp->svp_uip, buf, sizeof (struct in6_addr));
906 910 *sizep = sizeof (struct in6_addr);
907 911 }
908 912 mutex_exit(&svp->svp_lock);
909 913 return (0);
910 914 }
911 915
912 916 /* svp/underlay_port */
913 917 if (strcmp(pname, varpd_svp_props[3]) == 0) {
914 918 uint64_t val;
915 919
916 920 if (*sizep < sizeof (uint64_t))
917 921 return (EOVERFLOW);
918 922
919 923 mutex_enter(&svp->svp_lock);
920 924 if (svp->svp_uport == 0) {
921 925 *sizep = 0;
922 926 } else {
923 927 val = svp->svp_uport;
924 928 bcopy(&val, buf, sizeof (uint64_t));
925 929 *sizep = sizeof (uint64_t);
926 930 }
927 931
928 932 mutex_exit(&svp->svp_lock);
929 933 return (0);
930 934 }
931 935
932 936 /* svp/dcid */
933 937 if (strcmp(pname, varpd_svp_props[4]) == 0) {
934 938 uint64_t val;
935 939
936 940 if (*sizep < sizeof (uint64_t))
937 941 return (EOVERFLOW);
938 942
939 943 mutex_enter(&svp->svp_lock);
940 944 if (svp->svp_uport == 0) {
941 945 *sizep = 0;
942 946 } else {
943 947 val = svp->svp_dcid;
944 948 bcopy(&val, buf, sizeof (uint64_t));
945 949 *sizep = sizeof (uint64_t);
946 950 }
947 951
948 952 mutex_exit(&svp->svp_lock);
949 953 return (0);
950 954 }
951 955
952 956 /* svp/router_oui */
953 957 if (strcmp(pname, varpd_svp_props[5]) == 0) {
954 958 if (*sizep < ETHERADDRL)
955 959 return (EOVERFLOW);
956 960 mutex_enter(&svp->svp_lock);
957 961
958 962 if (ether_is_zero(&svp->svp_router_oui)) {
959 963 *sizep = 0;
960 964 } else {
961 965 bcopy(&svp->svp_router_oui, buf, ETHERADDRL);
962 966 *sizep = ETHERADDRL;
963 967 }
964 968
965 969 mutex_exit(&svp->svp_lock);
966 970 return (0);
967 971 }
968 972 return (EINVAL);
969 973 }
970 974
971 975 static int
972 976 varpd_svp_setprop(void *arg, const char *pname, const void *buf,
973 977 const uint32_t size)
974 978 {
975 979 svp_t *svp = arg;
976 980
977 981 /* svp/host */
978 982 if (strcmp(pname, varpd_svp_props[0]) == 0) {
979 983 char *dup;
980 984 dup = umem_alloc(size, UMEM_DEFAULT);
981 985 (void) strlcpy(dup, buf, size);
982 986 if (dup == NULL)
983 987 return (ENOMEM);
984 988 mutex_enter(&svp->svp_lock);
985 989 if (svp->svp_host != NULL)
986 990 umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
987 991 svp->svp_host = dup;
988 992 mutex_exit(&svp->svp_lock);
989 993 return (0);
990 994 }
991 995
992 996 /* svp/port */
993 997 if (strcmp(pname, varpd_svp_props[1]) == 0) {
994 998 const uint64_t *valp = buf;
995 999 if (size < sizeof (uint64_t))
996 1000 return (EOVERFLOW);
997 1001
998 1002 if (*valp == 0 || *valp > UINT16_MAX)
999 1003 return (EINVAL);
1000 1004
1001 1005 mutex_enter(&svp->svp_lock);
1002 1006 svp->svp_port = (uint16_t)*valp;
1003 1007 mutex_exit(&svp->svp_lock);
1004 1008 return (0);
1005 1009 }
1006 1010
1007 1011 /* svp/underlay_ip */
1008 1012 if (strcmp(pname, varpd_svp_props[2]) == 0) {
1009 1013 const struct in6_addr *ipv6 = buf;
1010 1014
1011 1015 if (size < sizeof (struct in6_addr))
1012 1016 return (EOVERFLOW);
1013 1017
1014 1018 if (IN6_IS_ADDR_V4COMPAT(ipv6))
1015 1019 return (EINVAL);
1016 1020
1017 1021 if (IN6_IS_ADDR_MULTICAST(ipv6))
1018 1022 return (EINVAL);
1019 1023
1020 1024 if (IN6_IS_ADDR_6TO4(ipv6))
1021 1025 return (EINVAL);
1022 1026
1023 1027 if (IN6_IS_ADDR_V4MAPPED(ipv6)) {
1024 1028 ipaddr_t v4;
1025 1029 IN6_V4MAPPED_TO_IPADDR(ipv6, v4);
1026 1030 if (IN_MULTICAST(v4))
1027 1031 return (EINVAL);
1028 1032 }
1029 1033
1030 1034 mutex_enter(&svp->svp_lock);
1031 1035 bcopy(buf, &svp->svp_uip, sizeof (struct in6_addr));
1032 1036 svp->svp_huip = B_TRUE;
1033 1037 mutex_exit(&svp->svp_lock);
1034 1038 return (0);
1035 1039 }
1036 1040
1037 1041 /* svp/underlay_port */
1038 1042 if (strcmp(pname, varpd_svp_props[3]) == 0) {
1039 1043 const uint64_t *valp = buf;
1040 1044 if (size < sizeof (uint64_t))
1041 1045 return (EOVERFLOW);
1042 1046
1043 1047 if (*valp == 0 || *valp > UINT16_MAX)
1044 1048 return (EINVAL);
1045 1049
1046 1050 mutex_enter(&svp->svp_lock);
1047 1051 svp->svp_uport = (uint16_t)*valp;
1048 1052 mutex_exit(&svp->svp_lock);
1049 1053
1050 1054 return (0);
1051 1055 }
1052 1056
1053 1057 /* svp/dcid */
1054 1058 if (strcmp(pname, varpd_svp_props[4]) == 0) {
1055 1059 const uint64_t *valp = buf;
1056 1060 if (size < sizeof (uint64_t))
1057 1061 return (EOVERFLOW);
1058 1062
1059 1063 if (*valp == 0 || *valp > UINT32_MAX - 1)
1060 1064 return (EINVAL);
1061 1065
1062 1066 mutex_enter(&svp->svp_lock);
1063 1067 svp->svp_dcid = (uint32_t)*valp;
1064 1068 mutex_exit(&svp->svp_lock);
1065 1069
1066 1070 return (0);
1067 1071 }
1068 1072
1069 1073 /* svp/router_oui */
1070 1074 if (strcmp(pname, varpd_svp_props[5]) == 0) {
1071 1075 if (size < ETHERADDRL)
1072 1076 return (EOVERFLOW);
1073 1077 mutex_enter(&svp->svp_lock);
1074 1078 bcopy(buf, &svp->svp_router_oui, ETHERADDRL);
1075 1079 /* Zero-out the low three bytes. */
1076 1080 svp->svp_router_oui[3] = 0;
1077 1081 svp->svp_router_oui[4] = 0;
1078 1082 svp->svp_router_oui[5] = 0;
1079 1083 mutex_exit(&svp->svp_lock);
1080 1084 return (0);
1081 1085 }
1082 1086
1083 1087 return (EINVAL);
1084 1088 }
1085 1089
1086 1090 static int
1087 1091 varpd_svp_save(void *arg, nvlist_t *nvp)
1088 1092 {
1089 1093 int ret;
1090 1094 svp_t *svp = arg;
1091 1095
1092 1096 mutex_enter(&svp->svp_lock);
1093 1097 /* svp/host */
1094 1098 if (svp->svp_host != NULL) {
1095 1099 if ((ret = nvlist_add_string(nvp, varpd_svp_props[0],
1096 1100 svp->svp_host)) != 0) {
1097 1101 mutex_exit(&svp->svp_lock);
1098 1102 return (ret);
1099 1103 }
1100 1104 }
1101 1105
1102 1106 /* svp/port */
1103 1107 if (svp->svp_port != 0) {
1104 1108 if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[1],
1105 1109 svp->svp_port)) != 0) {
1106 1110 mutex_exit(&svp->svp_lock);
1107 1111 return (ret);
1108 1112 }
1109 1113 }
1110 1114
1111 1115 /* svp/underlay_ip */
1112 1116 if (svp->svp_huip == B_TRUE) {
1113 1117 char buf[INET6_ADDRSTRLEN];
1114 1118
1115 1119 if (inet_ntop(AF_INET6, &svp->svp_uip, buf, sizeof (buf)) ==
1116 1120 NULL)
1117 1121 libvarpd_panic("unexpected inet_ntop failure: %d",
1118 1122 errno);
1119 1123
1120 1124 if ((ret = nvlist_add_string(nvp, varpd_svp_props[2],
1121 1125 buf)) != 0) {
1122 1126 mutex_exit(&svp->svp_lock);
1123 1127 return (ret);
1124 1128 }
1125 1129 }
1126 1130
1127 1131 /* svp/underlay_port */
1128 1132 if (svp->svp_uport != 0) {
1129 1133 if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[3],
1130 1134 svp->svp_uport)) != 0) {
1131 1135 mutex_exit(&svp->svp_lock);
1132 1136 return (ret);
1133 1137 }
1134 1138 }
1135 1139
1136 1140 /* svp/dcid */
1137 1141 if (svp->svp_dcid != 0) {
1138 1142 if ((ret = nvlist_add_uint32(nvp, varpd_svp_props[4],
1139 1143 svp->svp_dcid)) != 0) {
1140 1144 mutex_exit(&svp->svp_lock);
1141 1145 return (ret);
1142 1146 }
1143 1147 }
1144 1148
1145 1149 /* svp/router_oui */
1146 1150 if (!ether_is_zero(&svp->svp_router_oui)) {
1147 1151 char buf[ETHERADDRSTRL];
1148 1152
1149 1153 if (ether_ntoa_r((struct ether_addr *)&svp->svp_router_oui,
1150 1154 buf) == NULL) {
1151 1155 libvarpd_panic("unexpected ether_ntoa_r failure: %d",
1152 1156 errno);
1153 1157 }
1154 1158
1155 1159 if ((ret = nvlist_add_string(nvp, varpd_svp_props[5],
1156 1160 buf)) != 0) {
1157 1161 mutex_exit(&svp->svp_lock);
1158 1162 return (ret);
1159 1163 }
1160 1164 }
1161 1165
1162 1166 mutex_exit(&svp->svp_lock);
1163 1167 return (0);
1164 1168 }
1165 1169
1166 1170 static int
1167 1171 varpd_svp_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl,
1168 1172 overlay_plugin_dest_t dest, void **outp)
1169 1173 {
1170 1174 int ret;
1171 1175 svp_t *svp;
1172 1176 char *ipstr, *hstr, *etherstr;
1173 1177
1174 1178 if (varpd_svp_valid_dest(dest) == B_FALSE)
1175 1179 return (ENOTSUP);
1176 1180
1177 1181 if ((ret = varpd_svp_create(hdl, (void **)&svp, dest)) != 0)
1178 1182 return (ret);
1179 1183
1180 1184 /* svp/host */
1181 1185 if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[0],
1182 1186 &hstr)) != 0) {
1183 1187 if (ret != ENOENT) {
1184 1188 varpd_svp_destroy(svp);
1185 1189 return (ret);
1186 1190 }
1187 1191 svp->svp_host = NULL;
1188 1192 } else {
1189 1193 size_t blen = strlen(hstr) + 1;
1190 1194 svp->svp_host = umem_alloc(blen, UMEM_DEFAULT);
1191 1195 (void) strlcpy(svp->svp_host, hstr, blen);
1192 1196 }
1193 1197
1194 1198 /* svp/port */
1195 1199 if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[1],
1196 1200 &svp->svp_port)) != 0) {
1197 1201 if (ret != ENOENT) {
1198 1202 varpd_svp_destroy(svp);
1199 1203 return (ret);
1200 1204 }
1201 1205 svp->svp_port = 0;
1202 1206 }
1203 1207
1204 1208 /* svp/underlay_ip */
1205 1209 if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[2],
1206 1210 &ipstr)) != 0) {
1207 1211 if (ret != ENOENT) {
1208 1212 varpd_svp_destroy(svp);
1209 1213 return (ret);
1210 1214 }
1211 1215 svp->svp_huip = B_FALSE;
1212 1216 } else {
1213 1217 ret = inet_pton(AF_INET6, ipstr, &svp->svp_uip);
1214 1218 if (ret == -1) {
1215 1219 assert(errno == EAFNOSUPPORT);
1216 1220 libvarpd_panic("unexpected inet_pton failure: %d",
1217 1221 errno);
1218 1222 }
1219 1223
1220 1224 if (ret == 0) {
1221 1225 varpd_svp_destroy(svp);
1222 1226 return (EINVAL);
1223 1227 }
1224 1228 svp->svp_huip = B_TRUE;
1225 1229 }
1226 1230
1227 1231 /* svp/underlay_port */
1228 1232 if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[3],
1229 1233 &svp->svp_uport)) != 0) {
1230 1234 if (ret != ENOENT) {
1231 1235 varpd_svp_destroy(svp);
1232 1236 return (ret);
1233 1237 }
1234 1238 svp->svp_uport = 0;
1235 1239 }
1236 1240
1237 1241 /* svp/dcid */
1238 1242 if ((ret = nvlist_lookup_uint32(nvp, varpd_svp_props[4],
1239 1243 &svp->svp_dcid)) != 0) {
1240 1244 if (ret != ENOENT) {
1241 1245 varpd_svp_destroy(svp);
1242 1246 return (ret);
1243 1247 }
1244 1248 svp->svp_dcid = 0;
1245 1249 }
1246 1250
1247 1251 /* svp/router_oui */
1248 1252 if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[5],
1249 1253 ðerstr)) != 0) {
1250 1254 if (ret != ENOENT) {
1251 1255 varpd_svp_destroy(svp);
1252 1256 return (ret);
1253 1257 }
1254 1258 bzero(&svp->svp_router_oui, ETHERADDRL);
1255 1259 } else if (ether_aton_r(etherstr,
1256 1260 (struct ether_addr *)&svp->svp_router_oui) == NULL) {
1257 1261 libvarpd_panic("unexpected ether_aton_r failure: %d", errno);
1258 1262 }
1259 1263
1260 1264 svp->svp_hdl = hdl;
1261 1265 *outp = svp;
1262 1266 return (0);
1263 1267 }
1264 1268
1265 1269 static void
1266 1270 varpd_svp_arp(void *arg, varpd_arp_handle_t *vah, int type,
1267 1271 const struct sockaddr *sock, uint16_t vlan __unused, uint8_t *out)
1268 1272 {
1269 1273 svp_t *svp = arg;
1270 1274 svp_lookup_t *svl;
1271 1275
1272 1276 if (type != VARPD_QTYPE_ETHERNET) {
1273 1277 libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
1274 1278 return;
1275 1279 }
1276 1280
1277 1281 svl = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
1278 1282 if (svl == NULL) {
1279 1283 libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
1280 1284 return;
1281 1285 }
1282 1286
1283 1287 svl->svl_type = SVP_L_VL3;
1284 1288 svl->svl_u.svl_vl3.svl_vah = vah;
1285 1289 svl->svl_u.svl_vl3.svl_out = out;
1286 1290 svp_remote_vl3_lookup(svp, &svl->svl_query, sock, svl);
1287 1291 }
1288 1292
1289 1293 static const varpd_plugin_ops_t varpd_svp_ops = {
1290 1294 0,
1291 1295 varpd_svp_create,
1292 1296 varpd_svp_start,
1293 1297 varpd_svp_stop,
1294 1298 varpd_svp_destroy,
1295 1299 NULL,
1296 1300 varpd_svp_lookup,
1297 1301 varpd_svp_nprops,
1298 1302 varpd_svp_propinfo,
1299 1303 varpd_svp_getprop,
1300 1304 varpd_svp_setprop,
1301 1305 varpd_svp_save,
1302 1306 varpd_svp_restore,
1303 1307 varpd_svp_arp,
1304 1308 NULL
1305 1309 };
1306 1310
1307 1311 static int
1308 1312 svp_bunyan_init(void)
1309 1313 {
1310 1314 int ret;
1311 1315
1312 1316 if ((ret = bunyan_init("svp", &svp_bunyan)) != 0)
1313 1317 return (ret);
1314 1318 ret = bunyan_stream_add(svp_bunyan, "stderr", BUNYAN_L_INFO,
1315 1319 bunyan_stream_fd, (void *)STDERR_FILENO);
1316 1320 if (ret != 0)
1317 1321 bunyan_fini(svp_bunyan);
1318 1322 return (ret);
1319 1323 }
1320 1324
1321 1325 static void
1322 1326 svp_bunyan_fini(void)
1323 1327 {
1324 1328 if (svp_bunyan != NULL)
1325 1329 bunyan_fini(svp_bunyan);
1326 1330 }
1327 1331
1328 1332 #pragma init(varpd_svp_init)
1329 1333 static void
1330 1334 varpd_svp_init(void)
1331 1335 {
1332 1336 int err;
1333 1337 varpd_plugin_register_t *vpr;
1334 1338
1335 1339 if (svp_bunyan_init() != 0)
1336 1340 return;
1337 1341
1338 1342 if ((err = svp_host_init()) != 0) {
1339 1343 (void) bunyan_error(svp_bunyan, "failed to init host subsystem",
1340 1344 BUNYAN_T_INT32, "error", err,
1341 1345 BUNYAN_T_END);
1342 1346 svp_bunyan_fini();
1343 1347 return;
1344 1348 }
1345 1349
1346 1350 svp_lookup_cache = umem_cache_create("svp_lookup",
1347 1351 sizeof (svp_lookup_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
1348 1352 if (svp_lookup_cache == NULL) {
1349 1353 (void) bunyan_error(svp_bunyan,
1350 1354 "failed to create svp_lookup cache",
1351 1355 BUNYAN_T_INT32, "error", errno,
1352 1356 BUNYAN_T_END);
1353 1357 svp_bunyan_fini();
1354 1358 return;
1355 1359 }
1356 1360
1357 1361 if ((err = svp_event_init()) != 0) {
1358 1362 (void) bunyan_error(svp_bunyan,
1359 1363 "failed to init event subsystem",
1360 1364 BUNYAN_T_INT32, "error", err,
1361 1365 BUNYAN_T_END);
1362 1366 svp_bunyan_fini();
1363 1367 umem_cache_destroy(svp_lookup_cache);
1364 1368 return;
1365 1369 }
1366 1370
1367 1371 if ((err = svp_timer_init()) != 0) {
1368 1372 (void) bunyan_error(svp_bunyan,
1369 1373 "failed to init timer subsystem",
1370 1374 BUNYAN_T_INT32, "error", err,
1371 1375 BUNYAN_T_END);
1372 1376 svp_event_fini();
1373 1377 umem_cache_destroy(svp_lookup_cache);
1374 1378 svp_bunyan_fini();
1375 1379 return;
1376 1380 }
1377 1381
1378 1382 if ((err = svp_remote_init()) != 0) {
1379 1383 (void) bunyan_error(svp_bunyan,
1380 1384 "failed to init remote subsystem",
1381 1385 BUNYAN_T_INT32, "error", err,
1382 1386 BUNYAN_T_END);
1383 1387 svp_event_fini();
1384 1388 umem_cache_destroy(svp_lookup_cache);
1385 1389 svp_bunyan_fini();
1386 1390 return;
1387 1391 }
1388 1392
1389 1393 vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err);
1390 1394 if (vpr == NULL) {
1391 1395 (void) bunyan_error(svp_bunyan,
1392 1396 "failed to alloc varpd plugin",
1393 1397 BUNYAN_T_INT32, "error", err,
1394 1398 BUNYAN_T_END);
1395 1399 svp_remote_fini();
1396 1400 svp_event_fini();
1397 1401 umem_cache_destroy(svp_lookup_cache);
1398 1402 svp_bunyan_fini();
1399 1403 return;
1400 1404 }
1401 1405
1402 1406 vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC;
1403 1407 vpr->vpr_name = "svp";
1404 1408 vpr->vpr_ops = &varpd_svp_ops;
1405 1409
1406 1410 if ((err = libvarpd_plugin_register(vpr)) != 0) {
1407 1411 (void) bunyan_error(svp_bunyan,
1408 1412 "failed to register varpd plugin",
1409 1413 BUNYAN_T_INT32, "error", err,
1410 1414 BUNYAN_T_END);
1411 1415 svp_remote_fini();
1412 1416 svp_event_fini();
1413 1417 umem_cache_destroy(svp_lookup_cache);
1414 1418 svp_bunyan_fini();
1415 1419
1416 1420 }
1417 1421 libvarpd_plugin_free(vpr);
1418 1422 }
|
↓ open down ↓ |
1175 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX