Print this page
Support route deletion entries in SVP_R_LOG_ACK.
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/lib/varpd/svp/common/libvarpd_svp.c
+++ new/usr/src/lib/varpd/svp/common/libvarpd_svp.c
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright 2018, Joyent, Inc.
14 14 */
15 15
16 16 /*
17 17 * This plugin implements the SDC VXLAN Protocol (SVP).
18 18 *
19 19 * This plugin is designed to work with a broader distributed system that
20 20 * mainains a database of mappings and provides a means of looking up data and
21 21 * provides a stream of updates. While it is named after VXLAN, there isn't
22 22 * anything specific to VXLAN baked into the protocol at this time, other than
23 23 * that it requires both an IP address and a port; however, if there's a good
24 24 * reason to support others here, we can modify that.
25 25 *
26 26 * -----------
27 27 * Terminology
28 28 * -----------
29 29 *
30 30 * Throughout this module we refer to a few different kinds of addresses:
31 31 *
32 32 * VL3
33 33 *
34 34 * A VL3 address, or virtual layer 3, refers to the layer three addreses
35 35 * that are used by entities on an overlay network. As far as we're
36 36 * concerned that means that this is the IP address of an interface on an
37 37 * overlay network.
38 38 *
39 39 * VL2
40 40 *
41 41 * A VL2 address, or a virtual layer 2, referes to the link-layer addresses
42 42 * that are used by entities on an overlay network. As far as we're
43 43 * concerned that means that this is the MAC addresses of an interface on
44 44 * an overlay network.
45 45 *
46 46 * UL3
47 47 *
48 48 * A UL3, or underlay layer 3, refers to the layer three (IP) address on
49 49 * the underlay network.
50 50 *
51 51 * The svp plugin provides lookups from VL3->VL2, eg. the equivalent of an ARP
52 52 * or NDP query, and then also provides VL2->UL3 lookups.
53 53 *
54 54 * -------------------
55 55 * Protocol Operations
56 56 * -------------------
57 57 *
58 58 * The svp protocol is defined in lib/varpd/svp/common/libvarpd_svp_prot.h. It
59 59 * defines the basic TCP protocol that we use to communicate to hosts. At this
60 60 * time, it is not quite 100% implemented in both this plug-in and our primary
61 61 * server, sdc-portolan (see https://github.com/joyent/sdc-portolan).
62 62 *
63 63 * At this time, we don't quite support everything that we need to. Including
64 64 * the SVP_R_BULK_REQ and SVP_R_SHOOTDOWN.
65 65 *
66 66 * ---------------------------------
67 67 * General Design and Considerations
68 68 * ---------------------------------
69 69 *
70 70 * Every instance of the svp plugin requires the hostname and port of a server
71 71 * to contact. Though, we have co-opted the port 1296 (the year of the oldest
72 72 * extant portolan) as our default port.
73 73 *
74 74 * Each of the different instance of the plugins has a corresponding remote
75 75 * backend. The remote backend represents the tuple of the [ host, port ].
76 76 * Different instances that share the same host and port tuple will use the same
77 77 * backend.
78 78 *
79 79 * The backend is actually in charge of performing lookups, resolving and
80 80 * updating the set of remote hosts based on the DNS resolution we've been
81 81 * provided, and taking care of things like shootdowns.
82 82 *
83 83 * The whole plugin itself maintains an event loop and a number of threads to
84 84 * service that event loop. On top of that event loop, we have a simple timer
85 85 * backend that ticks at one second intervals and performs various callbacks,
86 86 * such as idle query timers, DNS resolution, connection backoff, etc. Each of
87 87 * the remote hosts that we obtain is wrapped up in an svp_conn_t, which manages
88 88 * the connection state, reconnecting, etc.
89 89 *
90 90 * All in all, the general way that this all looks like is:
91 91 *
92 92 * +----------------------------+
93 93 * | Plugin Instance |
94 94 * | svp_t |
95 95 * | |
96 96 * | varpd_provider_handle_t * -+-> varpd handle
97 97 * | uint64_t ----+-> varpd ID
98 98 * | char * ----+-> remote host
99 99 * | uint16_t ----+-> remote port
100 100 * | svp_remote_t * ---+------+-> remote backend
101 101 * +---------------------+------+
102 102 * |
103 103 * v
104 104 * +----------------------+ +----------------+
105 105 * | Remote backend |------------------>| Remove Backend |---> ...
106 106 * | svp_remote_t | | svp_remote_t |
107 107 * | | +----------------+
108 108 * | svp_remote_state_t --+-> state flags
109 109 * | svp_degrade_state_t -+-> degraded reason
110 110 * | struct addrinfo * --+-> resolved hosts
111 111 * | uint_t ---+-> active hosts
112 112 * | uint_t ---+-> DNS generation
113 113 * | uint_t ---+-> Reference count
114 114 * | uint_t ---+-> active conns
115 115 * | uint_t ---+-> degraded conns
116 116 * | list_t ---+---+-> connection list
117 117 * +------------------+---+
118 118 * |
119 119 * +------------------------------+-----------------+
120 120 * | | |
121 121 * v v v
122 122 * +-------------------+ +----------------
123 123 * | SVP Connection | | SVP connection | ...
124 124 * | svp_conn_t | | svp_conn_t |
125 125 * | | +----------------+
126 126 * | svp_event_t ----+-> event loop handle
127 127 * | svp_timer_t ----+-> backoff timer
128 128 * | svp_timer_t ----+-> query timer
129 129 * | int ----+-> socket fd
130 130 * | uint_t ----+-> generation
131 131 * | uint_t ----+-> current backoff
132 132 * | svp_conn_flags_t -+-> connection flags
133 133 * | svp_conn_state_t -+-> connection state
134 134 * | svp_conn_error_t -+-> connection error
135 135 * | int ---+-> last errrno
136 136 * | hrtime_t ---+-> activity timestamp
137 137 * | svp_conn_out_t ---+-> outgoing data state
138 138 * | svp_conn_in_t ---+-> incoming data state
139 139 * | list_t ---+--+-> active queries
140 140 * +----------------+--+
141 141 * |
142 142 * +----------------------------------+-----------------+
143 143 * | | |
144 144 * v v v
145 145 * +--------------------+ +-------------+
146 146 * | SVP Query | | SVP Query | ...
147 147 * | svp_query_t | | svp_query_t |
148 148 * | | +-------------+
149 149 * | svp_query_f ---+-> callback function
150 150 * | void * ---+-> callback arg
151 151 * | svp_query_state_t -+-> state flags
152 152 * | svp_req_t ---+-> svp prot. header
153 153 * | svp_query_data_t --+-> read data
154 154 * | svp_query_data_t --+-> write data
155 155 * | svp_status_t ---+-> request status
156 156 * +--------------------+
157 157 *
158 158 * The svp_t is the instance that we assoicate with varpd. The instance itself
159 159 * maintains properties and then when it's started associates with an
160 160 * svp_remote_t, which is the remote backend. The remote backend itself,
161 161 * maintains the DNS state and spins up and downs connections based on the
162 162 * results from DNS. By default, we query DNS every 30 seconds. For more on the
163 163 * connection life cycle, see the next section.
164 164 *
165 165 * By default, each connection maintains its own back off timer and list of
166 166 * queries it's servicing. Only one request is generally outstanding at a time
167 167 * and requests are round robined across the various connections.
168 168 *
169 169 * The query itself represents the svp request that's going on and keep track of
170 170 * its state and is a place for data that's read and written to as part of the
171 171 * request.
172 172 *
173 173 * Connections maintain a query timer such that if we have not received data on
174 174 * a socket for a certain amount of time, we kill that socket and begin a
175 175 * reconnection cycle with backoff.
176 176 *
177 177 * ------------------------
178 178 * Connection State Machine
179 179 * ------------------------
180 180 *
181 181 * We have a connection pool that's built upon DNS records. DNS describes the
182 182 * membership of the set of remote peers that make up our pool and we maintain
183 183 * one connection to each of them. In addition, we maintain an exponential
184 184 * backoff for each peer and will attempt to reconect immediately before backing
185 185 * off. The following are the valid states that a connection can be in:
186 186 *
187 187 * SVP_CS_ERROR An OS error has occurred on this connection,
188 188 * such as failure to create a socket or associate
189 189 * the socket with an event port. We also
190 190 * transition all connections to this state before
191 191 * we destroy them.
192 192 *
193 193 * SVP_CS_INITIAL This is the initial state of a connection, all
194 194 * that should exist is an unbound socket.
195 195 *
196 196 * SVP_CS_CONNECTING A call to connect has been made and we are
197 197 * polling for it to complete.
198 198 *
199 199 * SVP_CS_BACKOFF A connect attempt has failed and we are
200 200 * currently backing off, waiting to try again.
201 201 *
202 202 * SVP_CS_ACTIVE We have successfully connected to the remote
203 203 * system.
204 204 *
205 205 * SVP_CS_WINDDOWN This connection is going to valhalla. In other
206 206 * words, a previously active connection is no
207 207 * longer valid in DNS, so we should curb our use
208 208 * of it, and reap it as soon as we have other
209 209 * active connections.
210 210 *
211 211 * The following diagram attempts to describe our state transition scheme, and
212 212 * when we transition from one state to the next.
213 213 *
214 214 * |
215 215 * * New remote IP from DNS resolution,
216 216 * | not currently active in the system.
217 217 * |
218 218 * v Socket Error,
219 219 * +----------------+ still in DNS
220 220 * +----------------<---| SVP_CS_INITIAL |<----------------------*--------+
221 221 * | +----------------+ |
222 222 * | System | |
223 223 * | Connection . . . . . success * Successful |
224 224 * | failed . | connect() |
225 225 * | . | +-------------------+ |
226 226 * | +----*---------+ | +-*>| SVP_CS_VERSIONING + |
227 227 * | | | | | +-------------------+ |
228 228 * | | | | | V V Set version |
229 229 * | | | | | | * based on |
230 230 * | | | | | | | SVP_R_PONG |
231 231 * | V ^ v ^ | V ^
232 232 * | +----------------+ +-------------------+ | +---------------+
233 233 * +<-| SVP_CS_BACKOFF | | SVP_CS_CONNECTING | | | SVP_CS_ACTIVE |
234 234 * | +----------------+ +-------------------+ | +---------------+
235 235 * | V ^ V | V V
236 236 * | Backoff wait * | | | | * Removed
237 237 * v interval +--------------+ +-----------------<+----+ | from DNS
238 238 * | finished | |
239 239 * | V |
240 240 * | | V
241 241 * | | +-----------------+
242 242 * +----------------+----------<-----+-------<----| SVP_CS_WINDDOWN |
243 243 * | +-----------------+
244 244 * * . . . Fatal system, not
245 245 * | socket error or
246 246 * V quiesced after
247 247 * +--------------+ removal from DNS
248 248 * | SVP_CS_ERROR |
249 249 * +--------------+
250 250 * |
251 251 * * . . . Removed from DNS
252 252 * v
253 253 * +------------+
254 254 * | Connection |
255 255 * | Destroyed |
256 256 * +------------+
257 257 *
258 258 * --------------------------
259 259 * Connection Event Injection
260 260 * --------------------------
261 261 *
262 262 * For each connection that exists in the system, we have a timer in place that
263 263 * is in charge of performing timeout activity. It fires once every thirty
264 264 * seconds or so for a given connection and checks to ensure that we have had
265 265 * activity for the most recent query on the connection. If not, it terminates
266 266 * the connection. This is important as if we have sent all our data and are
267 267 * waiting for the remote end to reply, without enabling something like TCP
268 268 * keep-alive, we will not be notified that anything that has happened to the
269 269 * remote connection, for example a panic. In addition, this also protects
270 270 * against a server that is up, but a portolan that is not making forward
271 271 * progress.
272 272 *
273 273 * When a timeout occurs, we first try to disassociate any active events, which
274 274 * by definition must exist. Once that's done, we inject a port source user
275 275 * event. Now, there is a small gotcha. Let's assume for a moment that we have a
276 276 * pathological portolan. That means that it knows to inject activity right at
277 277 * the time out window. That means, that the event may be disassociated before
278 278 * we could get to it. If that's the case, we must _not_ inject the user event
279 279 * and instead, we'll let the pending event take care of it. We know that the
280 280 * pending event hasn't hit the main part of the loop yet, otherwise, it would
281 281 * have released the lock protecting our state and associated the event.
282 282 *
283 283 * ------------
284 284 * Notes on DNS
285 285 * ------------
286 286 *
287 287 * Unfortunately, doing host name resolution in a way that allows us to leverage
288 288 * the system's resolvers and the system's caching, require us to make blocking
289 289 * calls in libc via getaddrinfo(3SOCKET). If we can't reach a given server,
290 290 * that will tie up a thread for quite some time. To work around that fact,
291 291 * we're going to create a fixed number of threads and we'll use them to service
292 292 * our DNS requests. While this isn't ideal, until we have a sane means of
293 293 * integrating a DNS resolution into an event loop with say portfs, it's not
294 294 * going to be a fun day no matter what we do.
295 295 *
296 296 * ------
297 297 * Timers
298 298 * ------
299 299 *
300 300 * We maintain a single timer based on CLOCK_REALTIME. It's designed to fire
301 301 * every second. While we'd rather use CLOCK_HIGHRES just to alleviate ourselves
302 302 * from timer drift; however, as zones may not actually have CLOCK_HIGHRES
303 303 * access, we don't want them to end up in there. The timer itself is just a
304 304 * simple avl tree sorted by expiration time, which is stored as a tick in the
305 305 * future, a tick is just one second.
306 306 *
307 307 * ----------
|
↓ open down ↓ |
307 lines elided |
↑ open up ↑ |
308 308 * Shootdowns
309 309 * ----------
310 310 *
311 311 * As part of the protocol, we need to be able to handle shootdowns that inform
312 312 * us some of the information in the system is out of date. This information
313 313 * needs to be processed promptly; however, the information is hopefully going
314 314 * to be relatively infrequent relative to the normal flow of information.
315 315 *
316 316 * The shoot down information needs to be done on a per-backend basis. The
317 317 * general design is that we'll have a single query for this which can fire on a
318 - * 5-10s period, we randmoize the latter part to give us a bit more load
318 + * 5-10s period, we randomize the latter part to give us a bit more load
319 319 * spreading. If we complete because there's no work to do, then we wait the
320 320 * normal period. If we complete, but there's still work to do, we'll go again
321 321 * after a second.
322 322 *
323 323 * A shootdown has a few different parts. We first receive a list of items to
324 324 * shootdown. After performing all of those, we need to acknowledge them. When
325 325 * that's been done successfully, we can move onto the next part. From a
326 326 * protocol perspective, we make a SVP_R_LOG_REQ, we get a reply, and then after
327 327 * processing them, send an SVP_R_LOG_RM. Only once that's been acked do we
328 328 * continue.
329 329 *
330 330 * However, one of the challenges that we have is that these invalidations are
331 331 * just that, an invalidation. For a virtual layer two request, that's fine,
332 332 * because the kernel supports that. However, for virtual layer three
333 333 * invalidations, we have a bit more work to do. These protocols, ARP and NDP,
334 334 * don't really support a notion of just an invalidation, instead you have to
335 335 * inject the new data in a gratuitous fashion.
336 336 *
337 337 * To that end, what we instead do is when we receive a VL3 invalidation, we
338 338 * turn that info a VL3 request. We hold the general request as outstanding
339 339 * until we receive all of the callbacks for the VL3 invalidations, at which
340 340 * point we go through and do the log removal request.
341 341 */
342 342
343 343 #include <umem.h>
344 344 #include <errno.h>
345 345 #include <stdlib.h>
346 346 #include <sys/types.h>
347 347 #include <sys/socket.h>
348 348 #include <netinet/in.h>
349 349 #include <arpa/inet.h>
350 350 #include <libnvpair.h>
351 351 #include <strings.h>
352 352 #include <string.h>
353 353 #include <assert.h>
354 354 #include <unistd.h>
355 355
356 356 #include <libvarpd_provider.h>
357 357 #include "libvarpd_svp.h"
358 358
359 359 bunyan_logger_t *svp_bunyan;
360 360 static int svp_defport = 1296;
361 361 static int svp_defuport = 1339;
362 362 static umem_cache_t *svp_lookup_cache;
363 363
364 364 typedef enum svp_lookup_type {
365 365 SVP_L_UNKNOWN = 0x0,
366 366 SVP_L_VL2 = 0x1,
367 367 SVP_L_VL3 = 0x2,
368 368 SVP_L_ROUTE = 0x3
369 369 } svp_lookup_type_t;
370 370
371 371 typedef struct svp_lookup {
372 372 int svl_type;
373 373 union {
374 374 struct svl_lookup_vl2 {
375 375 varpd_query_handle_t *svl_handle;
376 376 overlay_target_point_t *svl_point;
377 377 } svl_vl2;
378 378 struct svl_lookup_vl3 {
379 379 varpd_arp_handle_t *svl_vah;
380 380 uint8_t *svl_out;
381 381 } svl_vl3;
382 382 struct svl_lookup_route {
383 383 varpd_query_handle_t *svl_handle;
384 384 overlay_target_point_t *svl_point;
385 385 overlay_target_route_t *svl_route;
386 386 overlay_target_mac_t *svl_mac;
387 387 } svl_route;
388 388 } svl_u;
389 389 svp_query_t svl_query;
390 390 } svp_lookup_t;
391 391
392 392 static const char *varpd_svp_props[] = {
393 393 "svp/host",
394 394 "svp/port",
395 395 "svp/underlay_ip",
396 396 "svp/underlay_port",
397 397 "svp/dcid",
398 398 "svp/router_oui"
399 399 };
400 400
401 401 static const uint8_t svp_bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
402 402
403 403 int
404 404 svp_comparator(const void *l, const void *r)
405 405 {
406 406 const svp_t *ls = l;
407 407 const svp_t *rs = r;
408 408
409 409 if (ls->svp_vid > rs->svp_vid)
410 410 return (1);
411 411 if (ls->svp_vid < rs->svp_vid)
412 412 return (-1);
413 413 return (0);
414 414 }
415 415
416 416 static void
417 417 svp_vl2_lookup_cb(svp_t *svp, svp_status_t status, const struct in6_addr *uip,
418 418 const uint16_t uport, void *arg)
419 419 {
420 420 svp_lookup_t *svl = arg;
421 421 overlay_target_point_t *otp;
422 422
423 423 assert(svp != NULL);
424 424 assert(arg != NULL);
425 425
426 426 if (status != SVP_S_OK) {
427 427 libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
428 428 VARPD_LOOKUP_DROP);
429 429 umem_cache_free(svp_lookup_cache, svl);
430 430 return;
431 431 }
432 432
433 433 otp = svl->svl_u.svl_vl2.svl_point;
434 434 bcopy(uip, &otp->otp_ip, sizeof (struct in6_addr));
435 435 otp->otp_port = uport;
436 436 libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
437 437 VARPD_LOOKUP_OK);
438 438 umem_cache_free(svp_lookup_cache, svl);
439 439 }
440 440
441 441 static void
442 442 svp_vl3_lookup_cb(svp_t *svp, svp_status_t status, const uint8_t *vl2mac,
443 443 const struct in6_addr *uip, const uint16_t uport, void *arg)
444 444 {
445 445 /* Initialize address-holders to 0 for comparisons-to-zeroes later. */
446 446 overlay_target_point_t point = { 0 };
447 447 svp_lookup_t *svl = arg;
448 448 uint8_t nexthop_mac[6] = { 0, 0, 0, 0, 0, 0 };
449 449
450 450 assert(svp != NULL);
451 451 assert(svl != NULL);
452 452
453 453 if (status != SVP_S_OK) {
454 454 libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
455 455 VARPD_LOOKUP_DROP);
456 456 umem_cache_free(svp_lookup_cache, svl);
457 457 return;
458 458 }
459 459
460 460 /* Inject the L2 mapping before the L3 */
461 461 bcopy(uip, &point.otp_ip, sizeof (struct in6_addr));
462 462 point.otp_port = uport;
463 463 libvarpd_inject_varp(svp->svp_hdl, vl2mac, &point);
464 464
465 465 bcopy(vl2mac, svl->svl_u.svl_vl3.svl_out, ETHERADDRL);
466 466 libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
467 467 VARPD_LOOKUP_OK);
468 468 umem_cache_free(svp_lookup_cache, svl);
469 469 }
470 470
471 471 static void
472 472 svp_vl2_invalidate_cb(svp_t *svp, const uint8_t *vl2mac)
473 473 {
474 474 libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
475 475 }
476 476
477 477 static void
478 478 svp_vl3_inject_cb(svp_t *svp, const uint16_t vlan, const struct in6_addr *vl3ip,
479 479 const uint8_t *vl2mac, const uint8_t *targmac)
480 480 {
481 481 struct in_addr v4;
482 482
483 483 /*
484 484 * At the moment we don't support any IPv6 related log entries, this
485 485 * will change soon as we develop a bit more of the IPv6 related
486 486 * infrastructure so we can properly test the injection.
487 487 */
488 488 if (IN6_IS_ADDR_V4MAPPED(vl3ip) == 0) {
489 489 return;
490 490 } else {
491 491 IN6_V4MAPPED_TO_INADDR(vl3ip, &v4);
492 492 if (targmac == NULL)
493 493 targmac = svp_bcast;
494 494 libvarpd_inject_arp(svp->svp_hdl, vlan, vl2mac, &v4, targmac);
495 495 }
496 496 }
497 497
498 498 /* ARGSUSED */
499 499 static void
500 500 svp_shootdown_cb(svp_t *svp, const uint8_t *vl2mac, const struct in6_addr *uip,
501 501 const uint16_t uport)
502 502 {
503 503 /*
504 504 * We should probably do a conditional invalidation here.
505 505 */
506 506 libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
507 507 }
508 508
509 509 static void
510 510 svp_route_lookup_cb(svp_t *svp, svp_status_t status, uint32_t dcid,
511 511 uint32_t vnetid, uint16_t vlan, uint8_t *srcmac, uint8_t *dstmac,
512 512 uint16_t ul3_port, uint8_t *ul3_addr, uint8_t srcpfx, uint8_t dstpfx,
513 513 void *arg)
514 514 {
515 515 svp_lookup_t *svl = arg;
516 516 overlay_target_point_t *otp;
517 517 overlay_target_route_t *otr;
518 518 overlay_target_mac_t *otm;
519 519
520 520 if (status != SVP_S_OK) {
521 521 libvarpd_plugin_query_reply(svl->svl_u.svl_route.svl_handle,
522 522 VARPD_LOOKUP_DROP);
523 523 umem_cache_free(svp_lookup_cache, svl);
524 524 return;
525 525 }
526 526
527 527 otp = svl->svl_u.svl_route.svl_point;
528 528 bcopy(ul3_addr, &otp->otp_ip, sizeof (struct in6_addr));
529 529 otp->otp_port = ul3_port;
530 530
531 531 otr = svl->svl_u.svl_route.svl_route;
532 532 otr->otr_vnet = vnetid;
533 533 otr->otr_vlan = vlan;
534 534 bcopy(srcmac, otr->otr_srcmac, ETHERADDRL);
|
↓ open down ↓ |
206 lines elided |
↑ open up ↑ |
535 535
536 536 otm = svl->svl_u.svl_route.svl_mac;
537 537 otm->otm_dcid = dcid;
538 538 bcopy(dstmac, otm->otm_mac, ETHERADDRL);
539 539
540 540 libvarpd_plugin_query_reply(svl->svl_u.svl_route.svl_handle,
541 541 VARPD_LOOKUP_OK);
542 542 umem_cache_free(svp_lookup_cache, svl);
543 543 }
544 544
545 +/*
546 + * Tell the overlay instance to flush out entries matcthing this route.
547 + * See libvarpd_route_flush() for more.
548 + */
549 +static void
550 +svp_route_shootdown_cb(svp_t *svp, uint8_t *srcip, uint8_t *dstip,
551 + uint8_t src_prefixlen, uint8_t dst_prefixlen, uint16_t vlan_id)
552 +{
553 + libvarpd_route_flush(svp->svp_hdl, srcip, dstip, src_prefixlen,
554 + dst_prefixlen, vlan_id);
555 +}
556 +
545 557 static svp_cb_t svp_defops = {
546 558 svp_vl2_lookup_cb,
547 559 svp_vl3_lookup_cb,
548 560 svp_vl2_invalidate_cb,
549 561 svp_vl3_inject_cb,
550 562 svp_shootdown_cb,
551 563 svp_route_lookup_cb,
564 + svp_route_shootdown_cb
552 565 };
553 566
554 567 static boolean_t
555 568 varpd_svp_valid_dest(overlay_plugin_dest_t dest)
556 569 {
557 570 if (dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
558 571 return (B_FALSE);
559 572
560 573 return (B_TRUE);
561 574 }
562 575
563 576 static int
564 577 varpd_svp_create(varpd_provider_handle_t *hdl, void **outp,
565 578 overlay_plugin_dest_t dest)
566 579 {
567 580 int ret;
568 581 svp_t *svp;
569 582
570 583 if (varpd_svp_valid_dest(dest) == B_FALSE)
571 584 return (ENOTSUP);
572 585
573 586 svp = umem_zalloc(sizeof (svp_t), UMEM_DEFAULT);
574 587 if (svp == NULL)
575 588 return (ENOMEM);
576 589
577 590 if ((ret = mutex_init(&svp->svp_lock, USYNC_THREAD | LOCK_ERRORCHECK,
578 591 NULL)) != 0) {
579 592 umem_free(svp, sizeof (svp_t));
580 593 return (ret);
581 594 }
582 595
583 596 svp->svp_port = svp_defport;
584 597 svp->svp_uport = svp_defuport;
585 598 svp->svp_cb = svp_defops;
586 599 svp->svp_hdl = hdl;
587 600 svp->svp_vid = libvarpd_plugin_vnetid(svp->svp_hdl);
588 601 *outp = svp;
589 602 return (0);
590 603 }
591 604
592 605 static int
593 606 varpd_svp_start(void *arg)
594 607 {
595 608 int ret;
596 609 svp_remote_t *srp;
597 610 svp_t *svp = arg;
598 611
599 612 mutex_enter(&svp->svp_lock);
600 613 if (svp->svp_host == NULL || svp->svp_port == 0 ||
601 614 svp->svp_huip == B_FALSE || svp->svp_uport == 0) {
602 615 mutex_exit(&svp->svp_lock);
603 616 return (EAGAIN);
604 617 }
605 618 mutex_exit(&svp->svp_lock);
606 619
607 620 if ((ret = svp_remote_find(svp->svp_host, svp->svp_port, &svp->svp_uip,
608 621 &srp)) != 0)
609 622 return (ret);
610 623
611 624 if ((ret = svp_remote_attach(srp, svp)) != 0) {
612 625 svp_remote_release(srp);
613 626 return (ret);
614 627 }
615 628
616 629 return (0);
617 630 }
618 631
619 632 static void
620 633 varpd_svp_stop(void *arg)
621 634 {
622 635 svp_t *svp = arg;
623 636
624 637 svp_remote_detach(svp);
625 638 }
626 639
627 640 static void
628 641 varpd_svp_destroy(void *arg)
629 642 {
630 643 svp_t *svp = arg;
631 644
632 645 if (svp->svp_host != NULL)
633 646 umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
634 647
635 648 if (mutex_destroy(&svp->svp_lock) != 0)
636 649 libvarpd_panic("failed to destroy svp_t`svp_lock");
637 650
638 651 umem_free(svp, sizeof (svp_t));
639 652 }
640 653
641 654 static void
642 655 varpd_svp_lookup_l3(svp_t *svp, varpd_query_handle_t *vqh,
643 656 const overlay_targ_lookup_t *otl, overlay_target_point_t *otp,
644 657 overlay_target_route_t *otr, overlay_target_mac_t *otm)
645 658 {
646 659 svp_lookup_t *slp;
647 660 uint32_t type;
648 661 const struct in6_addr *src = &otl->otl_addru.otlu_l3.otl3_srcip,
649 662 *dst = &otl->otl_addru.otlu_l3.otl3_dstip;
650 663
651 664 /*
652 665 * otl is an L3 request, so we have src/dst IPs for the inner packet.
653 666 * We also have the vlan.
654 667 *
655 668 * Assume kernel's overlay module is caching well, so we are directly
656 669 * going to query (i.e. no caching up here of actual destinations).
657 670 *
658 671 * Our existing remote sever (svp_remote), but with the new message
659 672 * SVP_R_ROUTE_REQ.
660 673 */
661 674
662 675 if (IN6_IS_ADDR_V4MAPPED(src)) {
663 676 if (!IN6_IS_ADDR_V4MAPPED(dst)) {
664 677 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
665 678 return;
666 679 }
667 680 type = SVP_VL3_IP;
668 681 } else {
669 682 if (IN6_IS_ADDR_V4MAPPED(dst)) {
670 683 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
671 684 return;
672 685 }
673 686 type = SVP_VL3_IPV6;
674 687 }
675 688
676 689 slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
677 690 if (slp == NULL) {
678 691 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
679 692 return;
680 693 }
681 694
682 695 slp->svl_type = SVP_L_ROUTE;
683 696 slp->svl_u.svl_route.svl_handle = vqh;
684 697 slp->svl_u.svl_route.svl_point = otp;
685 698 slp->svl_u.svl_route.svl_route = otr;
686 699 slp->svl_u.svl_route.svl_mac = otm;
687 700
688 701 svp_remote_route_lookup(svp, &slp->svl_query, src, dst,
689 702 otl->otl_vnetid, (uint16_t)otl->otl_vlan, slp);
690 703 }
691 704
692 705 static void
693 706 varpd_svp_lookup(void *arg, varpd_query_handle_t *vqh,
694 707 const overlay_targ_lookup_t *otl, overlay_target_point_t *otp,
695 708 overlay_target_route_t *otr, overlay_target_mac_t *otm)
696 709 {
697 710 svp_lookup_t *slp;
698 711 svp_t *svp = arg;
699 712
700 713 /*
701 714 * Shuffle off L3 lookups to their own codepath.
702 715 */
703 716 if (otl->otl_l3req) {
704 717 varpd_svp_lookup_l3(svp, vqh, otl, otp, otr, otm);
705 718 return;
706 719 }
707 720 /*
708 721 * At this point, the traditional overlay_target_point_t is all that
709 722 * needs filling in. Zero-out the otr for safety.
710 723 */
711 724 bzero(otr, sizeof (*otr));
712 725
713 726
714 727 /*
715 728 * Check if this is something that we need to proxy, eg. arp or ndp.
716 729 */
717 730 if (otl->otl_addru.otlu_l2.otl2_sap == ETHERTYPE_ARP) {
718 731 libvarpd_plugin_proxy_arp(svp->svp_hdl, vqh, otl);
719 732 return;
720 733 }
721 734
722 735 if (otl->otl_addru.otlu_l2.otl2_dstaddr[0] == 0x33 &&
723 736 otl->otl_addru.otlu_l2.otl2_dstaddr[1] == 0x33) {
724 737 if (otl->otl_addru.otlu_l2.otl2_sap == ETHERTYPE_IPV6) {
725 738 libvarpd_plugin_proxy_ndp(svp->svp_hdl, vqh, otl);
726 739 } else {
727 740 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
728 741 }
729 742 return;
730 743 }
731 744
732 745 /*
733 746 * Watch out for various multicast and broadcast addresses. We've
734 747 * already taken care of the IPv6 range above. Now we just need to
735 748 * handle broadcast and if the multicast bit is set, lowest bit of the
736 749 * first octet of the MAC, then we drop it now.
737 750 */
738 751 if (bcmp(otl->otl_addru.otlu_l2.otl2_dstaddr, svp_bcast,
739 752 ETHERADDRL) == 0 ||
740 753 (otl->otl_addru.otlu_l2.otl2_dstaddr[0] & 0x01) == 0x01) {
741 754 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
742 755 return;
743 756 }
744 757
745 758 /*
746 759 * If we have a failure to allocate memory for this, that's not good.
747 760 * However, telling the kernel to just drop this packet is much better
748 761 * than the alternative at this moment. At least we'll try again and we
749 762 * may have something more available to us in a little bit.
750 763 */
751 764 slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
752 765 if (slp == NULL) {
753 766 libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
754 767 return;
755 768 }
756 769
757 770 slp->svl_type = SVP_L_VL2;
758 771 slp->svl_u.svl_vl2.svl_handle = vqh;
759 772 slp->svl_u.svl_vl2.svl_point = otp;
760 773
761 774 svp_remote_vl2_lookup(svp, &slp->svl_query,
762 775 otl->otl_addru.otlu_l2.otl2_dstaddr, slp);
763 776 }
764 777
765 778 /* ARGSUSED */
766 779 static int
767 780 varpd_svp_nprops(void *arg, uint_t *nprops)
768 781 {
769 782 *nprops = sizeof (varpd_svp_props) / sizeof (char *);
770 783 return (0);
771 784 }
772 785
773 786 /* ARGSUSED */
774 787 static int
775 788 varpd_svp_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph)
776 789 {
777 790 switch (propid) {
778 791 case 0:
779 792 /* svp/host */
780 793 libvarpd_prop_set_name(vph, varpd_svp_props[0]);
781 794 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
782 795 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING);
783 796 libvarpd_prop_set_nodefault(vph);
784 797 break;
785 798 case 1:
786 799 /* svp/port */
787 800 libvarpd_prop_set_name(vph, varpd_svp_props[1]);
788 801 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
789 802 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
790 803 (void) libvarpd_prop_set_default(vph, &svp_defport,
791 804 sizeof (svp_defport));
792 805 libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
793 806 break;
794 807 case 2:
795 808 /* svp/underlay_ip */
796 809 libvarpd_prop_set_name(vph, varpd_svp_props[2]);
797 810 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
798 811 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP);
799 812 libvarpd_prop_set_nodefault(vph);
800 813 break;
801 814 case 3:
802 815 /* svp/underlay_port */
803 816 libvarpd_prop_set_name(vph, varpd_svp_props[3]);
804 817 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
805 818 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
806 819 (void) libvarpd_prop_set_default(vph, &svp_defuport,
807 820 sizeof (svp_defuport));
808 821 libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
809 822 break;
810 823 case 4:
811 824 /* svp/dcid */
812 825 libvarpd_prop_set_name(vph, varpd_svp_props[4]);
813 826 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
814 827 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
815 828 libvarpd_prop_set_nodefault(vph);
816 829 libvarpd_prop_set_range_uint32(vph, 1, UINT32_MAX - 1);
817 830 break;
818 831 case 5:
819 832 /* svp/router_oui */
820 833 libvarpd_prop_set_name(vph, varpd_svp_props[5]);
821 834 libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
822 835 libvarpd_prop_set_type(vph, OVERLAY_PROP_T_ETHER);
823 836 libvarpd_prop_set_nodefault(vph);
824 837 break;
825 838 default:
826 839 return (EINVAL);
827 840 }
828 841 return (0);
829 842 }
830 843
831 844 static int
832 845 varpd_svp_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep)
833 846 {
834 847 svp_t *svp = arg;
835 848
836 849 /* svp/host */
837 850 if (strcmp(pname, varpd_svp_props[0]) == 0) {
838 851 size_t len;
839 852
840 853 mutex_enter(&svp->svp_lock);
841 854 if (svp->svp_host == NULL) {
842 855 *sizep = 0;
843 856 } else {
844 857 len = strlen(svp->svp_host) + 1;
845 858 if (*sizep < len) {
846 859 mutex_exit(&svp->svp_lock);
847 860 return (EOVERFLOW);
848 861 }
849 862 *sizep = len;
850 863 (void) strlcpy(buf, svp->svp_host, *sizep);
851 864 }
852 865 mutex_exit(&svp->svp_lock);
853 866 return (0);
854 867 }
855 868
856 869 /* svp/port */
857 870 if (strcmp(pname, varpd_svp_props[1]) == 0) {
858 871 uint64_t val;
859 872
860 873 if (*sizep < sizeof (uint64_t))
861 874 return (EOVERFLOW);
862 875
863 876 mutex_enter(&svp->svp_lock);
864 877 if (svp->svp_port == 0) {
865 878 *sizep = 0;
866 879 } else {
867 880 val = svp->svp_port;
868 881 bcopy(&val, buf, sizeof (uint64_t));
869 882 *sizep = sizeof (uint64_t);
870 883 }
871 884 mutex_exit(&svp->svp_lock);
872 885 return (0);
873 886 }
874 887
875 888 /* svp/underlay_ip */
876 889 if (strcmp(pname, varpd_svp_props[2]) == 0) {
877 890 if (*sizep < sizeof (struct in6_addr))
878 891 return (EOVERFLOW);
879 892 mutex_enter(&svp->svp_lock);
880 893 if (svp->svp_huip == B_FALSE) {
881 894 *sizep = 0;
882 895 } else {
883 896 bcopy(&svp->svp_uip, buf, sizeof (struct in6_addr));
884 897 *sizep = sizeof (struct in6_addr);
885 898 }
886 899 mutex_exit(&svp->svp_lock);
887 900 return (0);
888 901 }
889 902
890 903 /* svp/underlay_port */
891 904 if (strcmp(pname, varpd_svp_props[3]) == 0) {
892 905 uint64_t val;
893 906
894 907 if (*sizep < sizeof (uint64_t))
895 908 return (EOVERFLOW);
896 909
897 910 mutex_enter(&svp->svp_lock);
898 911 if (svp->svp_uport == 0) {
899 912 *sizep = 0;
900 913 } else {
901 914 val = svp->svp_uport;
902 915 bcopy(&val, buf, sizeof (uint64_t));
903 916 *sizep = sizeof (uint64_t);
904 917 }
905 918
906 919 mutex_exit(&svp->svp_lock);
907 920 return (0);
908 921 }
909 922
910 923 /* svp/dcid */
911 924 if (strcmp(pname, varpd_svp_props[4]) == 0) {
912 925 uint64_t val;
913 926
914 927 if (*sizep < sizeof (uint64_t))
915 928 return (EOVERFLOW);
916 929
917 930 mutex_enter(&svp->svp_lock);
918 931 if (svp->svp_uport == 0) {
919 932 *sizep = 0;
920 933 } else {
921 934 val = svp->svp_dcid;
922 935 bcopy(&val, buf, sizeof (uint64_t));
923 936 *sizep = sizeof (uint64_t);
924 937 }
925 938
926 939 mutex_exit(&svp->svp_lock);
927 940 return (0);
928 941 }
929 942
930 943 /* svp/router_oui */
931 944 if (strcmp(pname, varpd_svp_props[5]) == 0) {
932 945 if (*sizep < ETHERADDRL)
933 946 return (EOVERFLOW);
934 947 mutex_enter(&svp->svp_lock);
935 948
936 949 if (ether_is_zero(&svp->svp_router_oui)) {
937 950 *sizep = 0;
938 951 } else {
939 952 bcopy(&svp->svp_router_oui, buf, ETHERADDRL);
940 953 *sizep = ETHERADDRL;
941 954 }
942 955
943 956 mutex_exit(&svp->svp_lock);
944 957 return (0);
945 958 }
946 959 return (EINVAL);
947 960 }
948 961
949 962 static int
950 963 varpd_svp_setprop(void *arg, const char *pname, const void *buf,
951 964 const uint32_t size)
952 965 {
953 966 svp_t *svp = arg;
954 967
955 968 /* svp/host */
956 969 if (strcmp(pname, varpd_svp_props[0]) == 0) {
957 970 char *dup;
958 971 dup = umem_alloc(size, UMEM_DEFAULT);
959 972 (void) strlcpy(dup, buf, size);
960 973 if (dup == NULL)
961 974 return (ENOMEM);
962 975 mutex_enter(&svp->svp_lock);
963 976 if (svp->svp_host != NULL)
964 977 umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
965 978 svp->svp_host = dup;
966 979 mutex_exit(&svp->svp_lock);
967 980 return (0);
968 981 }
969 982
970 983 /* svp/port */
971 984 if (strcmp(pname, varpd_svp_props[1]) == 0) {
972 985 const uint64_t *valp = buf;
973 986 if (size < sizeof (uint64_t))
974 987 return (EOVERFLOW);
975 988
976 989 if (*valp == 0 || *valp > UINT16_MAX)
977 990 return (EINVAL);
978 991
979 992 mutex_enter(&svp->svp_lock);
980 993 svp->svp_port = (uint16_t)*valp;
981 994 mutex_exit(&svp->svp_lock);
982 995 return (0);
983 996 }
984 997
985 998 /* svp/underlay_ip */
986 999 if (strcmp(pname, varpd_svp_props[2]) == 0) {
987 1000 const struct in6_addr *ipv6 = buf;
988 1001
989 1002 if (size < sizeof (struct in6_addr))
990 1003 return (EOVERFLOW);
991 1004
992 1005 if (IN6_IS_ADDR_V4COMPAT(ipv6))
993 1006 return (EINVAL);
994 1007
995 1008 if (IN6_IS_ADDR_MULTICAST(ipv6))
996 1009 return (EINVAL);
997 1010
998 1011 if (IN6_IS_ADDR_6TO4(ipv6))
999 1012 return (EINVAL);
1000 1013
1001 1014 if (IN6_IS_ADDR_V4MAPPED(ipv6)) {
1002 1015 ipaddr_t v4;
1003 1016 IN6_V4MAPPED_TO_IPADDR(ipv6, v4);
1004 1017 if (IN_MULTICAST(v4))
1005 1018 return (EINVAL);
1006 1019 }
1007 1020
1008 1021 mutex_enter(&svp->svp_lock);
1009 1022 bcopy(buf, &svp->svp_uip, sizeof (struct in6_addr));
1010 1023 svp->svp_huip = B_TRUE;
1011 1024 mutex_exit(&svp->svp_lock);
1012 1025 return (0);
1013 1026 }
1014 1027
1015 1028 /* svp/underlay_port */
1016 1029 if (strcmp(pname, varpd_svp_props[3]) == 0) {
1017 1030 const uint64_t *valp = buf;
1018 1031 if (size < sizeof (uint64_t))
1019 1032 return (EOVERFLOW);
1020 1033
1021 1034 if (*valp == 0 || *valp > UINT16_MAX)
1022 1035 return (EINVAL);
1023 1036
1024 1037 mutex_enter(&svp->svp_lock);
1025 1038 svp->svp_uport = (uint16_t)*valp;
1026 1039 mutex_exit(&svp->svp_lock);
1027 1040
1028 1041 return (0);
1029 1042 }
1030 1043
1031 1044 /* svp/dcid */
1032 1045 if (strcmp(pname, varpd_svp_props[4]) == 0) {
1033 1046 const uint64_t *valp = buf;
1034 1047 if (size < sizeof (uint64_t))
1035 1048 return (EOVERFLOW);
1036 1049
1037 1050 if (*valp == 0 || *valp > UINT32_MAX - 1)
1038 1051 return (EINVAL);
1039 1052
1040 1053 mutex_enter(&svp->svp_lock);
1041 1054 svp->svp_dcid = (uint32_t)*valp;
1042 1055 mutex_exit(&svp->svp_lock);
1043 1056
1044 1057 return (0);
1045 1058 }
1046 1059
1047 1060 /* svp/router_oui */
1048 1061 if (strcmp(pname, varpd_svp_props[5]) == 0) {
1049 1062 if (size < ETHERADDRL)
1050 1063 return (EOVERFLOW);
1051 1064 mutex_enter(&svp->svp_lock);
1052 1065 bcopy(buf, &svp->svp_router_oui, ETHERADDRL);
1053 1066 /* Zero-out the low three bytes. */
1054 1067 svp->svp_router_oui[3] = 0;
1055 1068 svp->svp_router_oui[4] = 0;
1056 1069 svp->svp_router_oui[5] = 0;
1057 1070 mutex_exit(&svp->svp_lock);
1058 1071 return (0);
1059 1072 }
1060 1073
1061 1074 return (EINVAL);
1062 1075 }
1063 1076
1064 1077 static int
1065 1078 varpd_svp_save(void *arg, nvlist_t *nvp)
1066 1079 {
1067 1080 int ret;
1068 1081 svp_t *svp = arg;
1069 1082
1070 1083 mutex_enter(&svp->svp_lock);
1071 1084 /* svp/host */
1072 1085 if (svp->svp_host != NULL) {
1073 1086 if ((ret = nvlist_add_string(nvp, varpd_svp_props[0],
1074 1087 svp->svp_host)) != 0) {
1075 1088 mutex_exit(&svp->svp_lock);
1076 1089 return (ret);
1077 1090 }
1078 1091 }
1079 1092
1080 1093 /* svp/port */
1081 1094 if (svp->svp_port != 0) {
1082 1095 if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[1],
1083 1096 svp->svp_port)) != 0) {
1084 1097 mutex_exit(&svp->svp_lock);
1085 1098 return (ret);
1086 1099 }
1087 1100 }
1088 1101
1089 1102 /* svp/underlay_ip */
1090 1103 if (svp->svp_huip == B_TRUE) {
1091 1104 char buf[INET6_ADDRSTRLEN];
1092 1105
1093 1106 if (inet_ntop(AF_INET6, &svp->svp_uip, buf, sizeof (buf)) ==
1094 1107 NULL)
1095 1108 libvarpd_panic("unexpected inet_ntop failure: %d",
1096 1109 errno);
1097 1110
1098 1111 if ((ret = nvlist_add_string(nvp, varpd_svp_props[2],
1099 1112 buf)) != 0) {
1100 1113 mutex_exit(&svp->svp_lock);
1101 1114 return (ret);
1102 1115 }
1103 1116 }
1104 1117
1105 1118 /* svp/underlay_port */
1106 1119 if (svp->svp_uport != 0) {
1107 1120 if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[3],
1108 1121 svp->svp_uport)) != 0) {
1109 1122 mutex_exit(&svp->svp_lock);
1110 1123 return (ret);
1111 1124 }
1112 1125 }
1113 1126
1114 1127 /* svp/dcid */
1115 1128 if (svp->svp_dcid != 0) {
1116 1129 if ((ret = nvlist_add_uint32(nvp, varpd_svp_props[4],
1117 1130 svp->svp_dcid)) != 0) {
1118 1131 mutex_exit(&svp->svp_lock);
1119 1132 return (ret);
1120 1133 }
1121 1134 }
1122 1135
1123 1136 /* svp/router_oui */
1124 1137 if (!ether_is_zero(&svp->svp_router_oui)) {
1125 1138 char buf[ETHERADDRSTRL];
1126 1139
1127 1140 if (ether_ntoa_r((struct ether_addr *)&svp->svp_router_oui,
1128 1141 buf) == NULL) {
1129 1142 libvarpd_panic("unexpected ether_ntoa_r failure: %d",
1130 1143 errno);
1131 1144 }
1132 1145
1133 1146 if ((ret = nvlist_add_string(nvp, varpd_svp_props[5],
1134 1147 buf)) != 0) {
1135 1148 mutex_exit(&svp->svp_lock);
1136 1149 return (ret);
1137 1150 }
1138 1151 }
1139 1152
1140 1153 mutex_exit(&svp->svp_lock);
1141 1154 return (0);
1142 1155 }
1143 1156
1144 1157 static int
1145 1158 varpd_svp_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl,
1146 1159 overlay_plugin_dest_t dest, void **outp)
1147 1160 {
1148 1161 int ret;
1149 1162 svp_t *svp;
1150 1163 char *ipstr, *hstr, *etherstr;
1151 1164
1152 1165 if (varpd_svp_valid_dest(dest) == B_FALSE)
1153 1166 return (ENOTSUP);
1154 1167
1155 1168 if ((ret = varpd_svp_create(hdl, (void **)&svp, dest)) != 0)
1156 1169 return (ret);
1157 1170
1158 1171 /* svp/host */
1159 1172 if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[0],
1160 1173 &hstr)) != 0) {
1161 1174 if (ret != ENOENT) {
1162 1175 varpd_svp_destroy(svp);
1163 1176 return (ret);
1164 1177 }
1165 1178 svp->svp_host = NULL;
1166 1179 } else {
1167 1180 size_t blen = strlen(hstr) + 1;
1168 1181 svp->svp_host = umem_alloc(blen, UMEM_DEFAULT);
1169 1182 (void) strlcpy(svp->svp_host, hstr, blen);
1170 1183 }
1171 1184
1172 1185 /* svp/port */
1173 1186 if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[1],
1174 1187 &svp->svp_port)) != 0) {
1175 1188 if (ret != ENOENT) {
1176 1189 varpd_svp_destroy(svp);
1177 1190 return (ret);
1178 1191 }
1179 1192 svp->svp_port = 0;
1180 1193 }
1181 1194
1182 1195 /* svp/underlay_ip */
1183 1196 if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[2],
1184 1197 &ipstr)) != 0) {
1185 1198 if (ret != ENOENT) {
1186 1199 varpd_svp_destroy(svp);
1187 1200 return (ret);
1188 1201 }
1189 1202 svp->svp_huip = B_FALSE;
1190 1203 } else {
1191 1204 ret = inet_pton(AF_INET6, ipstr, &svp->svp_uip);
1192 1205 if (ret == -1) {
1193 1206 assert(errno == EAFNOSUPPORT);
1194 1207 libvarpd_panic("unexpected inet_pton failure: %d",
1195 1208 errno);
1196 1209 }
1197 1210
1198 1211 if (ret == 0) {
1199 1212 varpd_svp_destroy(svp);
1200 1213 return (EINVAL);
1201 1214 }
1202 1215 svp->svp_huip = B_TRUE;
1203 1216 }
1204 1217
1205 1218 /* svp/underlay_port */
1206 1219 if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[3],
1207 1220 &svp->svp_uport)) != 0) {
1208 1221 if (ret != ENOENT) {
1209 1222 varpd_svp_destroy(svp);
1210 1223 return (ret);
1211 1224 }
1212 1225 svp->svp_uport = 0;
1213 1226 }
1214 1227
1215 1228 /* svp/dcid */
1216 1229 if ((ret = nvlist_lookup_uint32(nvp, varpd_svp_props[4],
1217 1230 &svp->svp_dcid)) != 0) {
1218 1231 if (ret != ENOENT) {
1219 1232 varpd_svp_destroy(svp);
1220 1233 return (ret);
1221 1234 }
1222 1235 svp->svp_dcid = 0;
1223 1236 }
1224 1237
1225 1238 /* svp/router_oui */
1226 1239 if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[5],
1227 1240 ðerstr)) != 0) {
1228 1241 if (ret != ENOENT) {
1229 1242 varpd_svp_destroy(svp);
1230 1243 return (ret);
1231 1244 }
1232 1245 bzero(&svp->svp_router_oui, ETHERADDRL);
1233 1246 } else if (ether_aton_r(etherstr,
1234 1247 (struct ether_addr *)&svp->svp_router_oui) == NULL) {
1235 1248 libvarpd_panic("unexpected ether_aton_r failure: %d", errno);
1236 1249 }
1237 1250
1238 1251 svp->svp_hdl = hdl;
1239 1252 *outp = svp;
1240 1253 return (0);
1241 1254 }
1242 1255
1243 1256 static void
1244 1257 varpd_svp_arp(void *arg, varpd_arp_handle_t *vah, int type,
1245 1258 const struct sockaddr *sock, uint16_t vlan __unused, uint8_t *out)
1246 1259 {
1247 1260 svp_t *svp = arg;
1248 1261 svp_lookup_t *svl;
1249 1262
1250 1263 if (type != VARPD_QTYPE_ETHERNET) {
1251 1264 libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
1252 1265 return;
1253 1266 }
1254 1267
1255 1268 svl = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
1256 1269 if (svl == NULL) {
1257 1270 libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
1258 1271 return;
1259 1272 }
1260 1273
1261 1274 svl->svl_type = SVP_L_VL3;
1262 1275 svl->svl_u.svl_vl3.svl_vah = vah;
1263 1276 svl->svl_u.svl_vl3.svl_out = out;
1264 1277 svp_remote_vl3_lookup(svp, &svl->svl_query, sock, svl);
1265 1278 }
1266 1279
1267 1280 static const varpd_plugin_ops_t varpd_svp_ops = {
1268 1281 0,
1269 1282 varpd_svp_create,
1270 1283 varpd_svp_start,
1271 1284 varpd_svp_stop,
1272 1285 varpd_svp_destroy,
1273 1286 NULL,
1274 1287 varpd_svp_lookup,
1275 1288 varpd_svp_nprops,
1276 1289 varpd_svp_propinfo,
1277 1290 varpd_svp_getprop,
1278 1291 varpd_svp_setprop,
1279 1292 varpd_svp_save,
1280 1293 varpd_svp_restore,
1281 1294 varpd_svp_arp,
1282 1295 NULL
1283 1296 };
1284 1297
1285 1298 static int
1286 1299 svp_bunyan_init(void)
1287 1300 {
1288 1301 int ret;
1289 1302
1290 1303 if ((ret = bunyan_init("svp", &svp_bunyan)) != 0)
1291 1304 return (ret);
1292 1305 ret = bunyan_stream_add(svp_bunyan, "stderr", BUNYAN_L_INFO,
1293 1306 bunyan_stream_fd, (void *)STDERR_FILENO);
1294 1307 if (ret != 0)
1295 1308 bunyan_fini(svp_bunyan);
1296 1309 return (ret);
1297 1310 }
1298 1311
1299 1312 static void
1300 1313 svp_bunyan_fini(void)
1301 1314 {
1302 1315 if (svp_bunyan != NULL)
1303 1316 bunyan_fini(svp_bunyan);
1304 1317 }
1305 1318
1306 1319 #pragma init(varpd_svp_init)
1307 1320 static void
1308 1321 varpd_svp_init(void)
1309 1322 {
1310 1323 int err;
1311 1324 varpd_plugin_register_t *vpr;
1312 1325
1313 1326 if (svp_bunyan_init() != 0)
1314 1327 return;
1315 1328
1316 1329 if ((err = svp_host_init()) != 0) {
1317 1330 (void) bunyan_error(svp_bunyan, "failed to init host subsystem",
1318 1331 BUNYAN_T_INT32, "error", err,
1319 1332 BUNYAN_T_END);
1320 1333 svp_bunyan_fini();
1321 1334 return;
1322 1335 }
1323 1336
1324 1337 svp_lookup_cache = umem_cache_create("svp_lookup",
1325 1338 sizeof (svp_lookup_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
1326 1339 if (svp_lookup_cache == NULL) {
1327 1340 (void) bunyan_error(svp_bunyan,
1328 1341 "failed to create svp_lookup cache",
1329 1342 BUNYAN_T_INT32, "error", errno,
1330 1343 BUNYAN_T_END);
1331 1344 svp_bunyan_fini();
1332 1345 return;
1333 1346 }
1334 1347
1335 1348 if ((err = svp_event_init()) != 0) {
1336 1349 (void) bunyan_error(svp_bunyan,
1337 1350 "failed to init event subsystem",
1338 1351 BUNYAN_T_INT32, "error", err,
1339 1352 BUNYAN_T_END);
1340 1353 svp_bunyan_fini();
1341 1354 umem_cache_destroy(svp_lookup_cache);
1342 1355 return;
1343 1356 }
1344 1357
1345 1358 if ((err = svp_timer_init()) != 0) {
1346 1359 (void) bunyan_error(svp_bunyan,
1347 1360 "failed to init timer subsystem",
1348 1361 BUNYAN_T_INT32, "error", err,
1349 1362 BUNYAN_T_END);
1350 1363 svp_event_fini();
1351 1364 umem_cache_destroy(svp_lookup_cache);
1352 1365 svp_bunyan_fini();
1353 1366 return;
1354 1367 }
1355 1368
1356 1369 if ((err = svp_remote_init()) != 0) {
1357 1370 (void) bunyan_error(svp_bunyan,
1358 1371 "failed to init remote subsystem",
1359 1372 BUNYAN_T_INT32, "error", err,
1360 1373 BUNYAN_T_END);
1361 1374 svp_event_fini();
1362 1375 umem_cache_destroy(svp_lookup_cache);
1363 1376 svp_bunyan_fini();
1364 1377 return;
1365 1378 }
1366 1379
1367 1380 vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err);
1368 1381 if (vpr == NULL) {
1369 1382 (void) bunyan_error(svp_bunyan,
1370 1383 "failed to alloc varpd plugin",
1371 1384 BUNYAN_T_INT32, "error", err,
1372 1385 BUNYAN_T_END);
1373 1386 svp_remote_fini();
1374 1387 svp_event_fini();
1375 1388 umem_cache_destroy(svp_lookup_cache);
1376 1389 svp_bunyan_fini();
1377 1390 return;
1378 1391 }
1379 1392
1380 1393 vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC;
1381 1394 vpr->vpr_name = "svp";
1382 1395 vpr->vpr_ops = &varpd_svp_ops;
1383 1396
1384 1397 if ((err = libvarpd_plugin_register(vpr)) != 0) {
1385 1398 (void) bunyan_error(svp_bunyan,
1386 1399 "failed to register varpd plugin",
1387 1400 BUNYAN_T_INT32, "error", err,
1388 1401 BUNYAN_T_END);
1389 1402 svp_remote_fini();
1390 1403 svp_event_fini();
1391 1404 umem_cache_destroy(svp_lookup_cache);
1392 1405 svp_bunyan_fini();
1393 1406
1394 1407 }
1395 1408 libvarpd_plugin_free(vpr);
1396 1409 }
|
↓ open down ↓ |
835 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX