1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2018 Joyent, Inc.
14 */
15
16 /*
17 * Writes (new rules) and reads (rule dump) go here. So do the
18 * ins/outs of reading & writing.
19 */
20
21 #include <sys/ddi.h>
22 #include <sys/dtrace.h>
23 #include <sys/debug.h>
24 #include <inet/vxlnat_impl.h>
25 #include <inet/ip_if.h> /* XXX KEBE SAYS CHEESY HACK */
26
27 /*
28 * These are all initialized to NULL or 0.
29 *
30 * If a VXNM_DUMP is requested, these get allocated/set. vxlnat_read()
31 * calls will consume them, and once delivered the last bytes read will
32 * cause these to be freed and reset to NULL/0. Cheesy, but this is a
33 * one-at-a-time thing. Protected by vxlnat_mutex.
34 */
35 static vxn_msg_t *vxlnat_dumpbuf;
36 static size_t vxlnat_initial; /* non-zero if no read yet. */
37 static size_t vxlnat_dumpcount;
38 static size_t vxlnat_dumpcurrent;
39
40 /*
41 * Store per-vnet-state in AVL tree. We could be handling 1000s or more...
42 * Could split this into a hash table of AVL trees if need be.
43 */
44 static krwlock_t vxlnat_vnet_lock; /* Could be mutex if we use refhold. */
45 static avl_tree_t vxlnat_vnets;
46
47 static void vxlnat_rule_unlink(vxlnat_rule_t *);
48 static void vxlnat_fixed_unlink(vxlnat_fixed_t *);
49 /* In vxlnat_nat.c */
50 extern void vxlnat_remote_unlink(vxlnat_remote_t *);
51
52 /*
53 * Comparison function for vnet AVL tree.
54 */
55 static int
56 vxlnat_vnetid_cmp(const void *first, const void *second)
57 {
58 uint32_t first_vnetid, second_vnetid;
59
60 first_vnetid = ((vxlnat_vnet_t *)first)->vxnv_vnetid;
61 second_vnetid = ((vxlnat_vnet_t *)second)->vxnv_vnetid;
62
63 if (first_vnetid < second_vnetid)
64 return (-1);
65 if (first_vnetid > second_vnetid)
66 return (1);
67 return (0);
68 }
69
70 /*
71 *
72 * NOTE: Many structures start with the form:
73 *
74 * struct foo {
75 * avl_node_t node;
76 * in6_addr_t address_which_is_search_key;
77 * ....
78 *
79 * We will use this same AVL comparison function for many of these structures.
80 */
81 int
82 vxlnat_tree_plus_in6_cmp(const void *first, const void *second)
83 {
84 in6_addr_t *firstaddr, *secondaddr;
85 int ret;
86
87 firstaddr = (in6_addr_t *)(((avl_node_t *)first) + 1);
88 secondaddr = (in6_addr_t *)(((avl_node_t *)second) + 1);
89
90 ret = memcmp(firstaddr, secondaddr, sizeof (in6_addr_t));
91 if (ret > 0)
92 return (1);
93 if (ret < 0)
94 return (-1);
95 return (0);
96 }
97
98 /*
99 * Find-and-reference-hold a vnet. If none present, create one.
100 * "vnetid" MUST be in wire-order and its one byte cleared.
101 */
102 vxlnat_vnet_t *
103 vxlnat_get_vnet(uint32_t vnetid, boolean_t create_on_miss)
104 {
105 vxlnat_vnet_t *vnet, searcher;
106 avl_index_t where;
107
108 /* Cheesy, but we KNOW vxnv_vnetid is the only thing checked. */
109 searcher.vxnv_vnetid = vnetid;
110
111 rw_enter(&vxlnat_vnet_lock, create_on_miss ? RW_WRITER : RW_READER);
112 vnet = (vxlnat_vnet_t *)avl_find(&vxlnat_vnets, &searcher, &where);
113 if (vnet == NULL && create_on_miss) {
114 vnet = kmem_zalloc(sizeof (*vnet), KM_SLEEP);
115 /* KM_SLEEP means non-NULL guaranteed. */
116 vnet->vxnv_refcount = 1; /* Internment reference. */
117 vnet->vxnv_vnetid = vnetid;
118 /* Initialize 1-1 mappings... */
119 rw_init(&vnet->vxnv_fixed_lock, NULL, RW_DRIVER, NULL);
120 avl_create(&vnet->vxnv_fixed_ips, vxlnat_tree_plus_in6_cmp,
121 sizeof (vxlnat_fixed_t), 0);
122 /* Initialize NAT rules. (NAT mutex is zeroed-out.) */
123 list_create(&vnet->vxnv_rules, sizeof (vxlnat_rule_t), 0);
124 #ifdef notyet
125 /* XXX KEBE SAYS INITIALIZE NAT flows... */
126 #endif /* notyet */
127 /*
128 * Initialize remote VXLAN destination cache.
129 * (remotes mutex is zeroed-out.)
130 */
131 avl_create(&vnet->vxnv_remotes, vxlnat_tree_plus_in6_cmp,
132 sizeof (vxlnat_remote_t), 0);
133
134 avl_insert(&vxlnat_vnets, vnet, where);
135 }
136 if (vnet != NULL)
137 VXNV_REFHOLD(vnet); /* Caller's reference. */
138 rw_exit(&vxlnat_vnet_lock);
139
140 return (vnet);
141 }
142
143 void
144 vxlnat_vnet_free(vxlnat_vnet_t *vnet)
145 {
146 /* XXX KEBE SAYS FILL ME IN */
147 ASSERT0(vnet->vxnv_refcount);
148 /* XXX KEBE ASKS -- assert detachment? */
149
150 kmem_free(vnet, sizeof (*vnet));
151 }
152
153 static void
154 vxlnat_vnet_unlink_locked(vxlnat_vnet_t *vnet)
155 {
156 ASSERT3U(vnet->vxnv_refcount, >=, 1);
157
158 ASSERT(RW_WRITE_HELD(&vxlnat_vnet_lock));
159 avl_remove(&vxlnat_vnets, vnet);
160 /* XXX KEBE ASKS --> Mark as condemned? */
161
162 /* Unlink all NAT rules */
163 mutex_enter(&vnet->vxnv_rule_lock);
164 while (!list_is_empty(&vnet->vxnv_rules)) {
165 /* Will decrement vnet's refcount too. */
166 vxlnat_rule_unlink(
167 (vxlnat_rule_t *)list_head(&vnet->vxnv_rules));
168 }
169 mutex_exit(&vnet->vxnv_rule_lock);
170 /* XXX KEBE SAYS unlink all 1-1 mappings */
171 rw_enter(&vnet->vxnv_fixed_lock, RW_WRITER);
172 while (!avl_is_empty(&vnet->vxnv_fixed_ips)) {
173 /* Will decrement vnet's refcount too. */
174 vxlnat_fixed_unlink(
175 (vxlnat_fixed_t *)avl_first(&vnet->vxnv_fixed_ips));
176 }
177 rw_exit(&vnet->vxnv_fixed_lock);
178
179 /* Unlink all remotes */
180 mutex_enter(&vnet->vxnv_remote_lock);
181 while (!avl_is_empty(&vnet->vxnv_remotes)) {
182 /* Will decrement vnet's refcount too. */
183 vxlnat_remote_unlink(
184 (vxlnat_remote_t *)avl_first(&vnet->vxnv_remotes));
185 }
186 mutex_exit(&vnet->vxnv_remote_lock);
187
188 /* XXX KEBE SAYS unlink all NAT flows */
189
190 VXNV_REFRELE(vnet); /* Internment reference. */
191 }
192
193 /*
194 * Assume it's refheld by the caller, so we will drop two references
195 * explicitly (caller's and internment), plus free any rules.
196 */
197 void
198 vxlnat_vnet_unlink(vxlnat_vnet_t *vnet)
199 {
200 ASSERT3U(vnet->vxnv_refcount, >=, 2);
201 rw_enter(&vxlnat_vnet_lock, RW_WRITER);
202 vxlnat_vnet_unlink_locked(vnet);
203 rw_exit(&vxlnat_vnet_lock);
204 /*
205 * At this point, we've decremented the refcount by one with the
206 * unlink. Drop the caller's now.
207 */
208 VXNV_REFRELE(vnet);
209 }
210
211 /*
212 * Add a (vnetid+prefix => external) rule.
213 */
214 static int
215 vxlnat_nat_rule(vxn_msg_t *vxnm)
216 {
217 vxlnat_vnet_t *vnet;
218 vxlnat_rule_t *rule;
219 uint32_t vnetid;
220
221 ASSERT(MUTEX_HELD(&vxlnat_mutex));
222
223 /* Reserve the requested public IP for shared use. */
224 if (!vxlnat_public_hold(&vxnm->vxnm_public, B_FALSE))
225 return (EADDRNOTAVAIL);
226
227 vnetid = VXLAN_ID_HTON(vxnm->vxnm_vnetid);
228 vnet = vxlnat_get_vnet(vnetid, B_TRUE);
229 if (vnet == NULL) {
230 /* RARE case of failed allocation or other disaster. */
231 vxlnat_public_rele(&vxnm->vxnm_public);
232 return (ENOMEM);
233 }
234
235 /* Now we have a reference-held vnet, create a rule for it. */
236 rule = kmem_alloc(sizeof (*rule), KM_SLEEP);
237 /* KM_SLEEP means non-NULL guaranteed. */
238 rule->vxnr_vnet = vnet; /* vnet already refheld, remember?. */
239 /* XXX KEBE ASKS, check the vxnm more carefully? */
240 rule->vxnr_myaddr = vxnm->vxnm_private;
241 rule->vxnr_pubaddr = vxnm->vxnm_public;
242 rule->vxnr_prefix = vxnm->vxnm_prefix;
243 /* For easier packet matching, keep vlanid in network order. */
244 rule->vxnr_vlanid = htons(vxnm->vxnm_vlanid);
245 bcopy(vxnm->vxnm_ether_addr, rule->vxnr_myether, ETHERADDRL);
246 rule->vxnr_refcount = 1; /* Internment reference. */
247 list_link_init(&rule->vxnr_link);
248
249 /* Put rule into vnet. */
250 mutex_enter(&vnet->vxnv_rule_lock);
251 /* XXX KEBE ASKS --> Check for collisions?!? */
252 list_insert_tail(&vnet->vxnv_rules, rule);
253 mutex_exit(&vnet->vxnv_rule_lock);
254
255 return (0);
256 }
257
258 void
259 vxlnat_rule_free(vxlnat_rule_t *rule)
260 {
261 ASSERT3P(rule->vxnr_vnet, ==, NULL);
262 ASSERT3P(rule->vxnr_link.list_next, ==, NULL);
263 ASSERT3P(rule->vxnr_link.list_prev, ==, NULL);
264 ASSERT0(rule->vxnr_refcount);
265 vxlnat_public_rele(&rule->vxnr_pubaddr);
266 kmem_free(rule, sizeof (*rule));
267 }
268
269 static void
270 vxlnat_rule_unlink(vxlnat_rule_t *rule)
271 {
272 vxlnat_vnet_t *vnet = rule->vxnr_vnet;
273
274 ASSERT3P(vnet, !=, NULL);
275 ASSERT(MUTEX_HELD(&vnet->vxnv_rule_lock));
276
277 list_remove(&vnet->vxnv_rules, rule);
278 VXNV_REFRELE(vnet);
279 rule->vxnr_vnet = NULL; /* This condemns this rule. */
280 VXNR_REFRELE(rule);
281 }
282
283 static int
284 vxlnat_flush(void)
285 {
286 vxlnat_closesock();
287 /* XXX KEBE SAYS DO OTHER STATE FLUSHING TOO. */
288
289 /* Flush out vnets. */
290 rw_enter(&vxlnat_vnet_lock, RW_WRITER);
291 while (!avl_is_empty(&vxlnat_vnets))
292 vxlnat_vnet_unlink_locked(avl_first(&vxlnat_vnets));
293 rw_exit(&vxlnat_vnet_lock);
294 if (vxlnat_dumpbuf != NULL) {
295 kmem_free(vxlnat_dumpbuf,
296 vxlnat_dumpcount * sizeof (vxn_msg_t));
297 vxlnat_dumpbuf = NULL;
298 vxlnat_initial = vxlnat_dumpcount = vxlnat_dumpcurrent = 0;
299 }
300 return (0);
301 }
302
303 void
304 vxlnat_fixed_free(vxlnat_fixed_t *fixed)
305 {
306 ASSERT0(fixed->vxnf_refcount);
307
308 vxlnat_public_rele(&fixed->vxnf_pubaddr);
309 kmem_free(fixed, sizeof (*fixed));
310 }
311
312 static void
313 vxlnat_fixed_unlink(vxlnat_fixed_t *fixed)
314 {
315 vxlnat_vnet_t *vnet = fixed->vxnf_vnet;
316 ire_t *ire = fixed->vxnf_ire;
317
318 ASSERT3P(vnet, !=, NULL);
319 ASSERT(RW_WRITE_HELD(&vnet->vxnv_fixed_lock));
320
321 /* Rid ourselves of the IRE now. */
322 if (ire != NULL) {
323 ASSERT(ire->ire_type == IRE_LOCAL);
324 ASSERT3P((void *)ire->ire_dep_sib_next, ==, (void *)fixed);
325
326 /* XXX KEBE SAYS CHEESY HACK. */
327 if (fixed->vxnf_clear_router)
328 ire->ire_ill->ill_flags &= ~ILLF_ROUTER;
329
330 ire->ire_dep_sib_next = NULL;
331 VXNF_REFRELE(fixed); /* ire's hold on us. */
332 /* Rewire IRE back to normal. */
333 ire->ire_recvfn = (ire->ire_ipversion == IPV4_VERSION) ?
334 ire_recv_local_v4 : ire_recv_local_v6;
335 ire_refrele(ire);
336 }
337
338 /* And the remote, if it's there. */
339 if (fixed->vxnf_remote != NULL) {
340 VXNREM_REFRELE(fixed->vxnf_remote);
341 fixed->vxnf_remote = NULL;
342 }
343
344 avl_remove(&vnet->vxnv_fixed_ips, fixed);
345 fixed->vxnf_vnet = NULL; /* This condemns this 1-1 mapping. */
346 VXNV_REFRELE(vnet);
347 VXNF_REFRELE(fixed);
348 }
349
350 /*
351 * Add a 1-1 (vnetid+IP <==> external) rule.
352 */
353 static int
354 vxlnat_fixed_ip(vxn_msg_t *vxnm)
355 {
356 vxlnat_vnet_t *vnet;
357 vxlnat_fixed_t *fixed;
358 uint32_t vnetid;
359 avl_index_t where;
360 int rc;
361 ire_t *ire;
362 ip_stack_t *ipst;
363
364 /* XXX KEBE SAYS FILL ME IN. */
365 ASSERT(MUTEX_HELD(&vxlnat_mutex));
366
367 /* Reserve the requested public IP for exclusive use. */
368 if (!vxlnat_public_hold(&vxnm->vxnm_public, B_TRUE))
369 return (EADDRNOTAVAIL);
370
371 vnetid = VXLAN_ID_HTON(vxnm->vxnm_vnetid);
372 vnet = vxlnat_get_vnet(vnetid, B_TRUE);
373 if (vnet == NULL) {
374 /* RARE case of failed allocation or other disaster. */
375 rc = ENOMEM;
376 goto fail;
377 }
378
379 fixed = kmem_zalloc(sizeof (*fixed), KM_SLEEP);
380 /* KM_SLEEP means non-NULL guaranteed. */
381 fixed->vxnf_vnet = vnet; /* vnet already refheld, remember? */
382 /* XXX KEBE ASKS, check the vxnm more carefully? */
383 fixed->vxnf_addr = vxnm->vxnm_private;
384 fixed->vxnf_pubaddr = vxnm->vxnm_public;
385 fixed->vxnf_refcount = 1; /* Internment reference. */
386 bcopy(&vxnm->vxnm_ether_addr, &fixed->vxnf_myether, ETHERADDRL);
387 fixed->vxnf_vlanid = htons(vxnm->vxnm_vlanid);
388
389 /*
390 * Find a local-address IRE for the public address.
391 */
392 ipst = vxlnat_netstack->netstack_ip;
393 ire = IN6_IS_ADDR_V4MAPPED(&fixed->vxnf_pubaddr) ?
394 ire_ftable_lookup_simple_v4(fixed->vxnf_pubaddr._S6_un._S6_u32[3],
395 0, ipst, NULL) :
396 ire_ftable_lookup_simple_v6(&fixed->vxnf_pubaddr, 0, ipst, NULL);
397
398 if (ire == NULL) {
399 /*
400 * Can't find a local IRE. For now, return.
401 * XXX KEBE ASKS --> Do we instead put a new entry in
402 * there? Or do we count on zone/netstack configuration
403 * to make sure the requested external address is there?!
404 */
405 kmem_free(fixed, sizeof (*fixed));
406 rc = EADDRNOTAVAIL;
407 goto fail;
408 }
409
410 /*
411 * Check the IRE for appropriate properties.
412 *
413 * This may change as we implement, but for now, we MUST have an ipif
414 * (local address) for the public IP. This can/should be on the
415 * public NIC OR on a my-netstack-only etherstub to enable
416 * instantiating redundant versions of vxlnat on other netstacks on
417 * other {zones,machines} without triggering DAD.
418 */
419 if (ire->ire_type != IRE_LOCAL) {
420 ire_refrele(ire);
421 kmem_free(fixed, sizeof (*fixed));
422 rc = EADDRNOTAVAIL; /* XXX KEBE ASKS different errno? */
423 goto fail;
424 }
425
426 /* Put the 1-1 mapping in place. */
427 rw_enter(&vnet->vxnv_fixed_lock, RW_WRITER);
428 if (avl_find(&vnet->vxnv_fixed_ips, fixed, &where) != NULL) {
429 /* Oh crap, we have an internal IP mapped already. */
430 ire_refrele(ire);
431 kmem_free(fixed, sizeof (*fixed));
432 rc = EEXIST;
433 } else {
434 avl_insert(&vnet->vxnv_fixed_ips, fixed, where);
435 rc = 0;
436 /*
437 * CHEESY USE OF POINTERS WARNING: I'm going to use
438 * ire_dep_children for this IRE_LOCAL as a backpointer to
439 * this 'fixed'. This'll allow rapid packet processing.
440 * Inspection seems to indicate that IRE_LOCAL ires NEVER use
441 * the ire_dep* pointers, so we'll use one (and independent of
442 * ip_stack_t's ips_ire_dep_lock as well). If I'm wrong,
443 * fix it here and add a new pointer in ip.h for ire_t.
444 */
445 ire->ire_dep_sib_next = (ire_t *)fixed;
446 /* and then rewire the ire receive and send functions. */
447 if (ire->ire_ipversion == IPV4_VERSION) {
448 ire->ire_recvfn = vxlnat_fixed_ire_recv_v4;
449 ire->ire_sendfn = vxlnat_fixed_ire_send_v4;
450 } else {
451 ASSERT(ire->ire_ipversion == IPV6_VERSION);
452 ire->ire_recvfn = vxlnat_fixed_ire_recv_v6;
453 ire->ire_sendfn = vxlnat_fixed_ire_send_v6;
454 }
455 VXNF_REFHOLD(fixed); /* ire holds us too... */
456 fixed->vxnf_ire = ire;
457 /*
458 * XXX KEBE SAYS CHEESY HACK:
459 */
460 if (!(ire->ire_ill->ill_flags & ILLF_ROUTER)) {
461 fixed->vxnf_clear_router = B_TRUE;
462 ire->ire_ill->ill_flags |= ILLF_ROUTER;
463 } else {
464 /* Just so we're clear... */
465 fixed->vxnf_clear_router = B_FALSE;
466 }
467 }
468 rw_exit(&vnet->vxnv_fixed_lock);
469
470 fail:
471 if (rc != 0)
472 vxlnat_public_rele(&vxnm->vxnm_public);
473
474 return (rc);
475 }
476
477 static void
478 vxlnat_rule_to_msg(vxn_msg_t *msg, vxlnat_rule_t *rule)
479 {
480 msg->vxnm_type = VXNM_RULE;
481 msg->vxnm_vnetid = VXLAN_ID_NTOH(rule->vxnr_vnet->vxnv_vnetid);
482 msg->vxnm_prefix = rule->vxnr_prefix;
483 msg->vxnm_vlanid = ntohs(rule->vxnr_vlanid);
484 bcopy(rule->vxnr_myether, msg->vxnm_ether_addr, ETHERADDRL);
485 msg->vxnm_public = rule->vxnr_pubaddr;
486 msg->vxnm_private = rule->vxnr_myaddr;
487 }
488
489 static void
490 vxlnat_fixed_to_msg(vxn_msg_t *msg, vxlnat_fixed_t *fixed)
491 {
492 msg->vxnm_type = VXNM_FIXEDIP;
493 msg->vxnm_vnetid = VXLAN_ID_NTOH(fixed->vxnf_vnet->vxnv_vnetid);
494 msg->vxnm_prefix = 0;
495 msg->vxnm_vlanid = ntohs(fixed->vxnf_vlanid);
496 bcopy(fixed->vxnf_myether, msg->vxnm_ether_addr, ETHERADDRL);
497 msg->vxnm_public = fixed->vxnf_pubaddr;
498 msg->vxnm_private = fixed->vxnf_addr;
499 }
500
501 static int
502 vxlnat_dump(void)
503 {
504 int rc = 0;
505 size_t entries = 0;
506 vxlnat_vnet_t *vnet;
507 vxlnat_fixed_t *fixed;
508 vxlnat_rule_t *rule;
509 vxn_msg_t *current;
510
511 ASSERT(MUTEX_HELD(&vxlnat_mutex));
512
513 /*
514 * XXX KEBE SAYS setup vxlnat_dump* above.
515 * XXX KEBE SAYS If function fails for reasons that aren't "dump in
516 * progress", make sure it keeps vxlnat_dump* stuff clean
517 *
518 * NOTE: Other commands are excluded at this point, but packet
519 * processing is not. OTOH, packet processing doesn't affect any
520 * entities we dump (at this time). We only dump things that can be
521 * added with commands. (So no remote VXLAN peers and no NAT flows.)
522 */
523
524 /* Lock down things. */
525 rw_enter(&vxlnat_vnet_lock, RW_READER);
526 if (avl_numnodes(&vxlnat_vnets) == 0)
527 goto bail; /* Nothing to see here, move along. */
528
529 /*
530 * This is going to be inefficient, requiring two passes through each
531 * vnet. The first pass locks-down and counts. Then we allocate
532 * based on the count. The second pass copies out and unlocks.
533 */
534 for (vnet = avl_first(&vxlnat_vnets); vnet != NULL;
535 vnet = AVL_NEXT(&vxlnat_vnets, vnet)) {
536 rw_enter(&vnet->vxnv_fixed_lock, RW_READER);
537 entries += avl_numnodes(&vnet->vxnv_fixed_ips);
538 mutex_enter(&vnet->vxnv_rule_lock);
539 /* Let's hope this isn't a big number... */
540 for (rule = list_head(&vnet->vxnv_rules); rule != NULL;
541 rule = list_next(&vnet->vxnv_rules, rule)) {
542 entries++;
543 }
544 /* XXX KEBE ASKS -- other fields?!? */
545 }
546 if (entries == 0)
547 goto bail; /* VNETs but with no rules AND no 1-1s?!? */
548 /* Don't be too agressive in allocating this. */
549 vxlnat_dumpbuf = kmem_alloc(entries * sizeof (vxn_msg_t),
550 KM_NOSLEEP | KM_NORMALPRI);
551 if (vxlnat_dumpbuf == NULL)
552 rc = ENOMEM; /* We still have to unlock everything. */
553 current = vxlnat_dumpbuf;
554
555 /* Second pass. */
556 for (vnet = avl_first(&vxlnat_vnets); vnet != NULL;
557 vnet = AVL_NEXT(&vxlnat_vnets, vnet)) {
558 /* XXX KEBE ASKS -- other fields?!? */
559 for (rule = list_head(&vnet->vxnv_rules); rule != NULL;
560 rule = list_next(&vnet->vxnv_rules, rule)) {
561 if (rc == 0) {
562 vxlnat_rule_to_msg(current, rule);
563 current++;
564 }
565 }
566 mutex_exit(&vnet->vxnv_rule_lock);
567 for (fixed = avl_first(&vnet->vxnv_fixed_ips); fixed != NULL;
568 fixed = AVL_NEXT(&vnet->vxnv_fixed_ips, fixed)) {
569 if (rc == 0) {
570 vxlnat_fixed_to_msg(current, fixed);
571 current++;
572 }
573 }
574 rw_exit(&vnet->vxnv_fixed_lock);
575 }
576 vxlnat_dumpcount = vxlnat_initial = entries;
577 vxlnat_dumpcurrent = 0;
578 ASSERT3P((vxlnat_dumpbuf + entries), ==, current);
579
580 bail:
581 rw_exit(&vxlnat_vnet_lock);
582 return (rc);
583 }
584
585 int
586 vxlnat_command(vxn_msg_t *vxnm)
587 {
588 int rc;
589
590 switch (vxnm->vxnm_type) {
591 case VXNM_VXLAN_ADDR:
592 rc = vxlnat_vxlan_addr(&vxnm->vxnm_private);
593 break;
594 case VXNM_RULE:
595 rc = vxlnat_nat_rule(vxnm);
596 break;
597 case VXNM_FIXEDIP:
598 rc = vxlnat_fixed_ip(vxnm);
599 break;
600 case VXNM_FLUSH:
601 rc = vxlnat_flush();
602 break;
603 case VXNM_DUMP:
604 rc = vxlnat_dump();
605 break;
606 default:
607 rc = EINVAL;
608 break;
609 }
610
611 return (rc);
612 }
613
614 void
615 vxlnat_state_init(void)
616 {
617 ASSERT(MUTEX_HELD(&vxlnat_mutex));
618 rw_init(&vxlnat_vnet_lock, NULL, RW_DRIVER, NULL);
619 avl_create(&vxlnat_vnets, vxlnat_vnetid_cmp, sizeof (vxlnat_vnet_t), 0);
620 vxlnat_public_init();
621 /* XXX KEBE SAYS -- more here. */
622 }
623
624 void
625 vxlnat_state_fini(void)
626 {
627 ASSERT(MUTEX_HELD(&vxlnat_mutex));
628 (void) vxlnat_flush(); /* If we fail, we're in bigger trouble anyway. */
629 vxlnat_public_init();
630 avl_destroy(&vxlnat_vnets);
631 rw_destroy(&vxlnat_vnet_lock);
632 }
633
634 int
635 vxlnat_read_dump(struct uio *uiop)
636 {
637 int rc = 0;
638 size_t dumpprogress = 0;
639
640 mutex_enter(&vxlnat_mutex);
641
642 /*
643 * Initial-case ==> dumpbuf with none delivered yet.
644 * Utter an 8-byte count.
645 */
646 if (vxlnat_initial != 0 && uiop->uio_resid >= sizeof (uint64_t)) {
647 uint64_t total = vxlnat_dumpcount;
648
649 ASSERT(vxlnat_dumpbuf != NULL && vxlnat_dumpcurrent == 0);
650 rc = uiomove(&total, sizeof (uint64_t), UIO_READ, uiop);
651 if (rc != 0)
652 goto bail;
653 vxlnat_initial = 0;
654 }
655
656 /* XXX KEBE THINKS -- if no dump buffer, just return w/o data. */
657 while (rc == 0 && vxlnat_dumpbuf != NULL &&
658 uiop->uio_resid >= sizeof (vxn_msg_t)) {
659 rc = uiomove(vxlnat_dumpbuf + vxlnat_dumpcurrent,
660 sizeof (vxn_msg_t), UIO_READ, uiop);
661 if (rc != 0) {
662 /*
663 * XXX KEBE ASKS, destroy or preserve dumpstate?
664 * Fill in answer here.
665 */
666 break;
667 }
668 vxlnat_dumpcurrent++;
669 dumpprogress++;
670 if (vxlnat_dumpcurrent == vxlnat_dumpcount) {
671 kmem_free(vxlnat_dumpbuf,
672 vxlnat_dumpcount * sizeof (vxn_msg_t));
673 vxlnat_dumpbuf = NULL;
674 vxlnat_dumpcount = vxlnat_dumpcurrent = 0;
675 }
676 }
677
678 bail:
679 /*
680 * If there's room at the end, just ignore that space for now. Handy
681 * DTrace probe below notes amount of extra bytes..
682 */
683 DTRACE_PROBE1(vxlnat__read__extrabytes, ssize_t, uiop->uio_resid);
684 /* Note progress of dump with DTrace probes. */
685 DTRACE_PROBE3(vxlnat__read__dumpprogress, size_t, dumpprogress, size_t,
686 vxlnat_dumpcurrent, size_t, vxlnat_dumpcount);
687
688 mutex_exit(&vxlnat_mutex);
689 return (rc);
690 }