Print this page
Factor out fixed/1-1 processing from vxlnat_vxlan_one(), paving way for
future processing types.
Initial definitions of NAT flows.
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/vxlnat/vxlnat_rules.c
+++ new/usr/src/uts/common/inet/vxlnat/vxlnat_rules.c
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright 2018 Joyent, Inc.
14 14 */
15 15
16 16 /*
17 17 * Writes (new rules) and reads (rule dump) go here. So do the
18 18 * ins/outs of reading & writing.
19 19 */
20 20
21 21 #include <sys/ddi.h>
22 22 #include <sys/dtrace.h>
23 23 #include <sys/debug.h>
24 24 #include <inet/vxlnat_impl.h>
25 25 #include <inet/ip_if.h> /* XXX KEBE SAYS CHEESY HACK */
26 26
27 27 /*
28 28 * These are all initialized to NULL or 0.
29 29 *
30 30 * If a VXNM_DUMP is requested, these get allocated/set. vxlnat_read()
31 31 * calls will consume them, and once delivered the last bytes read will
32 32 * cause these to be freed and reset to NULL/0. Cheesy, but this is a
33 33 * one-at-a-time thing. Protected by vxlnat_mutex.
34 34 */
35 35 static vxn_msg_t *vxlnat_dumpbuf;
36 36 static size_t vxlnat_initial; /* non-zero if no read yet. */
37 37 static size_t vxlnat_dumpcount;
38 38 static size_t vxlnat_dumpcurrent;
39 39
40 40 /*
41 41 * Store per-vnet-state in AVL tree. We could be handling 1000s or more...
42 42 * Could split this into a hash table of AVL trees if need be.
43 43 */
44 44 static krwlock_t vxlnat_vnet_lock; /* Could be mutex if we use refhold. */
45 45 static avl_tree_t vxlnat_vnets;
46 46
47 47 static void vxlnat_rule_unlink(vxlnat_rule_t *);
48 48 static void vxlnat_fixed_unlink(vxlnat_fixed_t *);
49 49 /* In vxlnat_nat.c */
50 50 extern void vxlnat_remote_unlink(vxlnat_remote_t *);
51 51
52 52 /*
53 53 * Comparison function for vnet AVL tree.
54 54 */
55 55 static int
56 56 vxlnat_vnetid_cmp(const void *first, const void *second)
57 57 {
58 58 uint32_t first_vnetid, second_vnetid;
59 59
60 60 first_vnetid = ((vxlnat_vnet_t *)first)->vxnv_vnetid;
61 61 second_vnetid = ((vxlnat_vnet_t *)second)->vxnv_vnetid;
62 62
63 63 if (first_vnetid < second_vnetid)
64 64 return (-1);
65 65 if (first_vnetid > second_vnetid)
66 66 return (1);
67 67 return (0);
68 68 }
69 69
70 70 /*
71 71 *
72 72 * NOTE: Many structures start with the form:
73 73 *
74 74 * struct foo {
75 75 * avl_node_t node;
76 76 * in6_addr_t address_which_is_search_key;
77 77 * ....
78 78 *
79 79 * We will use this same AVL comparison function for many of these structures.
80 80 */
81 81 int
82 82 vxlnat_tree_plus_in6_cmp(const void *first, const void *second)
83 83 {
84 84 in6_addr_t *firstaddr, *secondaddr;
85 85 int ret;
86 86
87 87 firstaddr = (in6_addr_t *)(((avl_node_t *)first) + 1);
88 88 secondaddr = (in6_addr_t *)(((avl_node_t *)second) + 1);
|
↓ open down ↓ |
88 lines elided |
↑ open up ↑ |
89 89
90 90 ret = memcmp(firstaddr, secondaddr, sizeof (in6_addr_t));
91 91 if (ret > 0)
92 92 return (1);
93 93 if (ret < 0)
94 94 return (-1);
95 95 return (0);
96 96 }
97 97
98 98 /*
99 + * Comparison function for NAT flow.
100 + */
101 +static int
102 +vxlnat_flow_cmp_v4(const void *first, const void *second)
103 +{
104 + vxlnat_flow_t *first_flow = (vxlnat_flow_t *)first;
105 + vxlnat_flow_t *second_flow = (vxlnat_flow_t *)second;
106 + uint64_t firstaddrs, secondaddrs, firstportproto, secondportproto;
107 +
108 + firstaddrs = first_flow->vxnfl_src._S6_un._S6_u32[3] |
109 + (((uint64_t)first_flow->vxnfl_dst._S6_un._S6_u32[3]) << 32ULL);
110 + secondaddrs = second_flow->vxnfl_src._S6_un._S6_u32[3] |
111 + (((uint64_t)second_flow->vxnfl_dst._S6_un._S6_u32[3]) << 32ULL);
112 + firstportproto = first_flow->vxnfl_ports |
113 + (((uint64_t)first_flow->vxnfl_protocol) << 32ULL);
114 + secondportproto = second_flow->vxnfl_ports |
115 + (((uint64_t)second_flow->vxnfl_protocol) << 32ULL);
116 +
117 + if (firstaddrs > secondaddrs)
118 + return (1);
119 + else if (firstaddrs < secondaddrs)
120 + return (-1);
121 + else if (firstportproto > secondportproto)
122 + return (1);
123 + else if (firstportproto < secondportproto)
124 + return (-1);
125 +
126 + return (0);
127 +}
128 +
129 +/*
99 130 * Find-and-reference-hold a vnet. If none present, create one.
100 131 * "vnetid" MUST be in wire-order and its one byte cleared.
101 132 */
102 133 vxlnat_vnet_t *
103 134 vxlnat_get_vnet(uint32_t vnetid, boolean_t create_on_miss)
104 135 {
105 136 vxlnat_vnet_t *vnet, searcher;
106 137 avl_index_t where;
107 138
108 139 /* Cheesy, but we KNOW vxnv_vnetid is the only thing checked. */
109 140 searcher.vxnv_vnetid = vnetid;
110 141
111 142 rw_enter(&vxlnat_vnet_lock, create_on_miss ? RW_WRITER : RW_READER);
112 143 vnet = (vxlnat_vnet_t *)avl_find(&vxlnat_vnets, &searcher, &where);
113 144 if (vnet == NULL && create_on_miss) {
|
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
114 145 vnet = kmem_zalloc(sizeof (*vnet), KM_SLEEP);
115 146 /* KM_SLEEP means non-NULL guaranteed. */
116 147 vnet->vxnv_refcount = 1; /* Internment reference. */
117 148 vnet->vxnv_vnetid = vnetid;
118 149 /* Initialize 1-1 mappings... */
119 150 rw_init(&vnet->vxnv_fixed_lock, NULL, RW_DRIVER, NULL);
120 151 avl_create(&vnet->vxnv_fixed_ips, vxlnat_tree_plus_in6_cmp,
121 152 sizeof (vxlnat_fixed_t), 0);
122 153 /* Initialize NAT rules. (NAT mutex is zeroed-out.) */
123 154 list_create(&vnet->vxnv_rules, sizeof (vxlnat_rule_t), 0);
124 -#ifdef notyet
125 - /* XXX KEBE SAYS INITIALIZE NAT flows... */
126 -#endif /* notyet */
155 +
156 + /* Initialize NAT flows... */
157 + rw_init(&vnet->vxnv_flowv4_lock, NULL, RW_DRIVER, NULL);
158 + avl_create(&vnet->vxnv_flows_v4, vxlnat_flow_cmp_v4,
159 + sizeof (vxlnat_flow_t), 0);
160 +
127 161 /*
128 162 * Initialize remote VXLAN destination cache.
129 163 * (remotes mutex is zeroed-out.)
130 164 */
131 165 avl_create(&vnet->vxnv_remotes, vxlnat_tree_plus_in6_cmp,
132 166 sizeof (vxlnat_remote_t), 0);
133 167
134 168 avl_insert(&vxlnat_vnets, vnet, where);
135 169 }
136 170 if (vnet != NULL)
137 171 VXNV_REFHOLD(vnet); /* Caller's reference. */
138 172 rw_exit(&vxlnat_vnet_lock);
139 173
140 174 return (vnet);
141 175 }
142 176
143 177 void
144 178 vxlnat_vnet_free(vxlnat_vnet_t *vnet)
145 179 {
146 180 /* XXX KEBE SAYS FILL ME IN */
147 181 ASSERT0(vnet->vxnv_refcount);
148 182 /* XXX KEBE ASKS -- assert detachment? */
149 183
150 184 kmem_free(vnet, sizeof (*vnet));
151 185 }
152 186
153 187 static void
154 188 vxlnat_vnet_unlink_locked(vxlnat_vnet_t *vnet)
155 189 {
156 190 ASSERT3U(vnet->vxnv_refcount, >=, 1);
157 191
158 192 ASSERT(RW_WRITE_HELD(&vxlnat_vnet_lock));
159 193 avl_remove(&vxlnat_vnets, vnet);
160 194 /* XXX KEBE ASKS --> Mark as condemned? */
161 195
162 196 /* Unlink all NAT rules */
163 197 mutex_enter(&vnet->vxnv_rule_lock);
164 198 while (!list_is_empty(&vnet->vxnv_rules)) {
165 199 /* Will decrement vnet's refcount too. */
166 200 vxlnat_rule_unlink(
167 201 (vxlnat_rule_t *)list_head(&vnet->vxnv_rules));
168 202 }
169 203 mutex_exit(&vnet->vxnv_rule_lock);
170 204 /* XXX KEBE SAYS unlink all 1-1 mappings */
171 205 rw_enter(&vnet->vxnv_fixed_lock, RW_WRITER);
172 206 while (!avl_is_empty(&vnet->vxnv_fixed_ips)) {
173 207 /* Will decrement vnet's refcount too. */
174 208 vxlnat_fixed_unlink(
175 209 (vxlnat_fixed_t *)avl_first(&vnet->vxnv_fixed_ips));
176 210 }
177 211 rw_exit(&vnet->vxnv_fixed_lock);
178 212
179 213 /* Unlink all remotes */
180 214 mutex_enter(&vnet->vxnv_remote_lock);
181 215 while (!avl_is_empty(&vnet->vxnv_remotes)) {
182 216 /* Will decrement vnet's refcount too. */
183 217 vxlnat_remote_unlink(
184 218 (vxlnat_remote_t *)avl_first(&vnet->vxnv_remotes));
185 219 }
186 220 mutex_exit(&vnet->vxnv_remote_lock);
187 221
188 222 /* XXX KEBE SAYS unlink all NAT flows */
189 223
190 224 VXNV_REFRELE(vnet); /* Internment reference. */
191 225 }
192 226
193 227 /*
194 228 * Assume it's refheld by the caller, so we will drop two references
195 229 * explicitly (caller's and internment), plus free any rules.
196 230 */
197 231 void
198 232 vxlnat_vnet_unlink(vxlnat_vnet_t *vnet)
199 233 {
200 234 ASSERT3U(vnet->vxnv_refcount, >=, 2);
201 235 rw_enter(&vxlnat_vnet_lock, RW_WRITER);
202 236 vxlnat_vnet_unlink_locked(vnet);
203 237 rw_exit(&vxlnat_vnet_lock);
204 238 /*
205 239 * At this point, we've decremented the refcount by one with the
206 240 * unlink. Drop the caller's now.
207 241 */
208 242 VXNV_REFRELE(vnet);
209 243 }
210 244
211 245 /*
212 246 * Add a (vnetid+prefix => external) rule.
213 247 */
214 248 static int
215 249 vxlnat_nat_rule(vxn_msg_t *vxnm)
216 250 {
217 251 vxlnat_vnet_t *vnet;
218 252 vxlnat_rule_t *rule;
219 253 uint32_t vnetid;
220 254
221 255 ASSERT(MUTEX_HELD(&vxlnat_mutex));
222 256
223 257 /* Reserve the requested public IP for shared use. */
224 258 if (!vxlnat_public_hold(&vxnm->vxnm_public, B_FALSE))
225 259 return (EADDRNOTAVAIL);
226 260
227 261 vnetid = VXLAN_ID_HTON(vxnm->vxnm_vnetid);
228 262 vnet = vxlnat_get_vnet(vnetid, B_TRUE);
229 263 if (vnet == NULL) {
230 264 /* RARE case of failed allocation or other disaster. */
231 265 vxlnat_public_rele(&vxnm->vxnm_public);
232 266 return (ENOMEM);
233 267 }
234 268
235 269 /* Now we have a reference-held vnet, create a rule for it. */
236 270 rule = kmem_alloc(sizeof (*rule), KM_SLEEP);
237 271 /* KM_SLEEP means non-NULL guaranteed. */
238 272 rule->vxnr_vnet = vnet; /* vnet already refheld, remember?. */
239 273 /* XXX KEBE ASKS, check the vxnm more carefully? */
240 274 rule->vxnr_myaddr = vxnm->vxnm_private;
241 275 rule->vxnr_pubaddr = vxnm->vxnm_public;
242 276 rule->vxnr_prefix = vxnm->vxnm_prefix;
243 277 /* For easier packet matching, keep vlanid in network order. */
244 278 rule->vxnr_vlanid = htons(vxnm->vxnm_vlanid);
245 279 bcopy(vxnm->vxnm_ether_addr, rule->vxnr_myether, ETHERADDRL);
246 280 rule->vxnr_refcount = 1; /* Internment reference. */
247 281 list_link_init(&rule->vxnr_link);
248 282
249 283 /* Put rule into vnet. */
250 284 mutex_enter(&vnet->vxnv_rule_lock);
251 285 /* XXX KEBE ASKS --> Check for collisions?!? */
252 286 list_insert_tail(&vnet->vxnv_rules, rule);
253 287 mutex_exit(&vnet->vxnv_rule_lock);
254 288
255 289 return (0);
256 290 }
257 291
258 292 void
259 293 vxlnat_rule_free(vxlnat_rule_t *rule)
260 294 {
261 295 ASSERT3P(rule->vxnr_vnet, ==, NULL);
262 296 ASSERT3P(rule->vxnr_link.list_next, ==, NULL);
263 297 ASSERT3P(rule->vxnr_link.list_prev, ==, NULL);
264 298 ASSERT0(rule->vxnr_refcount);
265 299 vxlnat_public_rele(&rule->vxnr_pubaddr);
266 300 kmem_free(rule, sizeof (*rule));
267 301 }
268 302
269 303 static void
270 304 vxlnat_rule_unlink(vxlnat_rule_t *rule)
271 305 {
272 306 vxlnat_vnet_t *vnet = rule->vxnr_vnet;
273 307
274 308 ASSERT3P(vnet, !=, NULL);
275 309 ASSERT(MUTEX_HELD(&vnet->vxnv_rule_lock));
276 310
277 311 list_remove(&vnet->vxnv_rules, rule);
278 312 VXNV_REFRELE(vnet);
279 313 rule->vxnr_vnet = NULL; /* This condemns this rule. */
280 314 VXNR_REFRELE(rule);
281 315 }
282 316
283 317 static int
284 318 vxlnat_flush(void)
285 319 {
286 320 vxlnat_closesock();
287 321 /* XXX KEBE SAYS DO OTHER STATE FLUSHING TOO. */
288 322
289 323 /* Flush out vnets. */
290 324 rw_enter(&vxlnat_vnet_lock, RW_WRITER);
291 325 while (!avl_is_empty(&vxlnat_vnets))
292 326 vxlnat_vnet_unlink_locked(avl_first(&vxlnat_vnets));
293 327 rw_exit(&vxlnat_vnet_lock);
294 328 if (vxlnat_dumpbuf != NULL) {
295 329 kmem_free(vxlnat_dumpbuf,
296 330 vxlnat_dumpcount * sizeof (vxn_msg_t));
297 331 vxlnat_dumpbuf = NULL;
298 332 vxlnat_initial = vxlnat_dumpcount = vxlnat_dumpcurrent = 0;
299 333 }
300 334 return (0);
301 335 }
302 336
303 337 void
304 338 vxlnat_fixed_free(vxlnat_fixed_t *fixed)
305 339 {
306 340 ASSERT0(fixed->vxnf_refcount);
307 341
308 342 vxlnat_public_rele(&fixed->vxnf_pubaddr);
309 343 kmem_free(fixed, sizeof (*fixed));
310 344 }
311 345
312 346 static void
313 347 vxlnat_fixed_unlink(vxlnat_fixed_t *fixed)
314 348 {
315 349 vxlnat_vnet_t *vnet = fixed->vxnf_vnet;
316 350 ire_t *ire = fixed->vxnf_ire;
317 351
318 352 ASSERT3P(vnet, !=, NULL);
319 353 ASSERT(RW_WRITE_HELD(&vnet->vxnv_fixed_lock));
320 354
321 355 /* Rid ourselves of the IRE now. */
322 356 if (ire != NULL) {
323 357 ASSERT(ire->ire_type == IRE_LOCAL);
324 358 ASSERT3P((void *)ire->ire_dep_sib_next, ==, (void *)fixed);
325 359
326 360 /* XXX KEBE SAYS CHEESY HACK. */
327 361 if (fixed->vxnf_clear_router)
328 362 ire->ire_ill->ill_flags &= ~ILLF_ROUTER;
329 363
330 364 ire->ire_dep_sib_next = NULL;
331 365 VXNF_REFRELE(fixed); /* ire's hold on us. */
332 366 /* Rewire IRE back to normal. */
333 367 ire->ire_recvfn = (ire->ire_ipversion == IPV4_VERSION) ?
334 368 ire_recv_local_v4 : ire_recv_local_v6;
335 369 ire_refrele(ire);
336 370 }
337 371
338 372 /* And the remote, if it's there. */
339 373 if (fixed->vxnf_remote != NULL) {
340 374 VXNREM_REFRELE(fixed->vxnf_remote);
341 375 fixed->vxnf_remote = NULL;
342 376 }
343 377
344 378 avl_remove(&vnet->vxnv_fixed_ips, fixed);
345 379 fixed->vxnf_vnet = NULL; /* This condemns this 1-1 mapping. */
346 380 VXNV_REFRELE(vnet);
347 381 VXNF_REFRELE(fixed);
348 382 }
349 383
350 384 /*
351 385 * Add a 1-1 (vnetid+IP <==> external) rule.
352 386 */
353 387 static int
354 388 vxlnat_fixed_ip(vxn_msg_t *vxnm)
355 389 {
356 390 vxlnat_vnet_t *vnet;
357 391 vxlnat_fixed_t *fixed;
358 392 uint32_t vnetid;
359 393 avl_index_t where;
360 394 int rc;
361 395 ire_t *ire;
362 396 ip_stack_t *ipst;
363 397
364 398 /* XXX KEBE SAYS FILL ME IN. */
365 399 ASSERT(MUTEX_HELD(&vxlnat_mutex));
366 400
367 401 /* Reserve the requested public IP for exclusive use. */
368 402 if (!vxlnat_public_hold(&vxnm->vxnm_public, B_TRUE))
369 403 return (EADDRNOTAVAIL);
370 404
371 405 vnetid = VXLAN_ID_HTON(vxnm->vxnm_vnetid);
372 406 vnet = vxlnat_get_vnet(vnetid, B_TRUE);
373 407 if (vnet == NULL) {
374 408 /* RARE case of failed allocation or other disaster. */
375 409 rc = ENOMEM;
376 410 goto fail;
377 411 }
378 412
379 413 fixed = kmem_zalloc(sizeof (*fixed), KM_SLEEP);
380 414 /* KM_SLEEP means non-NULL guaranteed. */
381 415 fixed->vxnf_vnet = vnet; /* vnet already refheld, remember? */
382 416 /* XXX KEBE ASKS, check the vxnm more carefully? */
383 417 fixed->vxnf_addr = vxnm->vxnm_private;
384 418 fixed->vxnf_pubaddr = vxnm->vxnm_public;
385 419 fixed->vxnf_refcount = 1; /* Internment reference. */
386 420 bcopy(&vxnm->vxnm_ether_addr, &fixed->vxnf_myether, ETHERADDRL);
387 421 fixed->vxnf_vlanid = htons(vxnm->vxnm_vlanid);
388 422
389 423 /*
390 424 * Find a local-address IRE for the public address.
391 425 */
392 426 ipst = vxlnat_netstack->netstack_ip;
393 427 ire = IN6_IS_ADDR_V4MAPPED(&fixed->vxnf_pubaddr) ?
394 428 ire_ftable_lookup_simple_v4(fixed->vxnf_pubaddr._S6_un._S6_u32[3],
395 429 0, ipst, NULL) :
396 430 ire_ftable_lookup_simple_v6(&fixed->vxnf_pubaddr, 0, ipst, NULL);
397 431
398 432 if (ire == NULL) {
399 433 /*
400 434 * Can't find a local IRE. For now, return.
401 435 * XXX KEBE ASKS --> Do we instead put a new entry in
402 436 * there? Or do we count on zone/netstack configuration
403 437 * to make sure the requested external address is there?!
404 438 */
405 439 kmem_free(fixed, sizeof (*fixed));
406 440 rc = EADDRNOTAVAIL;
407 441 goto fail;
408 442 }
409 443
410 444 /*
411 445 * Check the IRE for appropriate properties.
412 446 *
413 447 * This may change as we implement, but for now, we MUST have an ipif
414 448 * (local address) for the public IP. This can/should be on the
415 449 * public NIC OR on a my-netstack-only etherstub to enable
416 450 * instantiating redundant versions of vxlnat on other netstacks on
417 451 * other {zones,machines} without triggering DAD.
418 452 */
419 453 if (ire->ire_type != IRE_LOCAL) {
420 454 ire_refrele(ire);
421 455 kmem_free(fixed, sizeof (*fixed));
422 456 rc = EADDRNOTAVAIL; /* XXX KEBE ASKS different errno? */
423 457 goto fail;
424 458 }
425 459
426 460 /* Put the 1-1 mapping in place. */
|
↓ open down ↓ |
290 lines elided |
↑ open up ↑ |
427 461 rw_enter(&vnet->vxnv_fixed_lock, RW_WRITER);
428 462 if (avl_find(&vnet->vxnv_fixed_ips, fixed, &where) != NULL) {
429 463 /* Oh crap, we have an internal IP mapped already. */
430 464 ire_refrele(ire);
431 465 kmem_free(fixed, sizeof (*fixed));
432 466 rc = EEXIST;
433 467 } else {
434 468 avl_insert(&vnet->vxnv_fixed_ips, fixed, where);
435 469 rc = 0;
436 470 /*
437 - * CHEESY USE OF POINTERS WARNING: I'm going to use
438 - * ire_dep_children for this IRE_LOCAL as a backpointer to
471 + * ODD USE OF POINTERS WARNING: I'm going to use
472 + * ire_dep_sib_next for this IRE_LOCAL as a backpointer to
439 473 * this 'fixed'. This'll allow rapid packet processing.
440 474 * Inspection seems to indicate that IRE_LOCAL ires NEVER use
441 475 * the ire_dep* pointers, so we'll use one (and independent of
442 476 * ip_stack_t's ips_ire_dep_lock as well). If I'm wrong,
443 477 * fix it here and add a new pointer in ip.h for ire_t.
444 478 */
445 479 ire->ire_dep_sib_next = (ire_t *)fixed;
480 + VXNF_REFHOLD(fixed); /* ire holds us too... */
481 + fixed->vxnf_ire = ire;
446 482 /* and then rewire the ire receive and send functions. */
447 483 if (ire->ire_ipversion == IPV4_VERSION) {
448 484 ire->ire_recvfn = vxlnat_fixed_ire_recv_v4;
449 485 ire->ire_sendfn = vxlnat_fixed_ire_send_v4;
450 486 } else {
451 487 ASSERT(ire->ire_ipversion == IPV6_VERSION);
452 488 ire->ire_recvfn = vxlnat_fixed_ire_recv_v6;
453 489 ire->ire_sendfn = vxlnat_fixed_ire_send_v6;
454 490 }
455 - VXNF_REFHOLD(fixed); /* ire holds us too... */
456 - fixed->vxnf_ire = ire;
491 +#if 1 /* Cheesy hack */
457 492 /*
458 493 * XXX KEBE SAYS CHEESY HACK:
459 494 */
460 495 if (!(ire->ire_ill->ill_flags & ILLF_ROUTER)) {
461 496 fixed->vxnf_clear_router = B_TRUE;
462 497 ire->ire_ill->ill_flags |= ILLF_ROUTER;
463 498 } else {
464 499 /* Just so we're clear... */
465 500 fixed->vxnf_clear_router = B_FALSE;
466 501 }
502 +#endif /* Cheesy hack */
467 503 }
468 504 rw_exit(&vnet->vxnv_fixed_lock);
469 505
470 506 fail:
471 507 if (rc != 0)
472 508 vxlnat_public_rele(&vxnm->vxnm_public);
473 509
474 510 return (rc);
475 511 }
476 512
477 513 static void
478 514 vxlnat_rule_to_msg(vxn_msg_t *msg, vxlnat_rule_t *rule)
479 515 {
480 516 msg->vxnm_type = VXNM_RULE;
481 517 msg->vxnm_vnetid = VXLAN_ID_NTOH(rule->vxnr_vnet->vxnv_vnetid);
482 518 msg->vxnm_prefix = rule->vxnr_prefix;
483 519 msg->vxnm_vlanid = ntohs(rule->vxnr_vlanid);
484 520 bcopy(rule->vxnr_myether, msg->vxnm_ether_addr, ETHERADDRL);
485 521 msg->vxnm_public = rule->vxnr_pubaddr;
486 522 msg->vxnm_private = rule->vxnr_myaddr;
487 523 }
488 524
489 525 static void
490 526 vxlnat_fixed_to_msg(vxn_msg_t *msg, vxlnat_fixed_t *fixed)
491 527 {
492 528 msg->vxnm_type = VXNM_FIXEDIP;
493 529 msg->vxnm_vnetid = VXLAN_ID_NTOH(fixed->vxnf_vnet->vxnv_vnetid);
494 530 msg->vxnm_prefix = 0;
495 531 msg->vxnm_vlanid = ntohs(fixed->vxnf_vlanid);
496 532 bcopy(fixed->vxnf_myether, msg->vxnm_ether_addr, ETHERADDRL);
497 533 msg->vxnm_public = fixed->vxnf_pubaddr;
498 534 msg->vxnm_private = fixed->vxnf_addr;
499 535 }
500 536
501 537 static int
502 538 vxlnat_dump(void)
503 539 {
504 540 int rc = 0;
505 541 size_t entries = 0;
506 542 vxlnat_vnet_t *vnet;
507 543 vxlnat_fixed_t *fixed;
508 544 vxlnat_rule_t *rule;
509 545 vxn_msg_t *current;
510 546
511 547 ASSERT(MUTEX_HELD(&vxlnat_mutex));
512 548
513 549 /*
514 550 * XXX KEBE SAYS setup vxlnat_dump* above.
515 551 * XXX KEBE SAYS If function fails for reasons that aren't "dump in
516 552 * progress", make sure it keeps vxlnat_dump* stuff clean
517 553 *
518 554 * NOTE: Other commands are excluded at this point, but packet
519 555 * processing is not. OTOH, packet processing doesn't affect any
520 556 * entities we dump (at this time). We only dump things that can be
521 557 * added with commands. (So no remote VXLAN peers and no NAT flows.)
522 558 */
523 559
524 560 /* Lock down things. */
525 561 rw_enter(&vxlnat_vnet_lock, RW_READER);
526 562 if (avl_numnodes(&vxlnat_vnets) == 0)
527 563 goto bail; /* Nothing to see here, move along. */
528 564
529 565 /*
530 566 * This is going to be inefficient, requiring two passes through each
531 567 * vnet. The first pass locks-down and counts. Then we allocate
532 568 * based on the count. The second pass copies out and unlocks.
533 569 */
534 570 for (vnet = avl_first(&vxlnat_vnets); vnet != NULL;
535 571 vnet = AVL_NEXT(&vxlnat_vnets, vnet)) {
536 572 rw_enter(&vnet->vxnv_fixed_lock, RW_READER);
537 573 entries += avl_numnodes(&vnet->vxnv_fixed_ips);
538 574 mutex_enter(&vnet->vxnv_rule_lock);
539 575 /* Let's hope this isn't a big number... */
540 576 for (rule = list_head(&vnet->vxnv_rules); rule != NULL;
541 577 rule = list_next(&vnet->vxnv_rules, rule)) {
542 578 entries++;
543 579 }
544 580 /* XXX KEBE ASKS -- other fields?!? */
545 581 }
546 582 if (entries == 0)
547 583 goto bail; /* VNETs but with no rules AND no 1-1s?!? */
548 584 /* Don't be too agressive in allocating this. */
549 585 vxlnat_dumpbuf = kmem_alloc(entries * sizeof (vxn_msg_t),
550 586 KM_NOSLEEP | KM_NORMALPRI);
551 587 if (vxlnat_dumpbuf == NULL)
552 588 rc = ENOMEM; /* We still have to unlock everything. */
553 589 current = vxlnat_dumpbuf;
554 590
555 591 /* Second pass. */
556 592 for (vnet = avl_first(&vxlnat_vnets); vnet != NULL;
557 593 vnet = AVL_NEXT(&vxlnat_vnets, vnet)) {
558 594 /* XXX KEBE ASKS -- other fields?!? */
559 595 for (rule = list_head(&vnet->vxnv_rules); rule != NULL;
560 596 rule = list_next(&vnet->vxnv_rules, rule)) {
561 597 if (rc == 0) {
562 598 vxlnat_rule_to_msg(current, rule);
563 599 current++;
564 600 }
565 601 }
566 602 mutex_exit(&vnet->vxnv_rule_lock);
567 603 for (fixed = avl_first(&vnet->vxnv_fixed_ips); fixed != NULL;
568 604 fixed = AVL_NEXT(&vnet->vxnv_fixed_ips, fixed)) {
569 605 if (rc == 0) {
570 606 vxlnat_fixed_to_msg(current, fixed);
571 607 current++;
572 608 }
573 609 }
574 610 rw_exit(&vnet->vxnv_fixed_lock);
575 611 }
576 612 vxlnat_dumpcount = vxlnat_initial = entries;
577 613 vxlnat_dumpcurrent = 0;
578 614 ASSERT3P((vxlnat_dumpbuf + entries), ==, current);
579 615
580 616 bail:
581 617 rw_exit(&vxlnat_vnet_lock);
582 618 return (rc);
583 619 }
584 620
585 621 int
586 622 vxlnat_command(vxn_msg_t *vxnm)
587 623 {
588 624 int rc;
589 625
590 626 switch (vxnm->vxnm_type) {
591 627 case VXNM_VXLAN_ADDR:
592 628 rc = vxlnat_vxlan_addr(&vxnm->vxnm_private);
593 629 break;
594 630 case VXNM_RULE:
595 631 rc = vxlnat_nat_rule(vxnm);
596 632 break;
597 633 case VXNM_FIXEDIP:
598 634 rc = vxlnat_fixed_ip(vxnm);
599 635 break;
600 636 case VXNM_FLUSH:
601 637 rc = vxlnat_flush();
602 638 break;
603 639 case VXNM_DUMP:
604 640 rc = vxlnat_dump();
605 641 break;
606 642 default:
607 643 rc = EINVAL;
608 644 break;
609 645 }
610 646
611 647 return (rc);
612 648 }
613 649
614 650 void
615 651 vxlnat_state_init(void)
616 652 {
617 653 ASSERT(MUTEX_HELD(&vxlnat_mutex));
618 654 rw_init(&vxlnat_vnet_lock, NULL, RW_DRIVER, NULL);
619 655 avl_create(&vxlnat_vnets, vxlnat_vnetid_cmp, sizeof (vxlnat_vnet_t), 0);
620 656 vxlnat_public_init();
621 657 /* XXX KEBE SAYS -- more here. */
622 658 }
623 659
624 660 void
625 661 vxlnat_state_fini(void)
626 662 {
627 663 ASSERT(MUTEX_HELD(&vxlnat_mutex));
628 664 (void) vxlnat_flush(); /* If we fail, we're in bigger trouble anyway. */
629 665 vxlnat_public_init();
630 666 avl_destroy(&vxlnat_vnets);
631 667 rw_destroy(&vxlnat_vnet_lock);
632 668 }
633 669
634 670 int
635 671 vxlnat_read_dump(struct uio *uiop)
636 672 {
637 673 int rc = 0;
638 674 size_t dumpprogress = 0;
639 675
640 676 mutex_enter(&vxlnat_mutex);
641 677
642 678 /*
643 679 * Initial-case ==> dumpbuf with none delivered yet.
644 680 * Utter an 8-byte count.
645 681 */
646 682 if (vxlnat_initial != 0 && uiop->uio_resid >= sizeof (uint64_t)) {
647 683 uint64_t total = vxlnat_dumpcount;
648 684
649 685 ASSERT(vxlnat_dumpbuf != NULL && vxlnat_dumpcurrent == 0);
650 686 rc = uiomove(&total, sizeof (uint64_t), UIO_READ, uiop);
651 687 if (rc != 0)
652 688 goto bail;
653 689 vxlnat_initial = 0;
654 690 }
655 691
656 692 /* XXX KEBE THINKS -- if no dump buffer, just return w/o data. */
657 693 while (rc == 0 && vxlnat_dumpbuf != NULL &&
658 694 uiop->uio_resid >= sizeof (vxn_msg_t)) {
659 695 rc = uiomove(vxlnat_dumpbuf + vxlnat_dumpcurrent,
660 696 sizeof (vxn_msg_t), UIO_READ, uiop);
661 697 if (rc != 0) {
662 698 /*
663 699 * XXX KEBE ASKS, destroy or preserve dumpstate?
664 700 * Fill in answer here.
665 701 */
666 702 break;
667 703 }
668 704 vxlnat_dumpcurrent++;
669 705 dumpprogress++;
670 706 if (vxlnat_dumpcurrent == vxlnat_dumpcount) {
671 707 kmem_free(vxlnat_dumpbuf,
672 708 vxlnat_dumpcount * sizeof (vxn_msg_t));
673 709 vxlnat_dumpbuf = NULL;
674 710 vxlnat_dumpcount = vxlnat_dumpcurrent = 0;
675 711 }
676 712 }
677 713
678 714 bail:
679 715 /*
680 716 * If there's room at the end, just ignore that space for now. Handy
681 717 * DTrace probe below notes amount of extra bytes..
682 718 */
683 719 DTRACE_PROBE1(vxlnat__read__extrabytes, ssize_t, uiop->uio_resid);
684 720 /* Note progress of dump with DTrace probes. */
685 721 DTRACE_PROBE3(vxlnat__read__dumpprogress, size_t, dumpprogress, size_t,
686 722 vxlnat_dumpcurrent, size_t, vxlnat_dumpcount);
687 723
688 724 mutex_exit(&vxlnat_mutex);
689 725 return (rc);
690 726 }
|
↓ open down ↓ |
214 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX