Print this page
WIP to help bringup NAT flows
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/vxlnat_impl.h
+++ new/usr/src/uts/common/inet/vxlnat_impl.h
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright 2018, Joyent, Inc.
14 14 */
15 15
16 16 #ifndef _INET_VXLNAT_IMPL_H
17 17 #define _INET_VXLNAT_IMPL_H
18 18
19 19 #include <inet/vxlnat.h>
20 20 #include <inet/ip.h>
21 21 #include <inet/ip6.h>
22 22 #include <inet/ip_ire.h>
23 23 #include <sys/clock_impl.h>
24 24 #include <sys/avl.h>
25 25 #include <sys/uio.h>
26 26 #include <sys/list.h>
27 27 #include <sys/byteorder.h>
28 28 #include <sys/vxlan.h>
29 29
30 30 /*
31 31 * XXX KEBE ASKS --> do we assume port IPPORT_VXLAN all the time?
32 32 * IF NOT, then we need to add ports to various things here that deal
33 33 * with the underlay network.
34 34 *
35 35 * NOTE: All reference counts *include* table/tree/list/whatever internment.
36 36 * Once an entry is removed, *_REFRELE() must be invoked, and it may or may
37 37 * not free something.
38 38 */
39 39
40 40 #ifdef __cplusplus
41 41 extern "C" {
42 42 #endif
43 43
|
↓ open down ↓ |
43 lines elided |
↑ open up ↑ |
44 44 /*
45 45 * NAT RULES. Instantiated per-vnet, write-once/read-only entries,
46 46 * linkage/entries protected by "rule lock" outside this structure.
47 47 */
48 48 typedef struct vxlnat_rule_s {
49 49 list_node_t vxnr_link;
50 50 /* refheld link, or if NULL, this rule is "condemned" and no good. */
51 51 struct vxlnat_vnet_s *vxnr_vnet;
52 52 in6_addr_t vxnr_myaddr;
53 53 in6_addr_t vxnr_pubaddr;
54 + /* XXX KEBE ASKS, ire? */
54 55 uint8_t vxnr_myether[ETHERADDRL];
55 56 uint16_t vxnr_vlanid; /* Fabrics use this too. */
56 57 uint32_t vxnr_refcount;
57 58 uint8_t vxnr_prefix;
58 59 } vxlnat_rule_t;
59 60 #define VXNR_REFHOLD(vxnr) { \
60 61 atomic_inc_32(&(vxnr)->vxnr_refcount); \
61 62 ASSERT((vxnr)->vxnr_refcount > 0); \
62 63 }
63 64 #define VXNR_REFRELE(vxnr) { \
64 65 ASSERT((vxnr)->vxnr_refcount > 0); \
65 66 membar_exit(); \
66 67 if (atomic_dec_32_nv(&(vxnr)->vxnr_refcount) == 0) \
67 68 vxlnat_rule_free(vxnr); \
68 69 }
69 70 extern void vxlnat_rule_free(vxlnat_rule_t *);
70 71
71 72 /*
72 73 * NAT FLOWS. These are per-vnet, and keyed/searched by:
73 74 * <inner-IP-source,IP-dest,inner-source-port,dest-port,protocol>.
74 - * They will be tied-to/part-of
75 + * They will be tied-to/part-of a conn_t.
75 76 */
76 77 typedef struct vxlnat_flow_s {
77 78 avl_node_t vxnfl_treenode;
78 79 /*
79 80 * I'm guessing that dst varies more than src. Also
80 81 * the plan is for the comparitor function to bcmp() both
81 82 * of these as one call for IPv6 (if we ever get to that..).
82 83 */
83 84 in6_addr_t vxnfl_dst;
84 85 in6_addr_t vxnfl_src; /* INNER source address. */
85 86 uint32_t vxnfl_ports;
86 87 uint8_t vxnfl_protocol;
87 88 uint8_t vxnfl_isv4 : 1, /* Will save us 12 bytes of compares... */
88 89 vxlfl_reserved1 : 7;
90 + /* Theoretically 16 bits lies where this comment is. */
91 + uint32_t vxnfl_refcount;
89 92 conn_t *vxnfl_connp; /* Question - embed instead? */
90 93 vxlnat_rule_t *vxnfl_rule; /* Refhold to rule that generated me. */
94 + /*
95 + * XXX KEBE SAYS Other NAT-state belongs here too. Like time-values
96 + * for timeouts, and more!
97 + */
91 98 } vxlnat_flow_t;
92 99 /* Exploit endianisms, maintain network order... */
93 100 #ifdef _BIG_ENDIAN
94 101 #define VXNFL_SPORT(ports) (uint16_t)((ports) >> 16) /* Unsigned all around. */
95 102 #define VXNFL_DPORT(ports) ((ports) & 0xFFFF)
96 103 #else
97 104 #define VXNFL_SPORT(ports) ((ports) & 0xFFFF)
98 105 #define VXNFL_DPORT(ports) (uint16_t)((ports) >> 16) /* Unsigned all around. */
99 106 #endif
107 +#define VXNFL_REFHOLD(vxnfl) { \
108 + atomic_inc_32(&(vxnfl)->vxnfl_refcount); \
109 + ASSERT((vxnfl)->vxnfl_refcount > 0); \
110 +}
111 +#define VXNFL_REFRELE(vxnfl) { \
112 + ASSERT((vxnfl)->vxnfl_refcount > 0); \
113 + membar_exit(); \
114 + if (atomic_dec_32_nv(&(vxnfl)->vxnfl_refcount) == 0) \
115 + vxlnat_flow_free(vxnfl); \
116 +}
117 +extern void vxlnat_flow_free(vxlnat_flow_t *);
100 118
101 119 /*
102 120 * 1-1 IP mapping.
103 121 */
104 122 typedef struct vxlnat_fixed_s {
105 123 avl_node_t vxnf_treenode;
106 124 in6_addr_t vxnf_addr; /* For now it needn't match to a rule. */
107 125 in6_addr_t vxnf_pubaddr; /* External IP. */
108 126 struct vxlnat_vnet_s *vxnf_vnet;
109 127 ire_t *vxnf_ire; /* Should be an IRE_LOCAL from the ftable. */
110 128 struct vxlnat_remote_s *vxnf_remote;
111 129 uint8_t vxnf_myether[ETHERADDRL];
112 130 uint16_t vxnf_vlanid; /* Stored in network order for quick xmit. */
113 131 uint32_t vxnf_refcount;
114 132 boolean_t vxnf_clear_router; /* XXX KEBE SAYS CHEESY HACK */
115 133 } vxlnat_fixed_t;
116 134 #define VXNF_REFHOLD(vxnf) { \
117 135 atomic_inc_32(&(vxnf)->vxnf_refcount); \
118 136 ASSERT((vxnf)->vxnf_refcount > 0); \
119 137 }
120 138 #define VXNF_REFRELE(vxnf) { \
121 139 ASSERT((vxnf)->vxnf_refcount > 0); \
122 140 membar_exit(); \
123 141 if (atomic_dec_32_nv(&(vxnf)->vxnf_refcount) == 0) \
124 142 vxlnat_fixed_free(vxnf); \
125 143 }
126 144 extern void vxlnat_fixed_free(vxlnat_fixed_t *);
127 145
128 146 /*
129 147 * REMOTE VXLAN destinations.
130 148 */
131 149 typedef struct vxlnat_remote_s {
132 150 avl_node_t vxnrem_treenode;
133 151 in6_addr_t vxnrem_addr; /* Same prefix as one in rule, or fixed addr. */
134 152 in6_addr_t vxnrem_uaddr; /* Underlay VXLAN destination. */
135 153 struct vxlnat_vnet_s *vxnrem_vnet; /* Reference-held. */
136 154 uint32_t vxnrem_refcount;
137 155 uint8_t vxnrem_ether[ETHERADDRL];
138 156 uint16_t vxnrem_vlan;
139 157 /*
140 158 * XXX KEBE SAYS put some lifetime/usetime/etc. here
141 159 * so we don't keep too many of these. Either that, or maybe
142 160 * convert to a qqcache or (patents expiring) ARC.
143 161 */
144 162 } vxlnat_remote_t;
145 163 #define VXNREM_REFHOLD(vxnrem) { \
146 164 atomic_inc_32(&(vxnrem)->vxnrem_refcount); \
147 165 ASSERT((vxnrem)->vxnrem_refcount > 0); \
148 166 }
149 167 #define VXNREM_REFRELE(vxnrem) { \
150 168 ASSERT((vxnrem)->vxnrem_refcount > 0); \
151 169 membar_exit(); \
152 170 if (atomic_dec_32_nv(&(vxnrem)->vxnrem_refcount) == 0) \
153 171 vxlnat_remote_free(vxnrem); \
154 172 }
155 173 extern void vxlnat_remote_free(vxlnat_remote_t *);
156 174
157 175 /*
158 176 * per-vnetid overarching structure. AVL tree keyed by vnetid.
159 177 * NOTE: Could be split into vnetid-hashed buckets to split any
160 178 * locks.
161 179 */
162 180 typedef struct vxlnat_vnet_s {
163 181 avl_node_t vxnv_treenode;
164 182 /*
165 183 * 1-1 IP mappings. (1st lookup for an in-to-out packet.)
166 184 * Will map to an IRE_LOCAL in IP.
167 185 */
168 186 krwlock_t vxnv_fixed_lock;
169 187 avl_tree_t vxnv_fixed_ips;
170 188
171 189 /*
172 190 * NAT flows. (2nd lookup for an in-to-out packet.)
173 191 * These are also conn_ts with outer-packet fields for out-to-in
174 192 * matches against a conn_t.
175 193 *
176 194 * NOTE: We're going to keep a separate tree for inner IPv6 NAT, if
177 195 * we ever need it.
178 196 */
179 197 krwlock_t vxnv_flowv4_lock;
180 198 avl_tree_t vxnv_flows_v4;
181 199
182 200 /* NAT rules. (3rd lookup for an in-to-out packet.) */
183 201 kmutex_t vxnv_rule_lock;
184 202 list_t vxnv_rules;
185 203
186 204 /*
187 205 * Internal-network remote-nodes. (only lookup for out-to-in packet.)
188 206 * Entries here are also refheld by 1-1s or NAT flows.
189 207 */
190 208 kmutex_t vxnv_remote_lock;
191 209 avl_tree_t vxnv_remotes;
192 210
193 211 uint32_t vxnv_refcount;
194 212 uint32_t vxnv_vnetid; /* Wire byteorder for less swapping on LE */
195 213 } vxlnat_vnet_t;
196 214 #define VXNV_REFHOLD(vxnv) { \
197 215 atomic_inc_32(&(vxnv)->vxnv_refcount); \
198 216 ASSERT((vxnv)->vxnv_refcount > 0); \
199 217 }
200 218 #define VXNV_REFRELE(vxnv) { \
201 219 ASSERT((vxnv)->vxnv_refcount > 0); \
202 220 membar_exit(); \
203 221 if (atomic_dec_32_nv(&(vxnv)->vxnv_refcount) == 0) \
204 222 vxlnat_vnet_free(vxnv); \
205 223 }
206 224 extern void vxlnat_vnet_free(vxlnat_vnet_t *);
207 225
208 226 /*
209 227 * Endian-independent macros for rapid off-wire header reading. i.e. avoid
210 228 * [nh]to[hn]*()
211 229 *
212 230 * VXLAN_ID_WIRE32(id) ==> Zero-out "reserved" bits, preserve wire-order
213 231 * and position of vnetid.
214 232 * VXLAN_FLAGS_WIRE32(vni) ==> Zero-out reserved bits, preserve wire-order
215 233 * and position of flags.
216 234 * VXLAN_F_VDI_WIRE ==> VXLAN_F_VDI, but w/o needing to swap.
217 235 *
218 236 * ALSO: HTON/NTOH for kernel-makes-right interactions with userland, which
219 237 * means shifting actual ID to/from low-24-bits of 32-bit word.
220 238 * VXLAN_ID_HTON(id)
221 239 * VXLAN_ID_NTOH(id)
222 240 *
223 241 * XXX KEBE ASKS ==> If not confusing to folks, move into sys/vxlan.h and
224 242 * have overlay's VXLAN encap adopt them?
225 243 */
226 244 #ifdef _BIG_ENDIAN
227 245 #define VXLAN_ID_WIRE32(id) ((id) & 0xFFFFFF00)
228 246 #define VXLAN_F_VDI_WIRE VXLAN_F_VDI
229 247 /* XXX KEBE ASKS, do masking here? */
230 248 #define VXLAN_ID_HTON(id) ((id) << VXLAN_ID_SHIFT)
231 249 #define VXLAN_ID_NTOH(id) ((id) >> VXLAN_ID_SHIFT)
232 250 #else /* i.e. _LITTLE_ENDIAN */
233 251 #define VXLAN_ID_WIRE32(id) ((id) & 0xFFFFFF)
234 252 #define VXLAN_F_VDI_WIRE 0x08
235 253 #define VXLAN_ID_HTON(id) htonl((id) << VXLAN_ID_SHIFT)
236 254 #define VXLAN_ID_NTOH(id) (ntohl(id) >> VXLAN_ID_SHIFT)
237 255 #endif /* _BIG_ENDIAN */
238 256 #define VXLAN_FLAGS_WIRE32(flags) ((flags) & VXLAN_F_VDI_WIRE)
239 257
240 258 extern kmutex_t vxlnat_mutex;
241 259 extern netstack_t *vxlnat_netstack;
242 260 extern int vxlnat_command(vxn_msg_t *);
243 261 extern int vxlnat_read_dump(struct uio *);
244 262 extern int vxlnat_vxlan_addr(in6_addr_t *);
245 263 extern void vxlnat_closesock(void);
246 264 extern void vxlnat_state_init(void);
247 265 extern void vxlnat_state_fini(void);
248 266
249 267 extern void vxlnat_public_init(void);
250 268 extern void vxlnat_public_fini(void);
251 269 extern boolean_t vxlnat_public_hold(in6_addr_t *, boolean_t);
252 270 extern void vxlnat_public_rele(in6_addr_t *);
253 271
254 272 extern int vxlnat_tree_plus_in6_cmp(const void *, const void *);
255 273
|
↓ open down ↓ |
146 lines elided |
↑ open up ↑ |
256 274 /* ire_recvfn & ire_sendfn functions for 1-1/fixed maps. */
257 275 extern void vxlnat_fixed_ire_recv_v4(ire_t *, mblk_t *, void *,
258 276 ip_recv_attr_t *);
259 277 extern void vxlnat_fixed_ire_recv_v6(ire_t *, mblk_t *, void *,
260 278 ip_recv_attr_t *);
261 279 extern int vxlnat_fixed_ire_send_v4(ire_t *, mblk_t *, void *,
262 280 ip_xmit_attr_t *, uint32_t *);
263 281 extern int vxlnat_fixed_ire_send_v6(ire_t *, mblk_t *, void *,
264 282 ip_xmit_attr_t *, uint32_t *);
265 283
284 +extern boolean_t vxlnat_new_conn(vxlnat_flow_t *);
285 +extern void vxlnat_activate_conn(vxlnat_flow_t *);
286 +#ifdef notyet
287 +extern void vxlnat_deactivate_conn(vxlnat_flow_t *);
288 +#endif
266 289
267 290 extern vxlnat_vnet_t *vxlnat_get_vnet(uint32_t, boolean_t);
268 291
269 292 #ifdef __cplusplus
270 293 }
271 294 #endif
272 295
273 296 #endif /* _INET_VXLNAT_IMPL_H */
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX