1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2018, Joyent, Inc.
14 */
15
16 #ifndef _INET_VXLNAT_IMPL_H
17 #define _INET_VXLNAT_IMPL_H
18
19 #include <inet/vxlnat.h>
20 #include <inet/ip.h>
21 #include <inet/ip6.h>
22 #include <inet/ip_ire.h>
23 #include <sys/clock_impl.h>
24 #include <sys/avl.h>
25 #include <sys/uio.h>
26 #include <sys/list.h>
27 #include <sys/byteorder.h>
28 #include <sys/vxlan.h>
29
30 /*
31 * XXX KEBE ASKS --> do we assume port IPPORT_VXLAN all the time?
32 * IF NOT, then we need to add ports to various things here that deal
33 * with the underlay network.
34 *
35 * NOTE: All reference counts *include* table/tree/list/whatever internment.
36 * Once an entry is removed, *_REFRELE() must be invoked, and it may or may
37 * not free something.
38 */
39
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43
44 /*
45 * NAT RULES. Instantiated per-vnet, write-once/read-only entries,
46 * linkage/entries protected by "rule lock" outside this structure.
47 */
48 typedef struct vxlnat_rule_s {
49 list_node_t vxnr_link;
50 /* refheld link, or if NULL, this rule is "condemned" and no good. */
51 struct vxlnat_vnet_s *vxnr_vnet;
52 in6_addr_t vxnr_myaddr;
53 in6_addr_t vxnr_pubaddr;
54 uint8_t vxnr_myether[ETHERADDRL];
55 uint16_t vxnr_vlanid; /* Fabrics use this too. */
56 uint32_t vxnr_refcount;
57 uint8_t vxnr_prefix;
58 } vxlnat_rule_t;
59 #define VXNR_REFHOLD(vxnr) { \
60 atomic_inc_32(&(vxnr)->vxnr_refcount); \
61 ASSERT((vxnr)->vxnr_refcount > 0); \
62 }
63 #define VXNR_REFRELE(vxnr) { \
64 ASSERT((vxnr)->vxnr_refcount > 0); \
65 membar_exit(); \
66 if (atomic_dec_32_nv(&(vxnr)->vxnr_refcount) == 0) \
67 vxlnat_rule_free(vxnr); \
68 }
69 extern void vxlnat_rule_free(vxlnat_rule_t *);
70
71 /*
72 * NAT FLOWS. These are per-vnet, and keyed/searched by:
73 * <inner-IP-source,IP-dest,inner-source-port,dest-port,protocol>.
74 * They will be tied-to/part-of
75 */
76 typedef struct vxlnat_flow_s {
77 avl_node_t vxnfl_treenode;
78 /*
79 * I'm guessing that dst varies more than src. Also
80 * the plan is for the comparitor function to bcmp() both
81 * of these as one call for IPv6 (if we ever get to that..).
82 */
83 in6_addr_t vxnfl_dst;
84 in6_addr_t vxnfl_src; /* INNER source address. */
85 uint32_t vxnfl_ports;
86 uint8_t vxnfl_protocol;
87 uint8_t vxnfl_isv4 : 1, /* Will save us 12 bytes of compares... */
88 vxlfl_reserved1 : 7;
89 conn_t *vxnfl_connp; /* Question - embed instead? */
90 vxlnat_rule_t *vxnfl_rule; /* Refhold to rule that generated me. */
91 } vxlnat_flow_t;
92 /* Exploit endianisms, maintain network order... */
93 #ifdef _BIG_ENDIAN
94 #define VXNFL_SPORT(ports) (uint16_t)((ports) >> 16) /* Unsigned all around. */
95 #define VXNFL_DPORT(ports) ((ports) & 0xFFFF)
96 #else
97 #define VXNFL_SPORT(ports) ((ports) & 0xFFFF)
98 #define VXNFL_DPORT(ports) (uint16_t)((ports) >> 16) /* Unsigned all around. */
99 #endif
100
101 /*
102 * 1-1 IP mapping.
103 */
104 typedef struct vxlnat_fixed_s {
105 avl_node_t vxnf_treenode;
106 in6_addr_t vxnf_addr; /* For now it needn't match to a rule. */
107 in6_addr_t vxnf_pubaddr; /* External IP. */
108 struct vxlnat_vnet_s *vxnf_vnet;
109 ire_t *vxnf_ire; /* Should be an IRE_LOCAL from the ftable. */
110 struct vxlnat_remote_s *vxnf_remote;
111 uint8_t vxnf_myether[ETHERADDRL];
112 uint16_t vxnf_vlanid; /* Stored in network order for quick xmit. */
113 uint32_t vxnf_refcount;
114 boolean_t vxnf_clear_router; /* XXX KEBE SAYS CHEESY HACK */
115 } vxlnat_fixed_t;
116 #define VXNF_REFHOLD(vxnf) { \
117 atomic_inc_32(&(vxnf)->vxnf_refcount); \
118 ASSERT((vxnf)->vxnf_refcount > 0); \
119 }
120 #define VXNF_REFRELE(vxnf) { \
121 ASSERT((vxnf)->vxnf_refcount > 0); \
122 membar_exit(); \
123 if (atomic_dec_32_nv(&(vxnf)->vxnf_refcount) == 0) \
124 vxlnat_fixed_free(vxnf); \
125 }
126 extern void vxlnat_fixed_free(vxlnat_fixed_t *);
127
128 /*
129 * REMOTE VXLAN destinations.
130 */
131 typedef struct vxlnat_remote_s {
132 avl_node_t vxnrem_treenode;
133 in6_addr_t vxnrem_addr; /* Same prefix as one in rule, or fixed addr. */
134 in6_addr_t vxnrem_uaddr; /* Underlay VXLAN destination. */
135 struct vxlnat_vnet_s *vxnrem_vnet; /* Reference-held. */
136 uint32_t vxnrem_refcount;
137 uint8_t vxnrem_ether[ETHERADDRL];
138 uint16_t vxnrem_vlan;
139 /*
140 * XXX KEBE SAYS put some lifetime/usetime/etc. here
141 * so we don't keep too many of these. Either that, or maybe
142 * convert to a qqcache or (patents expiring) ARC.
143 */
144 } vxlnat_remote_t;
145 #define VXNREM_REFHOLD(vxnrem) { \
146 atomic_inc_32(&(vxnrem)->vxnrem_refcount); \
147 ASSERT((vxnrem)->vxnrem_refcount > 0); \
148 }
149 #define VXNREM_REFRELE(vxnrem) { \
150 ASSERT((vxnrem)->vxnrem_refcount > 0); \
151 membar_exit(); \
152 if (atomic_dec_32_nv(&(vxnrem)->vxnrem_refcount) == 0) \
153 vxlnat_remote_free(vxnrem); \
154 }
155 extern void vxlnat_remote_free(vxlnat_remote_t *);
156
157 /*
158 * per-vnetid overarching structure. AVL tree keyed by vnetid.
159 * NOTE: Could be split into vnetid-hashed buckets to split any
160 * locks.
161 */
162 typedef struct vxlnat_vnet_s {
163 avl_node_t vxnv_treenode;
164 /*
165 * 1-1 IP mappings. (1st lookup for an in-to-out packet.)
166 * Will map to an IRE_LOCAL in IP.
167 */
168 krwlock_t vxnv_fixed_lock;
169 avl_tree_t vxnv_fixed_ips;
170
171 /*
172 * NAT flows. (2nd lookup for an in-to-out packet.)
173 * These are also conn_ts with outer-packet fields for out-to-in
174 * matches against a conn_t.
175 *
176 * NOTE: We're going to keep a separate tree for inner IPv6 NAT, if
177 * we ever need it.
178 */
179 krwlock_t vxnv_flowv4_lock;
180 avl_tree_t vxnv_flows_v4;
181
182 /* NAT rules. (3rd lookup for an in-to-out packet.) */
183 kmutex_t vxnv_rule_lock;
184 list_t vxnv_rules;
185
186 /*
187 * Internal-network remote-nodes. (only lookup for out-to-in packet.)
188 * Entries here are also refheld by 1-1s or NAT flows.
189 */
190 kmutex_t vxnv_remote_lock;
191 avl_tree_t vxnv_remotes;
192
193 uint32_t vxnv_refcount;
194 uint32_t vxnv_vnetid; /* Wire byteorder for less swapping on LE */
195 } vxlnat_vnet_t;
196 #define VXNV_REFHOLD(vxnv) { \
197 atomic_inc_32(&(vxnv)->vxnv_refcount); \
198 ASSERT((vxnv)->vxnv_refcount > 0); \
199 }
200 #define VXNV_REFRELE(vxnv) { \
201 ASSERT((vxnv)->vxnv_refcount > 0); \
202 membar_exit(); \
203 if (atomic_dec_32_nv(&(vxnv)->vxnv_refcount) == 0) \
204 vxlnat_vnet_free(vxnv); \
205 }
206 extern void vxlnat_vnet_free(vxlnat_vnet_t *);
207
208 /*
209 * Endian-independent macros for rapid off-wire header reading. i.e. avoid
210 * [nh]to[hn]*()
211 *
212 * VXLAN_ID_WIRE32(id) ==> Zero-out "reserved" bits, preserve wire-order
213 * and position of vnetid.
214 * VXLAN_FLAGS_WIRE32(vni) ==> Zero-out reserved bits, preserve wire-order
215 * and position of flags.
216 * VXLAN_F_VDI_WIRE ==> VXLAN_F_VDI, but w/o needing to swap.
217 *
218 * ALSO: HTON/NTOH for kernel-makes-right interactions with userland, which
219 * means shifting actual ID to/from low-24-bits of 32-bit word.
220 * VXLAN_ID_HTON(id)
221 * VXLAN_ID_NTOH(id)
222 *
223 * XXX KEBE ASKS ==> If not confusing to folks, move into sys/vxlan.h and
224 * have overlay's VXLAN encap adopt them?
225 */
226 #ifdef _BIG_ENDIAN
227 #define VXLAN_ID_WIRE32(id) ((id) & 0xFFFFFF00)
228 #define VXLAN_F_VDI_WIRE VXLAN_F_VDI
229 /* XXX KEBE ASKS, do masking here? */
230 #define VXLAN_ID_HTON(id) ((id) << VXLAN_ID_SHIFT)
231 #define VXLAN_ID_NTOH(id) ((id) >> VXLAN_ID_SHIFT)
232 #else /* i.e. _LITTLE_ENDIAN */
233 #define VXLAN_ID_WIRE32(id) ((id) & 0xFFFFFF)
234 #define VXLAN_F_VDI_WIRE 0x08
235 #define VXLAN_ID_HTON(id) htonl((id) << VXLAN_ID_SHIFT)
236 #define VXLAN_ID_NTOH(id) (ntohl(id) >> VXLAN_ID_SHIFT)
237 #endif /* _BIG_ENDIAN */
238 #define VXLAN_FLAGS_WIRE32(flags) ((flags) & VXLAN_F_VDI_WIRE)
239
240 extern kmutex_t vxlnat_mutex;
241 extern netstack_t *vxlnat_netstack;
242 extern int vxlnat_command(vxn_msg_t *);
243 extern int vxlnat_read_dump(struct uio *);
244 extern int vxlnat_vxlan_addr(in6_addr_t *);
245 extern void vxlnat_closesock(void);
246 extern void vxlnat_state_init(void);
247 extern void vxlnat_state_fini(void);
248
249 extern void vxlnat_public_init(void);
250 extern void vxlnat_public_fini(void);
251 extern boolean_t vxlnat_public_hold(in6_addr_t *, boolean_t);
252 extern void vxlnat_public_rele(in6_addr_t *);
253
254 extern int vxlnat_tree_plus_in6_cmp(const void *, const void *);
255
256 /* ire_recvfn & ire_sendfn functions for 1-1/fixed maps. */
257 extern void vxlnat_fixed_ire_recv_v4(ire_t *, mblk_t *, void *,
258 ip_recv_attr_t *);
259 extern void vxlnat_fixed_ire_recv_v6(ire_t *, mblk_t *, void *,
260 ip_recv_attr_t *);
261 extern int vxlnat_fixed_ire_send_v4(ire_t *, mblk_t *, void *,
262 ip_xmit_attr_t *, uint32_t *);
263 extern int vxlnat_fixed_ire_send_v6(ire_t *, mblk_t *, void *,
264 ip_xmit_attr_t *, uint32_t *);
265
266
267 extern vxlnat_vnet_t *vxlnat_get_vnet(uint32_t, boolean_t);
268
269 #ifdef __cplusplus
270 }
271 #endif
272
273 #endif /* _INET_VXLNAT_IMPL_H */