1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2018, Joyent, Inc.
14 */
15
16 #ifndef _INET_VXLNAT_IMPL_H
17 #define _INET_VXLNAT_IMPL_H
18
19 #include <inet/vxlnat.h>
20 #include <inet/ip.h>
21 #include <inet/ip6.h>
22 #include <inet/ip_ire.h>
23 #include <sys/clock_impl.h>
24 #include <sys/avl.h>
25 #include <sys/uio.h>
26 #include <sys/list.h>
27 #include <sys/byteorder.h>
28 #include <sys/vxlan.h>
29
30 /*
31 * XXX KEBE ASKS --> do we assume port IPPORT_VXLAN all the time?
32 * IF NOT, then we need to add ports to various things here that deal
33 * with the underlay network.
34 *
35 * NOTE: All reference counts *include* table/tree/list/whatever internment.
36 * Once an entry is removed, *_REFRELE() must be invoked, and it may or may
37 * not free something.
38 */
39
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43
44 /*
45 * NAT RULES. Instantiated per-vnet, write-once/read-only entries,
46 * linkage/entries protected by "rule lock" outside this structure.
47 */
48 typedef struct vxlnat_rule_s {
49 list_node_t vxnr_link;
50 /* refheld link, or if NULL, this rule is "condemned" and no good. */
51 struct vxlnat_vnet_s *vxnr_vnet;
52 in6_addr_t vxnr_myaddr;
53 in6_addr_t vxnr_pubaddr;
54 uint8_t vxnr_myether[ETHERADDRL];
55 uint16_t vxnr_vlanid; /* Fabrics use this too. */
56 uint32_t vxnr_refcount;
57 uint8_t vxnr_prefix;
58 } vxlnat_rule_t;
59 #define VXNR_REFHOLD(vxnr) { \
60 atomic_inc_32(&(vxnr)->vxnr_refcount); \
61 ASSERT((vxnr)->vxnr_refcount > 0); \
62 }
63 #define VXNR_REFRELE(vxnr) { \
64 ASSERT((vxnr)->vxnr_refcount > 0); \
65 membar_exit(); \
66 if (atomic_dec_32_nv(&(vxnr)->vxnr_refcount) == 0) \
67 vxlnat_rule_free(vxnr); \
68 }
69 extern void vxlnat_rule_free(vxlnat_rule_t *);
70
71 /*
72 * 1-1 IP mapping.
73 */
74 typedef struct vxlnat_fixed_s {
75 avl_node_t vxnf_treenode;
76 in6_addr_t vxnf_addr; /* XXX KEBE ASKS - must it match to a rule? */
77 in6_addr_t vxnf_pubaddr; /* External IP. */
78 struct vxlnat_vnet_s *vxnf_vnet;
79 ire_t *vxnf_ire; /* Should be a local IRE from the ftable. */
80 struct vxlnat_remote_s *vxnf_remote;
81 uint8_t vxnf_myether[ETHERADDRL];
82 uint16_t vxnf_vlanid; /* Stored in network order for quick xmit. */
83 uint32_t vxnf_refcount;
84 boolean_t vxnf_clear_router; /* XXX KEBE SAYS CHEESY HACK */
85 } vxlnat_fixed_t;
86 #define VXNF_REFHOLD(vxnf) { \
87 atomic_inc_32(&(vxnf)->vxnf_refcount); \
88 ASSERT((vxnf)->vxnf_refcount > 0); \
89 }
90 #define VXNF_REFRELE(vxnf) { \
91 ASSERT((vxnf)->vxnf_refcount > 0); \
92 membar_exit(); \
93 if (atomic_dec_32_nv(&(vxnf)->vxnf_refcount) == 0) \
94 vxlnat_fixed_free(vxnf); \
95 }
96 extern void vxlnat_fixed_free(vxlnat_fixed_t *);
97
98 /*
99 * REMOTE VXLAN destinations.
100 */
101 typedef struct vxlnat_remote_s {
102 avl_node_t vxnrem_treenode;
103 in6_addr_t vxnrem_addr; /* Same prefix as one in rule. */
104 in6_addr_t vxnrem_uaddr; /* Underlay VXLAN destination. */
105 struct vxlnat_vnet_s *vxnrem_vnet; /* Reference-held. */
106 uint32_t vxnrem_refcount;
107 uint8_t vxnrem_ether[ETHERADDRL];
108 uint16_t vxnrem_vlan;
109 /*
110 * XXX KEBE SAYS put some lifetime/usetime/etc. here
111 * so we don't keep too many of these. Either that, or maybe
112 * convert to a qqcache or (patents expiring) ARC.
113 */
114 } vxlnat_remote_t;
115 #define VXNREM_REFHOLD(vxnrem) { \
116 atomic_inc_32(&(vxnrem)->vxnrem_refcount); \
117 ASSERT((vxnrem)->vxnrem_refcount > 0); \
118 }
119 #define VXNREM_REFRELE(vxnrem) { \
120 ASSERT((vxnrem)->vxnrem_refcount > 0); \
121 membar_exit(); \
122 if (atomic_dec_32_nv(&(vxnrem)->vxnrem_refcount) == 0) \
123 vxlnat_remote_free(vxnrem); \
124 }
125 extern void vxlnat_remote_free(vxlnat_remote_t *);
126
127 /*
128 * per-vnetid overarching structure. AVL tree keyed by vnetid.
129 * NOTE: Could be split into vnetid-hashed buckets to split any
130 * locks.
131 */
132 typedef struct vxlnat_vnet_s {
133 avl_node_t vxnv_treenode;
134 /*
135 * 1-1 IP mappings. (1st lookup for an in-to-out packet.)
136 * Will map to SOMETHING in IP.
137 * XXX KEBE ASKS - conn_t or something else TBD?!
138 */
139 krwlock_t vxnv_fixed_lock;
140 avl_tree_t vxnv_fixed_ips;
141 /*
142 * NAT flows. (2nd lookup for an in-to-out packet.)
143 * These are also conn_ts with outer-packet fields for out-to-in
144 * matches against a conn_t.
145 */
146 krwlock_t vxnv_flow_lock;
147 avl_tree_t vxnv_flows;
148 /* NAT rules. (3rd lookup for an in-to-out packet.) */
149 kmutex_t vxnv_rule_lock;
150 list_t vxnv_rules;
151 /*
152 * Internal-network remote-nodes. (only lookup for out-to-in packet.)
153 * Entries here are also refheld by 1-1s or NAT flows.
154 */
155 kmutex_t vxnv_remote_lock;
156 avl_tree_t vxnv_remotes;
157
158 uint32_t vxnv_refcount;
159 uint32_t vxnv_vnetid; /* Wire byteorder for less swapping on LE */
160 } vxlnat_vnet_t;
161 #define VXNV_REFHOLD(vxnv) { \
162 atomic_inc_32(&(vxnv)->vxnv_refcount); \
163 ASSERT((vxnv)->vxnv_refcount > 0); \
164 }
165 #define VXNV_REFRELE(vxnv) { \
166 ASSERT((vxnv)->vxnv_refcount > 0); \
167 membar_exit(); \
168 if (atomic_dec_32_nv(&(vxnv)->vxnv_refcount) == 0) \
169 vxlnat_vnet_free(vxnv); \
170 }
171 extern void vxlnat_vnet_free(vxlnat_vnet_t *);
172
173 /*
174 * Endian-independent macros for rapid off-wire header reading. i.e. avoid
175 * [nh]to[hn]*()
176 *
177 * VXLAN_ID_WIRE32(id) ==> Zero-out "reserved" bits, preserve wire-order
178 * and position of vnetid.
179 * VXLAN_FLAGS_WIRE32(vni) ==> Zero-out reserved bits, preserve wire-order
180 * and position of flags.
181 * VXLAN_F_VDI_WIRE ==> VXLAN_F_VDI, but w/o needing to swap.
182 *
183 * ALSO: HTON/NTOH for kernel-makes-right interactions with userland, which
184 * means shifting actual ID to/from low-24-bits of 32-bit word.
185 * VXLAN_ID_HTON(id)
186 * VXLAN_ID_NTOH(id)
187 *
188 * XXX KEBE ASKS ==> If not confusing to folks, move into sys/vxlan.h and
189 * have overlay's VXLAN encap adopt them?
190 */
191 #ifdef _BIG_ENDIAN
192 #define VXLAN_ID_WIRE32(id) ((id) & 0xFFFFFF00)
193 #define VXLAN_F_VDI_WIRE VXLAN_F_VDI
194 /* XXX KEBE ASKS, do masking here? */
195 #define VXLAN_ID_HTON(id) ((id) << VXLAN_ID_SHIFT)
196 #define VXLAN_ID_NTOH(id) ((id) >> VXLAN_ID_SHIFT)
197 #else /* i.e. _LITTLE_ENDIAN */
198 #define VXLAN_ID_WIRE32(id) ((id) & 0xFFFFFF)
199 #define VXLAN_F_VDI_WIRE 0x08
200 #define VXLAN_ID_HTON(id) htonl((id) << VXLAN_ID_SHIFT)
201 #define VXLAN_ID_NTOH(id) (ntohl(id) >> VXLAN_ID_SHIFT)
202 #endif /* _BIG_ENDIAN */
203 #define VXLAN_FLAGS_WIRE32(flags) ((flags) & VXLAN_F_VDI_WIRE)
204
205 extern kmutex_t vxlnat_mutex;
206 extern netstack_t *vxlnat_netstack;
207 extern int vxlnat_command(vxn_msg_t *);
208 extern int vxlnat_read_dump(struct uio *);
209 extern int vxlnat_vxlan_addr(in6_addr_t *);
210 extern void vxlnat_closesock(void);
211 extern void vxlnat_state_init(void);
212 extern void vxlnat_state_fini(void);
213
214 extern void vxlnat_public_init(void);
215 extern void vxlnat_public_fini(void);
216 extern boolean_t vxlnat_public_hold(in6_addr_t *, boolean_t);
217 extern void vxlnat_public_rele(in6_addr_t *);
218
219 extern int vxlnat_tree_plus_in6_cmp(const void *, const void *);
220
221 /* ire_recvfn & ire_sendfn functions for 1-1/fixed maps. */
222 extern void vxlnat_fixed_ire_recv_v4(ire_t *, mblk_t *, void *,
223 ip_recv_attr_t *);
224 extern void vxlnat_fixed_ire_recv_v6(ire_t *, mblk_t *, void *,
225 ip_recv_attr_t *);
226 extern int vxlnat_fixed_ire_send_v4(ire_t *, mblk_t *, void *,
227 ip_xmit_attr_t *, uint32_t *);
228 extern int vxlnat_fixed_ire_send_v6(ire_t *, mblk_t *, void *,
229 ip_xmit_attr_t *, uint32_t *);
230
231
232 extern vxlnat_vnet_t *vxlnat_get_vnet(uint32_t, boolean_t);
233
234 #ifdef __cplusplus
235 }
236 #endif
237
238 #endif /* _INET_VXLNAT_IMPL_H */