1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2018 Joyent, Inc.
14 */
15
16 #include <sys/types.h>
17 #include <sys/socket.h>
18 #include <netinet/in.h>
19 #include <inet/ip.h>
20 #include <inet/tcp_impl.h>
21 #include <inet/udp_impl.h>
22
23 #include <inet/vxlnat_impl.h>
24
25 /*
26 * Functions for handling conn_t AND for new conn_t receive-side functions
27 * so we can exploit ipclassifier for NAT flows.
28 */
29
30 static void
31 vxlnat_external_tcp_v4(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
32 {
33 /* XXX KEBE SAYS FOR NOW, drop. */
34 freemsg(mp);
35 }
36
37 static void
38 vxlnat_external_tcp_v6(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
39 {
40 /* XXX KEBE SAYS FOR NOW, drop. */
41 freemsg(mp);
42 }
43
44 static void
45 vxlnat_external_tcp_icmp_v4(void *arg, mblk_t *mp, void *arg2,
46 ip_recv_attr_t *ira)
47 {
48 /* XXX KEBE SAYS FOR NOW, drop. */
49 freemsg(mp);
50 }
51
52 static void
53 vxlnat_external_tcp_icmp_v6(void *arg, mblk_t *mp, void *arg2,
54 ip_recv_attr_t *ira)
55 {
56 /* XXX KEBE SAYS FOR NOW, drop. */
57 freemsg(mp);
58 }
59
60 static void
61 vxlnat_external_udp_v4(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
62 {
63 /* XXX KEBE SAYS FOR NOW, drop. */
64 freemsg(mp);
65 }
66
67 static void
68 vxlnat_external_udp_v6(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
69 {
70 /* XXX KEBE SAYS FOR NOW, drop. */
71 freemsg(mp);
72 }
73
74 static void
75 vxlnat_external_udp_icmp_v4(void *arg, mblk_t *mp, void *arg2,
76 ip_recv_attr_t *ira)
77 {
78 /* XXX KEBE SAYS FOR NOW, drop. */
79 freemsg(mp);
80 }
81
82 static void
83 vxlnat_external_udp_icmp_v6(void *arg, mblk_t *mp, void *arg2,
84 ip_recv_attr_t *ira)
85 {
86 /* XXX KEBE SAYS FOR NOW, drop. */
87 freemsg(mp);
88 }
89
90 static void
91 vxlnat_external_icmp_v4(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
92 {
93 /* XXX KEBE SAYS FOR NOW, drop. */
94 freemsg(mp);
95 }
96
97 static void
98 vxlnat_external_icmp_icmp_v4(void *arg, mblk_t *mp, void *arg2,
99 ip_recv_attr_t *ira)
100 {
101 /* XXX KEBE SAYS FOR NOW, drop. */
102 freemsg(mp);
103 }
104
105 boolean_t
106 vxlnat_new_conn(vxlnat_flow_t *flow)
107 {
108 conn_t *connp;
109 uint16_t new_lport;
110 uint8_t protocol = flow->vxnfl_protocol;
111 int rc, ntries = 3;
112
113 /*
114 * XXX KEBE SAYS -- Use KM_NORMALPRI because we're likely in interrupt
115 * context when we call this function. If ipcl_conn_create() becomes
116 * a problem even with these flags, we may need to go asynchronous.
117 * XXX KEBE ALSO SAYS -- See TCP's handling of new inbound
118 * connections.
119 */
120 switch (protocol) {
121 case IPPROTO_TCP:
122 case IPPROTO_UDP:
123 case IPPROTO_ICMP:
124 /* case IPPROTO_ICMP6: */
125 break;
126 default:
127 return (B_FALSE);
128 }
129 connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP | KM_NORMALPRI,
130 vxlnat_netstack);
131 if (connp == NULL)
132 return (B_FALSE);
133
134 /*
135 * XXX KEBE SAYS FILL IN ALL SORTS OF conn_t STUFF HERE.
136 * Draw inspiration from iptun_conn_create, but also include
137 * protocol-specific thingies.
138 *
139 * NOTE: As of right this moment, I'm imagining that for
140 * inside-to-outside, conn_ip_output() will NOT be used, but rather
141 * ire_forward_recv_v*() will be, not unlike the fixed/1-1 path, and
142 * that the conn_t's *receive-side* features are the only ones to be
143 * used.
144 *
145 * Also, like UDP, there will be no verifyicmp method assigned.
146 * (Oddly, iptun does this, but it always returns true. Maybe that's a
147 * bug in iptun?)
148 */
149
150 /* connp->conn_flags |= .... */
151 connp->conn_priv = flow; /* XXX is this a problem for freeing? */
152
153 /*
154 * XXX KEBE SAYS Don't worry about conn_ixa FOR NOW, but maybe
155 * fill it in for use later?
156 */
157
158 /*
159 * ALWAYS set this to GLOBAL_ZONEID. We check at open() for
160 * a non-exclusive zone open (we disallow it), and for exclusive-
161 * stack zones, we want IP thinking (correctly) we own the netstack.
162 */
163 connp->conn_zoneid = GLOBAL_ZONEID;
164 /*
165 * cred_t dance is because we may be getting this straight from
166 * interrupt context.
167 */
168 connp->conn_cred = zone_get_kcred(netstack_get_zoneid(vxlnat_netstack));
169 connp->conn_cpid = NOPID;
170
171 ASSERT(connp->conn_ref == 1);
172
173 connp->conn_family = flow->vxnfl_isv4 ? AF_INET : AF_INET6;
174
175 CONN_INC_REF(connp); /* For the following... */
176 flow->vxnfl_connp = connp;
177
178 /* XXX KEBE SAYS Assume the right thing v4/v6-wise happens for now. */
179 connp->conn_laddr_v6 = flow->vxnfl_rule->vxnr_pubaddr;
180 connp->conn_faddr_v6 = flow->vxnfl_dst;
181
182 /* XXX KEBE SAYS REMAP PORTS HERE ... */
183 connp->conn_ports = flow->vxnfl_ports;
184 connp->conn_proto = protocol;
185
186 /* XXX KEBE ASKS INSERT HERE? */
187 do {
188
189 switch (protocol) {
190 case IPPROTO_TCP: {
191 tcp_stack_t *tcps = vxlnat_netstack->netstack_tcp;
192 tcp_t dummy = {.tcp_tcps = tcps, .tcp_connp = connp};
193
194 /* Fill in with TCP-specific recv/recvicmp. */
195 if (flow->vxnfl_isv4) {
196 connp->conn_recv = vxlnat_external_tcp_v4;
197 connp->conn_recvicmp =
198 vxlnat_external_tcp_icmp_v4;
199 } else {
200 connp->conn_recv = vxlnat_external_tcp_v6;
201 connp->conn_recvicmp =
202 vxlnat_external_tcp_icmp_v6;
203 }
204 /* And set new_lport. */
205 new_lport = tcp_update_next_port(
206 tcps->tcps_next_port_to_try, &dummy, B_TRUE);
207 break;
208 }
209 case IPPROTO_UDP: {
210 udp_stack_t *udps = vxlnat_netstack->netstack_udp;
211 udp_t dummy = {.udp_us = udps, .udp_connp = connp };
212
213 /* Fill in with UDP-specific recv/recvicmp. */
214 if (flow->vxnfl_isv4) {
215 connp->conn_recv = vxlnat_external_udp_v4;
216 connp->conn_recvicmp =
217 vxlnat_external_udp_icmp_v4;
218 } else {
219 connp->conn_recv = vxlnat_external_udp_v6;
220 connp->conn_recvicmp =
221 vxlnat_external_udp_icmp_v6;
222 }
223 /* And set new_lport. */
224 new_lport = udp_update_next_port(&dummy,
225 udps->us_next_port_to_try, B_TRUE);
226 break;
227 }
228 case IPPROTO_ICMP: {
229 /* NOTE: Only an IPv4 version of this is needed. */
230 connp->conn_recv = vxlnat_external_icmp_v4;
231 connp->conn_recv = vxlnat_external_icmp_icmp_v4;
232 /*
233 * XXX KEBE SAYS -- I don't think we can tell the real
234 * IP code to bind an ICMP socket to anything beyond
235 * the addresses. But also we allow multiple ICMP
236 * conn_ts, which could mean duplicate packets. :-/
237 */
238 new_lport = 0;
239 break;
240 }
241 default:
242 /* Should never reach here... */
243 cmn_err(CE_PANIC, "vxnfl_protocol corruption!");
244 return (B_FALSE);
245 }
246 connp->conn_lport = new_lport;
247
248 rc = ipcl_conn_insert(connp);
249 switch (rc) {
250 case 0:
251 break;
252 case EADDRINUSE:
253 /* Try rewhacking the ports if we can. */
254 switch (protocol) {
255 case IPPROTO_TCP:
256 case IPPROTO_UDP:
257 /* Try again... */
258 break;
259 default:
260 /* Give up now. */
261 ntries = 1;
262 break;
263 }
264 break;
265 default:
266 /* GET OUT, NOW! */
267 DTRACE_PROBE1(vxlnat__new__conn__badins, int, rc);
268 ntries = 1;
269 break;
270 }
271 } while (rc != 0 && --ntries > 0);
272
273 if (rc != 0) {
274 /* Trash this conn. */
275 DTRACE_PROBE3(vxlnat__new__conn__collision, int, rc,
276 conn_t *, connp, vxlnat_flow_t *, flow);
277 CONN_DEC_REF(connp);
278 CONN_DEC_REF(connp);
279 /*
280 * XXX KEBE ASKS Anything else? Last CONN_DEC_REF should
281 * trigger destroy.
282 */
283 flow->vxnfl_connp = NULL;
284 return (B_FALSE);
285 }
286
287 return (B_TRUE);
288 }
289
290 void
291 vxlnat_activate_conn(vxlnat_flow_t *flow)
292 {
293 conn_t *connp = flow->vxnfl_connp;
294
295 mutex_enter(&connp->conn_lock);
296 connp->conn_state_flags &= ~CONN_INCIPIENT;
297 mutex_exit(&connp->conn_lock);
298 /* XXX KEBE ASKS OR INSERT HERE? */
299 }
300
301 #ifdef notyet
302 void
303 vxlnat_deactivate_conn(vxlnat_flow_t *flow)
304 {
305 conn_t *connp = flow->vxnfl_connp;
306
307 ip_quiesce_conn(connp);
308 /* XXX KEBE ASKS ipcl_hash_remove()? */
309 }
310 #endif