Print this page
OS-7667 IPFilter needs to keep and report state for cloud firewall logging
Portions contributed by: Mike Gerdts <mike.gerdts@joyent.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
+++ new/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
1 1 /*
2 2 * Copyright (C) 1993-2001, 2003 by Darren Reed.
3 3 *
4 4 * See the IPFILTER.LICENCE file for details on licencing.
5 5 *
6 6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
7 7 *
8 - * Copyright 2018 Joyent, Inc.
8 + * Copyright 2019 Joyent, Inc.
9 9 */
10 10
11 11 #if !defined(lint)
12 12 static const char sccsid[] = "@(#)ip_fil_solaris.c 1.7 07/22/06 (C) 1993-2000 Darren Reed";
13 13 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
14 14 #endif
15 15
16 16 #include <sys/types.h>
17 17 #include <sys/errno.h>
18 18 #include <sys/param.h>
19 19 #include <sys/cpuvar.h>
20 20 #include <sys/open.h>
21 21 #include <sys/ioctl.h>
22 22 #include <sys/filio.h>
23 23 #include <sys/systm.h>
24 24 #include <sys/strsubr.h>
25 25 #include <sys/strsun.h>
26 26 #include <sys/cred.h>
27 27 #include <sys/ddi.h>
28 28 #include <sys/sunddi.h>
29 29 #include <sys/ksynch.h>
30 30 #include <sys/kmem.h>
31 31 #include <sys/mac_provider.h>
32 32 #include <sys/mkdev.h>
33 33 #include <sys/protosw.h>
34 34 #include <sys/socket.h>
35 35 #include <sys/dditypes.h>
36 36 #include <sys/cmn_err.h>
37 37 #include <sys/zone.h>
38 38 #include <net/if.h>
39 39 #include <net/af.h>
40 40 #include <net/route.h>
41 41 #include <netinet/in.h>
42 42 #include <netinet/in_systm.h>
43 43 #include <netinet/ip.h>
44 44 #include <netinet/ip_var.h>
45 45 #include <netinet/tcp.h>
46 46 #include <netinet/udp.h>
47 47 #include <netinet/tcpip.h>
48 48 #include <netinet/ip_icmp.h>
49 49 #include "netinet/ip_compat.h"
50 50 #ifdef USE_INET6
51 51 # include <netinet/icmp6.h>
52 52 #endif
53 53 #include "netinet/ip_fil.h"
54 54 #include "netinet/ip_nat.h"
55 55 #include "netinet/ip_frag.h"
56 56 #include "netinet/ip_state.h"
57 57 #include "netinet/ip_auth.h"
58 58 #include "netinet/ip_proxy.h"
59 59 #include "netinet/ipf_stack.h"
60 60 #ifdef IPFILTER_LOOKUP
61 61 # include "netinet/ip_lookup.h"
62 62 #endif
63 63 #include <inet/ip_ire.h>
64 64
65 65 #include <sys/md5.h>
66 66 #include <sys/neti.h>
67 67
68 68 static int frzerostats __P((caddr_t, ipf_stack_t *));
69 69 static int fr_setipfloopback __P((int, ipf_stack_t *));
70 70 static int fr_enableipf __P((ipf_stack_t *, int));
71 71 static int fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
72 72 static int ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
73 73 static int ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
74 74 static int ipf_hook __P((hook_data_t, int, int, void *));
75 75 static int ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
76 76 static int ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
77 77 static int ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
78 78 void *));
79 79 static int ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
80 80 static int ipf_hook4 __P((hook_data_t, int, int, void *));
81 81 static int ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
82 82 static int ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
83 83 static int ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
84 84 void *));
85 85 static int ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
86 86 void *));
87 87 static int ipf_hook6 __P((hook_data_t, int, int, void *));
88 88 static int ipf_hookvndl3v4_in __P((hook_event_token_t, hook_data_t,
89 89 void *));
90 90 static int ipf_hookvndl3v6_in __P((hook_event_token_t, hook_data_t,
91 91 void *));
92 92 static int ipf_hookvndl3v4_out __P((hook_event_token_t, hook_data_t,
93 93 void *));
94 94 static int ipf_hookvndl3v6_out __P((hook_event_token_t, hook_data_t,
95 95 void *));
96 96
97 97 static int ipf_hookviona_in __P((hook_event_token_t, hook_data_t, void *));
98 98 static int ipf_hookviona_out __P((hook_event_token_t, hook_data_t,
99 99 void *));
100 100
101 101 extern int ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
102 102 extern int ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
103 103
104 104 static int ipf_hook_protocol_notify __P((hook_notify_cmd_t, void *,
105 105 const char *, const char *, const char *));
106 106 static int ipf_hook_instance_notify __P((hook_notify_cmd_t, void *,
107 107 const char *, const char *, const char *));
108 108
109 109 #if SOLARIS2 < 10
110 110 #if SOLARIS2 >= 7
111 111 u_int *ip_ttl_ptr = NULL;
112 112 u_int *ip_mtudisc = NULL;
113 113 # if SOLARIS2 >= 8
114 114 int *ip_forwarding = NULL;
115 115 u_int *ip6_forwarding = NULL;
116 116 # else
117 117 u_int *ip_forwarding = NULL;
118 118 # endif
119 119 #else
120 120 u_long *ip_ttl_ptr = NULL;
121 121 u_long *ip_mtudisc = NULL;
122 122 u_long *ip_forwarding = NULL;
123 123 #endif
124 124 #endif
125 125
126 126 vmem_t *ipf_minor; /* minor number arena */
127 127 void *ipf_state; /* DDI state */
128 128
129 129 /*
130 130 * GZ-controlled and per-zone stacks:
131 131 *
132 132 * For each non-global zone, we create two ipf stacks: the per-zone stack and
133 133 * the GZ-controlled stack. The per-zone stack can be controlled and observed
134 134 * from inside the zone or from the global zone. The GZ-controlled stack can
135 135 * only be controlled and observed from the global zone (though the rules
136 136 * still only affect that non-global zone).
137 137 *
138 138 * The two hooks are always arranged so that the GZ-controlled stack is always
139 139 * "outermost" with respect to the zone. The traffic flow then looks like
140 140 * this:
141 141 *
142 142 * Inbound:
143 143 *
144 144 * nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone
145 145 *
146 146 * Outbound:
147 147 *
148 148 * nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone
149 149 */
150 150
151 151 /* IPv4 hook names */
152 152 char *hook4_nicevents = "ipfilter_hook4_nicevents";
153 153 char *hook4_nicevents_gz = "ipfilter_hook4_nicevents_gz";
154 154 char *hook4_in = "ipfilter_hook4_in";
155 155 char *hook4_in_gz = "ipfilter_hook4_in_gz";
156 156 char *hook4_out = "ipfilter_hook4_out";
157 157 char *hook4_out_gz = "ipfilter_hook4_out_gz";
158 158 char *hook4_loop_in = "ipfilter_hook4_loop_in";
159 159 char *hook4_loop_in_gz = "ipfilter_hook4_loop_in_gz";
160 160 char *hook4_loop_out = "ipfilter_hook4_loop_out";
161 161 char *hook4_loop_out_gz = "ipfilter_hook4_loop_out_gz";
162 162
163 163 /* IPv6 hook names */
164 164 char *hook6_nicevents = "ipfilter_hook6_nicevents";
165 165 char *hook6_nicevents_gz = "ipfilter_hook6_nicevents_gz";
166 166 char *hook6_in = "ipfilter_hook6_in";
167 167 char *hook6_in_gz = "ipfilter_hook6_in_gz";
168 168 char *hook6_out = "ipfilter_hook6_out";
169 169 char *hook6_out_gz = "ipfilter_hook6_out_gz";
170 170 char *hook6_loop_in = "ipfilter_hook6_loop_in";
171 171 char *hook6_loop_in_gz = "ipfilter_hook6_loop_in_gz";
172 172 char *hook6_loop_out = "ipfilter_hook6_loop_out";
173 173 char *hook6_loop_out_gz = "ipfilter_hook6_loop_out_gz";
174 174
175 175 /* vnd IPv4/v6 hook names */
176 176 char *hook4_vnd_in = "ipfilter_hookvndl3v4_in";
177 177 char *hook4_vnd_in_gz = "ipfilter_hookvndl3v4_in_gz";
178 178 char *hook6_vnd_in = "ipfilter_hookvndl3v6_in";
179 179 char *hook6_vnd_in_gz = "ipfilter_hookvndl3v6_in_gz";
180 180 char *hook4_vnd_out = "ipfilter_hookvndl3v4_out";
181 181 char *hook4_vnd_out_gz = "ipfilter_hookvndl3v4_out_gz";
182 182 char *hook6_vnd_out = "ipfilter_hookvndl3v6_out";
183 183 char *hook6_vnd_out_gz = "ipfilter_hookvndl3v6_out_gz";
184 184
185 185 /* viona hook names */
186 186 char *hook_viona_in = "ipfilter_hookviona_in";
187 187 char *hook_viona_in_gz = "ipfilter_hookviona_in_gz";
188 188 char *hook_viona_out = "ipfilter_hookviona_out";
189 189 char *hook_viona_out_gz = "ipfilter_hookviona_out_gz";
190 190
191 191 /* ------------------------------------------------------------------------ */
192 192 /* Function: ipldetach */
193 193 /* Returns: int - 0 == success, else error. */
194 194 /* Parameters: Nil */
195 195 /* */
196 196 /* This function is responsible for undoing anything that might have been */
197 197 /* done in a call to iplattach(). It must be able to clean up from a call */
198 198 /* to iplattach() that did not succeed. Why might that happen? Someone */
199 199 /* configures a table to be so large that we cannot allocate enough memory */
200 200 /* for it. */
201 201 /* ------------------------------------------------------------------------ */
202 202 int ipldetach(ifs)
203 203 ipf_stack_t *ifs;
204 204 {
205 205
206 206 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
207 207
208 208 #if SOLARIS2 < 10
209 209
210 210 if (ifs->ifs_fr_control_forwarding & 2) {
211 211 if (ip_forwarding != NULL)
212 212 *ip_forwarding = 0;
213 213 #if SOLARIS2 >= 8
214 214 if (ip6_forwarding != NULL)
215 215 *ip6_forwarding = 0;
216 216 #endif
217 217 }
218 218 #endif
219 219
220 220 /*
221 221 * This lock needs to be dropped around the net_hook_unregister calls
222 222 * because we can deadlock here with:
223 223 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
224 224 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
225 225 */
226 226 RWLOCK_EXIT(&ifs->ifs_ipf_global);
227 227
228 228 #define UNDO_HOOK(_f, _b, _e, _h) \
229 229 do { \
230 230 if (ifs->_f != NULL) { \
231 231 if (ifs->_b) { \
232 232 int tmp = net_hook_unregister(ifs->_f, \
233 233 _e, ifs->_h); \
234 234 ifs->_b = (tmp != 0 && tmp != ENXIO); \
235 235 if (!ifs->_b && ifs->_h != NULL) { \
236 236 hook_free(ifs->_h); \
237 237 ifs->_h = NULL; \
238 238 } \
239 239 } else if (ifs->_h != NULL) { \
240 240 hook_free(ifs->_h); \
241 241 ifs->_h = NULL; \
242 242 } \
243 243 } \
244 244 _NOTE(CONSTCOND) \
245 245 } while (0)
246 246
247 247 /*
248 248 * Remove IPv6 Hooks
249 249 */
250 250 if (ifs->ifs_ipf_ipv6 != NULL) {
251 251 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
252 252 NH_PHYSICAL_IN, ifs_ipfhook6_in);
253 253 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
254 254 NH_PHYSICAL_OUT, ifs_ipfhook6_out);
255 255 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
256 256 NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
257 257 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
258 258 NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
259 259 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
260 260 NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
261 261
262 262 if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
263 263 goto detach_failed;
264 264 ifs->ifs_ipf_ipv6 = NULL;
265 265 }
266 266
267 267 /*
268 268 * Remove IPv4 Hooks
269 269 */
270 270 if (ifs->ifs_ipf_ipv4 != NULL) {
271 271 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
272 272 NH_PHYSICAL_IN, ifs_ipfhook4_in);
273 273 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
274 274 NH_PHYSICAL_OUT, ifs_ipfhook4_out);
275 275 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
276 276 NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
277 277 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
278 278 NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
279 279 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
280 280 NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
281 281
282 282 if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
283 283 goto detach_failed;
284 284 ifs->ifs_ipf_ipv4 = NULL;
285 285 }
286 286
287 287 /*
288 288 * Remove VND hooks
289 289 */
290 290 if (ifs->ifs_ipf_vndl3v4 != NULL) {
291 291 UNDO_HOOK(ifs_ipf_vndl3v4, ifs_hookvndl3v4_physical_in,
292 292 NH_PHYSICAL_IN, ifs_ipfhookvndl3v4_in);
293 293 UNDO_HOOK(ifs_ipf_vndl3v4, ifs_hookvndl3v4_physical_out,
294 294 NH_PHYSICAL_OUT, ifs_ipfhookvndl3v4_out);
295 295
296 296 if (net_protocol_release(ifs->ifs_ipf_vndl3v4) != 0)
297 297 goto detach_failed;
298 298 ifs->ifs_ipf_vndl3v4 = NULL;
299 299 }
300 300
301 301 if (ifs->ifs_ipf_vndl3v6 != NULL) {
302 302 UNDO_HOOK(ifs_ipf_vndl3v6, ifs_hookvndl3v6_physical_in,
303 303 NH_PHYSICAL_IN, ifs_ipfhookvndl3v6_in);
304 304 UNDO_HOOK(ifs_ipf_vndl3v6, ifs_hookvndl3v6_physical_out,
305 305 NH_PHYSICAL_OUT, ifs_ipfhookvndl3v6_out);
306 306
307 307 if (net_protocol_release(ifs->ifs_ipf_vndl3v6) != 0)
308 308 goto detach_failed;
309 309 ifs->ifs_ipf_vndl3v6 = NULL;
310 310 }
311 311
312 312 /*
313 313 * Remove notification of viona hooks
314 314 */
315 315 net_instance_notify_unregister(ifs->ifs_netid,
316 316 ipf_hook_instance_notify);
317 317
318 318 #undef UNDO_HOOK
319 319
320 320 /*
321 321 * Normally, viona will unregister itself before ipldetach() is called,
322 322 * so these will be no-ops, but out of caution, we try to make sure
323 323 * we've removed any of our references.
324 324 */
325 325 (void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL,
326 326 NH_PHYSICAL_IN);
327 327 (void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL,
328 328 NH_PHYSICAL_OUT);
329 329
330 330 {
331 331 char netidstr[12]; /* Large enough for INT_MAX + NUL */
332 332 (void) snprintf(netidstr, sizeof (netidstr), "%d",
333 333 ifs->ifs_netid);
334 334
335 335 /*
336 336 * The notify callbacks expect the netid value passed as a
337 337 * string in the third argument. To prevent confusion if
338 338 * traced, we pass the same value the nethook framework would
339 339 * pass, even though the callback does not currently use the
340 340 * value.
341 341 */
342 342 (void) ipf_hook_instance_notify(HN_UNREGISTER, ifs, netidstr,
343 343 NULL, Hn_VIONA);
344 344 }
345 345
346 346 #ifdef IPFDEBUG
347 347 cmn_err(CE_CONT, "ipldetach()\n");
348 348 #endif
349 349
350 350 WRITE_ENTER(&ifs->ifs_ipf_global);
351 351 fr_deinitialise(ifs);
352 352
353 353 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
354 354 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
355 355
356 356 if (ifs->ifs_ipf_locks_done == 1) {
357 357 MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
358 358 MUTEX_DESTROY(&ifs->ifs_ipf_rw);
359 359 RW_DESTROY(&ifs->ifs_ipf_tokens);
360 360 RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
361 361 ifs->ifs_ipf_locks_done = 0;
362 362 }
363 363
364 364 if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
365 365 ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
366 366 ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
367 367 ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
368 368 ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
369 369 return -1;
370 370
371 371 return 0;
372 372
373 373 detach_failed:
374 374 WRITE_ENTER(&ifs->ifs_ipf_global);
375 375 return -1;
376 376 }
377 377
378 378 int iplattach(ifs)
379 379 ipf_stack_t *ifs;
380 380 {
381 381 #if SOLARIS2 < 10
382 382 int i;
383 383 #endif
384 384 netid_t id = ifs->ifs_netid;
385 385
386 386 #ifdef IPFDEBUG
387 387 cmn_err(CE_CONT, "iplattach()\n");
388 388 #endif
389 389
390 390 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
391 391 ifs->ifs_fr_flags = IPF_LOGGING;
392 392 #ifdef _KERNEL
393 393 ifs->ifs_fr_update_ipid = 0;
394 394 #else
395 395 ifs->ifs_fr_update_ipid = 1;
396 396 #endif
397 397 ifs->ifs_fr_minttl = 4;
398 398 ifs->ifs_fr_icmpminfragmtu = 68;
399 399 #if defined(IPFILTER_DEFAULT_BLOCK)
400 400 ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
401 401 #else
402 402 ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
403 403 #endif
404 404
405 405 bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
406 406 MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
407 407 MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
408 408 RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
409 409 RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
410 410 ifs->ifs_ipf_locks_done = 1;
411 411
412 412 if (fr_initialise(ifs) < 0)
413 413 return -1;
414 414
415 415 /*
416 416 * For incoming packets, we want the GZ-controlled hooks to run before
417 417 * the per-zone hooks, regardless of what order they're are installed.
418 418 * See the "GZ-controlled and per-zone stacks" comment block at the top
419 419 * of this file.
420 420 */
421 421 #define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a) \
422 422 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \
423 423 (x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER; \
424 424 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
425 425
426 426 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
427 427 hook4_nicevents, hook4_nicevents_gz, ifs);
428 428 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in,
429 429 hook4_in, hook4_in_gz, ifs);
430 430 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
431 431 hook4_loop_in, hook4_loop_in_gz, ifs);
432 432
433 433 /*
434 434 * For outgoing packets, we want the GZ-controlled hooks to run after
435 435 * the per-zone hooks, regardless of what order they're are installed.
436 436 * See the "GZ-controlled and per-zone stacks" comment block at the top
437 437 * of this file.
438 438 */
439 439 #define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a) \
440 440 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \
441 441 (x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE; \
442 442 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
443 443
444 444 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out,
445 445 hook4_out, hook4_out_gz, ifs);
446 446 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
447 447 hook4_loop_out, hook4_loop_out_gz, ifs);
448 448
449 449 /*
450 450 * If we hold this lock over all of the net_hook_register calls, we
451 451 * can cause a deadlock to occur with the following lock ordering:
452 452 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
453 453 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
454 454 */
455 455 RWLOCK_EXIT(&ifs->ifs_ipf_global);
456 456
457 457 /*
458 458 * Add IPv4 hooks
459 459 */
460 460 ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
461 461 if (ifs->ifs_ipf_ipv4 == NULL)
462 462 goto hookup_failed;
463 463
464 464 ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
465 465 NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
466 466 if (!ifs->ifs_hook4_nic_events)
467 467 goto hookup_failed;
468 468
469 469 ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
470 470 NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
471 471 if (!ifs->ifs_hook4_physical_in)
472 472 goto hookup_failed;
473 473
474 474 ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
475 475 NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
476 476 if (!ifs->ifs_hook4_physical_out)
477 477 goto hookup_failed;
478 478
479 479 if (ifs->ifs_ipf_loopback) {
480 480 ifs->ifs_hook4_loopback_in = (net_hook_register(
481 481 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
482 482 ifs->ifs_ipfhook4_loop_in) == 0);
483 483 if (!ifs->ifs_hook4_loopback_in)
484 484 goto hookup_failed;
485 485
486 486 ifs->ifs_hook4_loopback_out = (net_hook_register(
487 487 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
488 488 ifs->ifs_ipfhook4_loop_out) == 0);
489 489 if (!ifs->ifs_hook4_loopback_out)
490 490 goto hookup_failed;
491 491 }
492 492
493 493 /*
494 494 * Add IPv6 hooks
495 495 */
496 496 ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
497 497 if (ifs->ifs_ipf_ipv6 == NULL)
498 498 goto hookup_failed;
499 499
500 500 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
501 501 hook6_nicevents, hook6_nicevents_gz, ifs);
502 502 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in,
503 503 hook6_in, hook6_in_gz, ifs);
504 504 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
505 505 hook6_loop_in, hook6_loop_in_gz, ifs);
506 506 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out,
507 507 hook6_out, hook6_out_gz, ifs);
508 508 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
509 509 hook6_loop_out, hook6_loop_out_gz, ifs);
510 510
511 511 ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
512 512 NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
513 513 if (!ifs->ifs_hook6_nic_events)
514 514 goto hookup_failed;
515 515
516 516 ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
517 517 NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
518 518 if (!ifs->ifs_hook6_physical_in)
519 519 goto hookup_failed;
520 520
521 521 ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
522 522 NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
523 523 if (!ifs->ifs_hook6_physical_out)
524 524 goto hookup_failed;
525 525
526 526 if (ifs->ifs_ipf_loopback) {
527 527 ifs->ifs_hook6_loopback_in = (net_hook_register(
528 528 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
529 529 ifs->ifs_ipfhook6_loop_in) == 0);
530 530 if (!ifs->ifs_hook6_loopback_in)
531 531 goto hookup_failed;
532 532
533 533 ifs->ifs_hook6_loopback_out = (net_hook_register(
534 534 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
535 535 ifs->ifs_ipfhook6_loop_out) == 0);
536 536 if (!ifs->ifs_hook6_loopback_out)
537 537 goto hookup_failed;
538 538 }
539 539
540 540 /*
541 541 * Add VND INET hooks
542 542 */
543 543 ifs->ifs_ipf_vndl3v4 = net_protocol_lookup(id, NHF_VND_INET);
544 544 if (ifs->ifs_ipf_vndl3v4 == NULL)
545 545 goto hookup_failed;
546 546
547 547 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhookvndl3v4_in, ipf_hookvndl3v4_in,
548 548 hook4_vnd_in, hook4_vnd_in_gz, ifs);
549 549 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhookvndl3v4_out, ipf_hookvndl3v4_out,
550 550 hook4_vnd_out, hook4_vnd_out_gz, ifs);
551 551 ifs->ifs_hookvndl3v4_physical_in = (net_hook_register(ifs->ifs_ipf_vndl3v4,
552 552 NH_PHYSICAL_IN, ifs->ifs_ipfhookvndl3v4_in) == 0);
553 553 if (!ifs->ifs_hookvndl3v4_physical_in)
554 554 goto hookup_failed;
555 555
556 556 ifs->ifs_hookvndl3v4_physical_out = (net_hook_register(ifs->ifs_ipf_vndl3v4,
557 557 NH_PHYSICAL_OUT, ifs->ifs_ipfhookvndl3v4_out) == 0);
558 558 if (!ifs->ifs_hookvndl3v4_physical_out)
559 559 goto hookup_failed;
560 560
561 561
562 562 /*
563 563 * VND INET6 hooks
564 564 */
565 565 ifs->ifs_ipf_vndl3v6 = net_protocol_lookup(id, NHF_VND_INET6);
566 566 if (ifs->ifs_ipf_vndl3v6 == NULL)
567 567 goto hookup_failed;
568 568
569 569 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhookvndl3v6_in, ipf_hookvndl3v6_in,
570 570 hook6_vnd_in, hook6_vnd_in_gz, ifs);
571 571 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhookvndl3v6_out, ipf_hookvndl3v6_out,
572 572 hook6_vnd_out, hook6_vnd_out_gz, ifs);
573 573 ifs->ifs_hookvndl3v6_physical_in = (net_hook_register(ifs->ifs_ipf_vndl3v6,
574 574 NH_PHYSICAL_IN, ifs->ifs_ipfhookvndl3v6_in) == 0);
575 575 if (!ifs->ifs_hookvndl3v6_physical_in)
576 576 goto hookup_failed;
577 577
578 578 ifs->ifs_hookvndl3v6_physical_out = (net_hook_register(ifs->ifs_ipf_vndl3v6,
579 579 NH_PHYSICAL_OUT, ifs->ifs_ipfhookvndl3v6_out) == 0);
580 580 if (!ifs->ifs_hookvndl3v6_physical_out)
581 581 goto hookup_failed;
582 582
583 583 /*
584 584 * VIONA INET hooks. While the nethook framework allows us to register
585 585 * hooks for events that haven't been registered yet, we instead
586 586 * register and unregister our hooks in response to notifications
587 587 * about the viona hooks from the nethook framework. This prevents
588 588 * problems when the viona module gets unloaded while the ipf module
589 589 * does not. If we do not unregister our hooks after the viona module
590 590 * is unloaded, the viona module cannot later re-register them if it
591 591 * gets reloaded. As the ip, vnd, and ipf modules are rarely unloaded
592 592 * even on DEBUG kernels, they do not experience this issue.
593 593 */
594 594 if (net_instance_notify_register(id, ipf_hook_instance_notify,
595 595 ifs) != 0)
596 596 goto hookup_failed;
597 597
598 598 /*
599 599 * Reacquire ipf_global, now it is safe.
600 600 */
601 601 WRITE_ENTER(&ifs->ifs_ipf_global);
602 602
603 603 /* Do not use private interface ip_params_arr[] in Solaris 10 */
604 604 #if SOLARIS2 < 10
605 605
606 606 #if SOLARIS2 >= 8
607 607 ip_forwarding = &ip_g_forward;
608 608 #endif
609 609 /*
610 610 * XXX - There is no terminator for this array, so it is not possible
611 611 * to tell if what we are looking for is missing and go off the end
612 612 * of the array.
613 613 */
614 614
615 615 #if SOLARIS2 <= 8
616 616 for (i = 0; ; i++) {
617 617 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
618 618 ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
619 619 } else if (!strcmp(ip_param_arr[i].ip_param_name,
620 620 "ip_path_mtu_discovery")) {
621 621 ip_mtudisc = &ip_param_arr[i].ip_param_value;
622 622 }
623 623 #if SOLARIS2 < 8
624 624 else if (!strcmp(ip_param_arr[i].ip_param_name,
625 625 "ip_forwarding")) {
626 626 ip_forwarding = &ip_param_arr[i].ip_param_value;
627 627 }
628 628 #else
629 629 else if (!strcmp(ip_param_arr[i].ip_param_name,
630 630 "ip6_forwarding")) {
631 631 ip6_forwarding = &ip_param_arr[i].ip_param_value;
632 632 }
633 633 #endif
634 634
635 635 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
636 636 #if SOLARIS2 >= 8
637 637 ip6_forwarding != NULL &&
638 638 #endif
639 639 ip_forwarding != NULL)
640 640 break;
641 641 }
642 642 #endif
643 643
644 644 if (ifs->ifs_fr_control_forwarding & 1) {
645 645 if (ip_forwarding != NULL)
646 646 *ip_forwarding = 1;
647 647 #if SOLARIS2 >= 8
648 648 if (ip6_forwarding != NULL)
649 649 *ip6_forwarding = 1;
650 650 #endif
651 651 }
652 652
653 653 #endif
654 654
655 655 return 0;
656 656 hookup_failed:
657 657 WRITE_ENTER(&ifs->ifs_ipf_global);
658 658 return -1;
659 659 }
660 660
661 661 /* ------------------------------------------------------------------------ */
662 662 /*
663 663 * Called whenever a nethook protocol is registered or unregistered. Currently
664 664 * only used to add or remove the hooks for viona.
665 665 *
666 666 * While the function signature requires returning int, nothing
667 667 * in usr/src/uts/common/io/hook.c that invokes the callbacks
668 668 * captures the return value (nor is there currently any documentation
669 669 * on what return values should be). For now at least, we'll return 0
670 670 * on success (or 'not applicable') or an error value. Even if the
671 671 * nethook framework doesn't use the return address, it can be observed via
672 672 * dtrace if needed.
673 673 */
674 674 static int
675 675 ipf_hook_protocol_notify(hook_notify_cmd_t command, void *arg,
676 676 const char *name, const char *dummy __unused, const char *he_name)
677 677 {
678 678 ipf_stack_t *ifs = arg;
679 679 hook_t **hookpp;
680 680 char *hook_name, *hint_name;
681 681 hook_func_t hookfn;
682 682 boolean_t *hookedp;
683 683 hook_hint_t hint;
684 684 boolean_t out;
685 685 int ret = 0;
686 686
687 687 const boolean_t gz = ifs->ifs_gz_controlled;
688 688
689 689 /* We currently only care about viona hooks notifications */
690 690 if (strcmp(name, Hn_VIONA) != 0)
691 691 return (0);
692 692
693 693 if (strcmp(he_name, NH_PHYSICAL_IN) == 0) {
694 694 out = B_FALSE;
695 695 } else if (strcmp(he_name, NH_PHYSICAL_OUT) == 0) {
696 696 out = B_TRUE;
697 697 } else {
698 698 /*
699 699 * If we've added more hook events to viona, we must add
700 700 * the corresponding handling here (even if it's just to
701 701 * ignore it) to prevent the firewall from not working as
702 702 * intended.
703 703 */
704 704 cmn_err(CE_PANIC, "%s: unhandled hook event %s", __func__,
705 705 he_name);
706 706
707 707 return (0);
708 708 }
709 709
710 710 if (out) {
711 711 hookpp = &ifs->ifs_ipfhookviona_out;
712 712 hookfn = ipf_hookviona_out;
713 713 hookedp = &ifs->ifs_hookviona_physical_out;
714 714 name = gz ? hook_viona_out_gz : hook_viona_out;
715 715 hint = gz ? HH_AFTER : HH_BEFORE;
716 716 hint_name = gz ? hook_viona_out : hook_viona_out_gz;
717 717 } else {
718 718 hookpp = &ifs->ifs_ipfhookviona_in;
719 719 hookfn = ipf_hookviona_in;
720 720 hookedp = &ifs->ifs_hookviona_physical_in;
721 721 name = gz ? hook_viona_in_gz : hook_viona_in;
722 722 hint = gz ? HH_BEFORE : HH_AFTER;
723 723 hint_name = gz ? hook_viona_in : hook_viona_in_gz;
724 724 }
725 725
726 726 switch (command) {
727 727 default:
728 728 case HN_NONE:
729 729 break;
730 730 case HN_REGISTER:
731 731 HOOK_INIT(*hookpp, hookfn, (char *)name, ifs);
732 732 (*hookpp)->h_hint = hint;
733 733 (*hookpp)->h_hintvalue = (uintptr_t)hint_name;
734 734 ret = net_hook_register(ifs->ifs_ipf_viona,
735 735 (char *)he_name, *hookpp);
736 736 if (ret != 0) {
737 737 cmn_err(CE_NOTE, "%s: could not register hook "
738 738 "(hook family=%s hook=%s) err=%d", __func__,
739 739 name, he_name, ret);
740 740 *hookedp = B_FALSE;
741 741 return (ret);
742 742 }
743 743 *hookedp = B_TRUE;
744 744 break;
745 745 case HN_UNREGISTER:
746 746 if (ifs->ifs_ipf_viona == NULL)
747 747 break;
748 748
749 749 ret = *hookedp ? net_hook_unregister(ifs->ifs_ipf_viona,
750 750 (char *)he_name, *hookpp) : 0;
751 751 if ((ret == 0 || ret == ENXIO)) {
752 752 if (*hookpp != NULL) {
753 753 hook_free(*hookpp);
754 754 *hookpp = NULL;
755 755 }
756 756 *hookedp = B_FALSE;
757 757 }
758 758 break;
759 759 }
760 760
761 761 return (ret);
762 762 }
763 763
764 764 /*
765 765 * Called whenever a new nethook instance is created. Currently only used
766 766 * with the Hn_VIONA nethooks. Similar to ipf_hook_protocol_notify, the out
767 767 * function signature must return an int, though the result is never used.
768 768 * We elect to return 0 on success (or not applicable) or a non-zero value
769 769 * on error.
770 770 */
771 771 static int
772 772 ipf_hook_instance_notify(hook_notify_cmd_t command, void *arg,
773 773 const char *netid, const char *dummy __unused, const char *instance)
774 774 {
775 775 ipf_stack_t *ifs = arg;
776 776 int ret = 0;
777 777
778 778 /* We currently only care about viona hooks */
779 779 if (strcmp(instance, Hn_VIONA) != 0)
780 780 return (0);
781 781
782 782 switch (command) {
783 783 case HN_NONE:
784 784 default:
785 785 return (0);
786 786 case HN_REGISTER:
787 787 ifs->ifs_ipf_viona = net_protocol_lookup(ifs->ifs_netid,
788 788 NHF_VIONA);
789 789
790 790 if (ifs->ifs_ipf_viona == NULL)
791 791 return (EPROTONOSUPPORT);
792 792
793 793 ret = net_protocol_notify_register(ifs->ifs_ipf_viona,
794 794 ipf_hook_protocol_notify, ifs);
795 795 VERIFY(ret == 0 || ret == ESHUTDOWN);
796 796 break;
797 797 case HN_UNREGISTER:
798 798 if (ifs->ifs_ipf_viona == NULL)
799 799 break;
800 800 VERIFY0(net_protocol_notify_unregister(ifs->ifs_ipf_viona,
801 801 ipf_hook_protocol_notify));
802 802 VERIFY0(net_protocol_release(ifs->ifs_ipf_viona));
803 803 ifs->ifs_ipf_viona = NULL;
804 804 break;
805 805 }
806 806
807 807 return (ret);
808 808 }
809 809
810 810 static int fr_setipfloopback(set, ifs)
811 811 int set;
812 812 ipf_stack_t *ifs;
813 813 {
814 814 if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
815 815 return EFAULT;
816 816
817 817 if (set && !ifs->ifs_ipf_loopback) {
818 818 ifs->ifs_ipf_loopback = 1;
819 819
820 820 ifs->ifs_hook4_loopback_in = (net_hook_register(
821 821 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
822 822 ifs->ifs_ipfhook4_loop_in) == 0);
823 823 if (!ifs->ifs_hook4_loopback_in)
824 824 return EINVAL;
825 825
826 826 ifs->ifs_hook4_loopback_out = (net_hook_register(
827 827 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
828 828 ifs->ifs_ipfhook4_loop_out) == 0);
829 829 if (!ifs->ifs_hook4_loopback_out)
830 830 return EINVAL;
831 831
832 832 ifs->ifs_hook6_loopback_in = (net_hook_register(
833 833 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
834 834 ifs->ifs_ipfhook6_loop_in) == 0);
835 835 if (!ifs->ifs_hook6_loopback_in)
836 836 return EINVAL;
837 837
838 838 ifs->ifs_hook6_loopback_out = (net_hook_register(
839 839 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
840 840 ifs->ifs_ipfhook6_loop_out) == 0);
841 841 if (!ifs->ifs_hook6_loopback_out)
842 842 return EINVAL;
843 843
844 844 } else if (!set && ifs->ifs_ipf_loopback) {
845 845 ifs->ifs_ipf_loopback = 0;
846 846
847 847 ifs->ifs_hook4_loopback_in =
848 848 (net_hook_unregister(ifs->ifs_ipf_ipv4,
849 849 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
850 850 if (ifs->ifs_hook4_loopback_in)
851 851 return EBUSY;
852 852
853 853 ifs->ifs_hook4_loopback_out =
854 854 (net_hook_unregister(ifs->ifs_ipf_ipv4,
855 855 NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
856 856 if (ifs->ifs_hook4_loopback_out)
857 857 return EBUSY;
858 858
859 859 ifs->ifs_hook6_loopback_in =
860 860 (net_hook_unregister(ifs->ifs_ipf_ipv6,
861 861 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
862 862 if (ifs->ifs_hook6_loopback_in)
863 863 return EBUSY;
864 864
865 865 ifs->ifs_hook6_loopback_out =
866 866 (net_hook_unregister(ifs->ifs_ipf_ipv6,
867 867 NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
868 868 if (ifs->ifs_hook6_loopback_out)
869 869 return EBUSY;
870 870 }
871 871 return 0;
872 872 }
873 873
874 874
875 875 /*
876 876 * Filter ioctl interface.
877 877 */
878 878 /*ARGSUSED*/
879 879 int iplioctl(dev, cmd, data, mode, cp, rp)
880 880 dev_t dev;
881 881 int cmd;
882 882 #if SOLARIS2 >= 7
883 883 intptr_t data;
884 884 #else
885 885 int *data;
886 886 #endif
887 887 int mode;
888 888 cred_t *cp;
889 889 int *rp;
890 890 {
891 891 int error = 0, tmp;
892 892 friostat_t fio;
893 893 minor_t unit;
894 894 u_int enable;
895 895 ipf_stack_t *ifs;
896 896 zoneid_t zid;
897 897 ipf_devstate_t *isp;
898 898
899 899 #ifdef IPFDEBUG
|
↓ open down ↓ |
881 lines elided |
↑ open up ↑ |
900 900 cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
901 901 dev, cmd, data, mode, cp, rp);
902 902 #endif
903 903 unit = getminor(dev);
904 904
905 905 isp = ddi_get_soft_state(ipf_state, unit);
906 906 if (isp == NULL)
907 907 return ENXIO;
908 908 unit = isp->ipfs_minor;
909 909
910 + if (unit == IPL_LOGEV)
911 + return (ipf_cfwlog_ioctl(dev, cmd, data, mode, cp, rp));
912 +
910 913 zid = crgetzoneid(cp);
911 914 if (cmd == SIOCIPFZONESET) {
912 915 if (zid == GLOBAL_ZONEID)
913 916 return fr_setzoneid(isp, (caddr_t) data);
914 917 return EACCES;
915 918 }
916 919
917 920 /*
918 921 * ipf_find_stack returns with a read lock on ifs_ipf_global
919 922 */
920 923 ifs = ipf_find_stack(zid, isp);
921 924 if (ifs == NULL)
922 925 return ENXIO;
923 926
924 927 if (ifs->ifs_fr_running <= 0) {
925 928 if (unit != IPL_LOGIPF) {
926 929 RWLOCK_EXIT(&ifs->ifs_ipf_global);
927 930 return EIO;
928 931 }
929 932 if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
930 933 cmd != SIOCIPFSET && cmd != SIOCFRENB &&
931 934 cmd != SIOCGETFS && cmd != SIOCGETFF) {
932 935 RWLOCK_EXIT(&ifs->ifs_ipf_global);
933 936 return EIO;
934 937 }
935 938 }
936 939
937 940 if (ifs->ifs_fr_enable_active != 0) {
938 941 RWLOCK_EXIT(&ifs->ifs_ipf_global);
939 942 return EBUSY;
940 943 }
941 944
942 945 error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
943 946 curproc, ifs);
944 947 if (error != -1) {
945 948 RWLOCK_EXIT(&ifs->ifs_ipf_global);
946 949 return error;
947 950 }
948 951 error = 0;
949 952
950 953 switch (cmd)
951 954 {
952 955 case SIOCFRENB :
953 956 if (!(mode & FWRITE))
954 957 error = EPERM;
955 958 else {
956 959 error = COPYIN((caddr_t)data, (caddr_t)&enable,
957 960 sizeof(enable));
958 961 if (error != 0) {
959 962 error = EFAULT;
960 963 break;
961 964 }
962 965
963 966 RWLOCK_EXIT(&ifs->ifs_ipf_global);
964 967 WRITE_ENTER(&ifs->ifs_ipf_global);
965 968
966 969 /*
967 970 * We must recheck fr_enable_active here, since we've
968 971 * dropped ifs_ipf_global from R in order to get it
969 972 * exclusively.
970 973 */
971 974 if (ifs->ifs_fr_enable_active == 0) {
972 975 ifs->ifs_fr_enable_active = 1;
973 976 error = fr_enableipf(ifs, enable);
974 977 ifs->ifs_fr_enable_active = 0;
975 978 }
976 979 }
977 980 break;
978 981 case SIOCIPFSET :
979 982 if (!(mode & FWRITE)) {
980 983 error = EPERM;
981 984 break;
982 985 }
983 986 /* FALLTHRU */
984 987 case SIOCIPFGETNEXT :
985 988 case SIOCIPFGET :
986 989 error = fr_ipftune(cmd, (void *)data, ifs);
987 990 break;
988 991 case SIOCSETFF :
989 992 if (!(mode & FWRITE))
990 993 error = EPERM;
991 994 else {
992 995 error = COPYIN((caddr_t)data,
993 996 (caddr_t)&ifs->ifs_fr_flags,
994 997 sizeof(ifs->ifs_fr_flags));
995 998 if (error != 0)
996 999 error = EFAULT;
997 1000 }
998 1001 break;
999 1002 case SIOCIPFLP :
1000 1003 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
1001 1004 sizeof(tmp));
1002 1005 if (error != 0)
1003 1006 error = EFAULT;
1004 1007 else
1005 1008 error = fr_setipfloopback(tmp, ifs);
1006 1009 break;
1007 1010 case SIOCGETFF :
1008 1011 error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
1009 1012 sizeof(ifs->ifs_fr_flags));
1010 1013 if (error != 0)
1011 1014 error = EFAULT;
1012 1015 break;
1013 1016 case SIOCFUNCL :
1014 1017 error = fr_resolvefunc((void *)data);
1015 1018 break;
1016 1019 case SIOCINAFR :
1017 1020 case SIOCRMAFR :
1018 1021 case SIOCADAFR :
1019 1022 case SIOCZRLST :
1020 1023 if (!(mode & FWRITE))
1021 1024 error = EPERM;
1022 1025 else
1023 1026 error = frrequest(unit, cmd, (caddr_t)data,
1024 1027 ifs->ifs_fr_active, 1, ifs);
1025 1028 break;
1026 1029 case SIOCINIFR :
1027 1030 case SIOCRMIFR :
1028 1031 case SIOCADIFR :
1029 1032 if (!(mode & FWRITE))
1030 1033 error = EPERM;
1031 1034 else
1032 1035 error = frrequest(unit, cmd, (caddr_t)data,
1033 1036 1 - ifs->ifs_fr_active, 1, ifs);
1034 1037 break;
1035 1038 case SIOCSWAPA :
1036 1039 if (!(mode & FWRITE))
1037 1040 error = EPERM;
1038 1041 else {
1039 1042 WRITE_ENTER(&ifs->ifs_ipf_mutex);
1040 1043 bzero((char *)ifs->ifs_frcache,
1041 1044 sizeof (ifs->ifs_frcache));
1042 1045 error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
1043 1046 (caddr_t)data,
1044 1047 sizeof(ifs->ifs_fr_active));
1045 1048 if (error != 0)
1046 1049 error = EFAULT;
1047 1050 else
1048 1051 ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
1049 1052 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
1050 1053 }
1051 1054 break;
1052 1055 case SIOCGETFS :
1053 1056 fr_getstat(&fio, ifs);
1054 1057 error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
1055 1058 break;
1056 1059 case SIOCFRZST :
1057 1060 if (!(mode & FWRITE))
1058 1061 error = EPERM;
1059 1062 else
1060 1063 error = fr_zerostats((caddr_t)data, ifs);
1061 1064 break;
1062 1065 case SIOCIPFFL :
1063 1066 if (!(mode & FWRITE))
1064 1067 error = EPERM;
1065 1068 else {
1066 1069 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
1067 1070 sizeof(tmp));
1068 1071 if (!error) {
1069 1072 tmp = frflush(unit, 4, tmp, ifs);
1070 1073 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1071 1074 sizeof(tmp));
1072 1075 if (error != 0)
1073 1076 error = EFAULT;
1074 1077 } else
1075 1078 error = EFAULT;
1076 1079 }
1077 1080 break;
1078 1081 #ifdef USE_INET6
1079 1082 case SIOCIPFL6 :
1080 1083 if (!(mode & FWRITE))
1081 1084 error = EPERM;
1082 1085 else {
1083 1086 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
1084 1087 sizeof(tmp));
1085 1088 if (!error) {
1086 1089 tmp = frflush(unit, 6, tmp, ifs);
1087 1090 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1088 1091 sizeof(tmp));
1089 1092 if (error != 0)
1090 1093 error = EFAULT;
1091 1094 } else
1092 1095 error = EFAULT;
1093 1096 }
1094 1097 break;
1095 1098 #endif
1096 1099 case SIOCSTLCK :
1097 1100 error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
1098 1101 if (error == 0) {
1099 1102 ifs->ifs_fr_state_lock = tmp;
1100 1103 ifs->ifs_fr_nat_lock = tmp;
1101 1104 ifs->ifs_fr_frag_lock = tmp;
1102 1105 ifs->ifs_fr_auth_lock = tmp;
1103 1106 } else
1104 1107 error = EFAULT;
1105 1108 break;
1106 1109 #ifdef IPFILTER_LOG
1107 1110 case SIOCIPFFB :
1108 1111 if (!(mode & FWRITE))
1109 1112 error = EPERM;
1110 1113 else {
1111 1114 tmp = ipflog_clear(unit, ifs);
1112 1115 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1113 1116 sizeof(tmp));
1114 1117 if (error)
1115 1118 error = EFAULT;
1116 1119 }
1117 1120 break;
1118 1121 #endif /* IPFILTER_LOG */
1119 1122 case SIOCFRSYN :
1120 1123 if (!(mode & FWRITE))
1121 1124 error = EPERM;
1122 1125 else {
1123 1126 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1124 1127 WRITE_ENTER(&ifs->ifs_ipf_global);
1125 1128
1126 1129 frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1127 1130 fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1128 1131 fr_nataddrsync(0, NULL, NULL, ifs);
1129 1132 fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1130 1133 error = 0;
1131 1134 }
1132 1135 break;
1133 1136 case SIOCGFRST :
1134 1137 error = fr_outobj((void *)data, fr_fragstats(ifs),
1135 1138 IPFOBJ_FRAGSTAT);
1136 1139 break;
1137 1140 case FIONREAD :
1138 1141 #ifdef IPFILTER_LOG
1139 1142 tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
1140 1143
1141 1144 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
1142 1145 if (error != 0)
1143 1146 error = EFAULT;
1144 1147 #endif
1145 1148 break;
1146 1149 case SIOCIPFITER :
1147 1150 error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
1148 1151 curproc, ifs);
1149 1152 break;
1150 1153
1151 1154 case SIOCGENITER :
1152 1155 error = ipf_genericiter((caddr_t)data, crgetuid(cp),
1153 1156 curproc, ifs);
1154 1157 break;
1155 1158
1156 1159 case SIOCIPFDELTOK :
1157 1160 error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
1158 1161 if (error != 0) {
1159 1162 error = EFAULT;
1160 1163 } else {
1161 1164 error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
1162 1165 }
1163 1166 break;
1164 1167
1165 1168 default :
1166 1169 #ifdef IPFDEBUG
1167 1170 cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
1168 1171 cmd, (void *)data);
1169 1172 #endif
1170 1173 error = EINVAL;
1171 1174 break;
1172 1175 }
1173 1176 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1174 1177 return error;
1175 1178 }
1176 1179
1177 1180
1178 1181 static int fr_enableipf(ifs, enable)
1179 1182 ipf_stack_t *ifs;
1180 1183 int enable;
1181 1184 {
1182 1185 int error;
1183 1186
1184 1187 if (!enable) {
1185 1188 error = ipldetach(ifs);
1186 1189 if (error == 0)
1187 1190 ifs->ifs_fr_running = -1;
1188 1191 return error;
1189 1192 }
1190 1193
1191 1194 if (ifs->ifs_fr_running > 0)
1192 1195 return 0;
1193 1196
1194 1197 error = iplattach(ifs);
1195 1198 if (error == 0) {
1196 1199 if (ifs->ifs_fr_timer_id == NULL) {
1197 1200 int hz = drv_usectohz(500000);
1198 1201
1199 1202 ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
1200 1203 (void *)ifs,
1201 1204 hz);
1202 1205 }
1203 1206 ifs->ifs_fr_running = 1;
1204 1207 } else {
1205 1208 (void) ipldetach(ifs);
1206 1209 }
1207 1210 return error;
1208 1211 }
1209 1212
1210 1213
1211 1214 phy_if_t get_unit(name, v, ifs)
1212 1215 char *name;
1213 1216 int v;
1214 1217 ipf_stack_t *ifs;
1215 1218 {
1216 1219 net_handle_t nif;
1217 1220
1218 1221 if (v == 4)
1219 1222 nif = ifs->ifs_ipf_ipv4;
1220 1223 else if (v == 6)
1221 1224 nif = ifs->ifs_ipf_ipv6;
1222 1225 else
1223 1226 return 0;
1224 1227
1225 1228 return (net_phylookup(nif, name));
1226 1229 }
1227 1230
1228 1231 /*
1229 1232 * routines below for saving IP headers to buffer
1230 1233 */
1231 1234 /*ARGSUSED*/
1232 1235 int iplopen(devp, flags, otype, cred)
1233 1236 dev_t *devp;
1234 1237 int flags, otype;
1235 1238 cred_t *cred;
1236 1239 {
1237 1240 ipf_devstate_t *isp;
1238 1241 minor_t min = getminor(*devp);
1239 1242 minor_t minor;
|
↓ open down ↓ |
320 lines elided |
↑ open up ↑ |
1240 1243
1241 1244 #ifdef IPFDEBUG
1242 1245 cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
1243 1246 #endif
1244 1247 if (!(otype & OTYP_CHR))
1245 1248 return ENXIO;
1246 1249
1247 1250 if (IPL_LOGMAX < min)
1248 1251 return ENXIO;
1249 1252
1253 + /* Special-case ipfev: global-zone-open only. */
1254 + if (min == IPL_LOGEV) {
1255 + if (crgetzoneid(cred) != GLOBAL_ZONEID)
1256 + return (ENXIO);
1257 + /*
1258 + * Else enable the CFW logging of events.
1259 + * NOTE: For now, we only allow one open at a time.
1260 + * Use atomic_add to confirm/deny. And also for now,
1261 + * assume sizeof (boolean_t) == sizeof (int).
1262 + */
1263 + if (atomic_inc_uint_nv(&ipf_cfwlog_enabled) > 1) {
1264 + atomic_dec_uint(&ipf_cfwlog_enabled);
1265 + return (EBUSY);
1266 + }
1267 + }
1268 +
1250 1269 minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1,
1251 1270 VM_BESTFIT | VM_SLEEP);
1252 1271
1253 1272 if (ddi_soft_state_zalloc(ipf_state, minor) != 0) {
1254 1273 vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1);
1274 + if (min == IPL_LOGEV)
1275 + atomic_dec_uint(&ipf_cfwlog_enabled);
1255 1276 return ENXIO;
1256 1277 }
1257 1278
1258 1279 *devp = makedevice(getmajor(*devp), minor);
1259 1280 isp = ddi_get_soft_state(ipf_state, minor);
1260 1281 VERIFY(isp != NULL);
1261 1282
1262 1283 isp->ipfs_minor = min;
1263 1284 isp->ipfs_zoneid = IPFS_ZONE_UNSET;
1264 1285
1265 1286 return 0;
|
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
1266 1287 }
1267 1288
1268 1289
1269 1290 /*ARGSUSED*/
1270 1291 int iplclose(dev, flags, otype, cred)
1271 1292 dev_t dev;
1272 1293 int flags, otype;
1273 1294 cred_t *cred;
1274 1295 {
1275 1296 minor_t min = getminor(dev);
1297 + ipf_devstate_t *isp;
1276 1298
1277 1299 #ifdef IPFDEBUG
1278 1300 cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
1279 1301 #endif
1280 1302
1281 1303 if (IPL_LOGMAX < min)
1282 1304 return ENXIO;
1283 1305
1306 + isp = ddi_get_soft_state(ipf_state, min);
1307 + if (isp != NULL && isp->ipfs_minor == IPL_LOGEV) {
1308 + /* Disable CFW logging. */
1309 + membar_exit();
1310 + atomic_dec_uint(&ipf_cfwlog_enabled);
1311 + }
1312 +
1284 1313 ddi_soft_state_free(ipf_state, min);
1285 1314 vmem_free(ipf_minor, (void *)(uintptr_t)min, 1);
1286 1315
1287 1316 return 0;
1288 1317 }
1289 1318
1290 1319 #ifdef IPFILTER_LOG
1291 1320 /*
1292 1321 * iplread/ipllog
1293 1322 * both of these must operate with at least splnet() lest they be
1294 1323 * called during packet processing and cause an inconsistancy to appear in
1295 1324 * the filter lists.
1296 1325 */
1297 1326 /*ARGSUSED*/
1298 1327 int iplread(dev, uio, cp)
1299 1328 dev_t dev;
1300 1329 register struct uio *uio;
1301 1330 cred_t *cp;
1302 1331 {
1303 1332 ipf_stack_t *ifs;
|
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
1304 1333 int ret;
1305 1334 minor_t unit;
1306 1335 ipf_devstate_t *isp;
1307 1336
1308 1337 unit = getminor(dev);
1309 1338 isp = ddi_get_soft_state(ipf_state, unit);
1310 1339 if (isp == NULL)
1311 1340 return ENXIO;
1312 1341 unit = isp->ipfs_minor;
1313 1342
1343 + if (unit == IPL_LOGEV)
1344 + return (ipf_cfwlog_read(dev, uio, cp));
1345 +
1314 1346 /*
1315 1347 * ipf_find_stack returns with a read lock on ifs_ipf_global
1316 1348 */
1317 1349 ifs = ipf_find_stack(crgetzoneid(cp), isp);
1318 1350 if (ifs == NULL)
1319 1351 return ENXIO;
1320 1352
1321 1353 # ifdef IPFDEBUG
1322 1354 cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
1323 1355 # endif
1324 1356
1325 1357 if (ifs->ifs_fr_running < 1) {
1326 1358 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1327 1359 return EIO;
1328 1360 }
1329 1361
1330 1362 # ifdef IPFILTER_SYNC
1331 1363 if (unit == IPL_LOGSYNC) {
1332 1364 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1333 1365 return ipfsync_read(uio);
1334 1366 }
1335 1367 # endif
1336 1368
1337 1369 ret = ipflog_read(unit, uio, ifs);
1338 1370 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1339 1371 return ret;
1340 1372 }
1341 1373 #endif /* IPFILTER_LOG */
1342 1374
1343 1375
1344 1376 /*
1345 1377 * iplread/ipllog
1346 1378 * both of these must operate with at least splnet() lest they be
1347 1379 * called during packet processing and cause an inconsistancy to appear in
1348 1380 * the filter lists.
1349 1381 */
1350 1382 int iplwrite(dev, uio, cp)
1351 1383 dev_t dev;
1352 1384 register struct uio *uio;
1353 1385 cred_t *cp;
1354 1386 {
|
↓ open down ↓ |
31 lines elided |
↑ open up ↑ |
1355 1387 ipf_stack_t *ifs;
1356 1388 minor_t unit;
1357 1389 ipf_devstate_t *isp;
1358 1390
1359 1391 unit = getminor(dev);
1360 1392 isp = ddi_get_soft_state(ipf_state, unit);
1361 1393 if (isp == NULL)
1362 1394 return ENXIO;
1363 1395 unit = isp->ipfs_minor;
1364 1396
1397 + if (unit == IPL_LOGEV)
1398 + return (EIO); /* ipfev doesn't support write yet. */
1399 +
1365 1400 /*
1366 1401 * ipf_find_stack returns with a read lock on ifs_ipf_global
1367 1402 */
1368 1403 ifs = ipf_find_stack(crgetzoneid(cp), isp);
1369 1404 if (ifs == NULL)
1370 1405 return ENXIO;
1371 1406
1372 1407 #ifdef IPFDEBUG
1373 1408 cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
1374 1409 #endif
1375 1410
1376 1411 if (ifs->ifs_fr_running < 1) {
1377 1412 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1378 1413 return EIO;
1379 1414 }
1380 1415
1381 1416 #ifdef IPFILTER_SYNC
1382 1417 if (getminor(dev) == IPL_LOGSYNC) {
1383 1418 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1384 1419 return ipfsync_write(uio);
1385 1420 }
1386 1421 #endif /* IPFILTER_SYNC */
1387 1422 dev = dev; /* LINT */
1388 1423 uio = uio; /* LINT */
1389 1424 cp = cp; /* LINT */
1390 1425 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1391 1426 return ENXIO;
1392 1427 }
1393 1428
1394 1429
1395 1430 /*
1396 1431 * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
1397 1432 * requires a large amount of setting up and isn't any more efficient.
1398 1433 */
1399 1434 int fr_send_reset(fin)
1400 1435 fr_info_t *fin;
1401 1436 {
1402 1437 tcphdr_t *tcp, *tcp2;
1403 1438 int tlen, hlen;
1404 1439 mblk_t *m;
1405 1440 #ifdef USE_INET6
1406 1441 ip6_t *ip6;
1407 1442 #endif
1408 1443 ip_t *ip;
1409 1444
1410 1445 tcp = fin->fin_dp;
1411 1446 if (tcp->th_flags & TH_RST)
1412 1447 return -1;
1413 1448
1414 1449 #ifndef IPFILTER_CKSUM
1415 1450 if (fr_checkl4sum(fin) == -1)
1416 1451 return -1;
1417 1452 #endif
1418 1453
1419 1454 tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
1420 1455 #ifdef USE_INET6
1421 1456 if (fin->fin_v == 6)
1422 1457 hlen = sizeof(ip6_t);
1423 1458 else
1424 1459 #endif
1425 1460 hlen = sizeof(ip_t);
1426 1461 hlen += sizeof(*tcp2);
1427 1462 if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
1428 1463 return -1;
1429 1464
1430 1465 m->b_rptr += 64;
1431 1466 MTYPE(m) = M_DATA;
1432 1467 m->b_wptr = m->b_rptr + hlen;
1433 1468 ip = (ip_t *)m->b_rptr;
1434 1469 bzero((char *)ip, hlen);
1435 1470 tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
1436 1471 tcp2->th_dport = tcp->th_sport;
1437 1472 tcp2->th_sport = tcp->th_dport;
1438 1473 if (tcp->th_flags & TH_ACK) {
1439 1474 tcp2->th_seq = tcp->th_ack;
1440 1475 tcp2->th_flags = TH_RST;
1441 1476 } else {
1442 1477 tcp2->th_ack = ntohl(tcp->th_seq);
1443 1478 tcp2->th_ack += tlen;
1444 1479 tcp2->th_ack = htonl(tcp2->th_ack);
1445 1480 tcp2->th_flags = TH_RST|TH_ACK;
1446 1481 }
1447 1482 tcp2->th_off = sizeof(struct tcphdr) >> 2;
1448 1483
1449 1484 ip->ip_v = fin->fin_v;
1450 1485 #ifdef USE_INET6
1451 1486 if (fin->fin_v == 6) {
1452 1487 ip6 = (ip6_t *)m->b_rptr;
1453 1488 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1454 1489 ip6->ip6_src = fin->fin_dst6.in6;
1455 1490 ip6->ip6_dst = fin->fin_src6.in6;
1456 1491 ip6->ip6_plen = htons(sizeof(*tcp));
1457 1492 ip6->ip6_nxt = IPPROTO_TCP;
1458 1493 tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1459 1494 } else
1460 1495 #endif
1461 1496 {
1462 1497 ip->ip_src.s_addr = fin->fin_daddr;
1463 1498 ip->ip_dst.s_addr = fin->fin_saddr;
1464 1499 ip->ip_id = fr_nextipid(fin);
1465 1500 ip->ip_hl = sizeof(*ip) >> 2;
1466 1501 ip->ip_p = IPPROTO_TCP;
1467 1502 ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1468 1503 ip->ip_tos = fin->fin_ip->ip_tos;
1469 1504 tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1470 1505 }
1471 1506 return fr_send_ip(fin, m, &m);
1472 1507 }
1473 1508
1474 1509 /*
1475 1510 * Function: fr_send_ip
1476 1511 * Returns: 0: success
1477 1512 * -1: failed
1478 1513 * Parameters:
1479 1514 * fin: packet information
1480 1515 * m: the message block where ip head starts
1481 1516 *
1482 1517 * Send a new packet through the IP stack.
1483 1518 *
1484 1519 * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1485 1520 * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1486 1521 * function).
1487 1522 *
1488 1523 * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1489 1524 * in by this function.
1490 1525 *
1491 1526 * All other portions of the packet must be in on-the-wire format.
1492 1527 */
1493 1528 /*ARGSUSED*/
1494 1529 static int fr_send_ip(fin, m, mpp)
1495 1530 fr_info_t *fin;
1496 1531 mblk_t *m, **mpp;
1497 1532 {
1498 1533 qpktinfo_t qpi, *qpip;
1499 1534 fr_info_t fnew;
1500 1535 ip_t *ip;
1501 1536 int i, hlen;
1502 1537 ipf_stack_t *ifs = fin->fin_ifs;
1503 1538
1504 1539 ip = (ip_t *)m->b_rptr;
1505 1540 bzero((char *)&fnew, sizeof(fnew));
1506 1541
1507 1542 #ifdef USE_INET6
1508 1543 if (fin->fin_v == 6) {
1509 1544 ip6_t *ip6;
1510 1545
1511 1546 ip6 = (ip6_t *)ip;
1512 1547 ip6->ip6_vfc = 0x60;
1513 1548 ip6->ip6_hlim = 127;
1514 1549 fnew.fin_v = 6;
1515 1550 hlen = sizeof(*ip6);
1516 1551 fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1517 1552 } else
1518 1553 #endif
1519 1554 {
1520 1555 fnew.fin_v = 4;
1521 1556 #if SOLARIS2 >= 10
1522 1557 ip->ip_ttl = 255;
1523 1558 if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1524 1559 ip->ip_off = htons(IP_DF);
1525 1560 #else
1526 1561 if (ip_ttl_ptr != NULL)
1527 1562 ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1528 1563 else
1529 1564 ip->ip_ttl = 63;
1530 1565 if (ip_mtudisc != NULL)
1531 1566 ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1532 1567 else
1533 1568 ip->ip_off = htons(IP_DF);
1534 1569 #endif
1535 1570 /*
1536 1571 * The dance with byte order and ip_len/ip_off is because in
1537 1572 * fr_fastroute, it expects them to be in host byte order but
1538 1573 * ipf_cksum expects them to be in network byte order.
1539 1574 */
1540 1575 ip->ip_len = htons(ip->ip_len);
1541 1576 ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1542 1577 ip->ip_len = ntohs(ip->ip_len);
1543 1578 ip->ip_off = ntohs(ip->ip_off);
1544 1579 hlen = sizeof(*ip);
1545 1580 fnew.fin_plen = ip->ip_len;
1546 1581 }
1547 1582
1548 1583 qpip = fin->fin_qpi;
1549 1584 qpi.qpi_off = 0;
1550 1585 qpi.qpi_ill = qpip->qpi_ill;
1551 1586 qpi.qpi_m = m;
1552 1587 qpi.qpi_data = ip;
1553 1588 fnew.fin_qpi = &qpi;
1554 1589 fnew.fin_ifp = fin->fin_ifp;
1555 1590 fnew.fin_flx = FI_NOCKSUM;
1556 1591 fnew.fin_m = m;
1557 1592 fnew.fin_qfm = m;
1558 1593 fnew.fin_ip = ip;
1559 1594 fnew.fin_mp = mpp;
1560 1595 fnew.fin_hlen = hlen;
1561 1596 fnew.fin_dp = (char *)ip + hlen;
1562 1597 fnew.fin_ifs = fin->fin_ifs;
1563 1598 (void) fr_makefrip(hlen, ip, &fnew);
1564 1599
1565 1600 i = fr_fastroute(m, mpp, &fnew, NULL);
1566 1601 return i;
1567 1602 }
1568 1603
1569 1604
1570 1605 int fr_send_icmp_err(type, fin, dst)
1571 1606 int type;
1572 1607 fr_info_t *fin;
1573 1608 int dst;
1574 1609 {
1575 1610 struct in_addr dst4;
1576 1611 struct icmp *icmp;
1577 1612 qpktinfo_t *qpi;
1578 1613 int hlen, code;
1579 1614 phy_if_t phy;
1580 1615 u_short sz;
1581 1616 #ifdef USE_INET6
1582 1617 mblk_t *mb;
1583 1618 #endif
1584 1619 mblk_t *m;
1585 1620 #ifdef USE_INET6
1586 1621 ip6_t *ip6;
1587 1622 #endif
1588 1623 ip_t *ip;
1589 1624 ipf_stack_t *ifs = fin->fin_ifs;
1590 1625
1591 1626 if ((type < 0) || (type > ICMP_MAXTYPE))
1592 1627 return -1;
1593 1628
1594 1629 code = fin->fin_icode;
1595 1630 #ifdef USE_INET6
1596 1631 if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1597 1632 return -1;
1598 1633 #endif
1599 1634
1600 1635 #ifndef IPFILTER_CKSUM
1601 1636 if (fr_checkl4sum(fin) == -1)
1602 1637 return -1;
1603 1638 #endif
1604 1639
1605 1640 qpi = fin->fin_qpi;
1606 1641
1607 1642 #ifdef USE_INET6
1608 1643 mb = fin->fin_qfm;
1609 1644
1610 1645 if (fin->fin_v == 6) {
1611 1646 sz = sizeof(ip6_t);
1612 1647 sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1613 1648 hlen = sizeof(ip6_t);
1614 1649 type = icmptoicmp6types[type];
1615 1650 if (type == ICMP6_DST_UNREACH)
1616 1651 code = icmptoicmp6unreach[code];
1617 1652 } else
1618 1653 #endif
1619 1654 {
1620 1655 if ((fin->fin_p == IPPROTO_ICMP) &&
1621 1656 !(fin->fin_flx & FI_SHORT))
1622 1657 switch (ntohs(fin->fin_data[0]) >> 8)
1623 1658 {
1624 1659 case ICMP_ECHO :
1625 1660 case ICMP_TSTAMP :
1626 1661 case ICMP_IREQ :
1627 1662 case ICMP_MASKREQ :
1628 1663 break;
1629 1664 default :
1630 1665 return 0;
1631 1666 }
1632 1667
1633 1668 sz = sizeof(ip_t) * 2;
1634 1669 sz += 8; /* 64 bits of data */
1635 1670 hlen = sizeof(ip_t);
1636 1671 }
1637 1672
1638 1673 sz += offsetof(struct icmp, icmp_ip);
1639 1674 if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1640 1675 return -1;
1641 1676 MTYPE(m) = M_DATA;
1642 1677 m->b_rptr += 64;
1643 1678 m->b_wptr = m->b_rptr + sz;
1644 1679 bzero((char *)m->b_rptr, (size_t)sz);
1645 1680 ip = (ip_t *)m->b_rptr;
1646 1681 ip->ip_v = fin->fin_v;
1647 1682 icmp = (struct icmp *)(m->b_rptr + hlen);
1648 1683 icmp->icmp_type = type & 0xff;
1649 1684 icmp->icmp_code = code & 0xff;
1650 1685 phy = (phy_if_t)qpi->qpi_ill;
1651 1686 if (type == ICMP_UNREACH && (phy != 0) &&
1652 1687 fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1653 1688 icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1654 1689
1655 1690 #ifdef USE_INET6
1656 1691 if (fin->fin_v == 6) {
1657 1692 struct in6_addr dst6;
1658 1693 int csz;
1659 1694
1660 1695 if (dst == 0) {
1661 1696 ipf_stack_t *ifs = fin->fin_ifs;
1662 1697
1663 1698 if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1664 1699 (void *)&dst6, NULL, ifs) == -1) {
1665 1700 FREE_MB_T(m);
1666 1701 return -1;
1667 1702 }
1668 1703 } else
1669 1704 dst6 = fin->fin_dst6.in6;
1670 1705
1671 1706 csz = sz;
1672 1707 sz -= sizeof(ip6_t);
1673 1708 ip6 = (ip6_t *)m->b_rptr;
1674 1709 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1675 1710 ip6->ip6_plen = htons((u_short)sz);
1676 1711 ip6->ip6_nxt = IPPROTO_ICMPV6;
1677 1712 ip6->ip6_src = dst6;
1678 1713 ip6->ip6_dst = fin->fin_src6.in6;
1679 1714 sz -= offsetof(struct icmp, icmp_ip);
1680 1715 bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1681 1716 icmp->icmp_cksum = csz - sizeof(ip6_t);
1682 1717 } else
1683 1718 #endif
1684 1719 {
1685 1720 ip->ip_hl = sizeof(*ip) >> 2;
1686 1721 ip->ip_p = IPPROTO_ICMP;
1687 1722 ip->ip_id = fin->fin_ip->ip_id;
1688 1723 ip->ip_tos = fin->fin_ip->ip_tos;
1689 1724 ip->ip_len = (u_short)sz;
1690 1725 if (dst == 0) {
1691 1726 ipf_stack_t *ifs = fin->fin_ifs;
1692 1727
1693 1728 if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1694 1729 (void *)&dst4, NULL, ifs) == -1) {
1695 1730 FREE_MB_T(m);
1696 1731 return -1;
1697 1732 }
1698 1733 } else {
1699 1734 dst4 = fin->fin_dst;
1700 1735 }
1701 1736 ip->ip_src = dst4;
1702 1737 ip->ip_dst = fin->fin_src;
1703 1738 bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1704 1739 sizeof(*fin->fin_ip));
1705 1740 bcopy((char *)fin->fin_ip + fin->fin_hlen,
1706 1741 (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1707 1742 icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1708 1743 icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1709 1744 icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1710 1745 sz - sizeof(ip_t));
1711 1746 }
1712 1747
1713 1748 /*
1714 1749 * Need to exit out of these so we don't recursively call rw_enter
1715 1750 * from fr_qout.
1716 1751 */
1717 1752 return fr_send_ip(fin, m, &m);
1718 1753 }
1719 1754
1720 1755 #include <sys/time.h>
1721 1756 #include <sys/varargs.h>
1722 1757
1723 1758 #ifndef _KERNEL
1724 1759 #include <stdio.h>
1725 1760 #endif
1726 1761
1727 1762 /*
1728 1763 * Return the first IP Address associated with an interface
1729 1764 * For IPv6, we walk through the list of logical interfaces and return
1730 1765 * the address of the first one that isn't a link-local interface.
1731 1766 * We can't assume that it is :1 because another link-local address
1732 1767 * may have been assigned there.
1733 1768 */
1734 1769 /*ARGSUSED*/
1735 1770 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1736 1771 int v, atype;
1737 1772 void *ifptr;
1738 1773 struct in_addr *inp, *inpmask;
1739 1774 ipf_stack_t *ifs;
1740 1775 {
1741 1776 struct sockaddr_in6 v6addr[2];
1742 1777 struct sockaddr_in v4addr[2];
1743 1778 net_ifaddr_t type[2];
1744 1779 net_handle_t net_data;
1745 1780 phy_if_t phyif;
1746 1781 void *array;
1747 1782
1748 1783 switch (v)
1749 1784 {
1750 1785 case 4:
1751 1786 net_data = ifs->ifs_ipf_ipv4;
1752 1787 array = v4addr;
1753 1788 break;
1754 1789 case 6:
1755 1790 net_data = ifs->ifs_ipf_ipv6;
1756 1791 array = v6addr;
1757 1792 break;
1758 1793 default:
1759 1794 net_data = NULL;
1760 1795 break;
1761 1796 }
1762 1797
1763 1798 if (net_data == NULL)
1764 1799 return -1;
1765 1800
1766 1801 phyif = (phy_if_t)ifptr;
1767 1802
1768 1803 switch (atype)
1769 1804 {
1770 1805 case FRI_PEERADDR :
1771 1806 type[0] = NA_PEER;
1772 1807 break;
1773 1808
1774 1809 case FRI_BROADCAST :
1775 1810 type[0] = NA_BROADCAST;
1776 1811 break;
1777 1812
1778 1813 default :
1779 1814 type[0] = NA_ADDRESS;
1780 1815 break;
1781 1816 }
1782 1817
1783 1818 type[1] = NA_NETMASK;
1784 1819
1785 1820 if (v == 6) {
1786 1821 lif_if_t idx = 0;
1787 1822
1788 1823 do {
1789 1824 idx = net_lifgetnext(net_data, phyif, idx);
1790 1825 if (net_getlifaddr(net_data, phyif, idx, 2, type,
1791 1826 array) < 0)
1792 1827 return -1;
1793 1828 if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1794 1829 !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1795 1830 break;
1796 1831 } while (idx != 0);
1797 1832
1798 1833 if (idx == 0)
1799 1834 return -1;
1800 1835
1801 1836 return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1802 1837 inp, inpmask);
1803 1838 }
1804 1839
1805 1840 if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1806 1841 return -1;
1807 1842
1808 1843 return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1809 1844 }
1810 1845
1811 1846
1812 1847 u_32_t fr_newisn(fin)
1813 1848 fr_info_t *fin;
1814 1849 {
1815 1850 static int iss_seq_off = 0;
1816 1851 u_char hash[16];
1817 1852 u_32_t newiss;
1818 1853 MD5_CTX ctx;
1819 1854 ipf_stack_t *ifs = fin->fin_ifs;
1820 1855
1821 1856 /*
1822 1857 * Compute the base value of the ISS. It is a hash
1823 1858 * of (saddr, sport, daddr, dport, secret).
1824 1859 */
1825 1860 MD5Init(&ctx);
1826 1861
1827 1862 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1828 1863 sizeof(fin->fin_fi.fi_src));
1829 1864 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1830 1865 sizeof(fin->fin_fi.fi_dst));
1831 1866 MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1832 1867
1833 1868 MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1834 1869
1835 1870 MD5Final(hash, &ctx);
1836 1871
1837 1872 bcopy(hash, &newiss, sizeof(newiss));
1838 1873
1839 1874 /*
1840 1875 * Now increment our "timer", and add it in to
1841 1876 * the computed value.
1842 1877 *
1843 1878 * XXX Use `addin'?
1844 1879 * XXX TCP_ISSINCR too large to use?
1845 1880 */
1846 1881 iss_seq_off += 0x00010000;
1847 1882 newiss += iss_seq_off;
1848 1883 return newiss;
1849 1884 }
1850 1885
1851 1886
1852 1887 /* ------------------------------------------------------------------------ */
1853 1888 /* Function: fr_nextipid */
1854 1889 /* Returns: int - 0 == success, -1 == error (packet should be droppped) */
1855 1890 /* Parameters: fin(I) - pointer to packet information */
1856 1891 /* */
1857 1892 /* Returns the next IPv4 ID to use for this packet. */
1858 1893 /* ------------------------------------------------------------------------ */
1859 1894 u_short fr_nextipid(fin)
1860 1895 fr_info_t *fin;
1861 1896 {
1862 1897 static u_short ipid = 0;
1863 1898 u_short id;
1864 1899 ipf_stack_t *ifs = fin->fin_ifs;
1865 1900
1866 1901 MUTEX_ENTER(&ifs->ifs_ipf_rw);
1867 1902 if (fin->fin_pktnum != 0) {
1868 1903 id = fin->fin_pktnum & 0xffff;
1869 1904 } else {
1870 1905 id = ipid++;
1871 1906 }
1872 1907 MUTEX_EXIT(&ifs->ifs_ipf_rw);
1873 1908
1874 1909 return id;
1875 1910 }
1876 1911
1877 1912
1878 1913 #ifndef IPFILTER_CKSUM
1879 1914 /* ARGSUSED */
1880 1915 #endif
1881 1916 INLINE void fr_checkv4sum(fin)
1882 1917 fr_info_t *fin;
1883 1918 {
1884 1919 #ifdef IPFILTER_CKSUM
1885 1920 if (fr_checkl4sum(fin) == -1)
1886 1921 fin->fin_flx |= FI_BAD;
1887 1922 #endif
1888 1923 }
1889 1924
1890 1925
1891 1926 #ifdef USE_INET6
1892 1927 # ifndef IPFILTER_CKSUM
1893 1928 /* ARGSUSED */
1894 1929 # endif
1895 1930 INLINE void fr_checkv6sum(fin)
1896 1931 fr_info_t *fin;
1897 1932 {
1898 1933 # ifdef IPFILTER_CKSUM
1899 1934 if (fr_checkl4sum(fin) == -1)
1900 1935 fin->fin_flx |= FI_BAD;
1901 1936 # endif
1902 1937 }
1903 1938 #endif /* USE_INET6 */
1904 1939
1905 1940
1906 1941 #if (SOLARIS2 < 7)
1907 1942 void fr_slowtimer()
1908 1943 #else
1909 1944 /*ARGSUSED*/
1910 1945 void fr_slowtimer __P((void *arg))
1911 1946 #endif
1912 1947 {
1913 1948 ipf_stack_t *ifs = arg;
1914 1949
1915 1950 READ_ENTER(&ifs->ifs_ipf_global);
1916 1951 if (ifs->ifs_fr_running != 1) {
1917 1952 ifs->ifs_fr_timer_id = NULL;
1918 1953 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1919 1954 return;
1920 1955 }
1921 1956 ipf_expiretokens(ifs);
1922 1957 fr_fragexpire(ifs);
1923 1958 fr_timeoutstate(ifs);
1924 1959 fr_natexpire(ifs);
1925 1960 fr_authexpire(ifs);
1926 1961 ifs->ifs_fr_ticks++;
1927 1962 if (ifs->ifs_fr_running == 1)
1928 1963 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1929 1964 drv_usectohz(500000));
1930 1965 else
1931 1966 ifs->ifs_fr_timer_id = NULL;
1932 1967 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1933 1968 }
1934 1969
1935 1970
1936 1971 /* ------------------------------------------------------------------------ */
1937 1972 /* Function: fr_pullup */
1938 1973 /* Returns: NULL == pullup failed, else pointer to protocol header */
1939 1974 /* Parameters: m(I) - pointer to buffer where data packet starts */
1940 1975 /* fin(I) - pointer to packet information */
1941 1976 /* len(I) - number of bytes to pullup */
1942 1977 /* */
1943 1978 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1944 1979 /* single buffer for ease of access. Operating system native functions are */
1945 1980 /* used to manage buffers - if necessary. If the entire packet ends up in */
1946 1981 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has */
1947 1982 /* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */
1948 1983 /* and ONLY if the pullup succeeds. */
1949 1984 /* */
1950 1985 /* We assume that 'min' is a pointer to a buffer that is part of the chain */
1951 1986 /* of buffers that starts at *fin->fin_mp. */
1952 1987 /* ------------------------------------------------------------------------ */
1953 1988 void *fr_pullup(min, fin, len)
1954 1989 mb_t *min;
1955 1990 fr_info_t *fin;
1956 1991 int len;
1957 1992 {
1958 1993 qpktinfo_t *qpi = fin->fin_qpi;
1959 1994 int out = fin->fin_out, dpoff, ipoff;
1960 1995 mb_t *m = min, *m1, *m2;
1961 1996 char *ip;
1962 1997 uint32_t start, stuff, end, value, flags;
1963 1998 ipf_stack_t *ifs = fin->fin_ifs;
1964 1999
1965 2000 if (m == NULL)
1966 2001 return NULL;
1967 2002
1968 2003 ip = (char *)fin->fin_ip;
1969 2004 if ((fin->fin_flx & FI_COALESCE) != 0)
1970 2005 return ip;
1971 2006
1972 2007 ipoff = fin->fin_ipoff;
1973 2008 if (fin->fin_dp != NULL)
1974 2009 dpoff = (char *)fin->fin_dp - (char *)ip;
1975 2010 else
1976 2011 dpoff = 0;
1977 2012
1978 2013 if (M_LEN(m) < len + ipoff) {
1979 2014
1980 2015 /*
1981 2016 * pfil_precheck ensures the IP header is on a 32bit
1982 2017 * aligned address so simply fail if that isn't currently
1983 2018 * the case (should never happen).
1984 2019 */
1985 2020 int inc = 0;
1986 2021
1987 2022 if (ipoff > 0) {
1988 2023 if ((ipoff & 3) != 0) {
1989 2024 inc = 4 - (ipoff & 3);
1990 2025 if (m->b_rptr - inc >= m->b_datap->db_base)
1991 2026 m->b_rptr -= inc;
1992 2027 else
1993 2028 inc = 0;
1994 2029 }
1995 2030 }
1996 2031
1997 2032 /*
1998 2033 * XXX This is here as a work around for a bug with DEBUG
1999 2034 * XXX Solaris kernels. The problem is b_prev is used by IP
2000 2035 * XXX code as a way to stash the phyint_index for a packet,
2001 2036 * XXX this doesn't get reset by IP but freeb does an ASSERT()
2002 2037 * XXX for both of these to be NULL. See 6442390.
2003 2038 */
2004 2039 m1 = m;
2005 2040 m2 = m->b_prev;
2006 2041
2007 2042 do {
2008 2043 m1->b_next = NULL;
2009 2044 m1->b_prev = NULL;
2010 2045 m1 = m1->b_cont;
2011 2046 } while (m1);
2012 2047
2013 2048 /*
2014 2049 * Need to preserve checksum information by copying them
2015 2050 * to newmp which heads the pulluped message.
2016 2051 */
2017 2052 mac_hcksum_get(m, &start, &stuff, &end, &value, &flags);
2018 2053
2019 2054 if (pullupmsg(m, len + ipoff + inc) == 0) {
2020 2055 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
2021 2056 FREE_MB_T(*fin->fin_mp);
2022 2057 *fin->fin_mp = NULL;
2023 2058 fin->fin_m = NULL;
2024 2059 fin->fin_ip = NULL;
2025 2060 fin->fin_dp = NULL;
2026 2061 qpi->qpi_data = NULL;
2027 2062 return NULL;
2028 2063 }
2029 2064
2030 2065 mac_hcksum_set(m, start, stuff, end, value, flags);
2031 2066
2032 2067 m->b_prev = m2;
2033 2068 m->b_rptr += inc;
2034 2069 fin->fin_m = m;
2035 2070 ip = MTOD(m, char *) + ipoff;
2036 2071 qpi->qpi_data = ip;
2037 2072 }
2038 2073
2039 2074 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
2040 2075 fin->fin_ip = (ip_t *)ip;
2041 2076 if (fin->fin_dp != NULL)
2042 2077 fin->fin_dp = (char *)fin->fin_ip + dpoff;
2043 2078
2044 2079 if (len == fin->fin_plen)
2045 2080 fin->fin_flx |= FI_COALESCE;
2046 2081 return ip;
2047 2082 }
2048 2083
2049 2084
2050 2085 /*
2051 2086 * Function: fr_verifysrc
2052 2087 * Returns: int (really boolean)
2053 2088 * Parameters: fin - packet information
2054 2089 *
2055 2090 * Check whether the packet has a valid source address for the interface on
2056 2091 * which the packet arrived, implementing the "fr_chksrc" feature.
2057 2092 * Returns true iff the packet's source address is valid.
2058 2093 */
2059 2094 int fr_verifysrc(fin)
2060 2095 fr_info_t *fin;
2061 2096 {
2062 2097 net_handle_t net_data_p;
2063 2098 phy_if_t phy_ifdata_routeto;
2064 2099 struct sockaddr sin;
2065 2100 ipf_stack_t *ifs = fin->fin_ifs;
2066 2101
2067 2102 if (fin->fin_v == 4) {
2068 2103 net_data_p = ifs->ifs_ipf_ipv4;
2069 2104 } else if (fin->fin_v == 6) {
2070 2105 net_data_p = ifs->ifs_ipf_ipv6;
2071 2106 } else {
2072 2107 return (0);
2073 2108 }
2074 2109
2075 2110 /* Get the index corresponding to the if name */
2076 2111 sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
2077 2112 bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
2078 2113 phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
2079 2114
2080 2115 return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0);
2081 2116 }
2082 2117
2083 2118 /*
2084 2119 * Return true only if forwarding is enabled on the interface.
2085 2120 */
2086 2121 static int
2087 2122 fr_forwarding_enabled(phy_if_t phyif, net_handle_t ndp)
2088 2123 {
2089 2124 lif_if_t lif;
2090 2125
2091 2126 for (lif = net_lifgetnext(ndp, phyif, 0); lif > 0;
2092 2127 lif = net_lifgetnext(ndp, phyif, lif)) {
2093 2128 int res;
2094 2129 uint64_t flags;
2095 2130
2096 2131 res = net_getlifflags(ndp, phyif, lif, &flags);
2097 2132 if (res != 0)
2098 2133 return (0);
2099 2134 if (flags & IFF_ROUTER)
2100 2135 return (1);
2101 2136 }
2102 2137
2103 2138 return (0);
2104 2139 }
2105 2140
2106 2141 /*
2107 2142 * Function: fr_fastroute
2108 2143 * Returns: 0: success;
2109 2144 * -1: failed
2110 2145 * Parameters:
2111 2146 * mb: the message block where ip head starts
2112 2147 * mpp: the pointer to the pointer of the orignal
2113 2148 * packet message
2114 2149 * fin: packet information
2115 2150 * fdp: destination interface information
2116 2151 * if it is NULL, no interface information provided.
2117 2152 *
2118 2153 * This function is for fastroute/to/dup-to rules. It calls
2119 2154 * pfil_make_lay2_packet to search route, make lay-2 header
2120 2155 * ,and identify output queue for the IP packet.
2121 2156 * The destination address depends on the following conditions:
2122 2157 * 1: for fastroute rule, fdp is passed in as NULL, so the
2123 2158 * destination address is the IP Packet's destination address
2124 2159 * 2: for to/dup-to rule, if an ip address is specified after
2125 2160 * the interface name, this address is the as destination
2126 2161 * address. Otherwise IP Packet's destination address is used
2127 2162 */
2128 2163 int fr_fastroute(mb, mpp, fin, fdp)
2129 2164 mblk_t *mb, **mpp;
2130 2165 fr_info_t *fin;
2131 2166 frdest_t *fdp;
2132 2167 {
2133 2168 net_handle_t net_data_p;
2134 2169 net_inject_t *inj;
2135 2170 mblk_t *mp = NULL;
2136 2171 frentry_t *fr = fin->fin_fr;
2137 2172 qpktinfo_t *qpi;
2138 2173 ip_t *ip;
2139 2174
2140 2175 struct sockaddr_in *sin;
2141 2176 struct sockaddr_in6 *sin6;
2142 2177 struct sockaddr *sinp;
2143 2178 ipf_stack_t *ifs = fin->fin_ifs;
2144 2179 #ifndef sparc
2145 2180 u_short __iplen, __ipoff;
2146 2181 #endif
2147 2182
2148 2183 if (fin->fin_v == 4) {
2149 2184 net_data_p = ifs->ifs_ipf_ipv4;
2150 2185 } else if (fin->fin_v == 6) {
2151 2186 net_data_p = ifs->ifs_ipf_ipv6;
2152 2187 } else {
2153 2188 return (-1);
2154 2189 }
2155 2190
2156 2191 /*
2157 2192 * If we're forwarding (vs. injecting), check the src here, fin_ifp is
2158 2193 * the src interface.
2159 2194 */
2160 2195 if (fdp != NULL &&
2161 2196 !fr_forwarding_enabled((phy_if_t)fin->fin_ifp, net_data_p))
2162 2197 return (-1);
2163 2198
2164 2199 inj = net_inject_alloc(NETINFO_VERSION);
2165 2200 if (inj == NULL)
2166 2201 return -1;
2167 2202
2168 2203 ip = fin->fin_ip;
2169 2204 qpi = fin->fin_qpi;
2170 2205
2171 2206 /*
2172 2207 * If this is a duplicate mblk then we want ip to point at that
2173 2208 * data, not the original, if and only if it is already pointing at
2174 2209 * the current mblk data.
2175 2210 *
2176 2211 * Otherwise, if it's not a duplicate, and we're not already pointing
2177 2212 * at the current mblk data, then we want to ensure that the data
2178 2213 * points at ip.
2179 2214 */
2180 2215
2181 2216 if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
2182 2217 ip = (ip_t *)mb->b_rptr;
2183 2218 } else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
2184 2219 qpi->qpi_m->b_rptr = (uchar_t *)ip;
2185 2220 qpi->qpi_off = 0;
2186 2221 }
2187 2222
2188 2223 /*
2189 2224 * If there is another M_PROTO, we don't want it
2190 2225 */
2191 2226 if (*mpp != mb) {
2192 2227 mp = unlinkb(*mpp);
2193 2228 freeb(*mpp);
2194 2229 *mpp = mp;
2195 2230 }
2196 2231
2197 2232 sinp = (struct sockaddr *)&inj->ni_addr;
2198 2233 sin = (struct sockaddr_in *)sinp;
2199 2234 sin6 = (struct sockaddr_in6 *)sinp;
2200 2235 bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
2201 2236 inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
2202 2237 inj->ni_packet = mb;
2203 2238
2204 2239 /*
2205 2240 * In case we're here due to "to <if>" being used with
2206 2241 * "keep state", check that we're going in the correct
2207 2242 * direction.
2208 2243 */
2209 2244 if (fdp != NULL) {
2210 2245 if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
2211 2246 (fin->fin_rev != 0) && (fdp == &fr->fr_tif))
2212 2247 goto bad_fastroute;
2213 2248 inj->ni_physical = (phy_if_t)fdp->fd_ifp;
2214 2249 if (fin->fin_v == 4) {
2215 2250 sin->sin_addr = fdp->fd_ip;
2216 2251 } else {
2217 2252 sin6->sin6_addr = fdp->fd_ip6.in6;
2218 2253 }
2219 2254 } else {
2220 2255 if (fin->fin_v == 4) {
2221 2256 sin->sin_addr = ip->ip_dst;
2222 2257 } else {
2223 2258 sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
2224 2259 }
2225 2260 inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
2226 2261 }
2227 2262
2228 2263 /* If we're forwarding (vs. injecting), check the destinatation here. */
2229 2264 if (fdp != NULL && !fr_forwarding_enabled(inj->ni_physical, net_data_p))
2230 2265 goto bad_fastroute;
2231 2266
2232 2267 /*
2233 2268 * Clear the hardware checksum flags from packets that we are doing
2234 2269 * input processing on as leaving them set will cause the outgoing
2235 2270 * NIC (if it supports hardware checksum) to calculate them anew,
2236 2271 * using the old (correct) checksums as the pseudo value to start
2237 2272 * from.
2238 2273 */
2239 2274 if (fin->fin_out == 0) {
2240 2275 DB_CKSUMFLAGS(mb) = 0;
2241 2276 }
2242 2277
2243 2278 *mpp = mb;
2244 2279
2245 2280 if (fin->fin_out == 0) {
2246 2281 void *saveifp;
2247 2282 u_32_t pass;
2248 2283
2249 2284 saveifp = fin->fin_ifp;
2250 2285 fin->fin_ifp = (void *)inj->ni_physical;
2251 2286 fin->fin_flx &= ~FI_STATE;
2252 2287 fin->fin_out = 1;
2253 2288 (void) fr_acctpkt(fin, &pass);
2254 2289 fin->fin_fr = NULL;
2255 2290 if (!fr || !(fr->fr_flags & FR_RETMASK))
2256 2291 (void) fr_checkstate(fin, &pass);
2257 2292 if (fr_checknatout(fin, NULL) == -1)
2258 2293 goto bad_fastroute;
2259 2294 fin->fin_out = 0;
2260 2295 fin->fin_ifp = saveifp;
2261 2296 }
2262 2297 #ifndef sparc
2263 2298 if (fin->fin_v == 4) {
2264 2299 __iplen = (u_short)ip->ip_len,
2265 2300 __ipoff = (u_short)ip->ip_off;
2266 2301
2267 2302 ip->ip_len = htons(__iplen);
2268 2303 ip->ip_off = htons(__ipoff);
2269 2304 }
2270 2305 #endif
2271 2306
2272 2307 if (net_data_p) {
2273 2308 if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
2274 2309 net_inject_free(inj);
2275 2310 return (-1);
2276 2311 }
2277 2312 }
2278 2313
2279 2314 ifs->ifs_fr_frouteok[0]++;
2280 2315 net_inject_free(inj);
2281 2316 return 0;
2282 2317 bad_fastroute:
2283 2318 net_inject_free(inj);
2284 2319 freemsg(mb);
2285 2320 ifs->ifs_fr_frouteok[1]++;
2286 2321 return -1;
2287 2322 }
2288 2323
2289 2324
2290 2325 /* ------------------------------------------------------------------------ */
2291 2326 /* Function: ipf_hook4_out */
2292 2327 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2293 2328 /* Parameters: event(I) - pointer to event */
2294 2329 /* info(I) - pointer to hook information for firewalling */
2295 2330 /* */
2296 2331 /* Calling ipf_hook. */
2297 2332 /* ------------------------------------------------------------------------ */
2298 2333 /*ARGSUSED*/
2299 2334 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
2300 2335 {
2301 2336 return ipf_hook(info, 1, 0, arg);
2302 2337 }
2303 2338 /*ARGSUSED*/
2304 2339 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
2305 2340 {
2306 2341 return ipf_hook6(info, 1, 0, arg);
2307 2342 }
2308 2343
2309 2344 /* ------------------------------------------------------------------------ */
2310 2345 /* Function: ipf_hook4_in */
2311 2346 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2312 2347 /* Parameters: event(I) - pointer to event */
2313 2348 /* info(I) - pointer to hook information for firewalling */
2314 2349 /* */
2315 2350 /* Calling ipf_hook. */
2316 2351 /* ------------------------------------------------------------------------ */
2317 2352 /*ARGSUSED*/
2318 2353 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
2319 2354 {
2320 2355 return ipf_hook(info, 0, 0, arg);
2321 2356 }
2322 2357 /*ARGSUSED*/
2323 2358 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
2324 2359 {
2325 2360 return ipf_hook6(info, 0, 0, arg);
2326 2361 }
2327 2362
2328 2363
2329 2364 /* ------------------------------------------------------------------------ */
2330 2365 /* Function: ipf_hook4_loop_out */
2331 2366 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2332 2367 /* Parameters: event(I) - pointer to event */
2333 2368 /* info(I) - pointer to hook information for firewalling */
2334 2369 /* */
2335 2370 /* Calling ipf_hook. */
2336 2371 /* ------------------------------------------------------------------------ */
2337 2372 /*ARGSUSED*/
2338 2373 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2339 2374 {
2340 2375 return ipf_hook(info, 1, FI_NOCKSUM, arg);
2341 2376 }
2342 2377 /*ARGSUSED*/
2343 2378 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2344 2379 {
2345 2380 return ipf_hook6(info, 1, FI_NOCKSUM, arg);
2346 2381 }
2347 2382
2348 2383 /* ------------------------------------------------------------------------ */
2349 2384 /* Function: ipf_hookvndl3_in */
2350 2385 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2351 2386 /* Parameters: event(I) - pointer to event */
2352 2387 /* info(I) - pointer to hook information for firewalling */
2353 2388 /* */
2354 2389 /* The vnd hooks are private hooks to ON. They represents a layer 2 */
2355 2390 /* datapath generally used to implement virtual machines. The driver sends */
2356 2391 /* along L3 packets of either type IP or IPv6. The ethertype to distinguish */
2357 2392 /* them is in the upper 16 bits while the remaining bits are the */
2358 2393 /* traditional packet hook flags. */
2359 2394 /* */
2360 2395 /* They end up calling the appropriate traditional ip hooks. */
2361 2396 /* ------------------------------------------------------------------------ */
2362 2397 /*ARGSUSED*/
2363 2398 int ipf_hookvndl3v4_in(hook_event_token_t token, hook_data_t info, void *arg)
2364 2399 {
2365 2400 return ipf_hook4_in(token, info, arg);
2366 2401 }
2367 2402
2368 2403 int ipf_hookvndl3v6_in(hook_event_token_t token, hook_data_t info, void *arg)
2369 2404 {
2370 2405 return ipf_hook6_in(token, info, arg);
2371 2406 }
2372 2407
2373 2408 /*ARGSUSED*/
2374 2409 int ipf_hookvndl3v4_out(hook_event_token_t token, hook_data_t info, void *arg)
2375 2410 {
2376 2411 return ipf_hook4_out(token, info, arg);
2377 2412 }
2378 2413
2379 2414 int ipf_hookvndl3v6_out(hook_event_token_t token, hook_data_t info, void *arg)
2380 2415 {
2381 2416 return ipf_hook6_out(token, info, arg);
2382 2417 }
2383 2418
2384 2419 /* Static constants used by ipf_hook_ether */
2385 2420 static uint8_t ipf_eth_bcast_addr[ETHERADDRL] = {
2386 2421 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
2387 2422 };
2388 2423 static uint8_t ipf_eth_ipv4_mcast[3] = { 0x01, 0x00, 0x5E };
2389 2424 static uint8_t ipf_eth_ipv6_mcast[2] = { 0x33, 0x33 };
2390 2425
2391 2426 /* ------------------------------------------------------------------------ */
2392 2427 /* Function: ipf_hook_ether */
2393 2428 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2394 2429 /* Parameters: token(I) - pointer to event */
2395 2430 /* info(I) - pointer to hook information for firewalling */
2396 2431 /* */
2397 2432 /* The ipf_hook_ether hook is currently private to illumos. It represents */
2398 2433 /* a layer 2 datapath generally used by virtual machines. Currently the */
2399 2434 /* hook is only used by the viona driver to pass along L2 frames for */
2400 2435 /* inspection. It requires that the L2 ethernet header is contained within */
2401 2436 /* a single dblk_t (however layers above the L2 header have no restrctions */
2402 2437 /* in ipf). ipf does not currently support filtering on L2 fields (e.g. */
2403 2438 /* filtering on a MAC address or ethertype), however virtual machines do */
2404 2439 /* not have native IP stack instances where ipf traditionally hooks in. */
2405 2440 /* Instead this entry point is used to determine if the packet is unicast, */
2406 2441 /* broadcast, or multicast. The IPv4 or IPv6 packet is then passed to the */
2407 2442 /* traditional ip hooks for filtering. Non IPv4 or non IPv6 packets are */
2408 2443 /* not subject to examination. */
2409 2444 /* ------------------------------------------------------------------------ */
2410 2445 int ipf_hook_ether(hook_event_token_t token, hook_data_t info, void *arg,
2411 2446 boolean_t out)
2412 2447 {
2413 2448 struct ether_header *ethp;
2414 2449 hook_pkt_event_t *hpe = (hook_pkt_event_t *)info;
2415 2450 mblk_t *mp;
2416 2451 size_t offset, len;
2417 2452 uint16_t etype;
2418 2453 boolean_t v6;
2419 2454
2420 2455 /*
2421 2456 * viona will only pass us mblks with the L2 header contained in a
2422 2457 * single data block.
2423 2458 */
2424 2459 mp = *hpe->hpe_mp;
2425 2460 len = MBLKL(mp);
2426 2461
2427 2462 VERIFY3S(len, >=, sizeof (struct ether_header));
2428 2463
2429 2464 ethp = (struct ether_header *)mp->b_rptr;
2430 2465 if ((etype = ntohs(ethp->ether_type)) == ETHERTYPE_VLAN) {
2431 2466 struct ether_vlan_header *evh =
2432 2467 (struct ether_vlan_header *)ethp;
2433 2468
2434 2469 VERIFY3S(len, >=, sizeof (struct ether_vlan_header));
2435 2470
2436 2471 etype = ntohs(evh->ether_type);
2437 2472 offset = sizeof (*evh);
2438 2473 } else {
2439 2474 offset = sizeof (*ethp);
2440 2475 }
2441 2476
2442 2477 /*
2443 2478 * ipf only support filtering IPv4 and IPv6. Ignore other types.
2444 2479 */
2445 2480 if (etype == ETHERTYPE_IP)
2446 2481 v6 = B_FALSE;
2447 2482 else if (etype == ETHERTYPE_IPV6)
2448 2483 v6 = B_TRUE;
2449 2484 else
2450 2485 return (0);
2451 2486
2452 2487 if (bcmp(ipf_eth_bcast_addr, ethp, ETHERADDRL) == 0)
2453 2488 hpe->hpe_flags |= HPE_BROADCAST;
2454 2489 else if (bcmp(ipf_eth_ipv4_mcast, ethp,
2455 2490 sizeof (ipf_eth_ipv4_mcast)) == 0)
2456 2491 hpe->hpe_flags |= HPE_MULTICAST;
2457 2492 else if (bcmp(ipf_eth_ipv6_mcast, ethp,
2458 2493 sizeof (ipf_eth_ipv6_mcast)) == 0)
2459 2494 hpe->hpe_flags |= HPE_MULTICAST;
2460 2495
2461 2496 /* Find the start of the IPv4 or IPv6 header */
2462 2497 for (; offset >= len; len = MBLKL(mp)) {
2463 2498 offset -= len;
2464 2499 mp = mp->b_cont;
2465 2500 if (mp == NULL) {
2466 2501 freemsg(*hpe->hpe_mp);
2467 2502 *hpe->hpe_mp = NULL;
2468 2503 return (-1);
2469 2504 }
2470 2505 }
2471 2506 hpe->hpe_mb = mp;
2472 2507 hpe->hpe_hdr = mp->b_rptr + offset;
2473 2508
2474 2509 return (v6 ? ipf_hook6(info, out, 0, arg) :
2475 2510 ipf_hook(info, out, 0, arg));
2476 2511 }
2477 2512
2478 2513 /* ------------------------------------------------------------------------ */
2479 2514 /* Function: ipf_hookviona_{in,out} */
2480 2515 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2481 2516 /* Parameters: event(I) - pointer to event */
2482 2517 /* info(I) - pointer to hook information for firewalling */
2483 2518 /* */
2484 2519 /* The viona hooks are private hooks to illumos. They represents a layer 2 */
2485 2520 /* datapath generally used to implement virtual machines. */
2486 2521 /* along L2 packets. */
2487 2522 /* */
2488 2523 /* They end up calling the appropriate traditional ip hooks. */
2489 2524 /* ------------------------------------------------------------------------ */
2490 2525 int
2491 2526 ipf_hookviona_in(hook_event_token_t token, hook_data_t info, void *arg)
2492 2527 {
2493 2528 return (ipf_hook_ether(token, info, arg, B_FALSE));
2494 2529 }
2495 2530
2496 2531 int
2497 2532 ipf_hookviona_out(hook_event_token_t token, hook_data_t info, void *arg)
2498 2533 {
2499 2534 return (ipf_hook_ether(token, info, arg, B_TRUE));
2500 2535 }
2501 2536
2502 2537 /* ------------------------------------------------------------------------ */
2503 2538 /* Function: ipf_hook4_loop_in */
2504 2539 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2505 2540 /* Parameters: event(I) - pointer to event */
2506 2541 /* info(I) - pointer to hook information for firewalling */
2507 2542 /* */
2508 2543 /* Calling ipf_hook. */
2509 2544 /* ------------------------------------------------------------------------ */
2510 2545 /*ARGSUSED*/
2511 2546 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2512 2547 {
2513 2548 return ipf_hook(info, 0, FI_NOCKSUM, arg);
2514 2549 }
2515 2550 /*ARGSUSED*/
2516 2551 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2517 2552 {
2518 2553 return ipf_hook6(info, 0, FI_NOCKSUM, arg);
2519 2554 }
2520 2555
2521 2556 /* ------------------------------------------------------------------------ */
2522 2557 /* Function: ipf_hook */
2523 2558 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2524 2559 /* Parameters: info(I) - pointer to hook information for firewalling */
2525 2560 /* out(I) - whether packet is going in or out */
2526 2561 /* loopback(I) - whether packet is a loopback packet or not */
2527 2562 /* */
2528 2563 /* Stepping stone function between the IP mainline and IPFilter. Extracts */
2529 2564 /* parameters out of the info structure and forms them up to be useful for */
2530 2565 /* calling ipfilter. */
2531 2566 /* ------------------------------------------------------------------------ */
2532 2567 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
2533 2568 {
2534 2569 hook_pkt_event_t *fw;
2535 2570 ipf_stack_t *ifs;
2536 2571 qpktinfo_t qpi;
2537 2572 int rval, hlen;
2538 2573 u_short swap;
2539 2574 phy_if_t phy;
2540 2575 ip_t *ip;
2541 2576
2542 2577 ifs = arg;
2543 2578 fw = (hook_pkt_event_t *)info;
2544 2579
2545 2580 ASSERT(fw != NULL);
2546 2581 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2547 2582
2548 2583 ip = fw->hpe_hdr;
2549 2584 swap = ntohs(ip->ip_len);
2550 2585 ip->ip_len = swap;
2551 2586 swap = ntohs(ip->ip_off);
2552 2587 ip->ip_off = swap;
2553 2588 hlen = IPH_HDR_LENGTH(ip);
2554 2589
2555 2590 qpi.qpi_m = fw->hpe_mb;
2556 2591 qpi.qpi_data = fw->hpe_hdr;
2557 2592 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2558 2593 qpi.qpi_ill = (void *)phy;
2559 2594 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2560 2595 if (qpi.qpi_flags)
2561 2596 qpi.qpi_flags |= FI_MBCAST;
2562 2597 qpi.qpi_flags |= loopback;
2563 2598
2564 2599 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2565 2600 &qpi, fw->hpe_mp, ifs);
2566 2601
2567 2602 /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2568 2603 if (rval == 0 && *(fw->hpe_mp) == NULL)
2569 2604 rval = 1;
2570 2605
2571 2606 /* Notify IP the packet mblk_t and IP header pointers. */
2572 2607 fw->hpe_mb = qpi.qpi_m;
2573 2608 fw->hpe_hdr = qpi.qpi_data;
2574 2609 if (rval == 0) {
2575 2610 ip = qpi.qpi_data;
2576 2611 swap = ntohs(ip->ip_len);
2577 2612 ip->ip_len = swap;
2578 2613 swap = ntohs(ip->ip_off);
2579 2614 ip->ip_off = swap;
2580 2615 }
2581 2616 return rval;
2582 2617
2583 2618 }
2584 2619 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
2585 2620 {
2586 2621 hook_pkt_event_t *fw;
2587 2622 int rval, hlen;
2588 2623 qpktinfo_t qpi;
2589 2624 phy_if_t phy;
2590 2625
2591 2626 fw = (hook_pkt_event_t *)info;
2592 2627
2593 2628 ASSERT(fw != NULL);
2594 2629 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2595 2630
2596 2631 hlen = sizeof (ip6_t);
2597 2632
2598 2633 qpi.qpi_m = fw->hpe_mb;
2599 2634 qpi.qpi_data = fw->hpe_hdr;
2600 2635 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2601 2636 qpi.qpi_ill = (void *)phy;
2602 2637 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2603 2638 if (qpi.qpi_flags)
2604 2639 qpi.qpi_flags |= FI_MBCAST;
2605 2640 qpi.qpi_flags |= loopback;
2606 2641
2607 2642 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2608 2643 &qpi, fw->hpe_mp, arg);
2609 2644
2610 2645 /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2611 2646 if (rval == 0 && *(fw->hpe_mp) == NULL)
2612 2647 rval = 1;
2613 2648
2614 2649 /* Notify IP the packet mblk_t and IP header pointers. */
2615 2650 fw->hpe_mb = qpi.qpi_m;
2616 2651 fw->hpe_hdr = qpi.qpi_data;
2617 2652 return rval;
2618 2653 }
2619 2654
2620 2655
2621 2656 /* ------------------------------------------------------------------------ */
2622 2657 /* Function: ipf_nic_event_v4 */
2623 2658 /* Returns: int - 0 == no problems encountered */
2624 2659 /* Parameters: event(I) - pointer to event */
2625 2660 /* info(I) - pointer to information about a NIC event */
2626 2661 /* */
2627 2662 /* Function to receive asynchronous NIC events from IP */
2628 2663 /* ------------------------------------------------------------------------ */
2629 2664 /*ARGSUSED*/
2630 2665 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2631 2666 {
2632 2667 struct sockaddr_in *sin;
2633 2668 hook_nic_event_t *hn;
2634 2669 ipf_stack_t *ifs = arg;
2635 2670 void *new_ifp = NULL;
2636 2671
2637 2672 if (ifs->ifs_fr_running <= 0)
2638 2673 return (0);
2639 2674
2640 2675 hn = (hook_nic_event_t *)info;
2641 2676
2642 2677 switch (hn->hne_event)
2643 2678 {
2644 2679 case NE_PLUMB :
2645 2680 frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2646 2681 ifs);
2647 2682 fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2648 2683 hn->hne_data, ifs);
2649 2684 fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2650 2685 hn->hne_data, ifs);
2651 2686 break;
2652 2687
2653 2688 case NE_UNPLUMB :
2654 2689 frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2655 2690 fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2656 2691 ifs);
2657 2692 fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2658 2693 break;
2659 2694
2660 2695 case NE_ADDRESS_CHANGE :
2661 2696 /*
2662 2697 * We only respond to events for logical interface 0 because
2663 2698 * IPFilter only uses the first address given to a network
2664 2699 * interface. We check for hne_lif==1 because the netinfo
2665 2700 * code maps adds 1 to the lif number so that it can return
2666 2701 * 0 to indicate "no more lifs" when walking them.
2667 2702 */
2668 2703 if (hn->hne_lif == 1) {
2669 2704 frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2670 2705 ifs);
2671 2706 sin = hn->hne_data;
2672 2707 fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2673 2708 ifs);
2674 2709 }
2675 2710 break;
2676 2711
2677 2712 #if SOLARIS2 >= 10
2678 2713 case NE_IFINDEX_CHANGE :
2679 2714 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2680 2715
2681 2716 if (hn->hne_data != NULL) {
2682 2717 /*
2683 2718 * The netinfo passes interface index as int (hne_data should be
2684 2719 * handled as a pointer to int), which is always 32bit. We need to
2685 2720 * convert it to void pointer here, since interfaces are
2686 2721 * represented as pointers to void in IPF. The pointers are 64 bits
2687 2722 * long on 64bit platforms. Doing something like
2688 2723 * (void *)((int) x)
2689 2724 * will throw warning:
2690 2725 * "cast to pointer from integer of different size"
2691 2726 * during 64bit compilation.
2692 2727 *
2693 2728 * The line below uses (size_t) to typecast int to
2694 2729 * size_t, which might be 64bit/32bit (depending
2695 2730 * on architecture). Once we have proper 64bit/32bit
2696 2731 * type (size_t), we can safely convert it to void pointer.
2697 2732 */
2698 2733 new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2699 2734 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2700 2735 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2701 2736 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2702 2737 }
2703 2738 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2704 2739 break;
2705 2740 #endif
2706 2741
2707 2742 default :
2708 2743 break;
2709 2744 }
2710 2745
2711 2746 return 0;
2712 2747 }
2713 2748
2714 2749
2715 2750 /* ------------------------------------------------------------------------ */
2716 2751 /* Function: ipf_nic_event_v6 */
2717 2752 /* Returns: int - 0 == no problems encountered */
2718 2753 /* Parameters: event(I) - pointer to event */
2719 2754 /* info(I) - pointer to information about a NIC event */
2720 2755 /* */
2721 2756 /* Function to receive asynchronous NIC events from IP */
2722 2757 /* ------------------------------------------------------------------------ */
2723 2758 /*ARGSUSED*/
2724 2759 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2725 2760 {
2726 2761 struct sockaddr_in6 *sin6;
2727 2762 hook_nic_event_t *hn;
2728 2763 ipf_stack_t *ifs = arg;
2729 2764 void *new_ifp = NULL;
2730 2765
2731 2766 if (ifs->ifs_fr_running <= 0)
2732 2767 return (0);
2733 2768
2734 2769 hn = (hook_nic_event_t *)info;
2735 2770
2736 2771 switch (hn->hne_event)
2737 2772 {
2738 2773 case NE_PLUMB :
2739 2774 frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2740 2775 hn->hne_data, ifs);
2741 2776 fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2742 2777 hn->hne_data, ifs);
2743 2778 fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2744 2779 hn->hne_data, ifs);
2745 2780 break;
2746 2781
2747 2782 case NE_UNPLUMB :
2748 2783 frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2749 2784 fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2750 2785 ifs);
2751 2786 fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2752 2787 break;
2753 2788
2754 2789 case NE_ADDRESS_CHANGE :
2755 2790 if (hn->hne_lif == 1) {
2756 2791 sin6 = hn->hne_data;
2757 2792 fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2758 2793 ifs);
2759 2794 }
2760 2795 break;
2761 2796
2762 2797 #if SOLARIS2 >= 10
2763 2798 case NE_IFINDEX_CHANGE :
2764 2799 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2765 2800 if (hn->hne_data != NULL) {
2766 2801 /*
2767 2802 * The netinfo passes interface index as int (hne_data should be
2768 2803 * handled as a pointer to int), which is always 32bit. We need to
2769 2804 * convert it to void pointer here, since interfaces are
2770 2805 * represented as pointers to void in IPF. The pointers are 64 bits
2771 2806 * long on 64bit platforms. Doing something like
2772 2807 * (void *)((int) x)
2773 2808 * will throw warning:
2774 2809 * "cast to pointer from integer of different size"
2775 2810 * during 64bit compilation.
2776 2811 *
2777 2812 * The line below uses (size_t) to typecast int to
2778 2813 * size_t, which might be 64bit/32bit (depending
2779 2814 * on architecture). Once we have proper 64bit/32bit
2780 2815 * type (size_t), we can safely convert it to void pointer.
2781 2816 */
2782 2817 new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2783 2818 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2784 2819 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2785 2820 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2786 2821 }
2787 2822 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2788 2823 break;
2789 2824 #endif
2790 2825
2791 2826 default :
2792 2827 break;
2793 2828 }
2794 2829
2795 2830 return 0;
2796 2831 }
2797 2832
2798 2833 /*
2799 2834 * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2800 2835 * are needed in Solaris kernel only. We don't need them in
2801 2836 * ipftest to pretend the ICMP/RST packet was sent as a response.
2802 2837 */
2803 2838 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2804 2839 /* ------------------------------------------------------------------------ */
2805 2840 /* Function: fr_make_rst */
2806 2841 /* Returns: int - 0 on success, -1 on failure */
2807 2842 /* Parameters: fin(I) - pointer to packet information */
2808 2843 /* */
2809 2844 /* We must alter the original mblks passed to IPF from IP stack via */
2810 2845 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations. */
2811 2846 /* IPF can basicaly do only these things with mblk representing the packet: */
2812 2847 /* leave it as it is (pass the packet) */
2813 2848 /* */
2814 2849 /* discard it (block the packet) */
2815 2850 /* */
2816 2851 /* alter it (i.e. NAT) */
2817 2852 /* */
2818 2853 /* As you can see IPF can not simply discard the mblk and supply a new one */
2819 2854 /* instead to IP stack via FW_HOOKS. */
2820 2855 /* */
2821 2856 /* The return-rst action for packets coming via NIC is handled as follows: */
2822 2857 /* mblk with packet is discarded */
2823 2858 /* */
2824 2859 /* new mblk with RST response is constructed and injected to network */
2825 2860 /* */
2826 2861 /* IPF can't inject packets to loopback interface, this is just another */
2827 2862 /* limitation we have to deal with here. The only option to send RST */
2828 2863 /* response to offending TCP packet coming via loopback is to alter it. */
2829 2864 /* */
2830 2865 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on */
2831 2866 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to */
2832 2867 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers. */
2833 2868 /* ------------------------------------------------------------------------ */
2834 2869 int fr_make_rst(fin)
2835 2870 fr_info_t *fin;
2836 2871 {
2837 2872 uint16_t tmp_port;
2838 2873 int rv = -1;
2839 2874 uint32_t old_ack;
2840 2875 tcphdr_t *tcp = NULL;
2841 2876 struct in_addr tmp_src;
2842 2877 #ifdef USE_INET6
2843 2878 struct in6_addr tmp_src6;
2844 2879 #endif
2845 2880
2846 2881 ASSERT(fin->fin_p == IPPROTO_TCP);
2847 2882
2848 2883 /*
2849 2884 * We do not need to adjust chksum, since it is not being checked by
2850 2885 * Solaris IP stack for loopback clients.
2851 2886 */
2852 2887 if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2853 2888 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2854 2889
2855 2890 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2856 2891 /* Swap IPv4 addresses. */
2857 2892 tmp_src = fin->fin_ip->ip_src;
2858 2893 fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2859 2894 fin->fin_ip->ip_dst = tmp_src;
2860 2895
2861 2896 rv = 0;
2862 2897 }
2863 2898 else
2864 2899 tcp = NULL;
2865 2900 }
2866 2901 #ifdef USE_INET6
2867 2902 else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2868 2903 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2869 2904 /*
2870 2905 * We are relying on fact the next header is TCP, which is true
2871 2906 * for regular TCP packets coming in over loopback.
2872 2907 */
2873 2908 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2874 2909 /* Swap IPv6 addresses. */
2875 2910 tmp_src6 = fin->fin_ip6->ip6_src;
2876 2911 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2877 2912 fin->fin_ip6->ip6_dst = tmp_src6;
2878 2913
2879 2914 rv = 0;
2880 2915 }
2881 2916 else
2882 2917 tcp = NULL;
2883 2918 }
2884 2919 #endif
2885 2920
2886 2921 if (tcp != NULL) {
2887 2922 /*
2888 2923 * Adjust TCP header:
2889 2924 * swap ports,
2890 2925 * set flags,
2891 2926 * set correct ACK number
2892 2927 */
2893 2928 tmp_port = tcp->th_sport;
2894 2929 tcp->th_sport = tcp->th_dport;
2895 2930 tcp->th_dport = tmp_port;
2896 2931 old_ack = tcp->th_ack;
2897 2932 tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2898 2933 tcp->th_seq = old_ack;
2899 2934 tcp->th_flags = TH_RST | TH_ACK;
2900 2935 }
2901 2936
2902 2937 return (rv);
2903 2938 }
2904 2939
2905 2940 /* ------------------------------------------------------------------------ */
2906 2941 /* Function: fr_make_icmp_v4 */
2907 2942 /* Returns: int - 0 on success, -1 on failure */
2908 2943 /* Parameters: fin(I) - pointer to packet information */
2909 2944 /* */
2910 2945 /* Please read comment at fr_make_icmp() wrapper function to get an idea */
2911 2946 /* what is going to happen here and why. Once you read the comment there, */
2912 2947 /* continue here with next paragraph. */
2913 2948 /* */
2914 2949 /* To turn IPv4 packet into ICMPv4 response packet, these things must */
2915 2950 /* happen here: */
2916 2951 /* (1) Original mblk is copied (duplicated). */
2917 2952 /* */
2918 2953 /* (2) ICMP header is created. */
2919 2954 /* */
2920 2955 /* (3) Link ICMP header with copy of original mblk, we have ICMPv4 */
2921 2956 /* data ready then. */
2922 2957 /* */
2923 2958 /* (4) Swap IP addresses in original mblk and adjust IP header data. */
2924 2959 /* */
2925 2960 /* (5) The mblk containing original packet is trimmed to contain IP */
2926 2961 /* header only and ICMP chksum is computed. */
2927 2962 /* */
2928 2963 /* (6) The ICMP header we have from (3) is linked to original mblk, */
2929 2964 /* which now contains new IP header. If original packet was spread */
2930 2965 /* over several mblks, only the first mblk is kept. */
2931 2966 /* ------------------------------------------------------------------------ */
2932 2967 static int fr_make_icmp_v4(fin)
2933 2968 fr_info_t *fin;
2934 2969 {
2935 2970 struct in_addr tmp_src;
2936 2971 tcphdr_t *tcp;
2937 2972 struct icmp *icmp;
2938 2973 mblk_t *mblk_icmp;
2939 2974 mblk_t *mblk_ip;
2940 2975 size_t icmp_pld_len; /* octets to append to ICMP header */
2941 2976 size_t orig_iphdr_len; /* length of IP header only */
2942 2977 uint32_t sum;
2943 2978 uint16_t *buf;
2944 2979 int len;
2945 2980
2946 2981
2947 2982 if (fin->fin_v != 4)
2948 2983 return (-1);
2949 2984
2950 2985 /*
2951 2986 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2952 2987 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2953 2988 */
2954 2989 tcp = (tcphdr_t *) fin->fin_dp;
2955 2990
2956 2991 if ((fin->fin_p == IPPROTO_TCP) &&
2957 2992 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2958 2993 return (-1);
2959 2994
2960 2995 /*
2961 2996 * Step (1)
2962 2997 *
2963 2998 * Make copy of original mblk.
2964 2999 *
2965 3000 * We want to copy as much data as necessary, not less, not more. The
2966 3001 * ICMPv4 payload length for unreachable messages is:
2967 3002 * original IP header + 8 bytes of L4 (if there are any).
2968 3003 *
2969 3004 * We determine if there are at least 8 bytes of L4 data following IP
2970 3005 * header first.
2971 3006 */
2972 3007 icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
2973 3008 ICMPERR_ICMPHLEN : fin->fin_dlen;
2974 3009 /*
2975 3010 * Since we don't want to copy more data than necessary, we must trim
2976 3011 * the original mblk here. The right way (STREAMish) would be to use
2977 3012 * adjmsg() to trim it. However we would have to calculate the length
2978 3013 * argument for adjmsg() from pointers we already have here.
2979 3014 *
2980 3015 * Since we have pointers and offsets, it's faster and easier for
2981 3016 * us to just adjust pointers by hand instead of using adjmsg().
2982 3017 */
2983 3018 fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
2984 3019 fin->fin_m->b_wptr += icmp_pld_len;
2985 3020 icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
2986 3021
2987 3022 /*
2988 3023 * Also we don't want to copy any L2 stuff, which might precede IP
2989 3024 * header, so we have have to set b_rptr to point to the start of IP
2990 3025 * header.
2991 3026 */
2992 3027 fin->fin_m->b_rptr += fin->fin_ipoff;
2993 3028 if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2994 3029 return (-1);
2995 3030 fin->fin_m->b_rptr -= fin->fin_ipoff;
2996 3031
2997 3032 /*
2998 3033 * Step (2)
2999 3034 *
3000 3035 * Create an ICMP header, which will be appened to original mblk later.
3001 3036 * ICMP header is just another mblk.
3002 3037 */
3003 3038 mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
3004 3039 if (mblk_icmp == NULL) {
3005 3040 FREE_MB_T(mblk_ip);
3006 3041 return (-1);
3007 3042 }
3008 3043
3009 3044 MTYPE(mblk_icmp) = M_DATA;
3010 3045 icmp = (struct icmp *) mblk_icmp->b_wptr;
3011 3046 icmp->icmp_type = ICMP_UNREACH;
3012 3047 icmp->icmp_code = fin->fin_icode & 0xFF;
3013 3048 icmp->icmp_void = 0;
3014 3049 icmp->icmp_cksum = 0;
3015 3050 mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
3016 3051
3017 3052 /*
3018 3053 * Step (3)
3019 3054 *
3020 3055 * Complete ICMP packet - link ICMP header with L4 data from original
3021 3056 * IP packet.
3022 3057 */
3023 3058 linkb(mblk_icmp, mblk_ip);
3024 3059
3025 3060 /*
3026 3061 * Step (4)
3027 3062 *
3028 3063 * Swap IP addresses and change IP header fields accordingly in
3029 3064 * original IP packet.
3030 3065 *
3031 3066 * There is a rule option return-icmp as a dest for physical
3032 3067 * interfaces. This option becomes useless for loopback, since IPF box
3033 3068 * uses same address as a loopback destination. We ignore the option
3034 3069 * here, the ICMP packet will always look like as it would have been
3035 3070 * sent from the original destination host.
3036 3071 */
3037 3072 tmp_src = fin->fin_ip->ip_src;
3038 3073 fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
3039 3074 fin->fin_ip->ip_dst = tmp_src;
3040 3075 fin->fin_ip->ip_p = IPPROTO_ICMP;
3041 3076 fin->fin_ip->ip_sum = 0;
3042 3077
3043 3078 /*
3044 3079 * Step (5)
3045 3080 *
3046 3081 * We trim the orignal mblk to hold IP header only.
3047 3082 */
3048 3083 fin->fin_m->b_wptr = fin->fin_dp;
3049 3084 orig_iphdr_len = fin->fin_m->b_wptr -
3050 3085 (fin->fin_m->b_rptr + fin->fin_ipoff);
3051 3086 fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
3052 3087 orig_iphdr_len);
3053 3088
3054 3089 /*
3055 3090 * ICMP chksum calculation. The data we are calculating chksum for are
3056 3091 * spread over two mblks, therefore we have to use two for loops.
3057 3092 *
3058 3093 * First for loop computes chksum part for ICMP header.
3059 3094 */
3060 3095 buf = (uint16_t *) icmp;
3061 3096 len = ICMPERR_ICMPHLEN;
3062 3097 for (sum = 0; len > 1; len -= 2)
3063 3098 sum += *buf++;
3064 3099
3065 3100 /*
3066 3101 * Here we add chksum part for ICMP payload.
3067 3102 */
3068 3103 len = icmp_pld_len;
3069 3104 buf = (uint16_t *) mblk_ip->b_rptr;
3070 3105 for (; len > 1; len -= 2)
3071 3106 sum += *buf++;
3072 3107
3073 3108 /*
3074 3109 * Chksum is done.
3075 3110 */
3076 3111 sum = (sum >> 16) + (sum & 0xffff);
3077 3112 sum += (sum >> 16);
3078 3113 icmp->icmp_cksum = ~sum;
3079 3114
3080 3115 /*
3081 3116 * Step (6)
3082 3117 *
3083 3118 * Release all packet mblks, except the first one.
3084 3119 */
3085 3120 if (fin->fin_m->b_cont != NULL) {
3086 3121 FREE_MB_T(fin->fin_m->b_cont);
3087 3122 }
3088 3123
3089 3124 /*
3090 3125 * Append ICMP payload to first mblk, which already contains new IP
3091 3126 * header.
3092 3127 */
3093 3128 linkb(fin->fin_m, mblk_icmp);
3094 3129
3095 3130 return (0);
3096 3131 }
3097 3132
3098 3133 #ifdef USE_INET6
3099 3134 /* ------------------------------------------------------------------------ */
3100 3135 /* Function: fr_make_icmp_v6 */
3101 3136 /* Returns: int - 0 on success, -1 on failure */
3102 3137 /* Parameters: fin(I) - pointer to packet information */
3103 3138 /* */
3104 3139 /* Please read comment at fr_make_icmp() wrapper function to get an idea */
3105 3140 /* what and why is going to happen here. Once you read the comment there, */
3106 3141 /* continue here with next paragraph. */
3107 3142 /* */
3108 3143 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response. */
3109 3144 /* The algorithm is fairly simple: */
3110 3145 /* 1) We need to get copy of complete mblk. */
3111 3146 /* */
3112 3147 /* 2) New ICMPv6 header is created. */
3113 3148 /* */
3114 3149 /* 3) The copy of original mblk with packet is linked to ICMPv6 */
3115 3150 /* header. */
3116 3151 /* */
3117 3152 /* 4) The checksum must be adjusted. */
3118 3153 /* */
3119 3154 /* 5) IP addresses in original mblk are swapped and IP header data */
3120 3155 /* are adjusted (protocol number). */
3121 3156 /* */
3122 3157 /* 6) Original mblk is trimmed to hold IPv6 header only, then it is */
3123 3158 /* linked with the ICMPv6 data we got from (3). */
3124 3159 /* ------------------------------------------------------------------------ */
3125 3160 static int fr_make_icmp_v6(fin)
3126 3161 fr_info_t *fin;
3127 3162 {
3128 3163 struct icmp6_hdr *icmp6;
3129 3164 tcphdr_t *tcp;
3130 3165 struct in6_addr tmp_src6;
3131 3166 size_t icmp_pld_len;
3132 3167 mblk_t *mblk_ip, *mblk_icmp;
3133 3168
3134 3169 if (fin->fin_v != 6)
3135 3170 return (-1);
3136 3171
3137 3172 /*
3138 3173 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
3139 3174 * IP stack. If it is not SYN/FIN, then we must drop it silently.
3140 3175 */
3141 3176 tcp = (tcphdr_t *) fin->fin_dp;
3142 3177
3143 3178 if ((fin->fin_p == IPPROTO_TCP) &&
3144 3179 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
3145 3180 return (-1);
3146 3181
3147 3182 /*
3148 3183 * Step (1)
3149 3184 *
3150 3185 * We need to copy complete packet in case of IPv6, no trimming is
3151 3186 * needed (except the L2 headers).
3152 3187 */
3153 3188 icmp_pld_len = M_LEN(fin->fin_m);
3154 3189 fin->fin_m->b_rptr += fin->fin_ipoff;
3155 3190 if ((mblk_ip = copyb(fin->fin_m)) == NULL)
3156 3191 return (-1);
3157 3192 fin->fin_m->b_rptr -= fin->fin_ipoff;
3158 3193
3159 3194 /*
3160 3195 * Step (2)
3161 3196 *
3162 3197 * Allocate and create ICMP header.
3163 3198 */
3164 3199 mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
3165 3200 BPRI_HI);
3166 3201
3167 3202 if (mblk_icmp == NULL)
3168 3203 return (-1);
3169 3204
3170 3205 MTYPE(mblk_icmp) = M_DATA;
3171 3206 icmp6 = (struct icmp6_hdr *) mblk_icmp->b_wptr;
3172 3207 icmp6->icmp6_type = ICMP6_DST_UNREACH;
3173 3208 icmp6->icmp6_code = fin->fin_icode & 0xFF;
3174 3209 icmp6->icmp6_data32[0] = 0;
3175 3210 mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
3176 3211
3177 3212 /*
3178 3213 * Step (3)
3179 3214 *
3180 3215 * Link the copy of IP packet to ICMP header.
3181 3216 */
3182 3217 linkb(mblk_icmp, mblk_ip);
3183 3218
3184 3219 /*
3185 3220 * Step (4)
3186 3221 *
3187 3222 * Calculate chksum - this is much more easier task than in case of
3188 3223 * IPv4 - ICMPv6 chksum only covers IP addresses, and payload length.
3189 3224 * We are making compensation just for change of packet length.
3190 3225 */
3191 3226 icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
3192 3227
3193 3228 /*
3194 3229 * Step (5)
3195 3230 *
3196 3231 * Swap IP addresses.
3197 3232 */
3198 3233 tmp_src6 = fin->fin_ip6->ip6_src;
3199 3234 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
3200 3235 fin->fin_ip6->ip6_dst = tmp_src6;
3201 3236
3202 3237 /*
3203 3238 * and adjust IP header data.
3204 3239 */
3205 3240 fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
3206 3241 fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
3207 3242
3208 3243 /*
3209 3244 * Step (6)
3210 3245 *
3211 3246 * We must release all linked mblks from original packet and keep only
3212 3247 * the first mblk with IP header to link ICMP data.
3213 3248 */
3214 3249 fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
3215 3250
3216 3251 if (fin->fin_m->b_cont != NULL) {
3217 3252 FREE_MB_T(fin->fin_m->b_cont);
3218 3253 }
3219 3254
3220 3255 /*
3221 3256 * Append ICMP payload to IP header.
3222 3257 */
3223 3258 linkb(fin->fin_m, mblk_icmp);
3224 3259
3225 3260 return (0);
3226 3261 }
3227 3262 #endif /* USE_INET6 */
3228 3263
3229 3264 /* ------------------------------------------------------------------------ */
3230 3265 /* Function: fr_make_icmp */
3231 3266 /* Returns: int - 0 on success, -1 on failure */
3232 3267 /* Parameters: fin(I) - pointer to packet information */
3233 3268 /* */
3234 3269 /* We must alter the original mblks passed to IPF from IP stack via */
3235 3270 /* FW_HOOKS. The reasons why we must alter packet are discussed within */
3236 3271 /* comment at fr_make_rst() function. */
3237 3272 /* */
3238 3273 /* The fr_make_icmp() function acts as a wrapper, which passes the code */
3239 3274 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on */
3240 3275 /* protocol version. However there are some details, which are common to */
3241 3276 /* both IP versions. The details are going to be explained here. */
3242 3277 /* */
3243 3278 /* The packet looks as follows: */
3244 3279 /* xxx | IP hdr | IP payload ... | */
3245 3280 /* ^ ^ ^ ^ */
3246 3281 /* | | | | */
3247 3282 /* | | | fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
3248 3283 /* | | | */
3249 3284 /* | | `- fin_m->fin_dp (in case of IPv4 points to L4 header) */
3250 3285 /* | | */
3251 3286 /* | `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case */
3252 3287 /* | of loopback) */
3253 3288 /* | */
3254 3289 /* `- fin_m->b_rptr - points to L2 header in case of physical NIC */
3255 3290 /* */
3256 3291 /* All relevant IP headers are pulled up into the first mblk. It happened */
3257 3292 /* well in advance before the matching rule was found (the rule, which took */
3258 3293 /* us here, to fr_make_icmp() function). */
3259 3294 /* */
3260 3295 /* Both functions will turn packet passed in fin->fin_m mblk into a new */
3261 3296 /* packet. New packet will be represented as chain of mblks. */
3262 3297 /* orig mblk |- b_cont ---. */
3263 3298 /* ^ `-> ICMP hdr |- b_cont--. */
3264 3299 /* | ^ `-> duped orig mblk */
3265 3300 /* | | ^ */
3266 3301 /* `- The original mblk | | */
3267 3302 /* will be trimmed to | | */
3268 3303 /* to contain IP header | | */
3269 3304 /* only | | */
3270 3305 /* | | */
3271 3306 /* `- This is newly | */
3272 3307 /* allocated mblk to | */
3273 3308 /* hold ICMPv6 data. | */
3274 3309 /* | */
3275 3310 /* | */
3276 3311 /* | */
3277 3312 /* This is the copy of original mblk, it will contain -' */
3278 3313 /* orignal IP packet in case of ICMPv6. In case of */
3279 3314 /* ICMPv4 it will contain up to 8 bytes of IP payload */
3280 3315 /* (TCP/UDP/L4) data from original packet. */
3281 3316 /* ------------------------------------------------------------------------ */
3282 3317 int fr_make_icmp(fin)
3283 3318 fr_info_t *fin;
3284 3319 {
3285 3320 int rv;
3286 3321
3287 3322 if (fin->fin_v == 4)
3288 3323 rv = fr_make_icmp_v4(fin);
3289 3324 #ifdef USE_INET6
3290 3325 else if (fin->fin_v == 6)
3291 3326 rv = fr_make_icmp_v6(fin);
3292 3327 #endif
3293 3328 else
3294 3329 rv = -1;
3295 3330
3296 3331 return (rv);
3297 3332 }
3298 3333
3299 3334 /* ------------------------------------------------------------------------ */
3300 3335 /* Function: fr_buf_sum */
3301 3336 /* Returns: unsigned int - sum of buffer buf */
3302 3337 /* Parameters: buf - pointer to buf we want to sum up */
3303 3338 /* len - length of buffer buf */
3304 3339 /* */
3305 3340 /* Sums buffer buf. The result is used for chksum calculation. The buf */
3306 3341 /* argument must be aligned. */
3307 3342 /* ------------------------------------------------------------------------ */
3308 3343 static uint32_t fr_buf_sum(buf, len)
3309 3344 const void *buf;
3310 3345 unsigned int len;
3311 3346 {
3312 3347 uint32_t sum = 0;
3313 3348 uint16_t *b = (uint16_t *)buf;
3314 3349
3315 3350 while (len > 1) {
3316 3351 sum += *b++;
3317 3352 len -= 2;
3318 3353 }
3319 3354
3320 3355 if (len == 1)
3321 3356 sum += htons((*(unsigned char *)b) << 8);
3322 3357
3323 3358 return (sum);
3324 3359 }
3325 3360
3326 3361 /* ------------------------------------------------------------------------ */
3327 3362 /* Function: fr_calc_chksum */
3328 3363 /* Returns: void */
3329 3364 /* Parameters: fin - pointer to fr_info_t instance with packet data */
3330 3365 /* pkt - pointer to duplicated packet */
3331 3366 /* */
3332 3367 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP */
3333 3368 /* versions. */
3334 3369 /* ------------------------------------------------------------------------ */
3335 3370 void fr_calc_chksum(fin, pkt)
3336 3371 fr_info_t *fin;
3337 3372 mb_t *pkt;
3338 3373 {
3339 3374 struct pseudo_hdr {
3340 3375 union {
3341 3376 struct in_addr in4;
3342 3377 #ifdef USE_INET6
3343 3378 struct in6_addr in6;
3344 3379 #endif
3345 3380 } src_addr;
3346 3381 union {
3347 3382 struct in_addr in4;
3348 3383 #ifdef USE_INET6
3349 3384 struct in6_addr in6;
3350 3385 #endif
3351 3386 } dst_addr;
3352 3387 char zero;
3353 3388 char proto;
3354 3389 uint16_t len;
3355 3390 } phdr;
3356 3391 uint32_t sum, ip_sum;
3357 3392 void *buf;
3358 3393 uint16_t *l4_csum_p;
3359 3394 tcphdr_t *tcp;
3360 3395 udphdr_t *udp;
3361 3396 icmphdr_t *icmp;
3362 3397 #ifdef USE_INET6
3363 3398 struct icmp6_hdr *icmp6;
3364 3399 #endif
3365 3400 ip_t *ip;
3366 3401 unsigned int len;
3367 3402 int pld_len;
3368 3403
3369 3404 /*
3370 3405 * We need to pullup the packet to the single continuous buffer to avoid
3371 3406 * potential misaligment of b_rptr member in mblk chain.
3372 3407 */
3373 3408 if (pullupmsg(pkt, -1) == 0) {
3374 3409 cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum"
3375 3410 " will not be computed by IPF");
3376 3411 return;
3377 3412 }
3378 3413
3379 3414 /*
3380 3415 * It is guaranteed IP header starts right at b_rptr, because we are
3381 3416 * working with a copy of the original packet.
3382 3417 *
3383 3418 * Compute pseudo header chksum for TCP and UDP.
3384 3419 */
3385 3420 if ((fin->fin_p == IPPROTO_UDP) ||
3386 3421 (fin->fin_p == IPPROTO_TCP)) {
3387 3422 bzero(&phdr, sizeof (phdr));
3388 3423 #ifdef USE_INET6
3389 3424 if (fin->fin_v == 6) {
3390 3425 phdr.src_addr.in6 = fin->fin_srcip6;
3391 3426 phdr.dst_addr.in6 = fin->fin_dstip6;
3392 3427 } else {
3393 3428 phdr.src_addr.in4 = fin->fin_src;
3394 3429 phdr.dst_addr.in4 = fin->fin_dst;
3395 3430 }
3396 3431 #else
3397 3432 phdr.src_addr.in4 = fin->fin_src;
3398 3433 phdr.dst_addr.in4 = fin->fin_dst;
3399 3434 #endif
3400 3435 phdr.zero = (char) 0;
3401 3436 phdr.proto = fin->fin_p;
3402 3437 phdr.len = htons((uint16_t)fin->fin_dlen);
3403 3438 sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr));
3404 3439 } else {
3405 3440 sum = 0;
3406 3441 }
3407 3442
3408 3443 /*
3409 3444 * Set pointer to the L4 chksum field in the packet, set buf pointer to
3410 3445 * the L4 header start.
3411 3446 */
3412 3447 switch (fin->fin_p) {
3413 3448 case IPPROTO_UDP:
3414 3449 udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3415 3450 l4_csum_p = &udp->uh_sum;
3416 3451 buf = udp;
3417 3452 break;
3418 3453 case IPPROTO_TCP:
3419 3454 tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3420 3455 l4_csum_p = &tcp->th_sum;
3421 3456 buf = tcp;
3422 3457 break;
3423 3458 case IPPROTO_ICMP:
3424 3459 icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3425 3460 l4_csum_p = &icmp->icmp_cksum;
3426 3461 buf = icmp;
3427 3462 break;
3428 3463 #ifdef USE_INET6
3429 3464 case IPPROTO_ICMPV6:
3430 3465 icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen);
3431 3466 l4_csum_p = &icmp6->icmp6_cksum;
3432 3467 buf = icmp6;
3433 3468 break;
3434 3469 #endif
3435 3470 default:
3436 3471 l4_csum_p = NULL;
3437 3472 }
3438 3473
3439 3474 /*
3440 3475 * Compute L4 chksum if needed.
3441 3476 */
3442 3477 if (l4_csum_p != NULL) {
3443 3478 *l4_csum_p = (uint16_t)0;
3444 3479 pld_len = fin->fin_dlen;
3445 3480 len = pkt->b_wptr - (unsigned char *)buf;
3446 3481 ASSERT(len == pld_len);
3447 3482 /*
3448 3483 * Add payload sum to pseudoheader sum.
3449 3484 */
3450 3485 sum += fr_buf_sum(buf, len);
3451 3486 while (sum >> 16)
3452 3487 sum = (sum & 0xFFFF) + (sum >> 16);
3453 3488
3454 3489 *l4_csum_p = ~((uint16_t)sum);
3455 3490 DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p);
3456 3491 }
3457 3492
3458 3493 /*
3459 3494 * The IP header chksum is needed just for IPv4.
3460 3495 */
3461 3496 if (fin->fin_v == 4) {
3462 3497 /*
3463 3498 * Compute IPv4 header chksum.
3464 3499 */
3465 3500 ip = (ip_t *)pkt->b_rptr;
3466 3501 ip->ip_sum = (uint16_t)0;
3467 3502 ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen);
3468 3503 while (ip_sum >> 16)
3469 3504 ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16);
3470 3505
3471 3506 ip->ip_sum = ~((uint16_t)ip_sum);
3472 3507 DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum);
3473 3508 }
3474 3509
3475 3510 return;
3476 3511 }
3477 3512
3478 3513 #endif /* _KERNEL && SOLARIS2 >= 10 */
|
↓ open down ↓ |
2104 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX