1 /*
2 * Copyright (C) 1993-2001, 2003 by Darren Reed.
3 *
4 * See the IPFILTER.LICENCE file for details on licencing.
5 *
6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
7 *
8 * Copyright 2018 Joyent, Inc.
9 */
10
11 #if !defined(lint)
12 static const char sccsid[] = "@(#)ip_fil_solaris.c 1.7 07/22/06 (C) 1993-2000 Darren Reed";
13 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
14 #endif
15
16 #include <sys/types.h>
17 #include <sys/errno.h>
18 #include <sys/param.h>
19 #include <sys/cpuvar.h>
20 #include <sys/open.h>
21 #include <sys/ioctl.h>
22 #include <sys/filio.h>
23 #include <sys/systm.h>
24 #include <sys/strsubr.h>
25 #include <sys/strsun.h>
26 #include <sys/cred.h>
27 #include <sys/ddi.h>
28 #include <sys/sunddi.h>
29 #include <sys/ksynch.h>
30 #include <sys/kmem.h>
31 #include <sys/mac_provider.h>
32 #include <sys/mkdev.h>
33 #include <sys/protosw.h>
34 #include <sys/socket.h>
35 #include <sys/dditypes.h>
36 #include <sys/cmn_err.h>
37 #include <sys/zone.h>
38 #include <net/if.h>
39 #include <net/af.h>
40 #include <net/route.h>
41 #include <netinet/in.h>
42 #include <netinet/in_systm.h>
43 #include <netinet/ip.h>
44 #include <netinet/ip_var.h>
45 #include <netinet/tcp.h>
46 #include <netinet/udp.h>
47 #include <netinet/tcpip.h>
48 #include <netinet/ip_icmp.h>
49 #include "netinet/ip_compat.h"
50 #ifdef USE_INET6
51 # include <netinet/icmp6.h>
52 #endif
53 #include "netinet/ip_fil.h"
54 #include "netinet/ip_nat.h"
55 #include "netinet/ip_frag.h"
56 #include "netinet/ip_state.h"
57 #include "netinet/ip_auth.h"
58 #include "netinet/ip_proxy.h"
59 #include "netinet/ipf_stack.h"
60 #ifdef IPFILTER_LOOKUP
61 # include "netinet/ip_lookup.h"
62 #endif
63 #include <inet/ip_ire.h>
64
65 #include <sys/md5.h>
66 #include <sys/neti.h>
67
68 static int frzerostats __P((caddr_t, ipf_stack_t *));
69 static int fr_setipfloopback __P((int, ipf_stack_t *));
70 static int fr_enableipf __P((ipf_stack_t *, int));
71 static int fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
72 static int ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
73 static int ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
74 static int ipf_hook __P((hook_data_t, int, int, void *));
75 static int ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
76 static int ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
77 static int ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
78 void *));
79 static int ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
80 static int ipf_hook4 __P((hook_data_t, int, int, void *));
81 static int ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
82 static int ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
83 static int ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
84 void *));
85 static int ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
86 void *));
87 static int ipf_hook6 __P((hook_data_t, int, int, void *));
88 static int ipf_hookvndl3v4_in __P((hook_event_token_t, hook_data_t,
89 void *));
90 static int ipf_hookvndl3v6_in __P((hook_event_token_t, hook_data_t,
91 void *));
92 static int ipf_hookvndl3v4_out __P((hook_event_token_t, hook_data_t,
93 void *));
94 static int ipf_hookvndl3v6_out __P((hook_event_token_t, hook_data_t,
95 void *));
96
97 static int ipf_hookviona_in __P((hook_event_token_t, hook_data_t, void *));
98 static int ipf_hookviona_out __P((hook_event_token_t, hook_data_t,
99 void *));
100
101 extern int ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
102 extern int ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
103
104 static int ipf_hook_protocol_notify __P((hook_notify_cmd_t, void *,
105 const char *, const char *, const char *));
106 static int ipf_hook_instance_notify __P((hook_notify_cmd_t, void *,
107 const char *, const char *, const char *));
108
109 #if SOLARIS2 < 10
110 #if SOLARIS2 >= 7
111 u_int *ip_ttl_ptr = NULL;
112 u_int *ip_mtudisc = NULL;
113 # if SOLARIS2 >= 8
114 int *ip_forwarding = NULL;
115 u_int *ip6_forwarding = NULL;
116 # else
117 u_int *ip_forwarding = NULL;
118 # endif
119 #else
120 u_long *ip_ttl_ptr = NULL;
121 u_long *ip_mtudisc = NULL;
122 u_long *ip_forwarding = NULL;
123 #endif
124 #endif
125
126 vmem_t *ipf_minor; /* minor number arena */
127 void *ipf_state; /* DDI state */
128
129 /*
130 * GZ-controlled and per-zone stacks:
131 *
132 * For each non-global zone, we create two ipf stacks: the per-zone stack and
133 * the GZ-controlled stack. The per-zone stack can be controlled and observed
134 * from inside the zone or from the global zone. The GZ-controlled stack can
135 * only be controlled and observed from the global zone (though the rules
136 * still only affect that non-global zone).
137 *
138 * The two hooks are always arranged so that the GZ-controlled stack is always
139 * "outermost" with respect to the zone. The traffic flow then looks like
140 * this:
141 *
142 * Inbound:
143 *
144 * nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone
145 *
146 * Outbound:
147 *
148 * nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone
149 */
150
151 /* IPv4 hook names */
152 char *hook4_nicevents = "ipfilter_hook4_nicevents";
153 char *hook4_nicevents_gz = "ipfilter_hook4_nicevents_gz";
154 char *hook4_in = "ipfilter_hook4_in";
155 char *hook4_in_gz = "ipfilter_hook4_in_gz";
156 char *hook4_out = "ipfilter_hook4_out";
157 char *hook4_out_gz = "ipfilter_hook4_out_gz";
158 char *hook4_loop_in = "ipfilter_hook4_loop_in";
159 char *hook4_loop_in_gz = "ipfilter_hook4_loop_in_gz";
160 char *hook4_loop_out = "ipfilter_hook4_loop_out";
161 char *hook4_loop_out_gz = "ipfilter_hook4_loop_out_gz";
162
163 /* IPv6 hook names */
164 char *hook6_nicevents = "ipfilter_hook6_nicevents";
165 char *hook6_nicevents_gz = "ipfilter_hook6_nicevents_gz";
166 char *hook6_in = "ipfilter_hook6_in";
167 char *hook6_in_gz = "ipfilter_hook6_in_gz";
168 char *hook6_out = "ipfilter_hook6_out";
169 char *hook6_out_gz = "ipfilter_hook6_out_gz";
170 char *hook6_loop_in = "ipfilter_hook6_loop_in";
171 char *hook6_loop_in_gz = "ipfilter_hook6_loop_in_gz";
172 char *hook6_loop_out = "ipfilter_hook6_loop_out";
173 char *hook6_loop_out_gz = "ipfilter_hook6_loop_out_gz";
174
175 /* vnd IPv4/v6 hook names */
176 char *hook4_vnd_in = "ipfilter_hookvndl3v4_in";
177 char *hook4_vnd_in_gz = "ipfilter_hookvndl3v4_in_gz";
178 char *hook6_vnd_in = "ipfilter_hookvndl3v6_in";
179 char *hook6_vnd_in_gz = "ipfilter_hookvndl3v6_in_gz";
180 char *hook4_vnd_out = "ipfilter_hookvndl3v4_out";
181 char *hook4_vnd_out_gz = "ipfilter_hookvndl3v4_out_gz";
182 char *hook6_vnd_out = "ipfilter_hookvndl3v6_out";
183 char *hook6_vnd_out_gz = "ipfilter_hookvndl3v6_out_gz";
184
185 /* viona hook names */
186 char *hook_viona_in = "ipfilter_hookviona_in";
187 char *hook_viona_in_gz = "ipfilter_hookviona_in_gz";
188 char *hook_viona_out = "ipfilter_hookviona_out";
189 char *hook_viona_out_gz = "ipfilter_hookviona_out_gz";
190
191 /* ------------------------------------------------------------------------ */
192 /* Function: ipldetach */
193 /* Returns: int - 0 == success, else error. */
194 /* Parameters: Nil */
195 /* */
196 /* This function is responsible for undoing anything that might have been */
197 /* done in a call to iplattach(). It must be able to clean up from a call */
198 /* to iplattach() that did not succeed. Why might that happen? Someone */
199 /* configures a table to be so large that we cannot allocate enough memory */
200 /* for it. */
201 /* ------------------------------------------------------------------------ */
202 int ipldetach(ifs)
203 ipf_stack_t *ifs;
204 {
205
206 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
207
208 #if SOLARIS2 < 10
209
210 if (ifs->ifs_fr_control_forwarding & 2) {
211 if (ip_forwarding != NULL)
212 *ip_forwarding = 0;
213 #if SOLARIS2 >= 8
214 if (ip6_forwarding != NULL)
215 *ip6_forwarding = 0;
216 #endif
217 }
218 #endif
219
220 /*
221 * This lock needs to be dropped around the net_hook_unregister calls
222 * because we can deadlock here with:
223 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
224 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
225 */
226 RWLOCK_EXIT(&ifs->ifs_ipf_global);
227
228 #define UNDO_HOOK(_f, _b, _e, _h) \
229 do { \
230 if (ifs->_f != NULL) { \
231 if (ifs->_b) { \
232 int tmp = net_hook_unregister(ifs->_f, \
233 _e, ifs->_h); \
234 ifs->_b = (tmp != 0 && tmp != ENXIO); \
235 if (!ifs->_b && ifs->_h != NULL) { \
236 hook_free(ifs->_h); \
237 ifs->_h = NULL; \
238 } \
239 } else if (ifs->_h != NULL) { \
240 hook_free(ifs->_h); \
241 ifs->_h = NULL; \
242 } \
243 } \
244 _NOTE(CONSTCOND) \
245 } while (0)
246
247 /*
248 * Remove IPv6 Hooks
249 */
250 if (ifs->ifs_ipf_ipv6 != NULL) {
251 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
252 NH_PHYSICAL_IN, ifs_ipfhook6_in);
253 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
254 NH_PHYSICAL_OUT, ifs_ipfhook6_out);
255 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
256 NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
257 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
258 NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
259 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
260 NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
261
262 if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
263 goto detach_failed;
264 ifs->ifs_ipf_ipv6 = NULL;
265 }
266
267 /*
268 * Remove IPv4 Hooks
269 */
270 if (ifs->ifs_ipf_ipv4 != NULL) {
271 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
272 NH_PHYSICAL_IN, ifs_ipfhook4_in);
273 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
274 NH_PHYSICAL_OUT, ifs_ipfhook4_out);
275 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
276 NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
277 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
278 NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
279 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
280 NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
281
282 if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
283 goto detach_failed;
284 ifs->ifs_ipf_ipv4 = NULL;
285 }
286
287 /*
288 * Remove VND hooks
289 */
290 if (ifs->ifs_ipf_vndl3v4 != NULL) {
291 UNDO_HOOK(ifs_ipf_vndl3v4, ifs_hookvndl3v4_physical_in,
292 NH_PHYSICAL_IN, ifs_ipfhookvndl3v4_in);
293 UNDO_HOOK(ifs_ipf_vndl3v4, ifs_hookvndl3v4_physical_out,
294 NH_PHYSICAL_OUT, ifs_ipfhookvndl3v4_out);
295
296 if (net_protocol_release(ifs->ifs_ipf_vndl3v4) != 0)
297 goto detach_failed;
298 ifs->ifs_ipf_vndl3v4 = NULL;
299 }
300
301 if (ifs->ifs_ipf_vndl3v6 != NULL) {
302 UNDO_HOOK(ifs_ipf_vndl3v6, ifs_hookvndl3v6_physical_in,
303 NH_PHYSICAL_IN, ifs_ipfhookvndl3v6_in);
304 UNDO_HOOK(ifs_ipf_vndl3v6, ifs_hookvndl3v6_physical_out,
305 NH_PHYSICAL_OUT, ifs_ipfhookvndl3v6_out);
306
307 if (net_protocol_release(ifs->ifs_ipf_vndl3v6) != 0)
308 goto detach_failed;
309 ifs->ifs_ipf_vndl3v6 = NULL;
310 }
311
312 /*
313 * Remove notification of viona hooks
314 */
315 net_instance_notify_unregister(ifs->ifs_netid,
316 ipf_hook_instance_notify);
317
318 #undef UNDO_HOOK
319
320 /*
321 * Normally, viona will unregister itself before ipldetach() is called,
322 * so these will be no-ops, but out of caution, we try to make sure
323 * we've removed any of our references.
324 */
325 (void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL,
326 NH_PHYSICAL_IN);
327 (void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL,
328 NH_PHYSICAL_OUT);
329
330 {
331 char netidstr[12]; /* Large enough for INT_MAX + NUL */
332 (void) snprintf(netidstr, sizeof (netidstr), "%d",
333 ifs->ifs_netid);
334
335 /*
336 * The notify callbacks expect the netid value passed as a
337 * string in the third argument. To prevent confusion if
338 * traced, we pass the same value the nethook framework would
339 * pass, even though the callback does not currently use the
340 * value.
341 */
342 (void) ipf_hook_instance_notify(HN_UNREGISTER, ifs, netidstr,
343 NULL, Hn_VIONA);
344 }
345
346 #ifdef IPFDEBUG
347 cmn_err(CE_CONT, "ipldetach()\n");
348 #endif
349
350 WRITE_ENTER(&ifs->ifs_ipf_global);
351 fr_deinitialise(ifs);
352
353 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
354 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
355
356 if (ifs->ifs_ipf_locks_done == 1) {
357 MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
358 MUTEX_DESTROY(&ifs->ifs_ipf_rw);
359 RW_DESTROY(&ifs->ifs_ipf_tokens);
360 RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
361 ifs->ifs_ipf_locks_done = 0;
362 }
363
364 if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
365 ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
366 ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
367 ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
368 ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
369 return -1;
370
371 return 0;
372
373 detach_failed:
374 WRITE_ENTER(&ifs->ifs_ipf_global);
375 return -1;
376 }
377
378 int iplattach(ifs)
379 ipf_stack_t *ifs;
380 {
381 #if SOLARIS2 < 10
382 int i;
383 #endif
384 netid_t id = ifs->ifs_netid;
385
386 #ifdef IPFDEBUG
387 cmn_err(CE_CONT, "iplattach()\n");
388 #endif
389
390 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
391 ifs->ifs_fr_flags = IPF_LOGGING;
392 #ifdef _KERNEL
393 ifs->ifs_fr_update_ipid = 0;
394 #else
395 ifs->ifs_fr_update_ipid = 1;
396 #endif
397 ifs->ifs_fr_minttl = 4;
398 ifs->ifs_fr_icmpminfragmtu = 68;
399 #if defined(IPFILTER_DEFAULT_BLOCK)
400 ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
401 #else
402 ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
403 #endif
404
405 bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
406 MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
407 MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
408 RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
409 RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
410 ifs->ifs_ipf_locks_done = 1;
411
412 if (fr_initialise(ifs) < 0)
413 return -1;
414
415 /*
416 * For incoming packets, we want the GZ-controlled hooks to run before
417 * the per-zone hooks, regardless of what order they're are installed.
418 * See the "GZ-controlled and per-zone stacks" comment block at the top
419 * of this file.
420 */
421 #define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a) \
422 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \
423 (x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER; \
424 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
425
426 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
427 hook4_nicevents, hook4_nicevents_gz, ifs);
428 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in,
429 hook4_in, hook4_in_gz, ifs);
430 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
431 hook4_loop_in, hook4_loop_in_gz, ifs);
432
433 /*
434 * For outgoing packets, we want the GZ-controlled hooks to run after
435 * the per-zone hooks, regardless of what order they're are installed.
436 * See the "GZ-controlled and per-zone stacks" comment block at the top
437 * of this file.
438 */
439 #define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a) \
440 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \
441 (x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE; \
442 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
443
444 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out,
445 hook4_out, hook4_out_gz, ifs);
446 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
447 hook4_loop_out, hook4_loop_out_gz, ifs);
448
449 /*
450 * If we hold this lock over all of the net_hook_register calls, we
451 * can cause a deadlock to occur with the following lock ordering:
452 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
453 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
454 */
455 RWLOCK_EXIT(&ifs->ifs_ipf_global);
456
457 /*
458 * Add IPv4 hooks
459 */
460 ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
461 if (ifs->ifs_ipf_ipv4 == NULL)
462 goto hookup_failed;
463
464 ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
465 NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
466 if (!ifs->ifs_hook4_nic_events)
467 goto hookup_failed;
468
469 ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
470 NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
471 if (!ifs->ifs_hook4_physical_in)
472 goto hookup_failed;
473
474 ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
475 NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
476 if (!ifs->ifs_hook4_physical_out)
477 goto hookup_failed;
478
479 if (ifs->ifs_ipf_loopback) {
480 ifs->ifs_hook4_loopback_in = (net_hook_register(
481 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
482 ifs->ifs_ipfhook4_loop_in) == 0);
483 if (!ifs->ifs_hook4_loopback_in)
484 goto hookup_failed;
485
486 ifs->ifs_hook4_loopback_out = (net_hook_register(
487 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
488 ifs->ifs_ipfhook4_loop_out) == 0);
489 if (!ifs->ifs_hook4_loopback_out)
490 goto hookup_failed;
491 }
492
493 /*
494 * Add IPv6 hooks
495 */
496 ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
497 if (ifs->ifs_ipf_ipv6 == NULL)
498 goto hookup_failed;
499
500 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
501 hook6_nicevents, hook6_nicevents_gz, ifs);
502 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in,
503 hook6_in, hook6_in_gz, ifs);
504 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
505 hook6_loop_in, hook6_loop_in_gz, ifs);
506 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out,
507 hook6_out, hook6_out_gz, ifs);
508 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
509 hook6_loop_out, hook6_loop_out_gz, ifs);
510
511 ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
512 NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
513 if (!ifs->ifs_hook6_nic_events)
514 goto hookup_failed;
515
516 ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
517 NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
518 if (!ifs->ifs_hook6_physical_in)
519 goto hookup_failed;
520
521 ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
522 NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
523 if (!ifs->ifs_hook6_physical_out)
524 goto hookup_failed;
525
526 if (ifs->ifs_ipf_loopback) {
527 ifs->ifs_hook6_loopback_in = (net_hook_register(
528 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
529 ifs->ifs_ipfhook6_loop_in) == 0);
530 if (!ifs->ifs_hook6_loopback_in)
531 goto hookup_failed;
532
533 ifs->ifs_hook6_loopback_out = (net_hook_register(
534 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
535 ifs->ifs_ipfhook6_loop_out) == 0);
536 if (!ifs->ifs_hook6_loopback_out)
537 goto hookup_failed;
538 }
539
540 /*
541 * Add VND INET hooks
542 */
543 ifs->ifs_ipf_vndl3v4 = net_protocol_lookup(id, NHF_VND_INET);
544 if (ifs->ifs_ipf_vndl3v4 == NULL)
545 goto hookup_failed;
546
547 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhookvndl3v4_in, ipf_hookvndl3v4_in,
548 hook4_vnd_in, hook4_vnd_in_gz, ifs);
549 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhookvndl3v4_out, ipf_hookvndl3v4_out,
550 hook4_vnd_out, hook4_vnd_out_gz, ifs);
551 ifs->ifs_hookvndl3v4_physical_in = (net_hook_register(ifs->ifs_ipf_vndl3v4,
552 NH_PHYSICAL_IN, ifs->ifs_ipfhookvndl3v4_in) == 0);
553 if (!ifs->ifs_hookvndl3v4_physical_in)
554 goto hookup_failed;
555
556 ifs->ifs_hookvndl3v4_physical_out = (net_hook_register(ifs->ifs_ipf_vndl3v4,
557 NH_PHYSICAL_OUT, ifs->ifs_ipfhookvndl3v4_out) == 0);
558 if (!ifs->ifs_hookvndl3v4_physical_out)
559 goto hookup_failed;
560
561
562 /*
563 * VND INET6 hooks
564 */
565 ifs->ifs_ipf_vndl3v6 = net_protocol_lookup(id, NHF_VND_INET6);
566 if (ifs->ifs_ipf_vndl3v6 == NULL)
567 goto hookup_failed;
568
569 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhookvndl3v6_in, ipf_hookvndl3v6_in,
570 hook6_vnd_in, hook6_vnd_in_gz, ifs);
571 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhookvndl3v6_out, ipf_hookvndl3v6_out,
572 hook6_vnd_out, hook6_vnd_out_gz, ifs);
573 ifs->ifs_hookvndl3v6_physical_in = (net_hook_register(ifs->ifs_ipf_vndl3v6,
574 NH_PHYSICAL_IN, ifs->ifs_ipfhookvndl3v6_in) == 0);
575 if (!ifs->ifs_hookvndl3v6_physical_in)
576 goto hookup_failed;
577
578 ifs->ifs_hookvndl3v6_physical_out = (net_hook_register(ifs->ifs_ipf_vndl3v6,
579 NH_PHYSICAL_OUT, ifs->ifs_ipfhookvndl3v6_out) == 0);
580 if (!ifs->ifs_hookvndl3v6_physical_out)
581 goto hookup_failed;
582
583 /*
584 * VIONA INET hooks. While the nethook framework allows us to register
585 * hooks for events that haven't been registered yet, we instead
586 * register and unregister our hooks in response to notifications
587 * about the viona hooks from the nethook framework. This prevents
588 * problems when the viona module gets unloaded while the ipf module
589 * does not. If we do not unregister our hooks after the viona module
590 * is unloaded, the viona module cannot later re-register them if it
591 * gets reloaded. As the ip, vnd, and ipf modules are rarely unloaded
592 * even on DEBUG kernels, they do not experience this issue.
593 */
594 if (net_instance_notify_register(id, ipf_hook_instance_notify,
595 ifs) != 0)
596 goto hookup_failed;
597
598 /*
599 * Reacquire ipf_global, now it is safe.
600 */
601 WRITE_ENTER(&ifs->ifs_ipf_global);
602
603 /* Do not use private interface ip_params_arr[] in Solaris 10 */
604 #if SOLARIS2 < 10
605
606 #if SOLARIS2 >= 8
607 ip_forwarding = &ip_g_forward;
608 #endif
609 /*
610 * XXX - There is no terminator for this array, so it is not possible
611 * to tell if what we are looking for is missing and go off the end
612 * of the array.
613 */
614
615 #if SOLARIS2 <= 8
616 for (i = 0; ; i++) {
617 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
618 ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
619 } else if (!strcmp(ip_param_arr[i].ip_param_name,
620 "ip_path_mtu_discovery")) {
621 ip_mtudisc = &ip_param_arr[i].ip_param_value;
622 }
623 #if SOLARIS2 < 8
624 else if (!strcmp(ip_param_arr[i].ip_param_name,
625 "ip_forwarding")) {
626 ip_forwarding = &ip_param_arr[i].ip_param_value;
627 }
628 #else
629 else if (!strcmp(ip_param_arr[i].ip_param_name,
630 "ip6_forwarding")) {
631 ip6_forwarding = &ip_param_arr[i].ip_param_value;
632 }
633 #endif
634
635 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
636 #if SOLARIS2 >= 8
637 ip6_forwarding != NULL &&
638 #endif
639 ip_forwarding != NULL)
640 break;
641 }
642 #endif
643
644 if (ifs->ifs_fr_control_forwarding & 1) {
645 if (ip_forwarding != NULL)
646 *ip_forwarding = 1;
647 #if SOLARIS2 >= 8
648 if (ip6_forwarding != NULL)
649 *ip6_forwarding = 1;
650 #endif
651 }
652
653 #endif
654
655 return 0;
656 hookup_failed:
657 WRITE_ENTER(&ifs->ifs_ipf_global);
658 return -1;
659 }
660
661 /* ------------------------------------------------------------------------ */
662 /*
663 * Called whenever a nethook protocol is registered or unregistered. Currently
664 * only used to add or remove the hooks for viona.
665 *
666 * While the function signature requires returning int, nothing
667 * in usr/src/uts/common/io/hook.c that invokes the callbacks
668 * captures the return value (nor is there currently any documentation
669 * on what return values should be). For now at least, we'll return 0
670 * on success (or 'not applicable') or an error value. Even if the
671 * nethook framework doesn't use the return address, it can be observed via
672 * dtrace if needed.
673 */
674 static int
675 ipf_hook_protocol_notify(hook_notify_cmd_t command, void *arg,
676 const char *name, const char *dummy __unused, const char *he_name)
677 {
678 ipf_stack_t *ifs = arg;
679 hook_t **hookpp;
680 char *hook_name, *hint_name;
681 hook_func_t hookfn;
682 boolean_t *hookedp;
683 hook_hint_t hint;
684 boolean_t out;
685 int ret = 0;
686
687 const boolean_t gz = ifs->ifs_gz_controlled;
688
689 /* We currently only care about viona hooks notifications */
690 if (strcmp(name, Hn_VIONA) != 0)
691 return (0);
692
693 if (strcmp(he_name, NH_PHYSICAL_IN) == 0) {
694 out = B_FALSE;
695 } else if (strcmp(he_name, NH_PHYSICAL_OUT) == 0) {
696 out = B_TRUE;
697 } else {
698 /*
699 * If we've added more hook events to viona, we must add
700 * the corresponding handling here (even if it's just to
701 * ignore it) to prevent the firewall from not working as
702 * intended.
703 */
704 cmn_err(CE_PANIC, "%s: unhandled hook event %s", __func__,
705 he_name);
706
707 return (0);
708 }
709
710 if (out) {
711 hookpp = &ifs->ifs_ipfhookviona_out;
712 hookfn = ipf_hookviona_out;
713 hookedp = &ifs->ifs_hookviona_physical_out;
714 name = gz ? hook_viona_out_gz : hook_viona_out;
715 hint = gz ? HH_AFTER : HH_BEFORE;
716 hint_name = gz ? hook_viona_out : hook_viona_out_gz;
717 } else {
718 hookpp = &ifs->ifs_ipfhookviona_in;
719 hookfn = ipf_hookviona_in;
720 hookedp = &ifs->ifs_hookviona_physical_in;
721 name = gz ? hook_viona_in_gz : hook_viona_in;
722 hint = gz ? HH_BEFORE : HH_AFTER;
723 hint_name = gz ? hook_viona_in : hook_viona_in_gz;
724 }
725
726 switch (command) {
727 default:
728 case HN_NONE:
729 break;
730 case HN_REGISTER:
731 HOOK_INIT(*hookpp, hookfn, (char *)name, ifs);
732 (*hookpp)->h_hint = hint;
733 (*hookpp)->h_hintvalue = (uintptr_t)hint_name;
734 ret = net_hook_register(ifs->ifs_ipf_viona,
735 (char *)he_name, *hookpp);
736 if (ret != 0) {
737 cmn_err(CE_NOTE, "%s: could not register hook "
738 "(hook family=%s hook=%s) err=%d", __func__,
739 name, he_name, ret);
740 *hookedp = B_FALSE;
741 return (ret);
742 }
743 *hookedp = B_TRUE;
744 break;
745 case HN_UNREGISTER:
746 if (ifs->ifs_ipf_viona == NULL)
747 break;
748
749 ret = *hookedp ? net_hook_unregister(ifs->ifs_ipf_viona,
750 (char *)he_name, *hookpp) : 0;
751 if ((ret == 0 || ret == ENXIO)) {
752 if (*hookpp != NULL) {
753 hook_free(*hookpp);
754 *hookpp = NULL;
755 }
756 *hookedp = B_FALSE;
757 }
758 break;
759 }
760
761 return (ret);
762 }
763
764 /*
765 * Called whenever a new nethook instance is created. Currently only used
766 * with the Hn_VIONA nethooks. Similar to ipf_hook_protocol_notify, the out
767 * function signature must return an int, though the result is never used.
768 * We elect to return 0 on success (or not applicable) or a non-zero value
769 * on error.
770 */
771 static int
772 ipf_hook_instance_notify(hook_notify_cmd_t command, void *arg,
773 const char *netid, const char *dummy __unused, const char *instance)
774 {
775 ipf_stack_t *ifs = arg;
776 int ret = 0;
777
778 /* We currently only care about viona hooks */
779 if (strcmp(instance, Hn_VIONA) != 0)
780 return (0);
781
782 switch (command) {
783 case HN_NONE:
784 default:
785 return (0);
786 case HN_REGISTER:
787 ifs->ifs_ipf_viona = net_protocol_lookup(ifs->ifs_netid,
788 NHF_VIONA);
789
790 if (ifs->ifs_ipf_viona == NULL)
791 return (EPROTONOSUPPORT);
792
793 ret = net_protocol_notify_register(ifs->ifs_ipf_viona,
794 ipf_hook_protocol_notify, ifs);
795 VERIFY(ret == 0 || ret == ESHUTDOWN);
796 break;
797 case HN_UNREGISTER:
798 if (ifs->ifs_ipf_viona == NULL)
799 break;
800 VERIFY0(net_protocol_notify_unregister(ifs->ifs_ipf_viona,
801 ipf_hook_protocol_notify));
802 VERIFY0(net_protocol_release(ifs->ifs_ipf_viona));
803 ifs->ifs_ipf_viona = NULL;
804 break;
805 }
806
807 return (ret);
808 }
809
810 static int fr_setipfloopback(set, ifs)
811 int set;
812 ipf_stack_t *ifs;
813 {
814 if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
815 return EFAULT;
816
817 if (set && !ifs->ifs_ipf_loopback) {
818 ifs->ifs_ipf_loopback = 1;
819
820 ifs->ifs_hook4_loopback_in = (net_hook_register(
821 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
822 ifs->ifs_ipfhook4_loop_in) == 0);
823 if (!ifs->ifs_hook4_loopback_in)
824 return EINVAL;
825
826 ifs->ifs_hook4_loopback_out = (net_hook_register(
827 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
828 ifs->ifs_ipfhook4_loop_out) == 0);
829 if (!ifs->ifs_hook4_loopback_out)
830 return EINVAL;
831
832 ifs->ifs_hook6_loopback_in = (net_hook_register(
833 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
834 ifs->ifs_ipfhook6_loop_in) == 0);
835 if (!ifs->ifs_hook6_loopback_in)
836 return EINVAL;
837
838 ifs->ifs_hook6_loopback_out = (net_hook_register(
839 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
840 ifs->ifs_ipfhook6_loop_out) == 0);
841 if (!ifs->ifs_hook6_loopback_out)
842 return EINVAL;
843
844 } else if (!set && ifs->ifs_ipf_loopback) {
845 ifs->ifs_ipf_loopback = 0;
846
847 ifs->ifs_hook4_loopback_in =
848 (net_hook_unregister(ifs->ifs_ipf_ipv4,
849 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
850 if (ifs->ifs_hook4_loopback_in)
851 return EBUSY;
852
853 ifs->ifs_hook4_loopback_out =
854 (net_hook_unregister(ifs->ifs_ipf_ipv4,
855 NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
856 if (ifs->ifs_hook4_loopback_out)
857 return EBUSY;
858
859 ifs->ifs_hook6_loopback_in =
860 (net_hook_unregister(ifs->ifs_ipf_ipv6,
861 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
862 if (ifs->ifs_hook6_loopback_in)
863 return EBUSY;
864
865 ifs->ifs_hook6_loopback_out =
866 (net_hook_unregister(ifs->ifs_ipf_ipv6,
867 NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
868 if (ifs->ifs_hook6_loopback_out)
869 return EBUSY;
870 }
871 return 0;
872 }
873
874
875 /*
876 * Filter ioctl interface.
877 */
878 /*ARGSUSED*/
879 int iplioctl(dev, cmd, data, mode, cp, rp)
880 dev_t dev;
881 int cmd;
882 #if SOLARIS2 >= 7
883 intptr_t data;
884 #else
885 int *data;
886 #endif
887 int mode;
888 cred_t *cp;
889 int *rp;
890 {
891 int error = 0, tmp;
892 friostat_t fio;
893 minor_t unit;
894 u_int enable;
895 ipf_stack_t *ifs;
896 zoneid_t zid;
897 ipf_devstate_t *isp;
898
899 #ifdef IPFDEBUG
900 cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
901 dev, cmd, data, mode, cp, rp);
902 #endif
903 unit = getminor(dev);
904
905 isp = ddi_get_soft_state(ipf_state, unit);
906 if (isp == NULL)
907 return ENXIO;
908 unit = isp->ipfs_minor;
909
910 zid = crgetzoneid(cp);
911 if (cmd == SIOCIPFZONESET) {
912 if (zid == GLOBAL_ZONEID)
913 return fr_setzoneid(isp, (caddr_t) data);
914 return EACCES;
915 }
916
917 /*
918 * ipf_find_stack returns with a read lock on ifs_ipf_global
919 */
920 ifs = ipf_find_stack(zid, isp);
921 if (ifs == NULL)
922 return ENXIO;
923
924 if (ifs->ifs_fr_running <= 0) {
925 if (unit != IPL_LOGIPF) {
926 RWLOCK_EXIT(&ifs->ifs_ipf_global);
927 return EIO;
928 }
929 if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
930 cmd != SIOCIPFSET && cmd != SIOCFRENB &&
931 cmd != SIOCGETFS && cmd != SIOCGETFF) {
932 RWLOCK_EXIT(&ifs->ifs_ipf_global);
933 return EIO;
934 }
935 }
936
937 if (ifs->ifs_fr_enable_active != 0) {
938 RWLOCK_EXIT(&ifs->ifs_ipf_global);
939 return EBUSY;
940 }
941
942 error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
943 curproc, ifs);
944 if (error != -1) {
945 RWLOCK_EXIT(&ifs->ifs_ipf_global);
946 return error;
947 }
948 error = 0;
949
950 switch (cmd)
951 {
952 case SIOCFRENB :
953 if (!(mode & FWRITE))
954 error = EPERM;
955 else {
956 error = COPYIN((caddr_t)data, (caddr_t)&enable,
957 sizeof(enable));
958 if (error != 0) {
959 error = EFAULT;
960 break;
961 }
962
963 RWLOCK_EXIT(&ifs->ifs_ipf_global);
964 WRITE_ENTER(&ifs->ifs_ipf_global);
965
966 /*
967 * We must recheck fr_enable_active here, since we've
968 * dropped ifs_ipf_global from R in order to get it
969 * exclusively.
970 */
971 if (ifs->ifs_fr_enable_active == 0) {
972 ifs->ifs_fr_enable_active = 1;
973 error = fr_enableipf(ifs, enable);
974 ifs->ifs_fr_enable_active = 0;
975 }
976 }
977 break;
978 case SIOCIPFSET :
979 if (!(mode & FWRITE)) {
980 error = EPERM;
981 break;
982 }
983 /* FALLTHRU */
984 case SIOCIPFGETNEXT :
985 case SIOCIPFGET :
986 error = fr_ipftune(cmd, (void *)data, ifs);
987 break;
988 case SIOCSETFF :
989 if (!(mode & FWRITE))
990 error = EPERM;
991 else {
992 error = COPYIN((caddr_t)data,
993 (caddr_t)&ifs->ifs_fr_flags,
994 sizeof(ifs->ifs_fr_flags));
995 if (error != 0)
996 error = EFAULT;
997 }
998 break;
999 case SIOCIPFLP :
1000 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
1001 sizeof(tmp));
1002 if (error != 0)
1003 error = EFAULT;
1004 else
1005 error = fr_setipfloopback(tmp, ifs);
1006 break;
1007 case SIOCGETFF :
1008 error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
1009 sizeof(ifs->ifs_fr_flags));
1010 if (error != 0)
1011 error = EFAULT;
1012 break;
1013 case SIOCFUNCL :
1014 error = fr_resolvefunc((void *)data);
1015 break;
1016 case SIOCINAFR :
1017 case SIOCRMAFR :
1018 case SIOCADAFR :
1019 case SIOCZRLST :
1020 if (!(mode & FWRITE))
1021 error = EPERM;
1022 else
1023 error = frrequest(unit, cmd, (caddr_t)data,
1024 ifs->ifs_fr_active, 1, ifs);
1025 break;
1026 case SIOCINIFR :
1027 case SIOCRMIFR :
1028 case SIOCADIFR :
1029 if (!(mode & FWRITE))
1030 error = EPERM;
1031 else
1032 error = frrequest(unit, cmd, (caddr_t)data,
1033 1 - ifs->ifs_fr_active, 1, ifs);
1034 break;
1035 case SIOCSWAPA :
1036 if (!(mode & FWRITE))
1037 error = EPERM;
1038 else {
1039 WRITE_ENTER(&ifs->ifs_ipf_mutex);
1040 bzero((char *)ifs->ifs_frcache,
1041 sizeof (ifs->ifs_frcache));
1042 error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
1043 (caddr_t)data,
1044 sizeof(ifs->ifs_fr_active));
1045 if (error != 0)
1046 error = EFAULT;
1047 else
1048 ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
1049 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
1050 }
1051 break;
1052 case SIOCGETFS :
1053 fr_getstat(&fio, ifs);
1054 error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
1055 break;
1056 case SIOCFRZST :
1057 if (!(mode & FWRITE))
1058 error = EPERM;
1059 else
1060 error = fr_zerostats((caddr_t)data, ifs);
1061 break;
1062 case SIOCIPFFL :
1063 if (!(mode & FWRITE))
1064 error = EPERM;
1065 else {
1066 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
1067 sizeof(tmp));
1068 if (!error) {
1069 tmp = frflush(unit, 4, tmp, ifs);
1070 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1071 sizeof(tmp));
1072 if (error != 0)
1073 error = EFAULT;
1074 } else
1075 error = EFAULT;
1076 }
1077 break;
1078 #ifdef USE_INET6
1079 case SIOCIPFL6 :
1080 if (!(mode & FWRITE))
1081 error = EPERM;
1082 else {
1083 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
1084 sizeof(tmp));
1085 if (!error) {
1086 tmp = frflush(unit, 6, tmp, ifs);
1087 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1088 sizeof(tmp));
1089 if (error != 0)
1090 error = EFAULT;
1091 } else
1092 error = EFAULT;
1093 }
1094 break;
1095 #endif
1096 case SIOCSTLCK :
1097 error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
1098 if (error == 0) {
1099 ifs->ifs_fr_state_lock = tmp;
1100 ifs->ifs_fr_nat_lock = tmp;
1101 ifs->ifs_fr_frag_lock = tmp;
1102 ifs->ifs_fr_auth_lock = tmp;
1103 } else
1104 error = EFAULT;
1105 break;
1106 #ifdef IPFILTER_LOG
1107 case SIOCIPFFB :
1108 if (!(mode & FWRITE))
1109 error = EPERM;
1110 else {
1111 tmp = ipflog_clear(unit, ifs);
1112 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1113 sizeof(tmp));
1114 if (error)
1115 error = EFAULT;
1116 }
1117 break;
1118 #endif /* IPFILTER_LOG */
1119 case SIOCFRSYN :
1120 if (!(mode & FWRITE))
1121 error = EPERM;
1122 else {
1123 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1124 WRITE_ENTER(&ifs->ifs_ipf_global);
1125
1126 frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1127 fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1128 fr_nataddrsync(0, NULL, NULL, ifs);
1129 fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1130 error = 0;
1131 }
1132 break;
1133 case SIOCGFRST :
1134 error = fr_outobj((void *)data, fr_fragstats(ifs),
1135 IPFOBJ_FRAGSTAT);
1136 break;
1137 case FIONREAD :
1138 #ifdef IPFILTER_LOG
1139 tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
1140
1141 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
1142 if (error != 0)
1143 error = EFAULT;
1144 #endif
1145 break;
1146 case SIOCIPFITER :
1147 error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
1148 curproc, ifs);
1149 break;
1150
1151 case SIOCGENITER :
1152 error = ipf_genericiter((caddr_t)data, crgetuid(cp),
1153 curproc, ifs);
1154 break;
1155
1156 case SIOCIPFDELTOK :
1157 error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
1158 if (error != 0) {
1159 error = EFAULT;
1160 } else {
1161 error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
1162 }
1163 break;
1164
1165 default :
1166 #ifdef IPFDEBUG
1167 cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
1168 cmd, (void *)data);
1169 #endif
1170 error = EINVAL;
1171 break;
1172 }
1173 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1174 return error;
1175 }
1176
1177
1178 static int fr_enableipf(ifs, enable)
1179 ipf_stack_t *ifs;
1180 int enable;
1181 {
1182 int error;
1183
1184 if (!enable) {
1185 error = ipldetach(ifs);
1186 if (error == 0)
1187 ifs->ifs_fr_running = -1;
1188 return error;
1189 }
1190
1191 if (ifs->ifs_fr_running > 0)
1192 return 0;
1193
1194 error = iplattach(ifs);
1195 if (error == 0) {
1196 if (ifs->ifs_fr_timer_id == NULL) {
1197 int hz = drv_usectohz(500000);
1198
1199 ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
1200 (void *)ifs,
1201 hz);
1202 }
1203 ifs->ifs_fr_running = 1;
1204 } else {
1205 (void) ipldetach(ifs);
1206 }
1207 return error;
1208 }
1209
1210
1211 phy_if_t get_unit(name, v, ifs)
1212 char *name;
1213 int v;
1214 ipf_stack_t *ifs;
1215 {
1216 net_handle_t nif;
1217
1218 if (v == 4)
1219 nif = ifs->ifs_ipf_ipv4;
1220 else if (v == 6)
1221 nif = ifs->ifs_ipf_ipv6;
1222 else
1223 return 0;
1224
1225 return (net_phylookup(nif, name));
1226 }
1227
1228 /*
1229 * routines below for saving IP headers to buffer
1230 */
1231 /*ARGSUSED*/
1232 int iplopen(devp, flags, otype, cred)
1233 dev_t *devp;
1234 int flags, otype;
1235 cred_t *cred;
1236 {
1237 ipf_devstate_t *isp;
1238 minor_t min = getminor(*devp);
1239 minor_t minor;
1240
1241 #ifdef IPFDEBUG
1242 cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
1243 #endif
1244 if (!(otype & OTYP_CHR))
1245 return ENXIO;
1246
1247 if (IPL_LOGMAX < min)
1248 return ENXIO;
1249
1250 minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1,
1251 VM_BESTFIT | VM_SLEEP);
1252
1253 if (ddi_soft_state_zalloc(ipf_state, minor) != 0) {
1254 vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1);
1255 return ENXIO;
1256 }
1257
1258 *devp = makedevice(getmajor(*devp), minor);
1259 isp = ddi_get_soft_state(ipf_state, minor);
1260 VERIFY(isp != NULL);
1261
1262 isp->ipfs_minor = min;
1263 isp->ipfs_zoneid = IPFS_ZONE_UNSET;
1264
1265 return 0;
1266 }
1267
1268
1269 /*ARGSUSED*/
1270 int iplclose(dev, flags, otype, cred)
1271 dev_t dev;
1272 int flags, otype;
1273 cred_t *cred;
1274 {
1275 minor_t min = getminor(dev);
1276
1277 #ifdef IPFDEBUG
1278 cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
1279 #endif
1280
1281 if (IPL_LOGMAX < min)
1282 return ENXIO;
1283
1284 ddi_soft_state_free(ipf_state, min);
1285 vmem_free(ipf_minor, (void *)(uintptr_t)min, 1);
1286
1287 return 0;
1288 }
1289
1290 #ifdef IPFILTER_LOG
1291 /*
1292 * iplread/ipllog
1293 * both of these must operate with at least splnet() lest they be
1294 * called during packet processing and cause an inconsistancy to appear in
1295 * the filter lists.
1296 */
1297 /*ARGSUSED*/
1298 int iplread(dev, uio, cp)
1299 dev_t dev;
1300 register struct uio *uio;
1301 cred_t *cp;
1302 {
1303 ipf_stack_t *ifs;
1304 int ret;
1305 minor_t unit;
1306 ipf_devstate_t *isp;
1307
1308 unit = getminor(dev);
1309 isp = ddi_get_soft_state(ipf_state, unit);
1310 if (isp == NULL)
1311 return ENXIO;
1312 unit = isp->ipfs_minor;
1313
1314 /*
1315 * ipf_find_stack returns with a read lock on ifs_ipf_global
1316 */
1317 ifs = ipf_find_stack(crgetzoneid(cp), isp);
1318 if (ifs == NULL)
1319 return ENXIO;
1320
1321 # ifdef IPFDEBUG
1322 cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
1323 # endif
1324
1325 if (ifs->ifs_fr_running < 1) {
1326 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1327 return EIO;
1328 }
1329
1330 # ifdef IPFILTER_SYNC
1331 if (unit == IPL_LOGSYNC) {
1332 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1333 return ipfsync_read(uio);
1334 }
1335 # endif
1336
1337 ret = ipflog_read(unit, uio, ifs);
1338 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1339 return ret;
1340 }
1341 #endif /* IPFILTER_LOG */
1342
1343
1344 /*
1345 * iplread/ipllog
1346 * both of these must operate with at least splnet() lest they be
1347 * called during packet processing and cause an inconsistancy to appear in
1348 * the filter lists.
1349 */
1350 int iplwrite(dev, uio, cp)
1351 dev_t dev;
1352 register struct uio *uio;
1353 cred_t *cp;
1354 {
1355 ipf_stack_t *ifs;
1356 minor_t unit;
1357 ipf_devstate_t *isp;
1358
1359 unit = getminor(dev);
1360 isp = ddi_get_soft_state(ipf_state, unit);
1361 if (isp == NULL)
1362 return ENXIO;
1363 unit = isp->ipfs_minor;
1364
1365 /*
1366 * ipf_find_stack returns with a read lock on ifs_ipf_global
1367 */
1368 ifs = ipf_find_stack(crgetzoneid(cp), isp);
1369 if (ifs == NULL)
1370 return ENXIO;
1371
1372 #ifdef IPFDEBUG
1373 cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
1374 #endif
1375
1376 if (ifs->ifs_fr_running < 1) {
1377 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1378 return EIO;
1379 }
1380
1381 #ifdef IPFILTER_SYNC
1382 if (getminor(dev) == IPL_LOGSYNC) {
1383 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1384 return ipfsync_write(uio);
1385 }
1386 #endif /* IPFILTER_SYNC */
1387 dev = dev; /* LINT */
1388 uio = uio; /* LINT */
1389 cp = cp; /* LINT */
1390 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1391 return ENXIO;
1392 }
1393
1394
1395 /*
1396 * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
1397 * requires a large amount of setting up and isn't any more efficient.
1398 */
1399 int fr_send_reset(fin)
1400 fr_info_t *fin;
1401 {
1402 tcphdr_t *tcp, *tcp2;
1403 int tlen, hlen;
1404 mblk_t *m;
1405 #ifdef USE_INET6
1406 ip6_t *ip6;
1407 #endif
1408 ip_t *ip;
1409
1410 tcp = fin->fin_dp;
1411 if (tcp->th_flags & TH_RST)
1412 return -1;
1413
1414 #ifndef IPFILTER_CKSUM
1415 if (fr_checkl4sum(fin) == -1)
1416 return -1;
1417 #endif
1418
1419 tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
1420 #ifdef USE_INET6
1421 if (fin->fin_v == 6)
1422 hlen = sizeof(ip6_t);
1423 else
1424 #endif
1425 hlen = sizeof(ip_t);
1426 hlen += sizeof(*tcp2);
1427 if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
1428 return -1;
1429
1430 m->b_rptr += 64;
1431 MTYPE(m) = M_DATA;
1432 m->b_wptr = m->b_rptr + hlen;
1433 ip = (ip_t *)m->b_rptr;
1434 bzero((char *)ip, hlen);
1435 tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
1436 tcp2->th_dport = tcp->th_sport;
1437 tcp2->th_sport = tcp->th_dport;
1438 if (tcp->th_flags & TH_ACK) {
1439 tcp2->th_seq = tcp->th_ack;
1440 tcp2->th_flags = TH_RST;
1441 } else {
1442 tcp2->th_ack = ntohl(tcp->th_seq);
1443 tcp2->th_ack += tlen;
1444 tcp2->th_ack = htonl(tcp2->th_ack);
1445 tcp2->th_flags = TH_RST|TH_ACK;
1446 }
1447 tcp2->th_off = sizeof(struct tcphdr) >> 2;
1448
1449 ip->ip_v = fin->fin_v;
1450 #ifdef USE_INET6
1451 if (fin->fin_v == 6) {
1452 ip6 = (ip6_t *)m->b_rptr;
1453 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1454 ip6->ip6_src = fin->fin_dst6.in6;
1455 ip6->ip6_dst = fin->fin_src6.in6;
1456 ip6->ip6_plen = htons(sizeof(*tcp));
1457 ip6->ip6_nxt = IPPROTO_TCP;
1458 tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1459 } else
1460 #endif
1461 {
1462 ip->ip_src.s_addr = fin->fin_daddr;
1463 ip->ip_dst.s_addr = fin->fin_saddr;
1464 ip->ip_id = fr_nextipid(fin);
1465 ip->ip_hl = sizeof(*ip) >> 2;
1466 ip->ip_p = IPPROTO_TCP;
1467 ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1468 ip->ip_tos = fin->fin_ip->ip_tos;
1469 tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1470 }
1471 return fr_send_ip(fin, m, &m);
1472 }
1473
1474 /*
1475 * Function: fr_send_ip
1476 * Returns: 0: success
1477 * -1: failed
1478 * Parameters:
1479 * fin: packet information
1480 * m: the message block where ip head starts
1481 *
1482 * Send a new packet through the IP stack.
1483 *
1484 * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1485 * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1486 * function).
1487 *
1488 * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1489 * in by this function.
1490 *
1491 * All other portions of the packet must be in on-the-wire format.
1492 */
1493 /*ARGSUSED*/
1494 static int fr_send_ip(fin, m, mpp)
1495 fr_info_t *fin;
1496 mblk_t *m, **mpp;
1497 {
1498 qpktinfo_t qpi, *qpip;
1499 fr_info_t fnew;
1500 ip_t *ip;
1501 int i, hlen;
1502 ipf_stack_t *ifs = fin->fin_ifs;
1503
1504 ip = (ip_t *)m->b_rptr;
1505 bzero((char *)&fnew, sizeof(fnew));
1506
1507 #ifdef USE_INET6
1508 if (fin->fin_v == 6) {
1509 ip6_t *ip6;
1510
1511 ip6 = (ip6_t *)ip;
1512 ip6->ip6_vfc = 0x60;
1513 ip6->ip6_hlim = 127;
1514 fnew.fin_v = 6;
1515 hlen = sizeof(*ip6);
1516 fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1517 } else
1518 #endif
1519 {
1520 fnew.fin_v = 4;
1521 #if SOLARIS2 >= 10
1522 ip->ip_ttl = 255;
1523 if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1524 ip->ip_off = htons(IP_DF);
1525 #else
1526 if (ip_ttl_ptr != NULL)
1527 ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1528 else
1529 ip->ip_ttl = 63;
1530 if (ip_mtudisc != NULL)
1531 ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1532 else
1533 ip->ip_off = htons(IP_DF);
1534 #endif
1535 /*
1536 * The dance with byte order and ip_len/ip_off is because in
1537 * fr_fastroute, it expects them to be in host byte order but
1538 * ipf_cksum expects them to be in network byte order.
1539 */
1540 ip->ip_len = htons(ip->ip_len);
1541 ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1542 ip->ip_len = ntohs(ip->ip_len);
1543 ip->ip_off = ntohs(ip->ip_off);
1544 hlen = sizeof(*ip);
1545 fnew.fin_plen = ip->ip_len;
1546 }
1547
1548 qpip = fin->fin_qpi;
1549 qpi.qpi_off = 0;
1550 qpi.qpi_ill = qpip->qpi_ill;
1551 qpi.qpi_m = m;
1552 qpi.qpi_data = ip;
1553 fnew.fin_qpi = &qpi;
1554 fnew.fin_ifp = fin->fin_ifp;
1555 fnew.fin_flx = FI_NOCKSUM;
1556 fnew.fin_m = m;
1557 fnew.fin_qfm = m;
1558 fnew.fin_ip = ip;
1559 fnew.fin_mp = mpp;
1560 fnew.fin_hlen = hlen;
1561 fnew.fin_dp = (char *)ip + hlen;
1562 fnew.fin_ifs = fin->fin_ifs;
1563 (void) fr_makefrip(hlen, ip, &fnew);
1564
1565 i = fr_fastroute(m, mpp, &fnew, NULL);
1566 return i;
1567 }
1568
1569
1570 int fr_send_icmp_err(type, fin, dst)
1571 int type;
1572 fr_info_t *fin;
1573 int dst;
1574 {
1575 struct in_addr dst4;
1576 struct icmp *icmp;
1577 qpktinfo_t *qpi;
1578 int hlen, code;
1579 phy_if_t phy;
1580 u_short sz;
1581 #ifdef USE_INET6
1582 mblk_t *mb;
1583 #endif
1584 mblk_t *m;
1585 #ifdef USE_INET6
1586 ip6_t *ip6;
1587 #endif
1588 ip_t *ip;
1589 ipf_stack_t *ifs = fin->fin_ifs;
1590
1591 if ((type < 0) || (type > ICMP_MAXTYPE))
1592 return -1;
1593
1594 code = fin->fin_icode;
1595 #ifdef USE_INET6
1596 if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1597 return -1;
1598 #endif
1599
1600 #ifndef IPFILTER_CKSUM
1601 if (fr_checkl4sum(fin) == -1)
1602 return -1;
1603 #endif
1604
1605 qpi = fin->fin_qpi;
1606
1607 #ifdef USE_INET6
1608 mb = fin->fin_qfm;
1609
1610 if (fin->fin_v == 6) {
1611 sz = sizeof(ip6_t);
1612 sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1613 hlen = sizeof(ip6_t);
1614 type = icmptoicmp6types[type];
1615 if (type == ICMP6_DST_UNREACH)
1616 code = icmptoicmp6unreach[code];
1617 } else
1618 #endif
1619 {
1620 if ((fin->fin_p == IPPROTO_ICMP) &&
1621 !(fin->fin_flx & FI_SHORT))
1622 switch (ntohs(fin->fin_data[0]) >> 8)
1623 {
1624 case ICMP_ECHO :
1625 case ICMP_TSTAMP :
1626 case ICMP_IREQ :
1627 case ICMP_MASKREQ :
1628 break;
1629 default :
1630 return 0;
1631 }
1632
1633 sz = sizeof(ip_t) * 2;
1634 sz += 8; /* 64 bits of data */
1635 hlen = sizeof(ip_t);
1636 }
1637
1638 sz += offsetof(struct icmp, icmp_ip);
1639 if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1640 return -1;
1641 MTYPE(m) = M_DATA;
1642 m->b_rptr += 64;
1643 m->b_wptr = m->b_rptr + sz;
1644 bzero((char *)m->b_rptr, (size_t)sz);
1645 ip = (ip_t *)m->b_rptr;
1646 ip->ip_v = fin->fin_v;
1647 icmp = (struct icmp *)(m->b_rptr + hlen);
1648 icmp->icmp_type = type & 0xff;
1649 icmp->icmp_code = code & 0xff;
1650 phy = (phy_if_t)qpi->qpi_ill;
1651 if (type == ICMP_UNREACH && (phy != 0) &&
1652 fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1653 icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1654
1655 #ifdef USE_INET6
1656 if (fin->fin_v == 6) {
1657 struct in6_addr dst6;
1658 int csz;
1659
1660 if (dst == 0) {
1661 ipf_stack_t *ifs = fin->fin_ifs;
1662
1663 if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1664 (void *)&dst6, NULL, ifs) == -1) {
1665 FREE_MB_T(m);
1666 return -1;
1667 }
1668 } else
1669 dst6 = fin->fin_dst6.in6;
1670
1671 csz = sz;
1672 sz -= sizeof(ip6_t);
1673 ip6 = (ip6_t *)m->b_rptr;
1674 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1675 ip6->ip6_plen = htons((u_short)sz);
1676 ip6->ip6_nxt = IPPROTO_ICMPV6;
1677 ip6->ip6_src = dst6;
1678 ip6->ip6_dst = fin->fin_src6.in6;
1679 sz -= offsetof(struct icmp, icmp_ip);
1680 bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1681 icmp->icmp_cksum = csz - sizeof(ip6_t);
1682 } else
1683 #endif
1684 {
1685 ip->ip_hl = sizeof(*ip) >> 2;
1686 ip->ip_p = IPPROTO_ICMP;
1687 ip->ip_id = fin->fin_ip->ip_id;
1688 ip->ip_tos = fin->fin_ip->ip_tos;
1689 ip->ip_len = (u_short)sz;
1690 if (dst == 0) {
1691 ipf_stack_t *ifs = fin->fin_ifs;
1692
1693 if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1694 (void *)&dst4, NULL, ifs) == -1) {
1695 FREE_MB_T(m);
1696 return -1;
1697 }
1698 } else {
1699 dst4 = fin->fin_dst;
1700 }
1701 ip->ip_src = dst4;
1702 ip->ip_dst = fin->fin_src;
1703 bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1704 sizeof(*fin->fin_ip));
1705 bcopy((char *)fin->fin_ip + fin->fin_hlen,
1706 (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1707 icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1708 icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1709 icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1710 sz - sizeof(ip_t));
1711 }
1712
1713 /*
1714 * Need to exit out of these so we don't recursively call rw_enter
1715 * from fr_qout.
1716 */
1717 return fr_send_ip(fin, m, &m);
1718 }
1719
1720 #include <sys/time.h>
1721 #include <sys/varargs.h>
1722
1723 #ifndef _KERNEL
1724 #include <stdio.h>
1725 #endif
1726
1727 /*
1728 * Return the first IP Address associated with an interface
1729 * For IPv6, we walk through the list of logical interfaces and return
1730 * the address of the first one that isn't a link-local interface.
1731 * We can't assume that it is :1 because another link-local address
1732 * may have been assigned there.
1733 */
1734 /*ARGSUSED*/
1735 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1736 int v, atype;
1737 void *ifptr;
1738 struct in_addr *inp, *inpmask;
1739 ipf_stack_t *ifs;
1740 {
1741 struct sockaddr_in6 v6addr[2];
1742 struct sockaddr_in v4addr[2];
1743 net_ifaddr_t type[2];
1744 net_handle_t net_data;
1745 phy_if_t phyif;
1746 void *array;
1747
1748 switch (v)
1749 {
1750 case 4:
1751 net_data = ifs->ifs_ipf_ipv4;
1752 array = v4addr;
1753 break;
1754 case 6:
1755 net_data = ifs->ifs_ipf_ipv6;
1756 array = v6addr;
1757 break;
1758 default:
1759 net_data = NULL;
1760 break;
1761 }
1762
1763 if (net_data == NULL)
1764 return -1;
1765
1766 phyif = (phy_if_t)ifptr;
1767
1768 switch (atype)
1769 {
1770 case FRI_PEERADDR :
1771 type[0] = NA_PEER;
1772 break;
1773
1774 case FRI_BROADCAST :
1775 type[0] = NA_BROADCAST;
1776 break;
1777
1778 default :
1779 type[0] = NA_ADDRESS;
1780 break;
1781 }
1782
1783 type[1] = NA_NETMASK;
1784
1785 if (v == 6) {
1786 lif_if_t idx = 0;
1787
1788 do {
1789 idx = net_lifgetnext(net_data, phyif, idx);
1790 if (net_getlifaddr(net_data, phyif, idx, 2, type,
1791 array) < 0)
1792 return -1;
1793 if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1794 !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1795 break;
1796 } while (idx != 0);
1797
1798 if (idx == 0)
1799 return -1;
1800
1801 return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1802 inp, inpmask);
1803 }
1804
1805 if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1806 return -1;
1807
1808 return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1809 }
1810
1811
1812 u_32_t fr_newisn(fin)
1813 fr_info_t *fin;
1814 {
1815 static int iss_seq_off = 0;
1816 u_char hash[16];
1817 u_32_t newiss;
1818 MD5_CTX ctx;
1819 ipf_stack_t *ifs = fin->fin_ifs;
1820
1821 /*
1822 * Compute the base value of the ISS. It is a hash
1823 * of (saddr, sport, daddr, dport, secret).
1824 */
1825 MD5Init(&ctx);
1826
1827 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1828 sizeof(fin->fin_fi.fi_src));
1829 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1830 sizeof(fin->fin_fi.fi_dst));
1831 MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1832
1833 MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1834
1835 MD5Final(hash, &ctx);
1836
1837 bcopy(hash, &newiss, sizeof(newiss));
1838
1839 /*
1840 * Now increment our "timer", and add it in to
1841 * the computed value.
1842 *
1843 * XXX Use `addin'?
1844 * XXX TCP_ISSINCR too large to use?
1845 */
1846 iss_seq_off += 0x00010000;
1847 newiss += iss_seq_off;
1848 return newiss;
1849 }
1850
1851
1852 /* ------------------------------------------------------------------------ */
1853 /* Function: fr_nextipid */
1854 /* Returns: int - 0 == success, -1 == error (packet should be droppped) */
1855 /* Parameters: fin(I) - pointer to packet information */
1856 /* */
1857 /* Returns the next IPv4 ID to use for this packet. */
1858 /* ------------------------------------------------------------------------ */
1859 u_short fr_nextipid(fin)
1860 fr_info_t *fin;
1861 {
1862 static u_short ipid = 0;
1863 u_short id;
1864 ipf_stack_t *ifs = fin->fin_ifs;
1865
1866 MUTEX_ENTER(&ifs->ifs_ipf_rw);
1867 if (fin->fin_pktnum != 0) {
1868 id = fin->fin_pktnum & 0xffff;
1869 } else {
1870 id = ipid++;
1871 }
1872 MUTEX_EXIT(&ifs->ifs_ipf_rw);
1873
1874 return id;
1875 }
1876
1877
1878 #ifndef IPFILTER_CKSUM
1879 /* ARGSUSED */
1880 #endif
1881 INLINE void fr_checkv4sum(fin)
1882 fr_info_t *fin;
1883 {
1884 #ifdef IPFILTER_CKSUM
1885 if (fr_checkl4sum(fin) == -1)
1886 fin->fin_flx |= FI_BAD;
1887 #endif
1888 }
1889
1890
1891 #ifdef USE_INET6
1892 # ifndef IPFILTER_CKSUM
1893 /* ARGSUSED */
1894 # endif
1895 INLINE void fr_checkv6sum(fin)
1896 fr_info_t *fin;
1897 {
1898 # ifdef IPFILTER_CKSUM
1899 if (fr_checkl4sum(fin) == -1)
1900 fin->fin_flx |= FI_BAD;
1901 # endif
1902 }
1903 #endif /* USE_INET6 */
1904
1905
1906 #if (SOLARIS2 < 7)
1907 void fr_slowtimer()
1908 #else
1909 /*ARGSUSED*/
1910 void fr_slowtimer __P((void *arg))
1911 #endif
1912 {
1913 ipf_stack_t *ifs = arg;
1914
1915 READ_ENTER(&ifs->ifs_ipf_global);
1916 if (ifs->ifs_fr_running != 1) {
1917 ifs->ifs_fr_timer_id = NULL;
1918 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1919 return;
1920 }
1921 ipf_expiretokens(ifs);
1922 fr_fragexpire(ifs);
1923 fr_timeoutstate(ifs);
1924 fr_natexpire(ifs);
1925 fr_authexpire(ifs);
1926 ifs->ifs_fr_ticks++;
1927 if (ifs->ifs_fr_running == 1)
1928 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1929 drv_usectohz(500000));
1930 else
1931 ifs->ifs_fr_timer_id = NULL;
1932 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1933 }
1934
1935
1936 /* ------------------------------------------------------------------------ */
1937 /* Function: fr_pullup */
1938 /* Returns: NULL == pullup failed, else pointer to protocol header */
1939 /* Parameters: m(I) - pointer to buffer where data packet starts */
1940 /* fin(I) - pointer to packet information */
1941 /* len(I) - number of bytes to pullup */
1942 /* */
1943 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1944 /* single buffer for ease of access. Operating system native functions are */
1945 /* used to manage buffers - if necessary. If the entire packet ends up in */
1946 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has */
1947 /* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */
1948 /* and ONLY if the pullup succeeds. */
1949 /* */
1950 /* We assume that 'min' is a pointer to a buffer that is part of the chain */
1951 /* of buffers that starts at *fin->fin_mp. */
1952 /* ------------------------------------------------------------------------ */
1953 void *fr_pullup(min, fin, len)
1954 mb_t *min;
1955 fr_info_t *fin;
1956 int len;
1957 {
1958 qpktinfo_t *qpi = fin->fin_qpi;
1959 int out = fin->fin_out, dpoff, ipoff;
1960 mb_t *m = min, *m1, *m2;
1961 char *ip;
1962 uint32_t start, stuff, end, value, flags;
1963 ipf_stack_t *ifs = fin->fin_ifs;
1964
1965 if (m == NULL)
1966 return NULL;
1967
1968 ip = (char *)fin->fin_ip;
1969 if ((fin->fin_flx & FI_COALESCE) != 0)
1970 return ip;
1971
1972 ipoff = fin->fin_ipoff;
1973 if (fin->fin_dp != NULL)
1974 dpoff = (char *)fin->fin_dp - (char *)ip;
1975 else
1976 dpoff = 0;
1977
1978 if (M_LEN(m) < len + ipoff) {
1979
1980 /*
1981 * pfil_precheck ensures the IP header is on a 32bit
1982 * aligned address so simply fail if that isn't currently
1983 * the case (should never happen).
1984 */
1985 int inc = 0;
1986
1987 if (ipoff > 0) {
1988 if ((ipoff & 3) != 0) {
1989 inc = 4 - (ipoff & 3);
1990 if (m->b_rptr - inc >= m->b_datap->db_base)
1991 m->b_rptr -= inc;
1992 else
1993 inc = 0;
1994 }
1995 }
1996
1997 /*
1998 * XXX This is here as a work around for a bug with DEBUG
1999 * XXX Solaris kernels. The problem is b_prev is used by IP
2000 * XXX code as a way to stash the phyint_index for a packet,
2001 * XXX this doesn't get reset by IP but freeb does an ASSERT()
2002 * XXX for both of these to be NULL. See 6442390.
2003 */
2004 m1 = m;
2005 m2 = m->b_prev;
2006
2007 do {
2008 m1->b_next = NULL;
2009 m1->b_prev = NULL;
2010 m1 = m1->b_cont;
2011 } while (m1);
2012
2013 /*
2014 * Need to preserve checksum information by copying them
2015 * to newmp which heads the pulluped message.
2016 */
2017 mac_hcksum_get(m, &start, &stuff, &end, &value, &flags);
2018
2019 if (pullupmsg(m, len + ipoff + inc) == 0) {
2020 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
2021 FREE_MB_T(*fin->fin_mp);
2022 *fin->fin_mp = NULL;
2023 fin->fin_m = NULL;
2024 fin->fin_ip = NULL;
2025 fin->fin_dp = NULL;
2026 qpi->qpi_data = NULL;
2027 return NULL;
2028 }
2029
2030 mac_hcksum_set(m, start, stuff, end, value, flags);
2031
2032 m->b_prev = m2;
2033 m->b_rptr += inc;
2034 fin->fin_m = m;
2035 ip = MTOD(m, char *) + ipoff;
2036 qpi->qpi_data = ip;
2037 }
2038
2039 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
2040 fin->fin_ip = (ip_t *)ip;
2041 if (fin->fin_dp != NULL)
2042 fin->fin_dp = (char *)fin->fin_ip + dpoff;
2043
2044 if (len == fin->fin_plen)
2045 fin->fin_flx |= FI_COALESCE;
2046 return ip;
2047 }
2048
2049
2050 /*
2051 * Function: fr_verifysrc
2052 * Returns: int (really boolean)
2053 * Parameters: fin - packet information
2054 *
2055 * Check whether the packet has a valid source address for the interface on
2056 * which the packet arrived, implementing the "fr_chksrc" feature.
2057 * Returns true iff the packet's source address is valid.
2058 */
2059 int fr_verifysrc(fin)
2060 fr_info_t *fin;
2061 {
2062 net_handle_t net_data_p;
2063 phy_if_t phy_ifdata_routeto;
2064 struct sockaddr sin;
2065 ipf_stack_t *ifs = fin->fin_ifs;
2066
2067 if (fin->fin_v == 4) {
2068 net_data_p = ifs->ifs_ipf_ipv4;
2069 } else if (fin->fin_v == 6) {
2070 net_data_p = ifs->ifs_ipf_ipv6;
2071 } else {
2072 return (0);
2073 }
2074
2075 /* Get the index corresponding to the if name */
2076 sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
2077 bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
2078 phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
2079
2080 return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0);
2081 }
2082
2083 /*
2084 * Return true only if forwarding is enabled on the interface.
2085 */
2086 static int
2087 fr_forwarding_enabled(phy_if_t phyif, net_handle_t ndp)
2088 {
2089 lif_if_t lif;
2090
2091 for (lif = net_lifgetnext(ndp, phyif, 0); lif > 0;
2092 lif = net_lifgetnext(ndp, phyif, lif)) {
2093 int res;
2094 uint64_t flags;
2095
2096 res = net_getlifflags(ndp, phyif, lif, &flags);
2097 if (res != 0)
2098 return (0);
2099 if (flags & IFF_ROUTER)
2100 return (1);
2101 }
2102
2103 return (0);
2104 }
2105
2106 /*
2107 * Function: fr_fastroute
2108 * Returns: 0: success;
2109 * -1: failed
2110 * Parameters:
2111 * mb: the message block where ip head starts
2112 * mpp: the pointer to the pointer of the orignal
2113 * packet message
2114 * fin: packet information
2115 * fdp: destination interface information
2116 * if it is NULL, no interface information provided.
2117 *
2118 * This function is for fastroute/to/dup-to rules. It calls
2119 * pfil_make_lay2_packet to search route, make lay-2 header
2120 * ,and identify output queue for the IP packet.
2121 * The destination address depends on the following conditions:
2122 * 1: for fastroute rule, fdp is passed in as NULL, so the
2123 * destination address is the IP Packet's destination address
2124 * 2: for to/dup-to rule, if an ip address is specified after
2125 * the interface name, this address is the as destination
2126 * address. Otherwise IP Packet's destination address is used
2127 */
2128 int fr_fastroute(mb, mpp, fin, fdp)
2129 mblk_t *mb, **mpp;
2130 fr_info_t *fin;
2131 frdest_t *fdp;
2132 {
2133 net_handle_t net_data_p;
2134 net_inject_t *inj;
2135 mblk_t *mp = NULL;
2136 frentry_t *fr = fin->fin_fr;
2137 qpktinfo_t *qpi;
2138 ip_t *ip;
2139
2140 struct sockaddr_in *sin;
2141 struct sockaddr_in6 *sin6;
2142 struct sockaddr *sinp;
2143 ipf_stack_t *ifs = fin->fin_ifs;
2144 #ifndef sparc
2145 u_short __iplen, __ipoff;
2146 #endif
2147
2148 if (fin->fin_v == 4) {
2149 net_data_p = ifs->ifs_ipf_ipv4;
2150 } else if (fin->fin_v == 6) {
2151 net_data_p = ifs->ifs_ipf_ipv6;
2152 } else {
2153 return (-1);
2154 }
2155
2156 /*
2157 * If we're forwarding (vs. injecting), check the src here, fin_ifp is
2158 * the src interface.
2159 */
2160 if (fdp != NULL &&
2161 !fr_forwarding_enabled((phy_if_t)fin->fin_ifp, net_data_p))
2162 return (-1);
2163
2164 inj = net_inject_alloc(NETINFO_VERSION);
2165 if (inj == NULL)
2166 return -1;
2167
2168 ip = fin->fin_ip;
2169 qpi = fin->fin_qpi;
2170
2171 /*
2172 * If this is a duplicate mblk then we want ip to point at that
2173 * data, not the original, if and only if it is already pointing at
2174 * the current mblk data.
2175 *
2176 * Otherwise, if it's not a duplicate, and we're not already pointing
2177 * at the current mblk data, then we want to ensure that the data
2178 * points at ip.
2179 */
2180
2181 if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
2182 ip = (ip_t *)mb->b_rptr;
2183 } else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
2184 qpi->qpi_m->b_rptr = (uchar_t *)ip;
2185 qpi->qpi_off = 0;
2186 }
2187
2188 /*
2189 * If there is another M_PROTO, we don't want it
2190 */
2191 if (*mpp != mb) {
2192 mp = unlinkb(*mpp);
2193 freeb(*mpp);
2194 *mpp = mp;
2195 }
2196
2197 sinp = (struct sockaddr *)&inj->ni_addr;
2198 sin = (struct sockaddr_in *)sinp;
2199 sin6 = (struct sockaddr_in6 *)sinp;
2200 bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
2201 inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
2202 inj->ni_packet = mb;
2203
2204 /*
2205 * In case we're here due to "to <if>" being used with
2206 * "keep state", check that we're going in the correct
2207 * direction.
2208 */
2209 if (fdp != NULL) {
2210 if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
2211 (fin->fin_rev != 0) && (fdp == &fr->fr_tif))
2212 goto bad_fastroute;
2213 inj->ni_physical = (phy_if_t)fdp->fd_ifp;
2214 if (fin->fin_v == 4) {
2215 sin->sin_addr = fdp->fd_ip;
2216 } else {
2217 sin6->sin6_addr = fdp->fd_ip6.in6;
2218 }
2219 } else {
2220 if (fin->fin_v == 4) {
2221 sin->sin_addr = ip->ip_dst;
2222 } else {
2223 sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
2224 }
2225 inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
2226 }
2227
2228 /* If we're forwarding (vs. injecting), check the destinatation here. */
2229 if (fdp != NULL && !fr_forwarding_enabled(inj->ni_physical, net_data_p))
2230 goto bad_fastroute;
2231
2232 /*
2233 * Clear the hardware checksum flags from packets that we are doing
2234 * input processing on as leaving them set will cause the outgoing
2235 * NIC (if it supports hardware checksum) to calculate them anew,
2236 * using the old (correct) checksums as the pseudo value to start
2237 * from.
2238 */
2239 if (fin->fin_out == 0) {
2240 DB_CKSUMFLAGS(mb) = 0;
2241 }
2242
2243 *mpp = mb;
2244
2245 if (fin->fin_out == 0) {
2246 void *saveifp;
2247 u_32_t pass;
2248
2249 saveifp = fin->fin_ifp;
2250 fin->fin_ifp = (void *)inj->ni_physical;
2251 fin->fin_flx &= ~FI_STATE;
2252 fin->fin_out = 1;
2253 (void) fr_acctpkt(fin, &pass);
2254 fin->fin_fr = NULL;
2255 if (!fr || !(fr->fr_flags & FR_RETMASK))
2256 (void) fr_checkstate(fin, &pass);
2257 if (fr_checknatout(fin, NULL) == -1)
2258 goto bad_fastroute;
2259 fin->fin_out = 0;
2260 fin->fin_ifp = saveifp;
2261 }
2262 #ifndef sparc
2263 if (fin->fin_v == 4) {
2264 __iplen = (u_short)ip->ip_len,
2265 __ipoff = (u_short)ip->ip_off;
2266
2267 ip->ip_len = htons(__iplen);
2268 ip->ip_off = htons(__ipoff);
2269 }
2270 #endif
2271
2272 if (net_data_p) {
2273 if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
2274 net_inject_free(inj);
2275 return (-1);
2276 }
2277 }
2278
2279 ifs->ifs_fr_frouteok[0]++;
2280 net_inject_free(inj);
2281 return 0;
2282 bad_fastroute:
2283 net_inject_free(inj);
2284 freemsg(mb);
2285 ifs->ifs_fr_frouteok[1]++;
2286 return -1;
2287 }
2288
2289
2290 /* ------------------------------------------------------------------------ */
2291 /* Function: ipf_hook4_out */
2292 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2293 /* Parameters: event(I) - pointer to event */
2294 /* info(I) - pointer to hook information for firewalling */
2295 /* */
2296 /* Calling ipf_hook. */
2297 /* ------------------------------------------------------------------------ */
2298 /*ARGSUSED*/
2299 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
2300 {
2301 return ipf_hook(info, 1, 0, arg);
2302 }
2303 /*ARGSUSED*/
2304 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
2305 {
2306 return ipf_hook6(info, 1, 0, arg);
2307 }
2308
2309 /* ------------------------------------------------------------------------ */
2310 /* Function: ipf_hook4_in */
2311 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2312 /* Parameters: event(I) - pointer to event */
2313 /* info(I) - pointer to hook information for firewalling */
2314 /* */
2315 /* Calling ipf_hook. */
2316 /* ------------------------------------------------------------------------ */
2317 /*ARGSUSED*/
2318 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
2319 {
2320 return ipf_hook(info, 0, 0, arg);
2321 }
2322 /*ARGSUSED*/
2323 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
2324 {
2325 return ipf_hook6(info, 0, 0, arg);
2326 }
2327
2328
2329 /* ------------------------------------------------------------------------ */
2330 /* Function: ipf_hook4_loop_out */
2331 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2332 /* Parameters: event(I) - pointer to event */
2333 /* info(I) - pointer to hook information for firewalling */
2334 /* */
2335 /* Calling ipf_hook. */
2336 /* ------------------------------------------------------------------------ */
2337 /*ARGSUSED*/
2338 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2339 {
2340 return ipf_hook(info, 1, FI_NOCKSUM, arg);
2341 }
2342 /*ARGSUSED*/
2343 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2344 {
2345 return ipf_hook6(info, 1, FI_NOCKSUM, arg);
2346 }
2347
2348 /* ------------------------------------------------------------------------ */
2349 /* Function: ipf_hookvndl3_in */
2350 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2351 /* Parameters: event(I) - pointer to event */
2352 /* info(I) - pointer to hook information for firewalling */
2353 /* */
2354 /* The vnd hooks are private hooks to ON. They represents a layer 2 */
2355 /* datapath generally used to implement virtual machines. The driver sends */
2356 /* along L3 packets of either type IP or IPv6. The ethertype to distinguish */
2357 /* them is in the upper 16 bits while the remaining bits are the */
2358 /* traditional packet hook flags. */
2359 /* */
2360 /* They end up calling the appropriate traditional ip hooks. */
2361 /* ------------------------------------------------------------------------ */
2362 /*ARGSUSED*/
2363 int ipf_hookvndl3v4_in(hook_event_token_t token, hook_data_t info, void *arg)
2364 {
2365 return ipf_hook4_in(token, info, arg);
2366 }
2367
2368 int ipf_hookvndl3v6_in(hook_event_token_t token, hook_data_t info, void *arg)
2369 {
2370 return ipf_hook6_in(token, info, arg);
2371 }
2372
2373 /*ARGSUSED*/
2374 int ipf_hookvndl3v4_out(hook_event_token_t token, hook_data_t info, void *arg)
2375 {
2376 return ipf_hook4_out(token, info, arg);
2377 }
2378
2379 int ipf_hookvndl3v6_out(hook_event_token_t token, hook_data_t info, void *arg)
2380 {
2381 return ipf_hook6_out(token, info, arg);
2382 }
2383
2384 /* Static constants used by ipf_hook_ether */
2385 static uint8_t ipf_eth_bcast_addr[ETHERADDRL] = {
2386 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
2387 };
2388 static uint8_t ipf_eth_ipv4_mcast[3] = { 0x01, 0x00, 0x5E };
2389 static uint8_t ipf_eth_ipv6_mcast[2] = { 0x33, 0x33 };
2390
2391 /* ------------------------------------------------------------------------ */
2392 /* Function: ipf_hook_ether */
2393 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2394 /* Parameters: token(I) - pointer to event */
2395 /* info(I) - pointer to hook information for firewalling */
2396 /* */
2397 /* The ipf_hook_ether hook is currently private to illumos. It represents */
2398 /* a layer 2 datapath generally used by virtual machines. Currently the */
2399 /* hook is only used by the viona driver to pass along L2 frames for */
2400 /* inspection. It requires that the L2 ethernet header is contained within */
2401 /* a single dblk_t (however layers above the L2 header have no restrctions */
2402 /* in ipf). ipf does not currently support filtering on L2 fields (e.g. */
2403 /* filtering on a MAC address or ethertype), however virtual machines do */
2404 /* not have native IP stack instances where ipf traditionally hooks in. */
2405 /* Instead this entry point is used to determine if the packet is unicast, */
2406 /* broadcast, or multicast. The IPv4 or IPv6 packet is then passed to the */
2407 /* traditional ip hooks for filtering. Non IPv4 or non IPv6 packets are */
2408 /* not subject to examination. */
2409 /* ------------------------------------------------------------------------ */
2410 int ipf_hook_ether(hook_event_token_t token, hook_data_t info, void *arg,
2411 boolean_t out)
2412 {
2413 struct ether_header *ethp;
2414 hook_pkt_event_t *hpe = (hook_pkt_event_t *)info;
2415 mblk_t *mp;
2416 size_t offset, len;
2417 uint16_t etype;
2418 boolean_t v6;
2419
2420 /*
2421 * viona will only pass us mblks with the L2 header contained in a
2422 * single data block.
2423 */
2424 mp = *hpe->hpe_mp;
2425 len = MBLKL(mp);
2426
2427 VERIFY3S(len, >=, sizeof (struct ether_header));
2428
2429 ethp = (struct ether_header *)mp->b_rptr;
2430 if ((etype = ntohs(ethp->ether_type)) == ETHERTYPE_VLAN) {
2431 struct ether_vlan_header *evh =
2432 (struct ether_vlan_header *)ethp;
2433
2434 VERIFY3S(len, >=, sizeof (struct ether_vlan_header));
2435
2436 etype = ntohs(evh->ether_type);
2437 offset = sizeof (*evh);
2438 } else {
2439 offset = sizeof (*ethp);
2440 }
2441
2442 /*
2443 * ipf only support filtering IPv4 and IPv6. Ignore other types.
2444 */
2445 if (etype == ETHERTYPE_IP)
2446 v6 = B_FALSE;
2447 else if (etype == ETHERTYPE_IPV6)
2448 v6 = B_TRUE;
2449 else
2450 return (0);
2451
2452 if (bcmp(ipf_eth_bcast_addr, ethp, ETHERADDRL) == 0)
2453 hpe->hpe_flags |= HPE_BROADCAST;
2454 else if (bcmp(ipf_eth_ipv4_mcast, ethp,
2455 sizeof (ipf_eth_ipv4_mcast)) == 0)
2456 hpe->hpe_flags |= HPE_MULTICAST;
2457 else if (bcmp(ipf_eth_ipv6_mcast, ethp,
2458 sizeof (ipf_eth_ipv6_mcast)) == 0)
2459 hpe->hpe_flags |= HPE_MULTICAST;
2460
2461 /* Find the start of the IPv4 or IPv6 header */
2462 for (; offset >= len; len = MBLKL(mp)) {
2463 offset -= len;
2464 mp = mp->b_cont;
2465 if (mp == NULL) {
2466 freemsg(*hpe->hpe_mp);
2467 *hpe->hpe_mp = NULL;
2468 return (-1);
2469 }
2470 }
2471 hpe->hpe_mb = mp;
2472 hpe->hpe_hdr = mp->b_rptr + offset;
2473
2474 return (v6 ? ipf_hook6(info, out, 0, arg) :
2475 ipf_hook(info, out, 0, arg));
2476 }
2477
2478 /* ------------------------------------------------------------------------ */
2479 /* Function: ipf_hookviona_{in,out} */
2480 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2481 /* Parameters: event(I) - pointer to event */
2482 /* info(I) - pointer to hook information for firewalling */
2483 /* */
2484 /* The viona hooks are private hooks to illumos. They represents a layer 2 */
2485 /* datapath generally used to implement virtual machines. */
2486 /* along L2 packets. */
2487 /* */
2488 /* They end up calling the appropriate traditional ip hooks. */
2489 /* ------------------------------------------------------------------------ */
2490 int
2491 ipf_hookviona_in(hook_event_token_t token, hook_data_t info, void *arg)
2492 {
2493 return (ipf_hook_ether(token, info, arg, B_FALSE));
2494 }
2495
2496 int
2497 ipf_hookviona_out(hook_event_token_t token, hook_data_t info, void *arg)
2498 {
2499 return (ipf_hook_ether(token, info, arg, B_TRUE));
2500 }
2501
2502 /* ------------------------------------------------------------------------ */
2503 /* Function: ipf_hook4_loop_in */
2504 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2505 /* Parameters: event(I) - pointer to event */
2506 /* info(I) - pointer to hook information for firewalling */
2507 /* */
2508 /* Calling ipf_hook. */
2509 /* ------------------------------------------------------------------------ */
2510 /*ARGSUSED*/
2511 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2512 {
2513 return ipf_hook(info, 0, FI_NOCKSUM, arg);
2514 }
2515 /*ARGSUSED*/
2516 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2517 {
2518 return ipf_hook6(info, 0, FI_NOCKSUM, arg);
2519 }
2520
2521 /* ------------------------------------------------------------------------ */
2522 /* Function: ipf_hook */
2523 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2524 /* Parameters: info(I) - pointer to hook information for firewalling */
2525 /* out(I) - whether packet is going in or out */
2526 /* loopback(I) - whether packet is a loopback packet or not */
2527 /* */
2528 /* Stepping stone function between the IP mainline and IPFilter. Extracts */
2529 /* parameters out of the info structure and forms them up to be useful for */
2530 /* calling ipfilter. */
2531 /* ------------------------------------------------------------------------ */
2532 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
2533 {
2534 hook_pkt_event_t *fw;
2535 ipf_stack_t *ifs;
2536 qpktinfo_t qpi;
2537 int rval, hlen;
2538 u_short swap;
2539 phy_if_t phy;
2540 ip_t *ip;
2541
2542 ifs = arg;
2543 fw = (hook_pkt_event_t *)info;
2544
2545 ASSERT(fw != NULL);
2546 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2547
2548 ip = fw->hpe_hdr;
2549 swap = ntohs(ip->ip_len);
2550 ip->ip_len = swap;
2551 swap = ntohs(ip->ip_off);
2552 ip->ip_off = swap;
2553 hlen = IPH_HDR_LENGTH(ip);
2554
2555 qpi.qpi_m = fw->hpe_mb;
2556 qpi.qpi_data = fw->hpe_hdr;
2557 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2558 qpi.qpi_ill = (void *)phy;
2559 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2560 if (qpi.qpi_flags)
2561 qpi.qpi_flags |= FI_MBCAST;
2562 qpi.qpi_flags |= loopback;
2563
2564 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2565 &qpi, fw->hpe_mp, ifs);
2566
2567 /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2568 if (rval == 0 && *(fw->hpe_mp) == NULL)
2569 rval = 1;
2570
2571 /* Notify IP the packet mblk_t and IP header pointers. */
2572 fw->hpe_mb = qpi.qpi_m;
2573 fw->hpe_hdr = qpi.qpi_data;
2574 if (rval == 0) {
2575 ip = qpi.qpi_data;
2576 swap = ntohs(ip->ip_len);
2577 ip->ip_len = swap;
2578 swap = ntohs(ip->ip_off);
2579 ip->ip_off = swap;
2580 }
2581 return rval;
2582
2583 }
2584 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
2585 {
2586 hook_pkt_event_t *fw;
2587 int rval, hlen;
2588 qpktinfo_t qpi;
2589 phy_if_t phy;
2590
2591 fw = (hook_pkt_event_t *)info;
2592
2593 ASSERT(fw != NULL);
2594 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2595
2596 hlen = sizeof (ip6_t);
2597
2598 qpi.qpi_m = fw->hpe_mb;
2599 qpi.qpi_data = fw->hpe_hdr;
2600 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2601 qpi.qpi_ill = (void *)phy;
2602 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2603 if (qpi.qpi_flags)
2604 qpi.qpi_flags |= FI_MBCAST;
2605 qpi.qpi_flags |= loopback;
2606
2607 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2608 &qpi, fw->hpe_mp, arg);
2609
2610 /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2611 if (rval == 0 && *(fw->hpe_mp) == NULL)
2612 rval = 1;
2613
2614 /* Notify IP the packet mblk_t and IP header pointers. */
2615 fw->hpe_mb = qpi.qpi_m;
2616 fw->hpe_hdr = qpi.qpi_data;
2617 return rval;
2618 }
2619
2620
2621 /* ------------------------------------------------------------------------ */
2622 /* Function: ipf_nic_event_v4 */
2623 /* Returns: int - 0 == no problems encountered */
2624 /* Parameters: event(I) - pointer to event */
2625 /* info(I) - pointer to information about a NIC event */
2626 /* */
2627 /* Function to receive asynchronous NIC events from IP */
2628 /* ------------------------------------------------------------------------ */
2629 /*ARGSUSED*/
2630 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2631 {
2632 struct sockaddr_in *sin;
2633 hook_nic_event_t *hn;
2634 ipf_stack_t *ifs = arg;
2635 void *new_ifp = NULL;
2636
2637 if (ifs->ifs_fr_running <= 0)
2638 return (0);
2639
2640 hn = (hook_nic_event_t *)info;
2641
2642 switch (hn->hne_event)
2643 {
2644 case NE_PLUMB :
2645 frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2646 ifs);
2647 fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2648 hn->hne_data, ifs);
2649 fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2650 hn->hne_data, ifs);
2651 break;
2652
2653 case NE_UNPLUMB :
2654 frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2655 fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2656 ifs);
2657 fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2658 break;
2659
2660 case NE_ADDRESS_CHANGE :
2661 /*
2662 * We only respond to events for logical interface 0 because
2663 * IPFilter only uses the first address given to a network
2664 * interface. We check for hne_lif==1 because the netinfo
2665 * code maps adds 1 to the lif number so that it can return
2666 * 0 to indicate "no more lifs" when walking them.
2667 */
2668 if (hn->hne_lif == 1) {
2669 frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2670 ifs);
2671 sin = hn->hne_data;
2672 fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2673 ifs);
2674 }
2675 break;
2676
2677 #if SOLARIS2 >= 10
2678 case NE_IFINDEX_CHANGE :
2679 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2680
2681 if (hn->hne_data != NULL) {
2682 /*
2683 * The netinfo passes interface index as int (hne_data should be
2684 * handled as a pointer to int), which is always 32bit. We need to
2685 * convert it to void pointer here, since interfaces are
2686 * represented as pointers to void in IPF. The pointers are 64 bits
2687 * long on 64bit platforms. Doing something like
2688 * (void *)((int) x)
2689 * will throw warning:
2690 * "cast to pointer from integer of different size"
2691 * during 64bit compilation.
2692 *
2693 * The line below uses (size_t) to typecast int to
2694 * size_t, which might be 64bit/32bit (depending
2695 * on architecture). Once we have proper 64bit/32bit
2696 * type (size_t), we can safely convert it to void pointer.
2697 */
2698 new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2699 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2700 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2701 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2702 }
2703 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2704 break;
2705 #endif
2706
2707 default :
2708 break;
2709 }
2710
2711 return 0;
2712 }
2713
2714
2715 /* ------------------------------------------------------------------------ */
2716 /* Function: ipf_nic_event_v6 */
2717 /* Returns: int - 0 == no problems encountered */
2718 /* Parameters: event(I) - pointer to event */
2719 /* info(I) - pointer to information about a NIC event */
2720 /* */
2721 /* Function to receive asynchronous NIC events from IP */
2722 /* ------------------------------------------------------------------------ */
2723 /*ARGSUSED*/
2724 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2725 {
2726 struct sockaddr_in6 *sin6;
2727 hook_nic_event_t *hn;
2728 ipf_stack_t *ifs = arg;
2729 void *new_ifp = NULL;
2730
2731 if (ifs->ifs_fr_running <= 0)
2732 return (0);
2733
2734 hn = (hook_nic_event_t *)info;
2735
2736 switch (hn->hne_event)
2737 {
2738 case NE_PLUMB :
2739 frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2740 hn->hne_data, ifs);
2741 fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2742 hn->hne_data, ifs);
2743 fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2744 hn->hne_data, ifs);
2745 break;
2746
2747 case NE_UNPLUMB :
2748 frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2749 fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2750 ifs);
2751 fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2752 break;
2753
2754 case NE_ADDRESS_CHANGE :
2755 if (hn->hne_lif == 1) {
2756 sin6 = hn->hne_data;
2757 fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2758 ifs);
2759 }
2760 break;
2761
2762 #if SOLARIS2 >= 10
2763 case NE_IFINDEX_CHANGE :
2764 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2765 if (hn->hne_data != NULL) {
2766 /*
2767 * The netinfo passes interface index as int (hne_data should be
2768 * handled as a pointer to int), which is always 32bit. We need to
2769 * convert it to void pointer here, since interfaces are
2770 * represented as pointers to void in IPF. The pointers are 64 bits
2771 * long on 64bit platforms. Doing something like
2772 * (void *)((int) x)
2773 * will throw warning:
2774 * "cast to pointer from integer of different size"
2775 * during 64bit compilation.
2776 *
2777 * The line below uses (size_t) to typecast int to
2778 * size_t, which might be 64bit/32bit (depending
2779 * on architecture). Once we have proper 64bit/32bit
2780 * type (size_t), we can safely convert it to void pointer.
2781 */
2782 new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2783 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2784 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2785 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2786 }
2787 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2788 break;
2789 #endif
2790
2791 default :
2792 break;
2793 }
2794
2795 return 0;
2796 }
2797
2798 /*
2799 * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2800 * are needed in Solaris kernel only. We don't need them in
2801 * ipftest to pretend the ICMP/RST packet was sent as a response.
2802 */
2803 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2804 /* ------------------------------------------------------------------------ */
2805 /* Function: fr_make_rst */
2806 /* Returns: int - 0 on success, -1 on failure */
2807 /* Parameters: fin(I) - pointer to packet information */
2808 /* */
2809 /* We must alter the original mblks passed to IPF from IP stack via */
2810 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations. */
2811 /* IPF can basicaly do only these things with mblk representing the packet: */
2812 /* leave it as it is (pass the packet) */
2813 /* */
2814 /* discard it (block the packet) */
2815 /* */
2816 /* alter it (i.e. NAT) */
2817 /* */
2818 /* As you can see IPF can not simply discard the mblk and supply a new one */
2819 /* instead to IP stack via FW_HOOKS. */
2820 /* */
2821 /* The return-rst action for packets coming via NIC is handled as follows: */
2822 /* mblk with packet is discarded */
2823 /* */
2824 /* new mblk with RST response is constructed and injected to network */
2825 /* */
2826 /* IPF can't inject packets to loopback interface, this is just another */
2827 /* limitation we have to deal with here. The only option to send RST */
2828 /* response to offending TCP packet coming via loopback is to alter it. */
2829 /* */
2830 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on */
2831 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to */
2832 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers. */
2833 /* ------------------------------------------------------------------------ */
2834 int fr_make_rst(fin)
2835 fr_info_t *fin;
2836 {
2837 uint16_t tmp_port;
2838 int rv = -1;
2839 uint32_t old_ack;
2840 tcphdr_t *tcp = NULL;
2841 struct in_addr tmp_src;
2842 #ifdef USE_INET6
2843 struct in6_addr tmp_src6;
2844 #endif
2845
2846 ASSERT(fin->fin_p == IPPROTO_TCP);
2847
2848 /*
2849 * We do not need to adjust chksum, since it is not being checked by
2850 * Solaris IP stack for loopback clients.
2851 */
2852 if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2853 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2854
2855 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2856 /* Swap IPv4 addresses. */
2857 tmp_src = fin->fin_ip->ip_src;
2858 fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2859 fin->fin_ip->ip_dst = tmp_src;
2860
2861 rv = 0;
2862 }
2863 else
2864 tcp = NULL;
2865 }
2866 #ifdef USE_INET6
2867 else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2868 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2869 /*
2870 * We are relying on fact the next header is TCP, which is true
2871 * for regular TCP packets coming in over loopback.
2872 */
2873 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2874 /* Swap IPv6 addresses. */
2875 tmp_src6 = fin->fin_ip6->ip6_src;
2876 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2877 fin->fin_ip6->ip6_dst = tmp_src6;
2878
2879 rv = 0;
2880 }
2881 else
2882 tcp = NULL;
2883 }
2884 #endif
2885
2886 if (tcp != NULL) {
2887 /*
2888 * Adjust TCP header:
2889 * swap ports,
2890 * set flags,
2891 * set correct ACK number
2892 */
2893 tmp_port = tcp->th_sport;
2894 tcp->th_sport = tcp->th_dport;
2895 tcp->th_dport = tmp_port;
2896 old_ack = tcp->th_ack;
2897 tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2898 tcp->th_seq = old_ack;
2899 tcp->th_flags = TH_RST | TH_ACK;
2900 }
2901
2902 return (rv);
2903 }
2904
2905 /* ------------------------------------------------------------------------ */
2906 /* Function: fr_make_icmp_v4 */
2907 /* Returns: int - 0 on success, -1 on failure */
2908 /* Parameters: fin(I) - pointer to packet information */
2909 /* */
2910 /* Please read comment at fr_make_icmp() wrapper function to get an idea */
2911 /* what is going to happen here and why. Once you read the comment there, */
2912 /* continue here with next paragraph. */
2913 /* */
2914 /* To turn IPv4 packet into ICMPv4 response packet, these things must */
2915 /* happen here: */
2916 /* (1) Original mblk is copied (duplicated). */
2917 /* */
2918 /* (2) ICMP header is created. */
2919 /* */
2920 /* (3) Link ICMP header with copy of original mblk, we have ICMPv4 */
2921 /* data ready then. */
2922 /* */
2923 /* (4) Swap IP addresses in original mblk and adjust IP header data. */
2924 /* */
2925 /* (5) The mblk containing original packet is trimmed to contain IP */
2926 /* header only and ICMP chksum is computed. */
2927 /* */
2928 /* (6) The ICMP header we have from (3) is linked to original mblk, */
2929 /* which now contains new IP header. If original packet was spread */
2930 /* over several mblks, only the first mblk is kept. */
2931 /* ------------------------------------------------------------------------ */
2932 static int fr_make_icmp_v4(fin)
2933 fr_info_t *fin;
2934 {
2935 struct in_addr tmp_src;
2936 tcphdr_t *tcp;
2937 struct icmp *icmp;
2938 mblk_t *mblk_icmp;
2939 mblk_t *mblk_ip;
2940 size_t icmp_pld_len; /* octets to append to ICMP header */
2941 size_t orig_iphdr_len; /* length of IP header only */
2942 uint32_t sum;
2943 uint16_t *buf;
2944 int len;
2945
2946
2947 if (fin->fin_v != 4)
2948 return (-1);
2949
2950 /*
2951 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2952 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2953 */
2954 tcp = (tcphdr_t *) fin->fin_dp;
2955
2956 if ((fin->fin_p == IPPROTO_TCP) &&
2957 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2958 return (-1);
2959
2960 /*
2961 * Step (1)
2962 *
2963 * Make copy of original mblk.
2964 *
2965 * We want to copy as much data as necessary, not less, not more. The
2966 * ICMPv4 payload length for unreachable messages is:
2967 * original IP header + 8 bytes of L4 (if there are any).
2968 *
2969 * We determine if there are at least 8 bytes of L4 data following IP
2970 * header first.
2971 */
2972 icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
2973 ICMPERR_ICMPHLEN : fin->fin_dlen;
2974 /*
2975 * Since we don't want to copy more data than necessary, we must trim
2976 * the original mblk here. The right way (STREAMish) would be to use
2977 * adjmsg() to trim it. However we would have to calculate the length
2978 * argument for adjmsg() from pointers we already have here.
2979 *
2980 * Since we have pointers and offsets, it's faster and easier for
2981 * us to just adjust pointers by hand instead of using adjmsg().
2982 */
2983 fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
2984 fin->fin_m->b_wptr += icmp_pld_len;
2985 icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
2986
2987 /*
2988 * Also we don't want to copy any L2 stuff, which might precede IP
2989 * header, so we have have to set b_rptr to point to the start of IP
2990 * header.
2991 */
2992 fin->fin_m->b_rptr += fin->fin_ipoff;
2993 if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2994 return (-1);
2995 fin->fin_m->b_rptr -= fin->fin_ipoff;
2996
2997 /*
2998 * Step (2)
2999 *
3000 * Create an ICMP header, which will be appened to original mblk later.
3001 * ICMP header is just another mblk.
3002 */
3003 mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
3004 if (mblk_icmp == NULL) {
3005 FREE_MB_T(mblk_ip);
3006 return (-1);
3007 }
3008
3009 MTYPE(mblk_icmp) = M_DATA;
3010 icmp = (struct icmp *) mblk_icmp->b_wptr;
3011 icmp->icmp_type = ICMP_UNREACH;
3012 icmp->icmp_code = fin->fin_icode & 0xFF;
3013 icmp->icmp_void = 0;
3014 icmp->icmp_cksum = 0;
3015 mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
3016
3017 /*
3018 * Step (3)
3019 *
3020 * Complete ICMP packet - link ICMP header with L4 data from original
3021 * IP packet.
3022 */
3023 linkb(mblk_icmp, mblk_ip);
3024
3025 /*
3026 * Step (4)
3027 *
3028 * Swap IP addresses and change IP header fields accordingly in
3029 * original IP packet.
3030 *
3031 * There is a rule option return-icmp as a dest for physical
3032 * interfaces. This option becomes useless for loopback, since IPF box
3033 * uses same address as a loopback destination. We ignore the option
3034 * here, the ICMP packet will always look like as it would have been
3035 * sent from the original destination host.
3036 */
3037 tmp_src = fin->fin_ip->ip_src;
3038 fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
3039 fin->fin_ip->ip_dst = tmp_src;
3040 fin->fin_ip->ip_p = IPPROTO_ICMP;
3041 fin->fin_ip->ip_sum = 0;
3042
3043 /*
3044 * Step (5)
3045 *
3046 * We trim the orignal mblk to hold IP header only.
3047 */
3048 fin->fin_m->b_wptr = fin->fin_dp;
3049 orig_iphdr_len = fin->fin_m->b_wptr -
3050 (fin->fin_m->b_rptr + fin->fin_ipoff);
3051 fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
3052 orig_iphdr_len);
3053
3054 /*
3055 * ICMP chksum calculation. The data we are calculating chksum for are
3056 * spread over two mblks, therefore we have to use two for loops.
3057 *
3058 * First for loop computes chksum part for ICMP header.
3059 */
3060 buf = (uint16_t *) icmp;
3061 len = ICMPERR_ICMPHLEN;
3062 for (sum = 0; len > 1; len -= 2)
3063 sum += *buf++;
3064
3065 /*
3066 * Here we add chksum part for ICMP payload.
3067 */
3068 len = icmp_pld_len;
3069 buf = (uint16_t *) mblk_ip->b_rptr;
3070 for (; len > 1; len -= 2)
3071 sum += *buf++;
3072
3073 /*
3074 * Chksum is done.
3075 */
3076 sum = (sum >> 16) + (sum & 0xffff);
3077 sum += (sum >> 16);
3078 icmp->icmp_cksum = ~sum;
3079
3080 /*
3081 * Step (6)
3082 *
3083 * Release all packet mblks, except the first one.
3084 */
3085 if (fin->fin_m->b_cont != NULL) {
3086 FREE_MB_T(fin->fin_m->b_cont);
3087 }
3088
3089 /*
3090 * Append ICMP payload to first mblk, which already contains new IP
3091 * header.
3092 */
3093 linkb(fin->fin_m, mblk_icmp);
3094
3095 return (0);
3096 }
3097
3098 #ifdef USE_INET6
3099 /* ------------------------------------------------------------------------ */
3100 /* Function: fr_make_icmp_v6 */
3101 /* Returns: int - 0 on success, -1 on failure */
3102 /* Parameters: fin(I) - pointer to packet information */
3103 /* */
3104 /* Please read comment at fr_make_icmp() wrapper function to get an idea */
3105 /* what and why is going to happen here. Once you read the comment there, */
3106 /* continue here with next paragraph. */
3107 /* */
3108 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response. */
3109 /* The algorithm is fairly simple: */
3110 /* 1) We need to get copy of complete mblk. */
3111 /* */
3112 /* 2) New ICMPv6 header is created. */
3113 /* */
3114 /* 3) The copy of original mblk with packet is linked to ICMPv6 */
3115 /* header. */
3116 /* */
3117 /* 4) The checksum must be adjusted. */
3118 /* */
3119 /* 5) IP addresses in original mblk are swapped and IP header data */
3120 /* are adjusted (protocol number). */
3121 /* */
3122 /* 6) Original mblk is trimmed to hold IPv6 header only, then it is */
3123 /* linked with the ICMPv6 data we got from (3). */
3124 /* ------------------------------------------------------------------------ */
3125 static int fr_make_icmp_v6(fin)
3126 fr_info_t *fin;
3127 {
3128 struct icmp6_hdr *icmp6;
3129 tcphdr_t *tcp;
3130 struct in6_addr tmp_src6;
3131 size_t icmp_pld_len;
3132 mblk_t *mblk_ip, *mblk_icmp;
3133
3134 if (fin->fin_v != 6)
3135 return (-1);
3136
3137 /*
3138 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
3139 * IP stack. If it is not SYN/FIN, then we must drop it silently.
3140 */
3141 tcp = (tcphdr_t *) fin->fin_dp;
3142
3143 if ((fin->fin_p == IPPROTO_TCP) &&
3144 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
3145 return (-1);
3146
3147 /*
3148 * Step (1)
3149 *
3150 * We need to copy complete packet in case of IPv6, no trimming is
3151 * needed (except the L2 headers).
3152 */
3153 icmp_pld_len = M_LEN(fin->fin_m);
3154 fin->fin_m->b_rptr += fin->fin_ipoff;
3155 if ((mblk_ip = copyb(fin->fin_m)) == NULL)
3156 return (-1);
3157 fin->fin_m->b_rptr -= fin->fin_ipoff;
3158
3159 /*
3160 * Step (2)
3161 *
3162 * Allocate and create ICMP header.
3163 */
3164 mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
3165 BPRI_HI);
3166
3167 if (mblk_icmp == NULL)
3168 return (-1);
3169
3170 MTYPE(mblk_icmp) = M_DATA;
3171 icmp6 = (struct icmp6_hdr *) mblk_icmp->b_wptr;
3172 icmp6->icmp6_type = ICMP6_DST_UNREACH;
3173 icmp6->icmp6_code = fin->fin_icode & 0xFF;
3174 icmp6->icmp6_data32[0] = 0;
3175 mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
3176
3177 /*
3178 * Step (3)
3179 *
3180 * Link the copy of IP packet to ICMP header.
3181 */
3182 linkb(mblk_icmp, mblk_ip);
3183
3184 /*
3185 * Step (4)
3186 *
3187 * Calculate chksum - this is much more easier task than in case of
3188 * IPv4 - ICMPv6 chksum only covers IP addresses, and payload length.
3189 * We are making compensation just for change of packet length.
3190 */
3191 icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
3192
3193 /*
3194 * Step (5)
3195 *
3196 * Swap IP addresses.
3197 */
3198 tmp_src6 = fin->fin_ip6->ip6_src;
3199 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
3200 fin->fin_ip6->ip6_dst = tmp_src6;
3201
3202 /*
3203 * and adjust IP header data.
3204 */
3205 fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
3206 fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
3207
3208 /*
3209 * Step (6)
3210 *
3211 * We must release all linked mblks from original packet and keep only
3212 * the first mblk with IP header to link ICMP data.
3213 */
3214 fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
3215
3216 if (fin->fin_m->b_cont != NULL) {
3217 FREE_MB_T(fin->fin_m->b_cont);
3218 }
3219
3220 /*
3221 * Append ICMP payload to IP header.
3222 */
3223 linkb(fin->fin_m, mblk_icmp);
3224
3225 return (0);
3226 }
3227 #endif /* USE_INET6 */
3228
3229 /* ------------------------------------------------------------------------ */
3230 /* Function: fr_make_icmp */
3231 /* Returns: int - 0 on success, -1 on failure */
3232 /* Parameters: fin(I) - pointer to packet information */
3233 /* */
3234 /* We must alter the original mblks passed to IPF from IP stack via */
3235 /* FW_HOOKS. The reasons why we must alter packet are discussed within */
3236 /* comment at fr_make_rst() function. */
3237 /* */
3238 /* The fr_make_icmp() function acts as a wrapper, which passes the code */
3239 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on */
3240 /* protocol version. However there are some details, which are common to */
3241 /* both IP versions. The details are going to be explained here. */
3242 /* */
3243 /* The packet looks as follows: */
3244 /* xxx | IP hdr | IP payload ... | */
3245 /* ^ ^ ^ ^ */
3246 /* | | | | */
3247 /* | | | fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
3248 /* | | | */
3249 /* | | `- fin_m->fin_dp (in case of IPv4 points to L4 header) */
3250 /* | | */
3251 /* | `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case */
3252 /* | of loopback) */
3253 /* | */
3254 /* `- fin_m->b_rptr - points to L2 header in case of physical NIC */
3255 /* */
3256 /* All relevant IP headers are pulled up into the first mblk. It happened */
3257 /* well in advance before the matching rule was found (the rule, which took */
3258 /* us here, to fr_make_icmp() function). */
3259 /* */
3260 /* Both functions will turn packet passed in fin->fin_m mblk into a new */
3261 /* packet. New packet will be represented as chain of mblks. */
3262 /* orig mblk |- b_cont ---. */
3263 /* ^ `-> ICMP hdr |- b_cont--. */
3264 /* | ^ `-> duped orig mblk */
3265 /* | | ^ */
3266 /* `- The original mblk | | */
3267 /* will be trimmed to | | */
3268 /* to contain IP header | | */
3269 /* only | | */
3270 /* | | */
3271 /* `- This is newly | */
3272 /* allocated mblk to | */
3273 /* hold ICMPv6 data. | */
3274 /* | */
3275 /* | */
3276 /* | */
3277 /* This is the copy of original mblk, it will contain -' */
3278 /* orignal IP packet in case of ICMPv6. In case of */
3279 /* ICMPv4 it will contain up to 8 bytes of IP payload */
3280 /* (TCP/UDP/L4) data from original packet. */
3281 /* ------------------------------------------------------------------------ */
3282 int fr_make_icmp(fin)
3283 fr_info_t *fin;
3284 {
3285 int rv;
3286
3287 if (fin->fin_v == 4)
3288 rv = fr_make_icmp_v4(fin);
3289 #ifdef USE_INET6
3290 else if (fin->fin_v == 6)
3291 rv = fr_make_icmp_v6(fin);
3292 #endif
3293 else
3294 rv = -1;
3295
3296 return (rv);
3297 }
3298
3299 /* ------------------------------------------------------------------------ */
3300 /* Function: fr_buf_sum */
3301 /* Returns: unsigned int - sum of buffer buf */
3302 /* Parameters: buf - pointer to buf we want to sum up */
3303 /* len - length of buffer buf */
3304 /* */
3305 /* Sums buffer buf. The result is used for chksum calculation. The buf */
3306 /* argument must be aligned. */
3307 /* ------------------------------------------------------------------------ */
3308 static uint32_t fr_buf_sum(buf, len)
3309 const void *buf;
3310 unsigned int len;
3311 {
3312 uint32_t sum = 0;
3313 uint16_t *b = (uint16_t *)buf;
3314
3315 while (len > 1) {
3316 sum += *b++;
3317 len -= 2;
3318 }
3319
3320 if (len == 1)
3321 sum += htons((*(unsigned char *)b) << 8);
3322
3323 return (sum);
3324 }
3325
3326 /* ------------------------------------------------------------------------ */
3327 /* Function: fr_calc_chksum */
3328 /* Returns: void */
3329 /* Parameters: fin - pointer to fr_info_t instance with packet data */
3330 /* pkt - pointer to duplicated packet */
3331 /* */
3332 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP */
3333 /* versions. */
3334 /* ------------------------------------------------------------------------ */
3335 void fr_calc_chksum(fin, pkt)
3336 fr_info_t *fin;
3337 mb_t *pkt;
3338 {
3339 struct pseudo_hdr {
3340 union {
3341 struct in_addr in4;
3342 #ifdef USE_INET6
3343 struct in6_addr in6;
3344 #endif
3345 } src_addr;
3346 union {
3347 struct in_addr in4;
3348 #ifdef USE_INET6
3349 struct in6_addr in6;
3350 #endif
3351 } dst_addr;
3352 char zero;
3353 char proto;
3354 uint16_t len;
3355 } phdr;
3356 uint32_t sum, ip_sum;
3357 void *buf;
3358 uint16_t *l4_csum_p;
3359 tcphdr_t *tcp;
3360 udphdr_t *udp;
3361 icmphdr_t *icmp;
3362 #ifdef USE_INET6
3363 struct icmp6_hdr *icmp6;
3364 #endif
3365 ip_t *ip;
3366 unsigned int len;
3367 int pld_len;
3368
3369 /*
3370 * We need to pullup the packet to the single continuous buffer to avoid
3371 * potential misaligment of b_rptr member in mblk chain.
3372 */
3373 if (pullupmsg(pkt, -1) == 0) {
3374 cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum"
3375 " will not be computed by IPF");
3376 return;
3377 }
3378
3379 /*
3380 * It is guaranteed IP header starts right at b_rptr, because we are
3381 * working with a copy of the original packet.
3382 *
3383 * Compute pseudo header chksum for TCP and UDP.
3384 */
3385 if ((fin->fin_p == IPPROTO_UDP) ||
3386 (fin->fin_p == IPPROTO_TCP)) {
3387 bzero(&phdr, sizeof (phdr));
3388 #ifdef USE_INET6
3389 if (fin->fin_v == 6) {
3390 phdr.src_addr.in6 = fin->fin_srcip6;
3391 phdr.dst_addr.in6 = fin->fin_dstip6;
3392 } else {
3393 phdr.src_addr.in4 = fin->fin_src;
3394 phdr.dst_addr.in4 = fin->fin_dst;
3395 }
3396 #else
3397 phdr.src_addr.in4 = fin->fin_src;
3398 phdr.dst_addr.in4 = fin->fin_dst;
3399 #endif
3400 phdr.zero = (char) 0;
3401 phdr.proto = fin->fin_p;
3402 phdr.len = htons((uint16_t)fin->fin_dlen);
3403 sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr));
3404 } else {
3405 sum = 0;
3406 }
3407
3408 /*
3409 * Set pointer to the L4 chksum field in the packet, set buf pointer to
3410 * the L4 header start.
3411 */
3412 switch (fin->fin_p) {
3413 case IPPROTO_UDP:
3414 udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3415 l4_csum_p = &udp->uh_sum;
3416 buf = udp;
3417 break;
3418 case IPPROTO_TCP:
3419 tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3420 l4_csum_p = &tcp->th_sum;
3421 buf = tcp;
3422 break;
3423 case IPPROTO_ICMP:
3424 icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3425 l4_csum_p = &icmp->icmp_cksum;
3426 buf = icmp;
3427 break;
3428 #ifdef USE_INET6
3429 case IPPROTO_ICMPV6:
3430 icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen);
3431 l4_csum_p = &icmp6->icmp6_cksum;
3432 buf = icmp6;
3433 break;
3434 #endif
3435 default:
3436 l4_csum_p = NULL;
3437 }
3438
3439 /*
3440 * Compute L4 chksum if needed.
3441 */
3442 if (l4_csum_p != NULL) {
3443 *l4_csum_p = (uint16_t)0;
3444 pld_len = fin->fin_dlen;
3445 len = pkt->b_wptr - (unsigned char *)buf;
3446 ASSERT(len == pld_len);
3447 /*
3448 * Add payload sum to pseudoheader sum.
3449 */
3450 sum += fr_buf_sum(buf, len);
3451 while (sum >> 16)
3452 sum = (sum & 0xFFFF) + (sum >> 16);
3453
3454 *l4_csum_p = ~((uint16_t)sum);
3455 DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p);
3456 }
3457
3458 /*
3459 * The IP header chksum is needed just for IPv4.
3460 */
3461 if (fin->fin_v == 4) {
3462 /*
3463 * Compute IPv4 header chksum.
3464 */
3465 ip = (ip_t *)pkt->b_rptr;
3466 ip->ip_sum = (uint16_t)0;
3467 ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen);
3468 while (ip_sum >> 16)
3469 ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16);
3470
3471 ip->ip_sum = ~((uint16_t)ip_sum);
3472 DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum);
3473 }
3474
3475 return;
3476 }
3477
3478 #endif /* _KERNEL && SOLARIS2 >= 10 */