1 /*
2 * Copyright (C) 1993-2001, 2003 by Darren Reed.
3 *
4 * See the IPFILTER.LICENCE file for details on licencing.
5 *
6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
7 *
8 * Copyright 2019 Joyent, Inc.
9 */
10
11 #if !defined(lint)
12 static const char sccsid[] = "@(#)ip_fil_solaris.c 1.7 07/22/06 (C) 1993-2000 Darren Reed";
13 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
14 #endif
15
16 #include <sys/types.h>
17 #include <sys/errno.h>
18 #include <sys/param.h>
19 #include <sys/cpuvar.h>
20 #include <sys/open.h>
21 #include <sys/ioctl.h>
22 #include <sys/filio.h>
23 #include <sys/systm.h>
24 #include <sys/strsubr.h>
25 #include <sys/strsun.h>
26 #include <sys/cred.h>
27 #include <sys/ddi.h>
28 #include <sys/sunddi.h>
29 #include <sys/ksynch.h>
30 #include <sys/kmem.h>
31 #include <sys/mac_provider.h>
32 #include <sys/mkdev.h>
33 #include <sys/protosw.h>
34 #include <sys/socket.h>
35 #include <sys/dditypes.h>
36 #include <sys/cmn_err.h>
37 #include <sys/zone.h>
38 #include <net/if.h>
39 #include <net/af.h>
40 #include <net/route.h>
41 #include <netinet/in.h>
42 #include <netinet/in_systm.h>
43 #include <netinet/ip.h>
44 #include <netinet/ip_var.h>
45 #include <netinet/tcp.h>
46 #include <netinet/udp.h>
47 #include <netinet/tcpip.h>
48 #include <netinet/ip_icmp.h>
49 #include "netinet/ip_compat.h"
50 #ifdef USE_INET6
51 # include <netinet/icmp6.h>
52 #endif
53 #include "netinet/ip_fil.h"
54 #include "netinet/ip_nat.h"
55 #include "netinet/ip_frag.h"
56 #include "netinet/ip_state.h"
57 #include "netinet/ip_auth.h"
58 #include "netinet/ip_proxy.h"
59 #include "netinet/ipf_stack.h"
60 #ifdef IPFILTER_LOOKUP
61 # include "netinet/ip_lookup.h"
62 #endif
63 #include <inet/ip_ire.h>
64
65 #include <sys/md5.h>
66 #include <sys/neti.h>
67
68 static int frzerostats __P((caddr_t, ipf_stack_t *));
69 static int fr_setipfloopback __P((int, ipf_stack_t *));
70 static int fr_enableipf __P((ipf_stack_t *, int));
71 static int fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
72 static int ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
73 static int ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
74 static int ipf_hook __P((hook_data_t, int, int, void *));
75 static int ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
76 static int ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
77 static int ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
78 void *));
79 static int ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
80 static int ipf_hook4 __P((hook_data_t, int, int, void *));
81 static int ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
82 static int ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
83 static int ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
84 void *));
85 static int ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
86 void *));
87 static int ipf_hook6 __P((hook_data_t, int, int, void *));
88 static int ipf_hookvndl3v4_in __P((hook_event_token_t, hook_data_t,
89 void *));
90 static int ipf_hookvndl3v6_in __P((hook_event_token_t, hook_data_t,
91 void *));
92 static int ipf_hookvndl3v4_out __P((hook_event_token_t, hook_data_t,
93 void *));
94 static int ipf_hookvndl3v6_out __P((hook_event_token_t, hook_data_t,
95 void *));
96
97 static int ipf_hookviona_in __P((hook_event_token_t, hook_data_t, void *));
98 static int ipf_hookviona_out __P((hook_event_token_t, hook_data_t,
99 void *));
100
101 extern int ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
102 extern int ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
103
104 static int ipf_hook_protocol_notify __P((hook_notify_cmd_t, void *,
105 const char *, const char *, const char *));
106 static int ipf_hook_instance_notify __P((hook_notify_cmd_t, void *,
107 const char *, const char *, const char *));
108
109 #if SOLARIS2 < 10
110 #if SOLARIS2 >= 7
111 u_int *ip_ttl_ptr = NULL;
112 u_int *ip_mtudisc = NULL;
113 # if SOLARIS2 >= 8
114 int *ip_forwarding = NULL;
115 u_int *ip6_forwarding = NULL;
116 # else
117 u_int *ip_forwarding = NULL;
118 # endif
119 #else
120 u_long *ip_ttl_ptr = NULL;
121 u_long *ip_mtudisc = NULL;
122 u_long *ip_forwarding = NULL;
123 #endif
124 #endif
125
126 vmem_t *ipf_minor; /* minor number arena */
127 void *ipf_state; /* DDI state */
128
129 /*
130 * GZ-controlled and per-zone stacks:
131 *
132 * For each non-global zone, we create two ipf stacks: the per-zone stack and
133 * the GZ-controlled stack. The per-zone stack can be controlled and observed
134 * from inside the zone or from the global zone. The GZ-controlled stack can
135 * only be controlled and observed from the global zone (though the rules
136 * still only affect that non-global zone).
137 *
138 * The two hooks are always arranged so that the GZ-controlled stack is always
139 * "outermost" with respect to the zone. The traffic flow then looks like
140 * this:
141 *
142 * Inbound:
143 *
144 * nic ---> [ GZ-controlled rules ] ---> [ per-zone rules ] ---> zone
145 *
146 * Outbound:
147 *
148 * nic <--- [ GZ-controlled rules ] <--- [ per-zone rules ] <--- zone
149 */
150
151 /* IPv4 hook names */
152 char *hook4_nicevents = "ipfilter_hook4_nicevents";
153 char *hook4_nicevents_gz = "ipfilter_hook4_nicevents_gz";
154 char *hook4_in = "ipfilter_hook4_in";
155 char *hook4_in_gz = "ipfilter_hook4_in_gz";
156 char *hook4_out = "ipfilter_hook4_out";
157 char *hook4_out_gz = "ipfilter_hook4_out_gz";
158 char *hook4_loop_in = "ipfilter_hook4_loop_in";
159 char *hook4_loop_in_gz = "ipfilter_hook4_loop_in_gz";
160 char *hook4_loop_out = "ipfilter_hook4_loop_out";
161 char *hook4_loop_out_gz = "ipfilter_hook4_loop_out_gz";
162
163 /* IPv6 hook names */
164 char *hook6_nicevents = "ipfilter_hook6_nicevents";
165 char *hook6_nicevents_gz = "ipfilter_hook6_nicevents_gz";
166 char *hook6_in = "ipfilter_hook6_in";
167 char *hook6_in_gz = "ipfilter_hook6_in_gz";
168 char *hook6_out = "ipfilter_hook6_out";
169 char *hook6_out_gz = "ipfilter_hook6_out_gz";
170 char *hook6_loop_in = "ipfilter_hook6_loop_in";
171 char *hook6_loop_in_gz = "ipfilter_hook6_loop_in_gz";
172 char *hook6_loop_out = "ipfilter_hook6_loop_out";
173 char *hook6_loop_out_gz = "ipfilter_hook6_loop_out_gz";
174
175 /* vnd IPv4/v6 hook names */
176 char *hook4_vnd_in = "ipfilter_hookvndl3v4_in";
177 char *hook4_vnd_in_gz = "ipfilter_hookvndl3v4_in_gz";
178 char *hook6_vnd_in = "ipfilter_hookvndl3v6_in";
179 char *hook6_vnd_in_gz = "ipfilter_hookvndl3v6_in_gz";
180 char *hook4_vnd_out = "ipfilter_hookvndl3v4_out";
181 char *hook4_vnd_out_gz = "ipfilter_hookvndl3v4_out_gz";
182 char *hook6_vnd_out = "ipfilter_hookvndl3v6_out";
183 char *hook6_vnd_out_gz = "ipfilter_hookvndl3v6_out_gz";
184
185 /* viona hook names */
186 char *hook_viona_in = "ipfilter_hookviona_in";
187 char *hook_viona_in_gz = "ipfilter_hookviona_in_gz";
188 char *hook_viona_out = "ipfilter_hookviona_out";
189 char *hook_viona_out_gz = "ipfilter_hookviona_out_gz";
190
191 /* ------------------------------------------------------------------------ */
192 /* Function: ipldetach */
193 /* Returns: int - 0 == success, else error. */
194 /* Parameters: Nil */
195 /* */
196 /* This function is responsible for undoing anything that might have been */
197 /* done in a call to iplattach(). It must be able to clean up from a call */
198 /* to iplattach() that did not succeed. Why might that happen? Someone */
199 /* configures a table to be so large that we cannot allocate enough memory */
200 /* for it. */
201 /* ------------------------------------------------------------------------ */
202 int ipldetach(ifs)
203 ipf_stack_t *ifs;
204 {
205
206 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
207
208 #if SOLARIS2 < 10
209
210 if (ifs->ifs_fr_control_forwarding & 2) {
211 if (ip_forwarding != NULL)
212 *ip_forwarding = 0;
213 #if SOLARIS2 >= 8
214 if (ip6_forwarding != NULL)
215 *ip6_forwarding = 0;
216 #endif
217 }
218 #endif
219
220 /*
221 * This lock needs to be dropped around the net_hook_unregister calls
222 * because we can deadlock here with:
223 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
224 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
225 */
226 RWLOCK_EXIT(&ifs->ifs_ipf_global);
227
228 #define UNDO_HOOK(_f, _b, _e, _h) \
229 do { \
230 if (ifs->_f != NULL) { \
231 if (ifs->_b) { \
232 int tmp = net_hook_unregister(ifs->_f, \
233 _e, ifs->_h); \
234 ifs->_b = (tmp != 0 && tmp != ENXIO); \
235 if (!ifs->_b && ifs->_h != NULL) { \
236 hook_free(ifs->_h); \
237 ifs->_h = NULL; \
238 } \
239 } else if (ifs->_h != NULL) { \
240 hook_free(ifs->_h); \
241 ifs->_h = NULL; \
242 } \
243 } \
244 _NOTE(CONSTCOND) \
245 } while (0)
246
247 /*
248 * Remove IPv6 Hooks
249 */
250 if (ifs->ifs_ipf_ipv6 != NULL) {
251 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
252 NH_PHYSICAL_IN, ifs_ipfhook6_in);
253 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
254 NH_PHYSICAL_OUT, ifs_ipfhook6_out);
255 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
256 NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
257 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
258 NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
259 UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
260 NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
261
262 if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
263 goto detach_failed;
264 ifs->ifs_ipf_ipv6 = NULL;
265 }
266
267 /*
268 * Remove IPv4 Hooks
269 */
270 if (ifs->ifs_ipf_ipv4 != NULL) {
271 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
272 NH_PHYSICAL_IN, ifs_ipfhook4_in);
273 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
274 NH_PHYSICAL_OUT, ifs_ipfhook4_out);
275 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
276 NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
277 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
278 NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
279 UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
280 NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
281
282 if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
283 goto detach_failed;
284 ifs->ifs_ipf_ipv4 = NULL;
285 }
286
287 /*
288 * Remove VND hooks
289 */
290 if (ifs->ifs_ipf_vndl3v4 != NULL) {
291 UNDO_HOOK(ifs_ipf_vndl3v4, ifs_hookvndl3v4_physical_in,
292 NH_PHYSICAL_IN, ifs_ipfhookvndl3v4_in);
293 UNDO_HOOK(ifs_ipf_vndl3v4, ifs_hookvndl3v4_physical_out,
294 NH_PHYSICAL_OUT, ifs_ipfhookvndl3v4_out);
295
296 if (net_protocol_release(ifs->ifs_ipf_vndl3v4) != 0)
297 goto detach_failed;
298 ifs->ifs_ipf_vndl3v4 = NULL;
299 }
300
301 if (ifs->ifs_ipf_vndl3v6 != NULL) {
302 UNDO_HOOK(ifs_ipf_vndl3v6, ifs_hookvndl3v6_physical_in,
303 NH_PHYSICAL_IN, ifs_ipfhookvndl3v6_in);
304 UNDO_HOOK(ifs_ipf_vndl3v6, ifs_hookvndl3v6_physical_out,
305 NH_PHYSICAL_OUT, ifs_ipfhookvndl3v6_out);
306
307 if (net_protocol_release(ifs->ifs_ipf_vndl3v6) != 0)
308 goto detach_failed;
309 ifs->ifs_ipf_vndl3v6 = NULL;
310 }
311
312 /*
313 * Remove notification of viona hooks
314 */
315 net_instance_notify_unregister(ifs->ifs_netid,
316 ipf_hook_instance_notify);
317
318 #undef UNDO_HOOK
319
320 /*
321 * Normally, viona will unregister itself before ipldetach() is called,
322 * so these will be no-ops, but out of caution, we try to make sure
323 * we've removed any of our references.
324 */
325 (void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL,
326 NH_PHYSICAL_IN);
327 (void) ipf_hook_protocol_notify(HN_UNREGISTER, ifs, Hn_VIONA, NULL,
328 NH_PHYSICAL_OUT);
329
330 {
331 char netidstr[12]; /* Large enough for INT_MAX + NUL */
332 (void) snprintf(netidstr, sizeof (netidstr), "%d",
333 ifs->ifs_netid);
334
335 /*
336 * The notify callbacks expect the netid value passed as a
337 * string in the third argument. To prevent confusion if
338 * traced, we pass the same value the nethook framework would
339 * pass, even though the callback does not currently use the
340 * value.
341 */
342 (void) ipf_hook_instance_notify(HN_UNREGISTER, ifs, netidstr,
343 NULL, Hn_VIONA);
344 }
345
346 #ifdef IPFDEBUG
347 cmn_err(CE_CONT, "ipldetach()\n");
348 #endif
349
350 WRITE_ENTER(&ifs->ifs_ipf_global);
351 fr_deinitialise(ifs);
352
353 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
354 (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
355
356 if (ifs->ifs_ipf_locks_done == 1) {
357 MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
358 MUTEX_DESTROY(&ifs->ifs_ipf_rw);
359 RW_DESTROY(&ifs->ifs_ipf_tokens);
360 RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
361 ifs->ifs_ipf_locks_done = 0;
362 }
363
364 if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
365 ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
366 ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
367 ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
368 ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
369 return -1;
370
371 return 0;
372
373 detach_failed:
374 WRITE_ENTER(&ifs->ifs_ipf_global);
375 return -1;
376 }
377
378 int iplattach(ifs)
379 ipf_stack_t *ifs;
380 {
381 #if SOLARIS2 < 10
382 int i;
383 #endif
384 netid_t id = ifs->ifs_netid;
385
386 #ifdef IPFDEBUG
387 cmn_err(CE_CONT, "iplattach()\n");
388 #endif
389
390 ASSERT(RW_WRITE_HELD(&ifs->ifs_ipf_global.ipf_lk));
391 ifs->ifs_fr_flags = IPF_LOGGING;
392 #ifdef _KERNEL
393 ifs->ifs_fr_update_ipid = 0;
394 #else
395 ifs->ifs_fr_update_ipid = 1;
396 #endif
397 ifs->ifs_fr_minttl = 4;
398 ifs->ifs_fr_icmpminfragmtu = 68;
399 #if defined(IPFILTER_DEFAULT_BLOCK)
400 ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
401 #else
402 ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
403 #endif
404
405 bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
406 MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
407 MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
408 RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
409 RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
410 ifs->ifs_ipf_locks_done = 1;
411
412 if (fr_initialise(ifs) < 0)
413 return -1;
414
415 /*
416 * For incoming packets, we want the GZ-controlled hooks to run before
417 * the per-zone hooks, regardless of what order they're are installed.
418 * See the "GZ-controlled and per-zone stacks" comment block at the top
419 * of this file.
420 */
421 #define HOOK_INIT_GZ_BEFORE(x, fn, n, gzn, a) \
422 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \
423 (x)->h_hint = ifs->ifs_gz_controlled ? HH_BEFORE : HH_AFTER; \
424 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
425
426 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
427 hook4_nicevents, hook4_nicevents_gz, ifs);
428 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_in, ipf_hook4_in,
429 hook4_in, hook4_in_gz, ifs);
430 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
431 hook4_loop_in, hook4_loop_in_gz, ifs);
432
433 /*
434 * For outgoing packets, we want the GZ-controlled hooks to run after
435 * the per-zone hooks, regardless of what order they're are installed.
436 * See the "GZ-controlled and per-zone stacks" comment block at the top
437 * of this file.
438 */
439 #define HOOK_INIT_GZ_AFTER(x, fn, n, gzn, a) \
440 HOOK_INIT(x, fn, ifs->ifs_gz_controlled ? gzn : n, ifs); \
441 (x)->h_hint = ifs->ifs_gz_controlled ? HH_AFTER : HH_BEFORE; \
442 (x)->h_hintvalue = (uintptr_t) (ifs->ifs_gz_controlled ? n : gzn);
443
444 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_out, ipf_hook4_out,
445 hook4_out, hook4_out_gz, ifs);
446 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
447 hook4_loop_out, hook4_loop_out_gz, ifs);
448
449 /*
450 * If we hold this lock over all of the net_hook_register calls, we
451 * can cause a deadlock to occur with the following lock ordering:
452 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
453 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
454 */
455 RWLOCK_EXIT(&ifs->ifs_ipf_global);
456
457 /*
458 * Add IPv4 hooks
459 */
460 ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
461 if (ifs->ifs_ipf_ipv4 == NULL)
462 goto hookup_failed;
463
464 ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
465 NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
466 if (!ifs->ifs_hook4_nic_events)
467 goto hookup_failed;
468
469 ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
470 NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
471 if (!ifs->ifs_hook4_physical_in)
472 goto hookup_failed;
473
474 ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
475 NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
476 if (!ifs->ifs_hook4_physical_out)
477 goto hookup_failed;
478
479 if (ifs->ifs_ipf_loopback) {
480 ifs->ifs_hook4_loopback_in = (net_hook_register(
481 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
482 ifs->ifs_ipfhook4_loop_in) == 0);
483 if (!ifs->ifs_hook4_loopback_in)
484 goto hookup_failed;
485
486 ifs->ifs_hook4_loopback_out = (net_hook_register(
487 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
488 ifs->ifs_ipfhook4_loop_out) == 0);
489 if (!ifs->ifs_hook4_loopback_out)
490 goto hookup_failed;
491 }
492
493 /*
494 * Add IPv6 hooks
495 */
496 ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
497 if (ifs->ifs_ipf_ipv6 == NULL)
498 goto hookup_failed;
499
500 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
501 hook6_nicevents, hook6_nicevents_gz, ifs);
502 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_in, ipf_hook6_in,
503 hook6_in, hook6_in_gz, ifs);
504 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
505 hook6_loop_in, hook6_loop_in_gz, ifs);
506 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_out, ipf_hook6_out,
507 hook6_out, hook6_out_gz, ifs);
508 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
509 hook6_loop_out, hook6_loop_out_gz, ifs);
510
511 ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
512 NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
513 if (!ifs->ifs_hook6_nic_events)
514 goto hookup_failed;
515
516 ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
517 NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
518 if (!ifs->ifs_hook6_physical_in)
519 goto hookup_failed;
520
521 ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
522 NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
523 if (!ifs->ifs_hook6_physical_out)
524 goto hookup_failed;
525
526 if (ifs->ifs_ipf_loopback) {
527 ifs->ifs_hook6_loopback_in = (net_hook_register(
528 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
529 ifs->ifs_ipfhook6_loop_in) == 0);
530 if (!ifs->ifs_hook6_loopback_in)
531 goto hookup_failed;
532
533 ifs->ifs_hook6_loopback_out = (net_hook_register(
534 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
535 ifs->ifs_ipfhook6_loop_out) == 0);
536 if (!ifs->ifs_hook6_loopback_out)
537 goto hookup_failed;
538 }
539
540 /*
541 * Add VND INET hooks
542 */
543 ifs->ifs_ipf_vndl3v4 = net_protocol_lookup(id, NHF_VND_INET);
544 if (ifs->ifs_ipf_vndl3v4 == NULL)
545 goto hookup_failed;
546
547 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhookvndl3v4_in, ipf_hookvndl3v4_in,
548 hook4_vnd_in, hook4_vnd_in_gz, ifs);
549 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhookvndl3v4_out, ipf_hookvndl3v4_out,
550 hook4_vnd_out, hook4_vnd_out_gz, ifs);
551 ifs->ifs_hookvndl3v4_physical_in = (net_hook_register(ifs->ifs_ipf_vndl3v4,
552 NH_PHYSICAL_IN, ifs->ifs_ipfhookvndl3v4_in) == 0);
553 if (!ifs->ifs_hookvndl3v4_physical_in)
554 goto hookup_failed;
555
556 ifs->ifs_hookvndl3v4_physical_out = (net_hook_register(ifs->ifs_ipf_vndl3v4,
557 NH_PHYSICAL_OUT, ifs->ifs_ipfhookvndl3v4_out) == 0);
558 if (!ifs->ifs_hookvndl3v4_physical_out)
559 goto hookup_failed;
560
561
562 /*
563 * VND INET6 hooks
564 */
565 ifs->ifs_ipf_vndl3v6 = net_protocol_lookup(id, NHF_VND_INET6);
566 if (ifs->ifs_ipf_vndl3v6 == NULL)
567 goto hookup_failed;
568
569 HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhookvndl3v6_in, ipf_hookvndl3v6_in,
570 hook6_vnd_in, hook6_vnd_in_gz, ifs);
571 HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhookvndl3v6_out, ipf_hookvndl3v6_out,
572 hook6_vnd_out, hook6_vnd_out_gz, ifs);
573 ifs->ifs_hookvndl3v6_physical_in = (net_hook_register(ifs->ifs_ipf_vndl3v6,
574 NH_PHYSICAL_IN, ifs->ifs_ipfhookvndl3v6_in) == 0);
575 if (!ifs->ifs_hookvndl3v6_physical_in)
576 goto hookup_failed;
577
578 ifs->ifs_hookvndl3v6_physical_out = (net_hook_register(ifs->ifs_ipf_vndl3v6,
579 NH_PHYSICAL_OUT, ifs->ifs_ipfhookvndl3v6_out) == 0);
580 if (!ifs->ifs_hookvndl3v6_physical_out)
581 goto hookup_failed;
582
583 /*
584 * VIONA INET hooks. While the nethook framework allows us to register
585 * hooks for events that haven't been registered yet, we instead
586 * register and unregister our hooks in response to notifications
587 * about the viona hooks from the nethook framework. This prevents
588 * problems when the viona module gets unloaded while the ipf module
589 * does not. If we do not unregister our hooks after the viona module
590 * is unloaded, the viona module cannot later re-register them if it
591 * gets reloaded. As the ip, vnd, and ipf modules are rarely unloaded
592 * even on DEBUG kernels, they do not experience this issue.
593 */
594 if (net_instance_notify_register(id, ipf_hook_instance_notify,
595 ifs) != 0)
596 goto hookup_failed;
597
598 /*
599 * Reacquire ipf_global, now it is safe.
600 */
601 WRITE_ENTER(&ifs->ifs_ipf_global);
602
603 /* Do not use private interface ip_params_arr[] in Solaris 10 */
604 #if SOLARIS2 < 10
605
606 #if SOLARIS2 >= 8
607 ip_forwarding = &ip_g_forward;
608 #endif
609 /*
610 * XXX - There is no terminator for this array, so it is not possible
611 * to tell if what we are looking for is missing and go off the end
612 * of the array.
613 */
614
615 #if SOLARIS2 <= 8
616 for (i = 0; ; i++) {
617 if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
618 ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
619 } else if (!strcmp(ip_param_arr[i].ip_param_name,
620 "ip_path_mtu_discovery")) {
621 ip_mtudisc = &ip_param_arr[i].ip_param_value;
622 }
623 #if SOLARIS2 < 8
624 else if (!strcmp(ip_param_arr[i].ip_param_name,
625 "ip_forwarding")) {
626 ip_forwarding = &ip_param_arr[i].ip_param_value;
627 }
628 #else
629 else if (!strcmp(ip_param_arr[i].ip_param_name,
630 "ip6_forwarding")) {
631 ip6_forwarding = &ip_param_arr[i].ip_param_value;
632 }
633 #endif
634
635 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
636 #if SOLARIS2 >= 8
637 ip6_forwarding != NULL &&
638 #endif
639 ip_forwarding != NULL)
640 break;
641 }
642 #endif
643
644 if (ifs->ifs_fr_control_forwarding & 1) {
645 if (ip_forwarding != NULL)
646 *ip_forwarding = 1;
647 #if SOLARIS2 >= 8
648 if (ip6_forwarding != NULL)
649 *ip6_forwarding = 1;
650 #endif
651 }
652
653 #endif
654
655 return 0;
656 hookup_failed:
657 WRITE_ENTER(&ifs->ifs_ipf_global);
658 return -1;
659 }
660
661 /* ------------------------------------------------------------------------ */
662 /*
663 * Called whenever a nethook protocol is registered or unregistered. Currently
664 * only used to add or remove the hooks for viona.
665 *
666 * While the function signature requires returning int, nothing
667 * in usr/src/uts/common/io/hook.c that invokes the callbacks
668 * captures the return value (nor is there currently any documentation
669 * on what return values should be). For now at least, we'll return 0
670 * on success (or 'not applicable') or an error value. Even if the
671 * nethook framework doesn't use the return address, it can be observed via
672 * dtrace if needed.
673 */
674 static int
675 ipf_hook_protocol_notify(hook_notify_cmd_t command, void *arg,
676 const char *name, const char *dummy __unused, const char *he_name)
677 {
678 ipf_stack_t *ifs = arg;
679 hook_t **hookpp;
680 char *hook_name, *hint_name;
681 hook_func_t hookfn;
682 boolean_t *hookedp;
683 hook_hint_t hint;
684 boolean_t out;
685 int ret = 0;
686
687 const boolean_t gz = ifs->ifs_gz_controlled;
688
689 /* We currently only care about viona hooks notifications */
690 if (strcmp(name, Hn_VIONA) != 0)
691 return (0);
692
693 if (strcmp(he_name, NH_PHYSICAL_IN) == 0) {
694 out = B_FALSE;
695 } else if (strcmp(he_name, NH_PHYSICAL_OUT) == 0) {
696 out = B_TRUE;
697 } else {
698 /*
699 * If we've added more hook events to viona, we must add
700 * the corresponding handling here (even if it's just to
701 * ignore it) to prevent the firewall from not working as
702 * intended.
703 */
704 cmn_err(CE_PANIC, "%s: unhandled hook event %s", __func__,
705 he_name);
706
707 return (0);
708 }
709
710 if (out) {
711 hookpp = &ifs->ifs_ipfhookviona_out;
712 hookfn = ipf_hookviona_out;
713 hookedp = &ifs->ifs_hookviona_physical_out;
714 name = gz ? hook_viona_out_gz : hook_viona_out;
715 hint = gz ? HH_AFTER : HH_BEFORE;
716 hint_name = gz ? hook_viona_out : hook_viona_out_gz;
717 } else {
718 hookpp = &ifs->ifs_ipfhookviona_in;
719 hookfn = ipf_hookviona_in;
720 hookedp = &ifs->ifs_hookviona_physical_in;
721 name = gz ? hook_viona_in_gz : hook_viona_in;
722 hint = gz ? HH_BEFORE : HH_AFTER;
723 hint_name = gz ? hook_viona_in : hook_viona_in_gz;
724 }
725
726 switch (command) {
727 default:
728 case HN_NONE:
729 break;
730 case HN_REGISTER:
731 HOOK_INIT(*hookpp, hookfn, (char *)name, ifs);
732 (*hookpp)->h_hint = hint;
733 (*hookpp)->h_hintvalue = (uintptr_t)hint_name;
734 ret = net_hook_register(ifs->ifs_ipf_viona,
735 (char *)he_name, *hookpp);
736 if (ret != 0) {
737 cmn_err(CE_NOTE, "%s: could not register hook "
738 "(hook family=%s hook=%s) err=%d", __func__,
739 name, he_name, ret);
740 *hookedp = B_FALSE;
741 return (ret);
742 }
743 *hookedp = B_TRUE;
744 break;
745 case HN_UNREGISTER:
746 if (ifs->ifs_ipf_viona == NULL)
747 break;
748
749 ret = *hookedp ? net_hook_unregister(ifs->ifs_ipf_viona,
750 (char *)he_name, *hookpp) : 0;
751 if ((ret == 0 || ret == ENXIO)) {
752 if (*hookpp != NULL) {
753 hook_free(*hookpp);
754 *hookpp = NULL;
755 }
756 *hookedp = B_FALSE;
757 }
758 break;
759 }
760
761 return (ret);
762 }
763
764 /*
765 * Called whenever a new nethook instance is created. Currently only used
766 * with the Hn_VIONA nethooks. Similar to ipf_hook_protocol_notify, the out
767 * function signature must return an int, though the result is never used.
768 * We elect to return 0 on success (or not applicable) or a non-zero value
769 * on error.
770 */
771 static int
772 ipf_hook_instance_notify(hook_notify_cmd_t command, void *arg,
773 const char *netid, const char *dummy __unused, const char *instance)
774 {
775 ipf_stack_t *ifs = arg;
776 int ret = 0;
777
778 /* We currently only care about viona hooks */
779 if (strcmp(instance, Hn_VIONA) != 0)
780 return (0);
781
782 switch (command) {
783 case HN_NONE:
784 default:
785 return (0);
786 case HN_REGISTER:
787 ifs->ifs_ipf_viona = net_protocol_lookup(ifs->ifs_netid,
788 NHF_VIONA);
789
790 if (ifs->ifs_ipf_viona == NULL)
791 return (EPROTONOSUPPORT);
792
793 ret = net_protocol_notify_register(ifs->ifs_ipf_viona,
794 ipf_hook_protocol_notify, ifs);
795 VERIFY(ret == 0 || ret == ESHUTDOWN);
796 break;
797 case HN_UNREGISTER:
798 if (ifs->ifs_ipf_viona == NULL)
799 break;
800 VERIFY0(net_protocol_notify_unregister(ifs->ifs_ipf_viona,
801 ipf_hook_protocol_notify));
802 VERIFY0(net_protocol_release(ifs->ifs_ipf_viona));
803 ifs->ifs_ipf_viona = NULL;
804 break;
805 }
806
807 return (ret);
808 }
809
810 static int fr_setipfloopback(set, ifs)
811 int set;
812 ipf_stack_t *ifs;
813 {
814 if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
815 return EFAULT;
816
817 if (set && !ifs->ifs_ipf_loopback) {
818 ifs->ifs_ipf_loopback = 1;
819
820 ifs->ifs_hook4_loopback_in = (net_hook_register(
821 ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
822 ifs->ifs_ipfhook4_loop_in) == 0);
823 if (!ifs->ifs_hook4_loopback_in)
824 return EINVAL;
825
826 ifs->ifs_hook4_loopback_out = (net_hook_register(
827 ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
828 ifs->ifs_ipfhook4_loop_out) == 0);
829 if (!ifs->ifs_hook4_loopback_out)
830 return EINVAL;
831
832 ifs->ifs_hook6_loopback_in = (net_hook_register(
833 ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
834 ifs->ifs_ipfhook6_loop_in) == 0);
835 if (!ifs->ifs_hook6_loopback_in)
836 return EINVAL;
837
838 ifs->ifs_hook6_loopback_out = (net_hook_register(
839 ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
840 ifs->ifs_ipfhook6_loop_out) == 0);
841 if (!ifs->ifs_hook6_loopback_out)
842 return EINVAL;
843
844 } else if (!set && ifs->ifs_ipf_loopback) {
845 ifs->ifs_ipf_loopback = 0;
846
847 ifs->ifs_hook4_loopback_in =
848 (net_hook_unregister(ifs->ifs_ipf_ipv4,
849 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
850 if (ifs->ifs_hook4_loopback_in)
851 return EBUSY;
852
853 ifs->ifs_hook4_loopback_out =
854 (net_hook_unregister(ifs->ifs_ipf_ipv4,
855 NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
856 if (ifs->ifs_hook4_loopback_out)
857 return EBUSY;
858
859 ifs->ifs_hook6_loopback_in =
860 (net_hook_unregister(ifs->ifs_ipf_ipv6,
861 NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
862 if (ifs->ifs_hook6_loopback_in)
863 return EBUSY;
864
865 ifs->ifs_hook6_loopback_out =
866 (net_hook_unregister(ifs->ifs_ipf_ipv6,
867 NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
868 if (ifs->ifs_hook6_loopback_out)
869 return EBUSY;
870 }
871 return 0;
872 }
873
874
875 /*
876 * Filter ioctl interface.
877 */
878 /*ARGSUSED*/
879 int iplioctl(dev, cmd, data, mode, cp, rp)
880 dev_t dev;
881 int cmd;
882 #if SOLARIS2 >= 7
883 intptr_t data;
884 #else
885 int *data;
886 #endif
887 int mode;
888 cred_t *cp;
889 int *rp;
890 {
891 int error = 0, tmp;
892 friostat_t fio;
893 minor_t unit;
894 u_int enable;
895 ipf_stack_t *ifs;
896 zoneid_t zid;
897 ipf_devstate_t *isp;
898
899 #ifdef IPFDEBUG
900 cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
901 dev, cmd, data, mode, cp, rp);
902 #endif
903 unit = getminor(dev);
904
905 isp = ddi_get_soft_state(ipf_state, unit);
906 if (isp == NULL)
907 return ENXIO;
908 unit = isp->ipfs_minor;
909
910 if (unit == IPL_LOGEV)
911 return (ipf_cfwlog_ioctl(dev, cmd, data, mode, cp, rp));
912
913 zid = crgetzoneid(cp);
914 if (cmd == SIOCIPFZONESET) {
915 if (zid == GLOBAL_ZONEID)
916 return fr_setzoneid(isp, (caddr_t) data);
917 return EACCES;
918 }
919
920 /*
921 * ipf_find_stack returns with a read lock on ifs_ipf_global
922 */
923 ifs = ipf_find_stack(zid, isp);
924 if (ifs == NULL)
925 return ENXIO;
926
927 if (ifs->ifs_fr_running <= 0) {
928 if (unit != IPL_LOGIPF) {
929 RWLOCK_EXIT(&ifs->ifs_ipf_global);
930 return EIO;
931 }
932 if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
933 cmd != SIOCIPFSET && cmd != SIOCFRENB &&
934 cmd != SIOCGETFS && cmd != SIOCGETFF) {
935 RWLOCK_EXIT(&ifs->ifs_ipf_global);
936 return EIO;
937 }
938 }
939
940 if (ifs->ifs_fr_enable_active != 0) {
941 RWLOCK_EXIT(&ifs->ifs_ipf_global);
942 return EBUSY;
943 }
944
945 error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
946 curproc, ifs);
947 if (error != -1) {
948 RWLOCK_EXIT(&ifs->ifs_ipf_global);
949 return error;
950 }
951 error = 0;
952
953 switch (cmd)
954 {
955 case SIOCFRENB :
956 if (!(mode & FWRITE))
957 error = EPERM;
958 else {
959 error = COPYIN((caddr_t)data, (caddr_t)&enable,
960 sizeof(enable));
961 if (error != 0) {
962 error = EFAULT;
963 break;
964 }
965
966 RWLOCK_EXIT(&ifs->ifs_ipf_global);
967 WRITE_ENTER(&ifs->ifs_ipf_global);
968
969 /*
970 * We must recheck fr_enable_active here, since we've
971 * dropped ifs_ipf_global from R in order to get it
972 * exclusively.
973 */
974 if (ifs->ifs_fr_enable_active == 0) {
975 ifs->ifs_fr_enable_active = 1;
976 error = fr_enableipf(ifs, enable);
977 ifs->ifs_fr_enable_active = 0;
978 }
979 }
980 break;
981 case SIOCIPFSET :
982 if (!(mode & FWRITE)) {
983 error = EPERM;
984 break;
985 }
986 /* FALLTHRU */
987 case SIOCIPFGETNEXT :
988 case SIOCIPFGET :
989 error = fr_ipftune(cmd, (void *)data, ifs);
990 break;
991 case SIOCSETFF :
992 if (!(mode & FWRITE))
993 error = EPERM;
994 else {
995 error = COPYIN((caddr_t)data,
996 (caddr_t)&ifs->ifs_fr_flags,
997 sizeof(ifs->ifs_fr_flags));
998 if (error != 0)
999 error = EFAULT;
1000 }
1001 break;
1002 case SIOCIPFLP :
1003 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
1004 sizeof(tmp));
1005 if (error != 0)
1006 error = EFAULT;
1007 else
1008 error = fr_setipfloopback(tmp, ifs);
1009 break;
1010 case SIOCGETFF :
1011 error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
1012 sizeof(ifs->ifs_fr_flags));
1013 if (error != 0)
1014 error = EFAULT;
1015 break;
1016 case SIOCFUNCL :
1017 error = fr_resolvefunc((void *)data);
1018 break;
1019 case SIOCINAFR :
1020 case SIOCRMAFR :
1021 case SIOCADAFR :
1022 case SIOCZRLST :
1023 if (!(mode & FWRITE))
1024 error = EPERM;
1025 else
1026 error = frrequest(unit, cmd, (caddr_t)data,
1027 ifs->ifs_fr_active, 1, ifs);
1028 break;
1029 case SIOCINIFR :
1030 case SIOCRMIFR :
1031 case SIOCADIFR :
1032 if (!(mode & FWRITE))
1033 error = EPERM;
1034 else
1035 error = frrequest(unit, cmd, (caddr_t)data,
1036 1 - ifs->ifs_fr_active, 1, ifs);
1037 break;
1038 case SIOCSWAPA :
1039 if (!(mode & FWRITE))
1040 error = EPERM;
1041 else {
1042 WRITE_ENTER(&ifs->ifs_ipf_mutex);
1043 bzero((char *)ifs->ifs_frcache,
1044 sizeof (ifs->ifs_frcache));
1045 error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
1046 (caddr_t)data,
1047 sizeof(ifs->ifs_fr_active));
1048 if (error != 0)
1049 error = EFAULT;
1050 else
1051 ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
1052 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
1053 }
1054 break;
1055 case SIOCGETFS :
1056 fr_getstat(&fio, ifs);
1057 error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
1058 break;
1059 case SIOCFRZST :
1060 if (!(mode & FWRITE))
1061 error = EPERM;
1062 else
1063 error = fr_zerostats((caddr_t)data, ifs);
1064 break;
1065 case SIOCIPFFL :
1066 if (!(mode & FWRITE))
1067 error = EPERM;
1068 else {
1069 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
1070 sizeof(tmp));
1071 if (!error) {
1072 tmp = frflush(unit, 4, tmp, ifs);
1073 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1074 sizeof(tmp));
1075 if (error != 0)
1076 error = EFAULT;
1077 } else
1078 error = EFAULT;
1079 }
1080 break;
1081 #ifdef USE_INET6
1082 case SIOCIPFL6 :
1083 if (!(mode & FWRITE))
1084 error = EPERM;
1085 else {
1086 error = COPYIN((caddr_t)data, (caddr_t)&tmp,
1087 sizeof(tmp));
1088 if (!error) {
1089 tmp = frflush(unit, 6, tmp, ifs);
1090 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1091 sizeof(tmp));
1092 if (error != 0)
1093 error = EFAULT;
1094 } else
1095 error = EFAULT;
1096 }
1097 break;
1098 #endif
1099 case SIOCSTLCK :
1100 error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
1101 if (error == 0) {
1102 ifs->ifs_fr_state_lock = tmp;
1103 ifs->ifs_fr_nat_lock = tmp;
1104 ifs->ifs_fr_frag_lock = tmp;
1105 ifs->ifs_fr_auth_lock = tmp;
1106 } else
1107 error = EFAULT;
1108 break;
1109 #ifdef IPFILTER_LOG
1110 case SIOCIPFFB :
1111 if (!(mode & FWRITE))
1112 error = EPERM;
1113 else {
1114 tmp = ipflog_clear(unit, ifs);
1115 error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
1116 sizeof(tmp));
1117 if (error)
1118 error = EFAULT;
1119 }
1120 break;
1121 #endif /* IPFILTER_LOG */
1122 case SIOCFRSYN :
1123 if (!(mode & FWRITE))
1124 error = EPERM;
1125 else {
1126 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1127 WRITE_ENTER(&ifs->ifs_ipf_global);
1128
1129 frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1130 fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1131 fr_nataddrsync(0, NULL, NULL, ifs);
1132 fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
1133 error = 0;
1134 }
1135 break;
1136 case SIOCGFRST :
1137 error = fr_outobj((void *)data, fr_fragstats(ifs),
1138 IPFOBJ_FRAGSTAT);
1139 break;
1140 case FIONREAD :
1141 #ifdef IPFILTER_LOG
1142 tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
1143
1144 error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
1145 if (error != 0)
1146 error = EFAULT;
1147 #endif
1148 break;
1149 case SIOCIPFITER :
1150 error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
1151 curproc, ifs);
1152 break;
1153
1154 case SIOCGENITER :
1155 error = ipf_genericiter((caddr_t)data, crgetuid(cp),
1156 curproc, ifs);
1157 break;
1158
1159 case SIOCIPFDELTOK :
1160 error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
1161 if (error != 0) {
1162 error = EFAULT;
1163 } else {
1164 error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
1165 }
1166 break;
1167
1168 default :
1169 #ifdef IPFDEBUG
1170 cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
1171 cmd, (void *)data);
1172 #endif
1173 error = EINVAL;
1174 break;
1175 }
1176 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1177 return error;
1178 }
1179
1180
1181 static int fr_enableipf(ifs, enable)
1182 ipf_stack_t *ifs;
1183 int enable;
1184 {
1185 int error;
1186
1187 if (!enable) {
1188 error = ipldetach(ifs);
1189 if (error == 0)
1190 ifs->ifs_fr_running = -1;
1191 return error;
1192 }
1193
1194 if (ifs->ifs_fr_running > 0)
1195 return 0;
1196
1197 error = iplattach(ifs);
1198 if (error == 0) {
1199 if (ifs->ifs_fr_timer_id == NULL) {
1200 int hz = drv_usectohz(500000);
1201
1202 ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
1203 (void *)ifs,
1204 hz);
1205 }
1206 ifs->ifs_fr_running = 1;
1207 } else {
1208 (void) ipldetach(ifs);
1209 }
1210 return error;
1211 }
1212
1213
1214 phy_if_t get_unit(name, v, ifs)
1215 char *name;
1216 int v;
1217 ipf_stack_t *ifs;
1218 {
1219 net_handle_t nif;
1220
1221 if (v == 4)
1222 nif = ifs->ifs_ipf_ipv4;
1223 else if (v == 6)
1224 nif = ifs->ifs_ipf_ipv6;
1225 else
1226 return 0;
1227
1228 return (net_phylookup(nif, name));
1229 }
1230
1231 /*
1232 * routines below for saving IP headers to buffer
1233 */
1234 /*ARGSUSED*/
1235 int iplopen(devp, flags, otype, cred)
1236 dev_t *devp;
1237 int flags, otype;
1238 cred_t *cred;
1239 {
1240 ipf_devstate_t *isp;
1241 minor_t min = getminor(*devp);
1242 minor_t minor;
1243
1244 #ifdef IPFDEBUG
1245 cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
1246 #endif
1247 if (!(otype & OTYP_CHR))
1248 return ENXIO;
1249
1250 if (IPL_LOGMAX < min)
1251 return ENXIO;
1252
1253 /* Special-case ipfev: global-zone-open only. */
1254 if (min == IPL_LOGEV) {
1255 if (crgetzoneid(cred) != GLOBAL_ZONEID)
1256 return (ENXIO);
1257 /*
1258 * Else enable the CFW logging of events.
1259 * NOTE: For now, we only allow one open at a time.
1260 * Use atomic_add to confirm/deny. And also for now,
1261 * assume sizeof (boolean_t) == sizeof (int).
1262 */
1263 if (atomic_inc_uint_nv(&ipf_cfwlog_enabled) > 1) {
1264 atomic_dec_uint(&ipf_cfwlog_enabled);
1265 return (EBUSY);
1266 }
1267 }
1268
1269 minor = (minor_t)(uintptr_t)vmem_alloc(ipf_minor, 1,
1270 VM_BESTFIT | VM_SLEEP);
1271
1272 if (ddi_soft_state_zalloc(ipf_state, minor) != 0) {
1273 vmem_free(ipf_minor, (void *)(uintptr_t)minor, 1);
1274 if (min == IPL_LOGEV)
1275 atomic_dec_uint(&ipf_cfwlog_enabled);
1276 return ENXIO;
1277 }
1278
1279 *devp = makedevice(getmajor(*devp), minor);
1280 isp = ddi_get_soft_state(ipf_state, minor);
1281 VERIFY(isp != NULL);
1282
1283 isp->ipfs_minor = min;
1284 isp->ipfs_zoneid = IPFS_ZONE_UNSET;
1285
1286 return 0;
1287 }
1288
1289
1290 /*ARGSUSED*/
1291 int iplclose(dev, flags, otype, cred)
1292 dev_t dev;
1293 int flags, otype;
1294 cred_t *cred;
1295 {
1296 minor_t min = getminor(dev);
1297 ipf_devstate_t *isp;
1298
1299 #ifdef IPFDEBUG
1300 cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
1301 #endif
1302
1303 if (IPL_LOGMAX < min)
1304 return ENXIO;
1305
1306 isp = ddi_get_soft_state(ipf_state, min);
1307 if (isp != NULL && isp->ipfs_minor == IPL_LOGEV) {
1308 /* Disable CFW logging. */
1309 membar_exit();
1310 atomic_dec_uint(&ipf_cfwlog_enabled);
1311 }
1312
1313 ddi_soft_state_free(ipf_state, min);
1314 vmem_free(ipf_minor, (void *)(uintptr_t)min, 1);
1315
1316 return 0;
1317 }
1318
1319 #ifdef IPFILTER_LOG
1320 /*
1321 * iplread/ipllog
1322 * both of these must operate with at least splnet() lest they be
1323 * called during packet processing and cause an inconsistancy to appear in
1324 * the filter lists.
1325 */
1326 /*ARGSUSED*/
1327 int iplread(dev, uio, cp)
1328 dev_t dev;
1329 register struct uio *uio;
1330 cred_t *cp;
1331 {
1332 ipf_stack_t *ifs;
1333 int ret;
1334 minor_t unit;
1335 ipf_devstate_t *isp;
1336
1337 unit = getminor(dev);
1338 isp = ddi_get_soft_state(ipf_state, unit);
1339 if (isp == NULL)
1340 return ENXIO;
1341 unit = isp->ipfs_minor;
1342
1343 if (unit == IPL_LOGEV)
1344 return (ipf_cfwlog_read(dev, uio, cp));
1345
1346 /*
1347 * ipf_find_stack returns with a read lock on ifs_ipf_global
1348 */
1349 ifs = ipf_find_stack(crgetzoneid(cp), isp);
1350 if (ifs == NULL)
1351 return ENXIO;
1352
1353 # ifdef IPFDEBUG
1354 cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
1355 # endif
1356
1357 if (ifs->ifs_fr_running < 1) {
1358 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1359 return EIO;
1360 }
1361
1362 # ifdef IPFILTER_SYNC
1363 if (unit == IPL_LOGSYNC) {
1364 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1365 return ipfsync_read(uio);
1366 }
1367 # endif
1368
1369 ret = ipflog_read(unit, uio, ifs);
1370 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1371 return ret;
1372 }
1373 #endif /* IPFILTER_LOG */
1374
1375
1376 /*
1377 * iplread/ipllog
1378 * both of these must operate with at least splnet() lest they be
1379 * called during packet processing and cause an inconsistancy to appear in
1380 * the filter lists.
1381 */
1382 int iplwrite(dev, uio, cp)
1383 dev_t dev;
1384 register struct uio *uio;
1385 cred_t *cp;
1386 {
1387 ipf_stack_t *ifs;
1388 minor_t unit;
1389 ipf_devstate_t *isp;
1390
1391 unit = getminor(dev);
1392 isp = ddi_get_soft_state(ipf_state, unit);
1393 if (isp == NULL)
1394 return ENXIO;
1395 unit = isp->ipfs_minor;
1396
1397 if (unit == IPL_LOGEV)
1398 return (EIO); /* ipfev doesn't support write yet. */
1399
1400 /*
1401 * ipf_find_stack returns with a read lock on ifs_ipf_global
1402 */
1403 ifs = ipf_find_stack(crgetzoneid(cp), isp);
1404 if (ifs == NULL)
1405 return ENXIO;
1406
1407 #ifdef IPFDEBUG
1408 cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
1409 #endif
1410
1411 if (ifs->ifs_fr_running < 1) {
1412 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1413 return EIO;
1414 }
1415
1416 #ifdef IPFILTER_SYNC
1417 if (getminor(dev) == IPL_LOGSYNC) {
1418 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1419 return ipfsync_write(uio);
1420 }
1421 #endif /* IPFILTER_SYNC */
1422 dev = dev; /* LINT */
1423 uio = uio; /* LINT */
1424 cp = cp; /* LINT */
1425 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1426 return ENXIO;
1427 }
1428
1429
1430 /*
1431 * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
1432 * requires a large amount of setting up and isn't any more efficient.
1433 */
1434 int fr_send_reset(fin)
1435 fr_info_t *fin;
1436 {
1437 tcphdr_t *tcp, *tcp2;
1438 int tlen, hlen;
1439 mblk_t *m;
1440 #ifdef USE_INET6
1441 ip6_t *ip6;
1442 #endif
1443 ip_t *ip;
1444
1445 tcp = fin->fin_dp;
1446 if (tcp->th_flags & TH_RST)
1447 return -1;
1448
1449 #ifndef IPFILTER_CKSUM
1450 if (fr_checkl4sum(fin) == -1)
1451 return -1;
1452 #endif
1453
1454 tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
1455 #ifdef USE_INET6
1456 if (fin->fin_v == 6)
1457 hlen = sizeof(ip6_t);
1458 else
1459 #endif
1460 hlen = sizeof(ip_t);
1461 hlen += sizeof(*tcp2);
1462 if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
1463 return -1;
1464
1465 m->b_rptr += 64;
1466 MTYPE(m) = M_DATA;
1467 m->b_wptr = m->b_rptr + hlen;
1468 ip = (ip_t *)m->b_rptr;
1469 bzero((char *)ip, hlen);
1470 tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
1471 tcp2->th_dport = tcp->th_sport;
1472 tcp2->th_sport = tcp->th_dport;
1473 if (tcp->th_flags & TH_ACK) {
1474 tcp2->th_seq = tcp->th_ack;
1475 tcp2->th_flags = TH_RST;
1476 } else {
1477 tcp2->th_ack = ntohl(tcp->th_seq);
1478 tcp2->th_ack += tlen;
1479 tcp2->th_ack = htonl(tcp2->th_ack);
1480 tcp2->th_flags = TH_RST|TH_ACK;
1481 }
1482 tcp2->th_off = sizeof(struct tcphdr) >> 2;
1483
1484 ip->ip_v = fin->fin_v;
1485 #ifdef USE_INET6
1486 if (fin->fin_v == 6) {
1487 ip6 = (ip6_t *)m->b_rptr;
1488 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1489 ip6->ip6_src = fin->fin_dst6.in6;
1490 ip6->ip6_dst = fin->fin_src6.in6;
1491 ip6->ip6_plen = htons(sizeof(*tcp));
1492 ip6->ip6_nxt = IPPROTO_TCP;
1493 tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1494 } else
1495 #endif
1496 {
1497 ip->ip_src.s_addr = fin->fin_daddr;
1498 ip->ip_dst.s_addr = fin->fin_saddr;
1499 ip->ip_id = fr_nextipid(fin);
1500 ip->ip_hl = sizeof(*ip) >> 2;
1501 ip->ip_p = IPPROTO_TCP;
1502 ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1503 ip->ip_tos = fin->fin_ip->ip_tos;
1504 tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1505 }
1506 return fr_send_ip(fin, m, &m);
1507 }
1508
1509 /*
1510 * Function: fr_send_ip
1511 * Returns: 0: success
1512 * -1: failed
1513 * Parameters:
1514 * fin: packet information
1515 * m: the message block where ip head starts
1516 *
1517 * Send a new packet through the IP stack.
1518 *
1519 * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1520 * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1521 * function).
1522 *
1523 * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1524 * in by this function.
1525 *
1526 * All other portions of the packet must be in on-the-wire format.
1527 */
1528 /*ARGSUSED*/
1529 static int fr_send_ip(fin, m, mpp)
1530 fr_info_t *fin;
1531 mblk_t *m, **mpp;
1532 {
1533 qpktinfo_t qpi, *qpip;
1534 fr_info_t fnew;
1535 ip_t *ip;
1536 int i, hlen;
1537 ipf_stack_t *ifs = fin->fin_ifs;
1538
1539 ip = (ip_t *)m->b_rptr;
1540 bzero((char *)&fnew, sizeof(fnew));
1541
1542 #ifdef USE_INET6
1543 if (fin->fin_v == 6) {
1544 ip6_t *ip6;
1545
1546 ip6 = (ip6_t *)ip;
1547 ip6->ip6_vfc = 0x60;
1548 ip6->ip6_hlim = 127;
1549 fnew.fin_v = 6;
1550 hlen = sizeof(*ip6);
1551 fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1552 } else
1553 #endif
1554 {
1555 fnew.fin_v = 4;
1556 #if SOLARIS2 >= 10
1557 ip->ip_ttl = 255;
1558 if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1559 ip->ip_off = htons(IP_DF);
1560 #else
1561 if (ip_ttl_ptr != NULL)
1562 ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1563 else
1564 ip->ip_ttl = 63;
1565 if (ip_mtudisc != NULL)
1566 ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1567 else
1568 ip->ip_off = htons(IP_DF);
1569 #endif
1570 /*
1571 * The dance with byte order and ip_len/ip_off is because in
1572 * fr_fastroute, it expects them to be in host byte order but
1573 * ipf_cksum expects them to be in network byte order.
1574 */
1575 ip->ip_len = htons(ip->ip_len);
1576 ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1577 ip->ip_len = ntohs(ip->ip_len);
1578 ip->ip_off = ntohs(ip->ip_off);
1579 hlen = sizeof(*ip);
1580 fnew.fin_plen = ip->ip_len;
1581 }
1582
1583 qpip = fin->fin_qpi;
1584 qpi.qpi_off = 0;
1585 qpi.qpi_ill = qpip->qpi_ill;
1586 qpi.qpi_m = m;
1587 qpi.qpi_data = ip;
1588 fnew.fin_qpi = &qpi;
1589 fnew.fin_ifp = fin->fin_ifp;
1590 fnew.fin_flx = FI_NOCKSUM;
1591 fnew.fin_m = m;
1592 fnew.fin_qfm = m;
1593 fnew.fin_ip = ip;
1594 fnew.fin_mp = mpp;
1595 fnew.fin_hlen = hlen;
1596 fnew.fin_dp = (char *)ip + hlen;
1597 fnew.fin_ifs = fin->fin_ifs;
1598 (void) fr_makefrip(hlen, ip, &fnew);
1599
1600 i = fr_fastroute(m, mpp, &fnew, NULL);
1601 return i;
1602 }
1603
1604
1605 int fr_send_icmp_err(type, fin, dst)
1606 int type;
1607 fr_info_t *fin;
1608 int dst;
1609 {
1610 struct in_addr dst4;
1611 struct icmp *icmp;
1612 qpktinfo_t *qpi;
1613 int hlen, code;
1614 phy_if_t phy;
1615 u_short sz;
1616 #ifdef USE_INET6
1617 mblk_t *mb;
1618 #endif
1619 mblk_t *m;
1620 #ifdef USE_INET6
1621 ip6_t *ip6;
1622 #endif
1623 ip_t *ip;
1624 ipf_stack_t *ifs = fin->fin_ifs;
1625
1626 if ((type < 0) || (type > ICMP_MAXTYPE))
1627 return -1;
1628
1629 code = fin->fin_icode;
1630 #ifdef USE_INET6
1631 if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1632 return -1;
1633 #endif
1634
1635 #ifndef IPFILTER_CKSUM
1636 if (fr_checkl4sum(fin) == -1)
1637 return -1;
1638 #endif
1639
1640 qpi = fin->fin_qpi;
1641
1642 #ifdef USE_INET6
1643 mb = fin->fin_qfm;
1644
1645 if (fin->fin_v == 6) {
1646 sz = sizeof(ip6_t);
1647 sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1648 hlen = sizeof(ip6_t);
1649 type = icmptoicmp6types[type];
1650 if (type == ICMP6_DST_UNREACH)
1651 code = icmptoicmp6unreach[code];
1652 } else
1653 #endif
1654 {
1655 if ((fin->fin_p == IPPROTO_ICMP) &&
1656 !(fin->fin_flx & FI_SHORT))
1657 switch (ntohs(fin->fin_data[0]) >> 8)
1658 {
1659 case ICMP_ECHO :
1660 case ICMP_TSTAMP :
1661 case ICMP_IREQ :
1662 case ICMP_MASKREQ :
1663 break;
1664 default :
1665 return 0;
1666 }
1667
1668 sz = sizeof(ip_t) * 2;
1669 sz += 8; /* 64 bits of data */
1670 hlen = sizeof(ip_t);
1671 }
1672
1673 sz += offsetof(struct icmp, icmp_ip);
1674 if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1675 return -1;
1676 MTYPE(m) = M_DATA;
1677 m->b_rptr += 64;
1678 m->b_wptr = m->b_rptr + sz;
1679 bzero((char *)m->b_rptr, (size_t)sz);
1680 ip = (ip_t *)m->b_rptr;
1681 ip->ip_v = fin->fin_v;
1682 icmp = (struct icmp *)(m->b_rptr + hlen);
1683 icmp->icmp_type = type & 0xff;
1684 icmp->icmp_code = code & 0xff;
1685 phy = (phy_if_t)qpi->qpi_ill;
1686 if (type == ICMP_UNREACH && (phy != 0) &&
1687 fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1688 icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1689
1690 #ifdef USE_INET6
1691 if (fin->fin_v == 6) {
1692 struct in6_addr dst6;
1693 int csz;
1694
1695 if (dst == 0) {
1696 ipf_stack_t *ifs = fin->fin_ifs;
1697
1698 if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1699 (void *)&dst6, NULL, ifs) == -1) {
1700 FREE_MB_T(m);
1701 return -1;
1702 }
1703 } else
1704 dst6 = fin->fin_dst6.in6;
1705
1706 csz = sz;
1707 sz -= sizeof(ip6_t);
1708 ip6 = (ip6_t *)m->b_rptr;
1709 ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1710 ip6->ip6_plen = htons((u_short)sz);
1711 ip6->ip6_nxt = IPPROTO_ICMPV6;
1712 ip6->ip6_src = dst6;
1713 ip6->ip6_dst = fin->fin_src6.in6;
1714 sz -= offsetof(struct icmp, icmp_ip);
1715 bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1716 icmp->icmp_cksum = csz - sizeof(ip6_t);
1717 } else
1718 #endif
1719 {
1720 ip->ip_hl = sizeof(*ip) >> 2;
1721 ip->ip_p = IPPROTO_ICMP;
1722 ip->ip_id = fin->fin_ip->ip_id;
1723 ip->ip_tos = fin->fin_ip->ip_tos;
1724 ip->ip_len = (u_short)sz;
1725 if (dst == 0) {
1726 ipf_stack_t *ifs = fin->fin_ifs;
1727
1728 if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1729 (void *)&dst4, NULL, ifs) == -1) {
1730 FREE_MB_T(m);
1731 return -1;
1732 }
1733 } else {
1734 dst4 = fin->fin_dst;
1735 }
1736 ip->ip_src = dst4;
1737 ip->ip_dst = fin->fin_src;
1738 bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1739 sizeof(*fin->fin_ip));
1740 bcopy((char *)fin->fin_ip + fin->fin_hlen,
1741 (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1742 icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1743 icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1744 icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1745 sz - sizeof(ip_t));
1746 }
1747
1748 /*
1749 * Need to exit out of these so we don't recursively call rw_enter
1750 * from fr_qout.
1751 */
1752 return fr_send_ip(fin, m, &m);
1753 }
1754
1755 #include <sys/time.h>
1756 #include <sys/varargs.h>
1757
1758 #ifndef _KERNEL
1759 #include <stdio.h>
1760 #endif
1761
1762 /*
1763 * Return the first IP Address associated with an interface
1764 * For IPv6, we walk through the list of logical interfaces and return
1765 * the address of the first one that isn't a link-local interface.
1766 * We can't assume that it is :1 because another link-local address
1767 * may have been assigned there.
1768 */
1769 /*ARGSUSED*/
1770 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1771 int v, atype;
1772 void *ifptr;
1773 struct in_addr *inp, *inpmask;
1774 ipf_stack_t *ifs;
1775 {
1776 struct sockaddr_in6 v6addr[2];
1777 struct sockaddr_in v4addr[2];
1778 net_ifaddr_t type[2];
1779 net_handle_t net_data;
1780 phy_if_t phyif;
1781 void *array;
1782
1783 switch (v)
1784 {
1785 case 4:
1786 net_data = ifs->ifs_ipf_ipv4;
1787 array = v4addr;
1788 break;
1789 case 6:
1790 net_data = ifs->ifs_ipf_ipv6;
1791 array = v6addr;
1792 break;
1793 default:
1794 net_data = NULL;
1795 break;
1796 }
1797
1798 if (net_data == NULL)
1799 return -1;
1800
1801 phyif = (phy_if_t)ifptr;
1802
1803 switch (atype)
1804 {
1805 case FRI_PEERADDR :
1806 type[0] = NA_PEER;
1807 break;
1808
1809 case FRI_BROADCAST :
1810 type[0] = NA_BROADCAST;
1811 break;
1812
1813 default :
1814 type[0] = NA_ADDRESS;
1815 break;
1816 }
1817
1818 type[1] = NA_NETMASK;
1819
1820 if (v == 6) {
1821 lif_if_t idx = 0;
1822
1823 do {
1824 idx = net_lifgetnext(net_data, phyif, idx);
1825 if (net_getlifaddr(net_data, phyif, idx, 2, type,
1826 array) < 0)
1827 return -1;
1828 if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1829 !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1830 break;
1831 } while (idx != 0);
1832
1833 if (idx == 0)
1834 return -1;
1835
1836 return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1837 inp, inpmask);
1838 }
1839
1840 if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1841 return -1;
1842
1843 return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1844 }
1845
1846
1847 u_32_t fr_newisn(fin)
1848 fr_info_t *fin;
1849 {
1850 static int iss_seq_off = 0;
1851 u_char hash[16];
1852 u_32_t newiss;
1853 MD5_CTX ctx;
1854 ipf_stack_t *ifs = fin->fin_ifs;
1855
1856 /*
1857 * Compute the base value of the ISS. It is a hash
1858 * of (saddr, sport, daddr, dport, secret).
1859 */
1860 MD5Init(&ctx);
1861
1862 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1863 sizeof(fin->fin_fi.fi_src));
1864 MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1865 sizeof(fin->fin_fi.fi_dst));
1866 MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1867
1868 MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1869
1870 MD5Final(hash, &ctx);
1871
1872 bcopy(hash, &newiss, sizeof(newiss));
1873
1874 /*
1875 * Now increment our "timer", and add it in to
1876 * the computed value.
1877 *
1878 * XXX Use `addin'?
1879 * XXX TCP_ISSINCR too large to use?
1880 */
1881 iss_seq_off += 0x00010000;
1882 newiss += iss_seq_off;
1883 return newiss;
1884 }
1885
1886
1887 /* ------------------------------------------------------------------------ */
1888 /* Function: fr_nextipid */
1889 /* Returns: int - 0 == success, -1 == error (packet should be droppped) */
1890 /* Parameters: fin(I) - pointer to packet information */
1891 /* */
1892 /* Returns the next IPv4 ID to use for this packet. */
1893 /* ------------------------------------------------------------------------ */
1894 u_short fr_nextipid(fin)
1895 fr_info_t *fin;
1896 {
1897 static u_short ipid = 0;
1898 u_short id;
1899 ipf_stack_t *ifs = fin->fin_ifs;
1900
1901 MUTEX_ENTER(&ifs->ifs_ipf_rw);
1902 if (fin->fin_pktnum != 0) {
1903 id = fin->fin_pktnum & 0xffff;
1904 } else {
1905 id = ipid++;
1906 }
1907 MUTEX_EXIT(&ifs->ifs_ipf_rw);
1908
1909 return id;
1910 }
1911
1912
1913 #ifndef IPFILTER_CKSUM
1914 /* ARGSUSED */
1915 #endif
1916 INLINE void fr_checkv4sum(fin)
1917 fr_info_t *fin;
1918 {
1919 #ifdef IPFILTER_CKSUM
1920 if (fr_checkl4sum(fin) == -1)
1921 fin->fin_flx |= FI_BAD;
1922 #endif
1923 }
1924
1925
1926 #ifdef USE_INET6
1927 # ifndef IPFILTER_CKSUM
1928 /* ARGSUSED */
1929 # endif
1930 INLINE void fr_checkv6sum(fin)
1931 fr_info_t *fin;
1932 {
1933 # ifdef IPFILTER_CKSUM
1934 if (fr_checkl4sum(fin) == -1)
1935 fin->fin_flx |= FI_BAD;
1936 # endif
1937 }
1938 #endif /* USE_INET6 */
1939
1940
1941 #if (SOLARIS2 < 7)
1942 void fr_slowtimer()
1943 #else
1944 /*ARGSUSED*/
1945 void fr_slowtimer __P((void *arg))
1946 #endif
1947 {
1948 ipf_stack_t *ifs = arg;
1949
1950 READ_ENTER(&ifs->ifs_ipf_global);
1951 if (ifs->ifs_fr_running != 1) {
1952 ifs->ifs_fr_timer_id = NULL;
1953 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1954 return;
1955 }
1956 ipf_expiretokens(ifs);
1957 fr_fragexpire(ifs);
1958 fr_timeoutstate(ifs);
1959 fr_natexpire(ifs);
1960 fr_authexpire(ifs);
1961 ifs->ifs_fr_ticks++;
1962 if (ifs->ifs_fr_running == 1)
1963 ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1964 drv_usectohz(500000));
1965 else
1966 ifs->ifs_fr_timer_id = NULL;
1967 RWLOCK_EXIT(&ifs->ifs_ipf_global);
1968 }
1969
1970
1971 /* ------------------------------------------------------------------------ */
1972 /* Function: fr_pullup */
1973 /* Returns: NULL == pullup failed, else pointer to protocol header */
1974 /* Parameters: m(I) - pointer to buffer where data packet starts */
1975 /* fin(I) - pointer to packet information */
1976 /* len(I) - number of bytes to pullup */
1977 /* */
1978 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1979 /* single buffer for ease of access. Operating system native functions are */
1980 /* used to manage buffers - if necessary. If the entire packet ends up in */
1981 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has */
1982 /* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */
1983 /* and ONLY if the pullup succeeds. */
1984 /* */
1985 /* We assume that 'min' is a pointer to a buffer that is part of the chain */
1986 /* of buffers that starts at *fin->fin_mp. */
1987 /* ------------------------------------------------------------------------ */
1988 void *fr_pullup(min, fin, len)
1989 mb_t *min;
1990 fr_info_t *fin;
1991 int len;
1992 {
1993 qpktinfo_t *qpi = fin->fin_qpi;
1994 int out = fin->fin_out, dpoff, ipoff;
1995 mb_t *m = min, *m1, *m2;
1996 char *ip;
1997 uint32_t start, stuff, end, value, flags;
1998 ipf_stack_t *ifs = fin->fin_ifs;
1999
2000 if (m == NULL)
2001 return NULL;
2002
2003 ip = (char *)fin->fin_ip;
2004 if ((fin->fin_flx & FI_COALESCE) != 0)
2005 return ip;
2006
2007 ipoff = fin->fin_ipoff;
2008 if (fin->fin_dp != NULL)
2009 dpoff = (char *)fin->fin_dp - (char *)ip;
2010 else
2011 dpoff = 0;
2012
2013 if (M_LEN(m) < len + ipoff) {
2014
2015 /*
2016 * pfil_precheck ensures the IP header is on a 32bit
2017 * aligned address so simply fail if that isn't currently
2018 * the case (should never happen).
2019 */
2020 int inc = 0;
2021
2022 if (ipoff > 0) {
2023 if ((ipoff & 3) != 0) {
2024 inc = 4 - (ipoff & 3);
2025 if (m->b_rptr - inc >= m->b_datap->db_base)
2026 m->b_rptr -= inc;
2027 else
2028 inc = 0;
2029 }
2030 }
2031
2032 /*
2033 * XXX This is here as a work around for a bug with DEBUG
2034 * XXX Solaris kernels. The problem is b_prev is used by IP
2035 * XXX code as a way to stash the phyint_index for a packet,
2036 * XXX this doesn't get reset by IP but freeb does an ASSERT()
2037 * XXX for both of these to be NULL. See 6442390.
2038 */
2039 m1 = m;
2040 m2 = m->b_prev;
2041
2042 do {
2043 m1->b_next = NULL;
2044 m1->b_prev = NULL;
2045 m1 = m1->b_cont;
2046 } while (m1);
2047
2048 /*
2049 * Need to preserve checksum information by copying them
2050 * to newmp which heads the pulluped message.
2051 */
2052 mac_hcksum_get(m, &start, &stuff, &end, &value, &flags);
2053
2054 if (pullupmsg(m, len + ipoff + inc) == 0) {
2055 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
2056 FREE_MB_T(*fin->fin_mp);
2057 *fin->fin_mp = NULL;
2058 fin->fin_m = NULL;
2059 fin->fin_ip = NULL;
2060 fin->fin_dp = NULL;
2061 qpi->qpi_data = NULL;
2062 return NULL;
2063 }
2064
2065 mac_hcksum_set(m, start, stuff, end, value, flags);
2066
2067 m->b_prev = m2;
2068 m->b_rptr += inc;
2069 fin->fin_m = m;
2070 ip = MTOD(m, char *) + ipoff;
2071 qpi->qpi_data = ip;
2072 }
2073
2074 ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
2075 fin->fin_ip = (ip_t *)ip;
2076 if (fin->fin_dp != NULL)
2077 fin->fin_dp = (char *)fin->fin_ip + dpoff;
2078
2079 if (len == fin->fin_plen)
2080 fin->fin_flx |= FI_COALESCE;
2081 return ip;
2082 }
2083
2084
2085 /*
2086 * Function: fr_verifysrc
2087 * Returns: int (really boolean)
2088 * Parameters: fin - packet information
2089 *
2090 * Check whether the packet has a valid source address for the interface on
2091 * which the packet arrived, implementing the "fr_chksrc" feature.
2092 * Returns true iff the packet's source address is valid.
2093 */
2094 int fr_verifysrc(fin)
2095 fr_info_t *fin;
2096 {
2097 net_handle_t net_data_p;
2098 phy_if_t phy_ifdata_routeto;
2099 struct sockaddr sin;
2100 ipf_stack_t *ifs = fin->fin_ifs;
2101
2102 if (fin->fin_v == 4) {
2103 net_data_p = ifs->ifs_ipf_ipv4;
2104 } else if (fin->fin_v == 6) {
2105 net_data_p = ifs->ifs_ipf_ipv6;
2106 } else {
2107 return (0);
2108 }
2109
2110 /* Get the index corresponding to the if name */
2111 sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
2112 bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
2113 phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
2114
2115 return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0);
2116 }
2117
2118 /*
2119 * Return true only if forwarding is enabled on the interface.
2120 */
2121 static int
2122 fr_forwarding_enabled(phy_if_t phyif, net_handle_t ndp)
2123 {
2124 lif_if_t lif;
2125
2126 for (lif = net_lifgetnext(ndp, phyif, 0); lif > 0;
2127 lif = net_lifgetnext(ndp, phyif, lif)) {
2128 int res;
2129 uint64_t flags;
2130
2131 res = net_getlifflags(ndp, phyif, lif, &flags);
2132 if (res != 0)
2133 return (0);
2134 if (flags & IFF_ROUTER)
2135 return (1);
2136 }
2137
2138 return (0);
2139 }
2140
2141 /*
2142 * Function: fr_fastroute
2143 * Returns: 0: success;
2144 * -1: failed
2145 * Parameters:
2146 * mb: the message block where ip head starts
2147 * mpp: the pointer to the pointer of the orignal
2148 * packet message
2149 * fin: packet information
2150 * fdp: destination interface information
2151 * if it is NULL, no interface information provided.
2152 *
2153 * This function is for fastroute/to/dup-to rules. It calls
2154 * pfil_make_lay2_packet to search route, make lay-2 header
2155 * ,and identify output queue for the IP packet.
2156 * The destination address depends on the following conditions:
2157 * 1: for fastroute rule, fdp is passed in as NULL, so the
2158 * destination address is the IP Packet's destination address
2159 * 2: for to/dup-to rule, if an ip address is specified after
2160 * the interface name, this address is the as destination
2161 * address. Otherwise IP Packet's destination address is used
2162 */
2163 int fr_fastroute(mb, mpp, fin, fdp)
2164 mblk_t *mb, **mpp;
2165 fr_info_t *fin;
2166 frdest_t *fdp;
2167 {
2168 net_handle_t net_data_p;
2169 net_inject_t *inj;
2170 mblk_t *mp = NULL;
2171 frentry_t *fr = fin->fin_fr;
2172 qpktinfo_t *qpi;
2173 ip_t *ip;
2174
2175 struct sockaddr_in *sin;
2176 struct sockaddr_in6 *sin6;
2177 struct sockaddr *sinp;
2178 ipf_stack_t *ifs = fin->fin_ifs;
2179 #ifndef sparc
2180 u_short __iplen, __ipoff;
2181 #endif
2182
2183 if (fin->fin_v == 4) {
2184 net_data_p = ifs->ifs_ipf_ipv4;
2185 } else if (fin->fin_v == 6) {
2186 net_data_p = ifs->ifs_ipf_ipv6;
2187 } else {
2188 return (-1);
2189 }
2190
2191 /*
2192 * If we're forwarding (vs. injecting), check the src here, fin_ifp is
2193 * the src interface.
2194 */
2195 if (fdp != NULL &&
2196 !fr_forwarding_enabled((phy_if_t)fin->fin_ifp, net_data_p))
2197 return (-1);
2198
2199 inj = net_inject_alloc(NETINFO_VERSION);
2200 if (inj == NULL)
2201 return -1;
2202
2203 ip = fin->fin_ip;
2204 qpi = fin->fin_qpi;
2205
2206 /*
2207 * If this is a duplicate mblk then we want ip to point at that
2208 * data, not the original, if and only if it is already pointing at
2209 * the current mblk data.
2210 *
2211 * Otherwise, if it's not a duplicate, and we're not already pointing
2212 * at the current mblk data, then we want to ensure that the data
2213 * points at ip.
2214 */
2215
2216 if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
2217 ip = (ip_t *)mb->b_rptr;
2218 } else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
2219 qpi->qpi_m->b_rptr = (uchar_t *)ip;
2220 qpi->qpi_off = 0;
2221 }
2222
2223 /*
2224 * If there is another M_PROTO, we don't want it
2225 */
2226 if (*mpp != mb) {
2227 mp = unlinkb(*mpp);
2228 freeb(*mpp);
2229 *mpp = mp;
2230 }
2231
2232 sinp = (struct sockaddr *)&inj->ni_addr;
2233 sin = (struct sockaddr_in *)sinp;
2234 sin6 = (struct sockaddr_in6 *)sinp;
2235 bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
2236 inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
2237 inj->ni_packet = mb;
2238
2239 /*
2240 * In case we're here due to "to <if>" being used with
2241 * "keep state", check that we're going in the correct
2242 * direction.
2243 */
2244 if (fdp != NULL) {
2245 if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
2246 (fin->fin_rev != 0) && (fdp == &fr->fr_tif))
2247 goto bad_fastroute;
2248 inj->ni_physical = (phy_if_t)fdp->fd_ifp;
2249 if (fin->fin_v == 4) {
2250 sin->sin_addr = fdp->fd_ip;
2251 } else {
2252 sin6->sin6_addr = fdp->fd_ip6.in6;
2253 }
2254 } else {
2255 if (fin->fin_v == 4) {
2256 sin->sin_addr = ip->ip_dst;
2257 } else {
2258 sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
2259 }
2260 inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
2261 }
2262
2263 /* If we're forwarding (vs. injecting), check the destinatation here. */
2264 if (fdp != NULL && !fr_forwarding_enabled(inj->ni_physical, net_data_p))
2265 goto bad_fastroute;
2266
2267 /*
2268 * Clear the hardware checksum flags from packets that we are doing
2269 * input processing on as leaving them set will cause the outgoing
2270 * NIC (if it supports hardware checksum) to calculate them anew,
2271 * using the old (correct) checksums as the pseudo value to start
2272 * from.
2273 */
2274 if (fin->fin_out == 0) {
2275 DB_CKSUMFLAGS(mb) = 0;
2276 }
2277
2278 *mpp = mb;
2279
2280 if (fin->fin_out == 0) {
2281 void *saveifp;
2282 u_32_t pass;
2283
2284 saveifp = fin->fin_ifp;
2285 fin->fin_ifp = (void *)inj->ni_physical;
2286 fin->fin_flx &= ~FI_STATE;
2287 fin->fin_out = 1;
2288 (void) fr_acctpkt(fin, &pass);
2289 fin->fin_fr = NULL;
2290 if (!fr || !(fr->fr_flags & FR_RETMASK))
2291 (void) fr_checkstate(fin, &pass);
2292 if (fr_checknatout(fin, NULL) == -1)
2293 goto bad_fastroute;
2294 fin->fin_out = 0;
2295 fin->fin_ifp = saveifp;
2296 }
2297 #ifndef sparc
2298 if (fin->fin_v == 4) {
2299 __iplen = (u_short)ip->ip_len,
2300 __ipoff = (u_short)ip->ip_off;
2301
2302 ip->ip_len = htons(__iplen);
2303 ip->ip_off = htons(__ipoff);
2304 }
2305 #endif
2306
2307 if (net_data_p) {
2308 if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
2309 net_inject_free(inj);
2310 return (-1);
2311 }
2312 }
2313
2314 ifs->ifs_fr_frouteok[0]++;
2315 net_inject_free(inj);
2316 return 0;
2317 bad_fastroute:
2318 net_inject_free(inj);
2319 freemsg(mb);
2320 ifs->ifs_fr_frouteok[1]++;
2321 return -1;
2322 }
2323
2324
2325 /* ------------------------------------------------------------------------ */
2326 /* Function: ipf_hook4_out */
2327 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2328 /* Parameters: event(I) - pointer to event */
2329 /* info(I) - pointer to hook information for firewalling */
2330 /* */
2331 /* Calling ipf_hook. */
2332 /* ------------------------------------------------------------------------ */
2333 /*ARGSUSED*/
2334 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
2335 {
2336 return ipf_hook(info, 1, 0, arg);
2337 }
2338 /*ARGSUSED*/
2339 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
2340 {
2341 return ipf_hook6(info, 1, 0, arg);
2342 }
2343
2344 /* ------------------------------------------------------------------------ */
2345 /* Function: ipf_hook4_in */
2346 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2347 /* Parameters: event(I) - pointer to event */
2348 /* info(I) - pointer to hook information for firewalling */
2349 /* */
2350 /* Calling ipf_hook. */
2351 /* ------------------------------------------------------------------------ */
2352 /*ARGSUSED*/
2353 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
2354 {
2355 return ipf_hook(info, 0, 0, arg);
2356 }
2357 /*ARGSUSED*/
2358 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
2359 {
2360 return ipf_hook6(info, 0, 0, arg);
2361 }
2362
2363
2364 /* ------------------------------------------------------------------------ */
2365 /* Function: ipf_hook4_loop_out */
2366 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2367 /* Parameters: event(I) - pointer to event */
2368 /* info(I) - pointer to hook information for firewalling */
2369 /* */
2370 /* Calling ipf_hook. */
2371 /* ------------------------------------------------------------------------ */
2372 /*ARGSUSED*/
2373 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2374 {
2375 return ipf_hook(info, 1, FI_NOCKSUM, arg);
2376 }
2377 /*ARGSUSED*/
2378 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
2379 {
2380 return ipf_hook6(info, 1, FI_NOCKSUM, arg);
2381 }
2382
2383 /* ------------------------------------------------------------------------ */
2384 /* Function: ipf_hookvndl3_in */
2385 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2386 /* Parameters: event(I) - pointer to event */
2387 /* info(I) - pointer to hook information for firewalling */
2388 /* */
2389 /* The vnd hooks are private hooks to ON. They represents a layer 2 */
2390 /* datapath generally used to implement virtual machines. The driver sends */
2391 /* along L3 packets of either type IP or IPv6. The ethertype to distinguish */
2392 /* them is in the upper 16 bits while the remaining bits are the */
2393 /* traditional packet hook flags. */
2394 /* */
2395 /* They end up calling the appropriate traditional ip hooks. */
2396 /* ------------------------------------------------------------------------ */
2397 /*ARGSUSED*/
2398 int ipf_hookvndl3v4_in(hook_event_token_t token, hook_data_t info, void *arg)
2399 {
2400 return ipf_hook4_in(token, info, arg);
2401 }
2402
2403 int ipf_hookvndl3v6_in(hook_event_token_t token, hook_data_t info, void *arg)
2404 {
2405 return ipf_hook6_in(token, info, arg);
2406 }
2407
2408 /*ARGSUSED*/
2409 int ipf_hookvndl3v4_out(hook_event_token_t token, hook_data_t info, void *arg)
2410 {
2411 return ipf_hook4_out(token, info, arg);
2412 }
2413
2414 int ipf_hookvndl3v6_out(hook_event_token_t token, hook_data_t info, void *arg)
2415 {
2416 return ipf_hook6_out(token, info, arg);
2417 }
2418
2419 /* Static constants used by ipf_hook_ether */
2420 static uint8_t ipf_eth_bcast_addr[ETHERADDRL] = {
2421 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
2422 };
2423 static uint8_t ipf_eth_ipv4_mcast[3] = { 0x01, 0x00, 0x5E };
2424 static uint8_t ipf_eth_ipv6_mcast[2] = { 0x33, 0x33 };
2425
2426 /* ------------------------------------------------------------------------ */
2427 /* Function: ipf_hook_ether */
2428 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2429 /* Parameters: token(I) - pointer to event */
2430 /* info(I) - pointer to hook information for firewalling */
2431 /* */
2432 /* The ipf_hook_ether hook is currently private to illumos. It represents */
2433 /* a layer 2 datapath generally used by virtual machines. Currently the */
2434 /* hook is only used by the viona driver to pass along L2 frames for */
2435 /* inspection. It requires that the L2 ethernet header is contained within */
2436 /* a single dblk_t (however layers above the L2 header have no restrctions */
2437 /* in ipf). ipf does not currently support filtering on L2 fields (e.g. */
2438 /* filtering on a MAC address or ethertype), however virtual machines do */
2439 /* not have native IP stack instances where ipf traditionally hooks in. */
2440 /* Instead this entry point is used to determine if the packet is unicast, */
2441 /* broadcast, or multicast. The IPv4 or IPv6 packet is then passed to the */
2442 /* traditional ip hooks for filtering. Non IPv4 or non IPv6 packets are */
2443 /* not subject to examination. */
2444 /* ------------------------------------------------------------------------ */
2445 int ipf_hook_ether(hook_event_token_t token, hook_data_t info, void *arg,
2446 boolean_t out)
2447 {
2448 struct ether_header *ethp;
2449 hook_pkt_event_t *hpe = (hook_pkt_event_t *)info;
2450 mblk_t *mp;
2451 size_t offset, len;
2452 uint16_t etype;
2453 boolean_t v6;
2454
2455 /*
2456 * viona will only pass us mblks with the L2 header contained in a
2457 * single data block.
2458 */
2459 mp = *hpe->hpe_mp;
2460 len = MBLKL(mp);
2461
2462 VERIFY3S(len, >=, sizeof (struct ether_header));
2463
2464 ethp = (struct ether_header *)mp->b_rptr;
2465 if ((etype = ntohs(ethp->ether_type)) == ETHERTYPE_VLAN) {
2466 struct ether_vlan_header *evh =
2467 (struct ether_vlan_header *)ethp;
2468
2469 VERIFY3S(len, >=, sizeof (struct ether_vlan_header));
2470
2471 etype = ntohs(evh->ether_type);
2472 offset = sizeof (*evh);
2473 } else {
2474 offset = sizeof (*ethp);
2475 }
2476
2477 /*
2478 * ipf only support filtering IPv4 and IPv6. Ignore other types.
2479 */
2480 if (etype == ETHERTYPE_IP)
2481 v6 = B_FALSE;
2482 else if (etype == ETHERTYPE_IPV6)
2483 v6 = B_TRUE;
2484 else
2485 return (0);
2486
2487 if (bcmp(ipf_eth_bcast_addr, ethp, ETHERADDRL) == 0)
2488 hpe->hpe_flags |= HPE_BROADCAST;
2489 else if (bcmp(ipf_eth_ipv4_mcast, ethp,
2490 sizeof (ipf_eth_ipv4_mcast)) == 0)
2491 hpe->hpe_flags |= HPE_MULTICAST;
2492 else if (bcmp(ipf_eth_ipv6_mcast, ethp,
2493 sizeof (ipf_eth_ipv6_mcast)) == 0)
2494 hpe->hpe_flags |= HPE_MULTICAST;
2495
2496 /* Find the start of the IPv4 or IPv6 header */
2497 for (; offset >= len; len = MBLKL(mp)) {
2498 offset -= len;
2499 mp = mp->b_cont;
2500 if (mp == NULL) {
2501 freemsg(*hpe->hpe_mp);
2502 *hpe->hpe_mp = NULL;
2503 return (-1);
2504 }
2505 }
2506 hpe->hpe_mb = mp;
2507 hpe->hpe_hdr = mp->b_rptr + offset;
2508
2509 return (v6 ? ipf_hook6(info, out, 0, arg) :
2510 ipf_hook(info, out, 0, arg));
2511 }
2512
2513 /* ------------------------------------------------------------------------ */
2514 /* Function: ipf_hookviona_{in,out} */
2515 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2516 /* Parameters: event(I) - pointer to event */
2517 /* info(I) - pointer to hook information for firewalling */
2518 /* */
2519 /* The viona hooks are private hooks to illumos. They represents a layer 2 */
2520 /* datapath generally used to implement virtual machines. */
2521 /* along L2 packets. */
2522 /* */
2523 /* They end up calling the appropriate traditional ip hooks. */
2524 /* ------------------------------------------------------------------------ */
2525 int
2526 ipf_hookviona_in(hook_event_token_t token, hook_data_t info, void *arg)
2527 {
2528 return (ipf_hook_ether(token, info, arg, B_FALSE));
2529 }
2530
2531 int
2532 ipf_hookviona_out(hook_event_token_t token, hook_data_t info, void *arg)
2533 {
2534 return (ipf_hook_ether(token, info, arg, B_TRUE));
2535 }
2536
2537 /* ------------------------------------------------------------------------ */
2538 /* Function: ipf_hook4_loop_in */
2539 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2540 /* Parameters: event(I) - pointer to event */
2541 /* info(I) - pointer to hook information for firewalling */
2542 /* */
2543 /* Calling ipf_hook. */
2544 /* ------------------------------------------------------------------------ */
2545 /*ARGSUSED*/
2546 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2547 {
2548 return ipf_hook(info, 0, FI_NOCKSUM, arg);
2549 }
2550 /*ARGSUSED*/
2551 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
2552 {
2553 return ipf_hook6(info, 0, FI_NOCKSUM, arg);
2554 }
2555
2556 /* ------------------------------------------------------------------------ */
2557 /* Function: ipf_hook */
2558 /* Returns: int - 0 == packet ok, else problem, free packet if not done */
2559 /* Parameters: info(I) - pointer to hook information for firewalling */
2560 /* out(I) - whether packet is going in or out */
2561 /* loopback(I) - whether packet is a loopback packet or not */
2562 /* */
2563 /* Stepping stone function between the IP mainline and IPFilter. Extracts */
2564 /* parameters out of the info structure and forms them up to be useful for */
2565 /* calling ipfilter. */
2566 /* ------------------------------------------------------------------------ */
2567 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
2568 {
2569 hook_pkt_event_t *fw;
2570 ipf_stack_t *ifs;
2571 qpktinfo_t qpi;
2572 int rval, hlen;
2573 u_short swap;
2574 phy_if_t phy;
2575 ip_t *ip;
2576
2577 ifs = arg;
2578 fw = (hook_pkt_event_t *)info;
2579
2580 ASSERT(fw != NULL);
2581 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2582
2583 ip = fw->hpe_hdr;
2584 swap = ntohs(ip->ip_len);
2585 ip->ip_len = swap;
2586 swap = ntohs(ip->ip_off);
2587 ip->ip_off = swap;
2588 hlen = IPH_HDR_LENGTH(ip);
2589
2590 qpi.qpi_m = fw->hpe_mb;
2591 qpi.qpi_data = fw->hpe_hdr;
2592 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2593 qpi.qpi_ill = (void *)phy;
2594 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2595 if (qpi.qpi_flags)
2596 qpi.qpi_flags |= FI_MBCAST;
2597 qpi.qpi_flags |= loopback;
2598
2599 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2600 &qpi, fw->hpe_mp, ifs);
2601
2602 /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2603 if (rval == 0 && *(fw->hpe_mp) == NULL)
2604 rval = 1;
2605
2606 /* Notify IP the packet mblk_t and IP header pointers. */
2607 fw->hpe_mb = qpi.qpi_m;
2608 fw->hpe_hdr = qpi.qpi_data;
2609 if (rval == 0) {
2610 ip = qpi.qpi_data;
2611 swap = ntohs(ip->ip_len);
2612 ip->ip_len = swap;
2613 swap = ntohs(ip->ip_off);
2614 ip->ip_off = swap;
2615 }
2616 return rval;
2617
2618 }
2619 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
2620 {
2621 hook_pkt_event_t *fw;
2622 int rval, hlen;
2623 qpktinfo_t qpi;
2624 phy_if_t phy;
2625
2626 fw = (hook_pkt_event_t *)info;
2627
2628 ASSERT(fw != NULL);
2629 phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2630
2631 hlen = sizeof (ip6_t);
2632
2633 qpi.qpi_m = fw->hpe_mb;
2634 qpi.qpi_data = fw->hpe_hdr;
2635 qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2636 qpi.qpi_ill = (void *)phy;
2637 qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2638 if (qpi.qpi_flags)
2639 qpi.qpi_flags |= FI_MBCAST;
2640 qpi.qpi_flags |= loopback;
2641
2642 rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2643 &qpi, fw->hpe_mp, arg);
2644
2645 /* For fastroute cases, fr_check returns 0 with mp set to NULL */
2646 if (rval == 0 && *(fw->hpe_mp) == NULL)
2647 rval = 1;
2648
2649 /* Notify IP the packet mblk_t and IP header pointers. */
2650 fw->hpe_mb = qpi.qpi_m;
2651 fw->hpe_hdr = qpi.qpi_data;
2652 return rval;
2653 }
2654
2655
2656 /* ------------------------------------------------------------------------ */
2657 /* Function: ipf_nic_event_v4 */
2658 /* Returns: int - 0 == no problems encountered */
2659 /* Parameters: event(I) - pointer to event */
2660 /* info(I) - pointer to information about a NIC event */
2661 /* */
2662 /* Function to receive asynchronous NIC events from IP */
2663 /* ------------------------------------------------------------------------ */
2664 /*ARGSUSED*/
2665 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2666 {
2667 struct sockaddr_in *sin;
2668 hook_nic_event_t *hn;
2669 ipf_stack_t *ifs = arg;
2670 void *new_ifp = NULL;
2671
2672 if (ifs->ifs_fr_running <= 0)
2673 return (0);
2674
2675 hn = (hook_nic_event_t *)info;
2676
2677 switch (hn->hne_event)
2678 {
2679 case NE_PLUMB :
2680 frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2681 ifs);
2682 fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2683 hn->hne_data, ifs);
2684 fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2685 hn->hne_data, ifs);
2686 break;
2687
2688 case NE_UNPLUMB :
2689 frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2690 fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2691 ifs);
2692 fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2693 break;
2694
2695 case NE_ADDRESS_CHANGE :
2696 /*
2697 * We only respond to events for logical interface 0 because
2698 * IPFilter only uses the first address given to a network
2699 * interface. We check for hne_lif==1 because the netinfo
2700 * code maps adds 1 to the lif number so that it can return
2701 * 0 to indicate "no more lifs" when walking them.
2702 */
2703 if (hn->hne_lif == 1) {
2704 frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2705 ifs);
2706 sin = hn->hne_data;
2707 fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2708 ifs);
2709 }
2710 break;
2711
2712 #if SOLARIS2 >= 10
2713 case NE_IFINDEX_CHANGE :
2714 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2715
2716 if (hn->hne_data != NULL) {
2717 /*
2718 * The netinfo passes interface index as int (hne_data should be
2719 * handled as a pointer to int), which is always 32bit. We need to
2720 * convert it to void pointer here, since interfaces are
2721 * represented as pointers to void in IPF. The pointers are 64 bits
2722 * long on 64bit platforms. Doing something like
2723 * (void *)((int) x)
2724 * will throw warning:
2725 * "cast to pointer from integer of different size"
2726 * during 64bit compilation.
2727 *
2728 * The line below uses (size_t) to typecast int to
2729 * size_t, which might be 64bit/32bit (depending
2730 * on architecture). Once we have proper 64bit/32bit
2731 * type (size_t), we can safely convert it to void pointer.
2732 */
2733 new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2734 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2735 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2736 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2737 }
2738 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2739 break;
2740 #endif
2741
2742 default :
2743 break;
2744 }
2745
2746 return 0;
2747 }
2748
2749
2750 /* ------------------------------------------------------------------------ */
2751 /* Function: ipf_nic_event_v6 */
2752 /* Returns: int - 0 == no problems encountered */
2753 /* Parameters: event(I) - pointer to event */
2754 /* info(I) - pointer to information about a NIC event */
2755 /* */
2756 /* Function to receive asynchronous NIC events from IP */
2757 /* ------------------------------------------------------------------------ */
2758 /*ARGSUSED*/
2759 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2760 {
2761 struct sockaddr_in6 *sin6;
2762 hook_nic_event_t *hn;
2763 ipf_stack_t *ifs = arg;
2764 void *new_ifp = NULL;
2765
2766 if (ifs->ifs_fr_running <= 0)
2767 return (0);
2768
2769 hn = (hook_nic_event_t *)info;
2770
2771 switch (hn->hne_event)
2772 {
2773 case NE_PLUMB :
2774 frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2775 hn->hne_data, ifs);
2776 fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2777 hn->hne_data, ifs);
2778 fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2779 hn->hne_data, ifs);
2780 break;
2781
2782 case NE_UNPLUMB :
2783 frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2784 fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2785 ifs);
2786 fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2787 break;
2788
2789 case NE_ADDRESS_CHANGE :
2790 if (hn->hne_lif == 1) {
2791 sin6 = hn->hne_data;
2792 fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2793 ifs);
2794 }
2795 break;
2796
2797 #if SOLARIS2 >= 10
2798 case NE_IFINDEX_CHANGE :
2799 WRITE_ENTER(&ifs->ifs_ipf_mutex);
2800 if (hn->hne_data != NULL) {
2801 /*
2802 * The netinfo passes interface index as int (hne_data should be
2803 * handled as a pointer to int), which is always 32bit. We need to
2804 * convert it to void pointer here, since interfaces are
2805 * represented as pointers to void in IPF. The pointers are 64 bits
2806 * long on 64bit platforms. Doing something like
2807 * (void *)((int) x)
2808 * will throw warning:
2809 * "cast to pointer from integer of different size"
2810 * during 64bit compilation.
2811 *
2812 * The line below uses (size_t) to typecast int to
2813 * size_t, which might be 64bit/32bit (depending
2814 * on architecture). Once we have proper 64bit/32bit
2815 * type (size_t), we can safely convert it to void pointer.
2816 */
2817 new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2818 fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2819 fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2820 fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2821 }
2822 RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2823 break;
2824 #endif
2825
2826 default :
2827 break;
2828 }
2829
2830 return 0;
2831 }
2832
2833 /*
2834 * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2835 * are needed in Solaris kernel only. We don't need them in
2836 * ipftest to pretend the ICMP/RST packet was sent as a response.
2837 */
2838 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2839 /* ------------------------------------------------------------------------ */
2840 /* Function: fr_make_rst */
2841 /* Returns: int - 0 on success, -1 on failure */
2842 /* Parameters: fin(I) - pointer to packet information */
2843 /* */
2844 /* We must alter the original mblks passed to IPF from IP stack via */
2845 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations. */
2846 /* IPF can basicaly do only these things with mblk representing the packet: */
2847 /* leave it as it is (pass the packet) */
2848 /* */
2849 /* discard it (block the packet) */
2850 /* */
2851 /* alter it (i.e. NAT) */
2852 /* */
2853 /* As you can see IPF can not simply discard the mblk and supply a new one */
2854 /* instead to IP stack via FW_HOOKS. */
2855 /* */
2856 /* The return-rst action for packets coming via NIC is handled as follows: */
2857 /* mblk with packet is discarded */
2858 /* */
2859 /* new mblk with RST response is constructed and injected to network */
2860 /* */
2861 /* IPF can't inject packets to loopback interface, this is just another */
2862 /* limitation we have to deal with here. The only option to send RST */
2863 /* response to offending TCP packet coming via loopback is to alter it. */
2864 /* */
2865 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on */
2866 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to */
2867 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers. */
2868 /* ------------------------------------------------------------------------ */
2869 int fr_make_rst(fin)
2870 fr_info_t *fin;
2871 {
2872 uint16_t tmp_port;
2873 int rv = -1;
2874 uint32_t old_ack;
2875 tcphdr_t *tcp = NULL;
2876 struct in_addr tmp_src;
2877 #ifdef USE_INET6
2878 struct in6_addr tmp_src6;
2879 #endif
2880
2881 ASSERT(fin->fin_p == IPPROTO_TCP);
2882
2883 /*
2884 * We do not need to adjust chksum, since it is not being checked by
2885 * Solaris IP stack for loopback clients.
2886 */
2887 if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2888 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2889
2890 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2891 /* Swap IPv4 addresses. */
2892 tmp_src = fin->fin_ip->ip_src;
2893 fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2894 fin->fin_ip->ip_dst = tmp_src;
2895
2896 rv = 0;
2897 }
2898 else
2899 tcp = NULL;
2900 }
2901 #ifdef USE_INET6
2902 else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2903 ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2904 /*
2905 * We are relying on fact the next header is TCP, which is true
2906 * for regular TCP packets coming in over loopback.
2907 */
2908 if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2909 /* Swap IPv6 addresses. */
2910 tmp_src6 = fin->fin_ip6->ip6_src;
2911 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2912 fin->fin_ip6->ip6_dst = tmp_src6;
2913
2914 rv = 0;
2915 }
2916 else
2917 tcp = NULL;
2918 }
2919 #endif
2920
2921 if (tcp != NULL) {
2922 /*
2923 * Adjust TCP header:
2924 * swap ports,
2925 * set flags,
2926 * set correct ACK number
2927 */
2928 tmp_port = tcp->th_sport;
2929 tcp->th_sport = tcp->th_dport;
2930 tcp->th_dport = tmp_port;
2931 old_ack = tcp->th_ack;
2932 tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2933 tcp->th_seq = old_ack;
2934 tcp->th_flags = TH_RST | TH_ACK;
2935 }
2936
2937 return (rv);
2938 }
2939
2940 /* ------------------------------------------------------------------------ */
2941 /* Function: fr_make_icmp_v4 */
2942 /* Returns: int - 0 on success, -1 on failure */
2943 /* Parameters: fin(I) - pointer to packet information */
2944 /* */
2945 /* Please read comment at fr_make_icmp() wrapper function to get an idea */
2946 /* what is going to happen here and why. Once you read the comment there, */
2947 /* continue here with next paragraph. */
2948 /* */
2949 /* To turn IPv4 packet into ICMPv4 response packet, these things must */
2950 /* happen here: */
2951 /* (1) Original mblk is copied (duplicated). */
2952 /* */
2953 /* (2) ICMP header is created. */
2954 /* */
2955 /* (3) Link ICMP header with copy of original mblk, we have ICMPv4 */
2956 /* data ready then. */
2957 /* */
2958 /* (4) Swap IP addresses in original mblk and adjust IP header data. */
2959 /* */
2960 /* (5) The mblk containing original packet is trimmed to contain IP */
2961 /* header only and ICMP chksum is computed. */
2962 /* */
2963 /* (6) The ICMP header we have from (3) is linked to original mblk, */
2964 /* which now contains new IP header. If original packet was spread */
2965 /* over several mblks, only the first mblk is kept. */
2966 /* ------------------------------------------------------------------------ */
2967 static int fr_make_icmp_v4(fin)
2968 fr_info_t *fin;
2969 {
2970 struct in_addr tmp_src;
2971 tcphdr_t *tcp;
2972 struct icmp *icmp;
2973 mblk_t *mblk_icmp;
2974 mblk_t *mblk_ip;
2975 size_t icmp_pld_len; /* octets to append to ICMP header */
2976 size_t orig_iphdr_len; /* length of IP header only */
2977 uint32_t sum;
2978 uint16_t *buf;
2979 int len;
2980
2981
2982 if (fin->fin_v != 4)
2983 return (-1);
2984
2985 /*
2986 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2987 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2988 */
2989 tcp = (tcphdr_t *) fin->fin_dp;
2990
2991 if ((fin->fin_p == IPPROTO_TCP) &&
2992 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2993 return (-1);
2994
2995 /*
2996 * Step (1)
2997 *
2998 * Make copy of original mblk.
2999 *
3000 * We want to copy as much data as necessary, not less, not more. The
3001 * ICMPv4 payload length for unreachable messages is:
3002 * original IP header + 8 bytes of L4 (if there are any).
3003 *
3004 * We determine if there are at least 8 bytes of L4 data following IP
3005 * header first.
3006 */
3007 icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
3008 ICMPERR_ICMPHLEN : fin->fin_dlen;
3009 /*
3010 * Since we don't want to copy more data than necessary, we must trim
3011 * the original mblk here. The right way (STREAMish) would be to use
3012 * adjmsg() to trim it. However we would have to calculate the length
3013 * argument for adjmsg() from pointers we already have here.
3014 *
3015 * Since we have pointers and offsets, it's faster and easier for
3016 * us to just adjust pointers by hand instead of using adjmsg().
3017 */
3018 fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
3019 fin->fin_m->b_wptr += icmp_pld_len;
3020 icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
3021
3022 /*
3023 * Also we don't want to copy any L2 stuff, which might precede IP
3024 * header, so we have have to set b_rptr to point to the start of IP
3025 * header.
3026 */
3027 fin->fin_m->b_rptr += fin->fin_ipoff;
3028 if ((mblk_ip = copyb(fin->fin_m)) == NULL)
3029 return (-1);
3030 fin->fin_m->b_rptr -= fin->fin_ipoff;
3031
3032 /*
3033 * Step (2)
3034 *
3035 * Create an ICMP header, which will be appened to original mblk later.
3036 * ICMP header is just another mblk.
3037 */
3038 mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
3039 if (mblk_icmp == NULL) {
3040 FREE_MB_T(mblk_ip);
3041 return (-1);
3042 }
3043
3044 MTYPE(mblk_icmp) = M_DATA;
3045 icmp = (struct icmp *) mblk_icmp->b_wptr;
3046 icmp->icmp_type = ICMP_UNREACH;
3047 icmp->icmp_code = fin->fin_icode & 0xFF;
3048 icmp->icmp_void = 0;
3049 icmp->icmp_cksum = 0;
3050 mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
3051
3052 /*
3053 * Step (3)
3054 *
3055 * Complete ICMP packet - link ICMP header with L4 data from original
3056 * IP packet.
3057 */
3058 linkb(mblk_icmp, mblk_ip);
3059
3060 /*
3061 * Step (4)
3062 *
3063 * Swap IP addresses and change IP header fields accordingly in
3064 * original IP packet.
3065 *
3066 * There is a rule option return-icmp as a dest for physical
3067 * interfaces. This option becomes useless for loopback, since IPF box
3068 * uses same address as a loopback destination. We ignore the option
3069 * here, the ICMP packet will always look like as it would have been
3070 * sent from the original destination host.
3071 */
3072 tmp_src = fin->fin_ip->ip_src;
3073 fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
3074 fin->fin_ip->ip_dst = tmp_src;
3075 fin->fin_ip->ip_p = IPPROTO_ICMP;
3076 fin->fin_ip->ip_sum = 0;
3077
3078 /*
3079 * Step (5)
3080 *
3081 * We trim the orignal mblk to hold IP header only.
3082 */
3083 fin->fin_m->b_wptr = fin->fin_dp;
3084 orig_iphdr_len = fin->fin_m->b_wptr -
3085 (fin->fin_m->b_rptr + fin->fin_ipoff);
3086 fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
3087 orig_iphdr_len);
3088
3089 /*
3090 * ICMP chksum calculation. The data we are calculating chksum for are
3091 * spread over two mblks, therefore we have to use two for loops.
3092 *
3093 * First for loop computes chksum part for ICMP header.
3094 */
3095 buf = (uint16_t *) icmp;
3096 len = ICMPERR_ICMPHLEN;
3097 for (sum = 0; len > 1; len -= 2)
3098 sum += *buf++;
3099
3100 /*
3101 * Here we add chksum part for ICMP payload.
3102 */
3103 len = icmp_pld_len;
3104 buf = (uint16_t *) mblk_ip->b_rptr;
3105 for (; len > 1; len -= 2)
3106 sum += *buf++;
3107
3108 /*
3109 * Chksum is done.
3110 */
3111 sum = (sum >> 16) + (sum & 0xffff);
3112 sum += (sum >> 16);
3113 icmp->icmp_cksum = ~sum;
3114
3115 /*
3116 * Step (6)
3117 *
3118 * Release all packet mblks, except the first one.
3119 */
3120 if (fin->fin_m->b_cont != NULL) {
3121 FREE_MB_T(fin->fin_m->b_cont);
3122 }
3123
3124 /*
3125 * Append ICMP payload to first mblk, which already contains new IP
3126 * header.
3127 */
3128 linkb(fin->fin_m, mblk_icmp);
3129
3130 return (0);
3131 }
3132
3133 #ifdef USE_INET6
3134 /* ------------------------------------------------------------------------ */
3135 /* Function: fr_make_icmp_v6 */
3136 /* Returns: int - 0 on success, -1 on failure */
3137 /* Parameters: fin(I) - pointer to packet information */
3138 /* */
3139 /* Please read comment at fr_make_icmp() wrapper function to get an idea */
3140 /* what and why is going to happen here. Once you read the comment there, */
3141 /* continue here with next paragraph. */
3142 /* */
3143 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response. */
3144 /* The algorithm is fairly simple: */
3145 /* 1) We need to get copy of complete mblk. */
3146 /* */
3147 /* 2) New ICMPv6 header is created. */
3148 /* */
3149 /* 3) The copy of original mblk with packet is linked to ICMPv6 */
3150 /* header. */
3151 /* */
3152 /* 4) The checksum must be adjusted. */
3153 /* */
3154 /* 5) IP addresses in original mblk are swapped and IP header data */
3155 /* are adjusted (protocol number). */
3156 /* */
3157 /* 6) Original mblk is trimmed to hold IPv6 header only, then it is */
3158 /* linked with the ICMPv6 data we got from (3). */
3159 /* ------------------------------------------------------------------------ */
3160 static int fr_make_icmp_v6(fin)
3161 fr_info_t *fin;
3162 {
3163 struct icmp6_hdr *icmp6;
3164 tcphdr_t *tcp;
3165 struct in6_addr tmp_src6;
3166 size_t icmp_pld_len;
3167 mblk_t *mblk_ip, *mblk_icmp;
3168
3169 if (fin->fin_v != 6)
3170 return (-1);
3171
3172 /*
3173 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
3174 * IP stack. If it is not SYN/FIN, then we must drop it silently.
3175 */
3176 tcp = (tcphdr_t *) fin->fin_dp;
3177
3178 if ((fin->fin_p == IPPROTO_TCP) &&
3179 ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
3180 return (-1);
3181
3182 /*
3183 * Step (1)
3184 *
3185 * We need to copy complete packet in case of IPv6, no trimming is
3186 * needed (except the L2 headers).
3187 */
3188 icmp_pld_len = M_LEN(fin->fin_m);
3189 fin->fin_m->b_rptr += fin->fin_ipoff;
3190 if ((mblk_ip = copyb(fin->fin_m)) == NULL)
3191 return (-1);
3192 fin->fin_m->b_rptr -= fin->fin_ipoff;
3193
3194 /*
3195 * Step (2)
3196 *
3197 * Allocate and create ICMP header.
3198 */
3199 mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
3200 BPRI_HI);
3201
3202 if (mblk_icmp == NULL)
3203 return (-1);
3204
3205 MTYPE(mblk_icmp) = M_DATA;
3206 icmp6 = (struct icmp6_hdr *) mblk_icmp->b_wptr;
3207 icmp6->icmp6_type = ICMP6_DST_UNREACH;
3208 icmp6->icmp6_code = fin->fin_icode & 0xFF;
3209 icmp6->icmp6_data32[0] = 0;
3210 mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
3211
3212 /*
3213 * Step (3)
3214 *
3215 * Link the copy of IP packet to ICMP header.
3216 */
3217 linkb(mblk_icmp, mblk_ip);
3218
3219 /*
3220 * Step (4)
3221 *
3222 * Calculate chksum - this is much more easier task than in case of
3223 * IPv4 - ICMPv6 chksum only covers IP addresses, and payload length.
3224 * We are making compensation just for change of packet length.
3225 */
3226 icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
3227
3228 /*
3229 * Step (5)
3230 *
3231 * Swap IP addresses.
3232 */
3233 tmp_src6 = fin->fin_ip6->ip6_src;
3234 fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
3235 fin->fin_ip6->ip6_dst = tmp_src6;
3236
3237 /*
3238 * and adjust IP header data.
3239 */
3240 fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
3241 fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
3242
3243 /*
3244 * Step (6)
3245 *
3246 * We must release all linked mblks from original packet and keep only
3247 * the first mblk with IP header to link ICMP data.
3248 */
3249 fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
3250
3251 if (fin->fin_m->b_cont != NULL) {
3252 FREE_MB_T(fin->fin_m->b_cont);
3253 }
3254
3255 /*
3256 * Append ICMP payload to IP header.
3257 */
3258 linkb(fin->fin_m, mblk_icmp);
3259
3260 return (0);
3261 }
3262 #endif /* USE_INET6 */
3263
3264 /* ------------------------------------------------------------------------ */
3265 /* Function: fr_make_icmp */
3266 /* Returns: int - 0 on success, -1 on failure */
3267 /* Parameters: fin(I) - pointer to packet information */
3268 /* */
3269 /* We must alter the original mblks passed to IPF from IP stack via */
3270 /* FW_HOOKS. The reasons why we must alter packet are discussed within */
3271 /* comment at fr_make_rst() function. */
3272 /* */
3273 /* The fr_make_icmp() function acts as a wrapper, which passes the code */
3274 /* execution to fr_make_icmp_v4() or fr_make_icmp_v6() depending on */
3275 /* protocol version. However there are some details, which are common to */
3276 /* both IP versions. The details are going to be explained here. */
3277 /* */
3278 /* The packet looks as follows: */
3279 /* xxx | IP hdr | IP payload ... | */
3280 /* ^ ^ ^ ^ */
3281 /* | | | | */
3282 /* | | | fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
3283 /* | | | */
3284 /* | | `- fin_m->fin_dp (in case of IPv4 points to L4 header) */
3285 /* | | */
3286 /* | `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case */
3287 /* | of loopback) */
3288 /* | */
3289 /* `- fin_m->b_rptr - points to L2 header in case of physical NIC */
3290 /* */
3291 /* All relevant IP headers are pulled up into the first mblk. It happened */
3292 /* well in advance before the matching rule was found (the rule, which took */
3293 /* us here, to fr_make_icmp() function). */
3294 /* */
3295 /* Both functions will turn packet passed in fin->fin_m mblk into a new */
3296 /* packet. New packet will be represented as chain of mblks. */
3297 /* orig mblk |- b_cont ---. */
3298 /* ^ `-> ICMP hdr |- b_cont--. */
3299 /* | ^ `-> duped orig mblk */
3300 /* | | ^ */
3301 /* `- The original mblk | | */
3302 /* will be trimmed to | | */
3303 /* to contain IP header | | */
3304 /* only | | */
3305 /* | | */
3306 /* `- This is newly | */
3307 /* allocated mblk to | */
3308 /* hold ICMPv6 data. | */
3309 /* | */
3310 /* | */
3311 /* | */
3312 /* This is the copy of original mblk, it will contain -' */
3313 /* orignal IP packet in case of ICMPv6. In case of */
3314 /* ICMPv4 it will contain up to 8 bytes of IP payload */
3315 /* (TCP/UDP/L4) data from original packet. */
3316 /* ------------------------------------------------------------------------ */
3317 int fr_make_icmp(fin)
3318 fr_info_t *fin;
3319 {
3320 int rv;
3321
3322 if (fin->fin_v == 4)
3323 rv = fr_make_icmp_v4(fin);
3324 #ifdef USE_INET6
3325 else if (fin->fin_v == 6)
3326 rv = fr_make_icmp_v6(fin);
3327 #endif
3328 else
3329 rv = -1;
3330
3331 return (rv);
3332 }
3333
3334 /* ------------------------------------------------------------------------ */
3335 /* Function: fr_buf_sum */
3336 /* Returns: unsigned int - sum of buffer buf */
3337 /* Parameters: buf - pointer to buf we want to sum up */
3338 /* len - length of buffer buf */
3339 /* */
3340 /* Sums buffer buf. The result is used for chksum calculation. The buf */
3341 /* argument must be aligned. */
3342 /* ------------------------------------------------------------------------ */
3343 static uint32_t fr_buf_sum(buf, len)
3344 const void *buf;
3345 unsigned int len;
3346 {
3347 uint32_t sum = 0;
3348 uint16_t *b = (uint16_t *)buf;
3349
3350 while (len > 1) {
3351 sum += *b++;
3352 len -= 2;
3353 }
3354
3355 if (len == 1)
3356 sum += htons((*(unsigned char *)b) << 8);
3357
3358 return (sum);
3359 }
3360
3361 /* ------------------------------------------------------------------------ */
3362 /* Function: fr_calc_chksum */
3363 /* Returns: void */
3364 /* Parameters: fin - pointer to fr_info_t instance with packet data */
3365 /* pkt - pointer to duplicated packet */
3366 /* */
3367 /* Calculates all chksums (L3, L4) for packet pkt. Works for both IP */
3368 /* versions. */
3369 /* ------------------------------------------------------------------------ */
3370 void fr_calc_chksum(fin, pkt)
3371 fr_info_t *fin;
3372 mb_t *pkt;
3373 {
3374 struct pseudo_hdr {
3375 union {
3376 struct in_addr in4;
3377 #ifdef USE_INET6
3378 struct in6_addr in6;
3379 #endif
3380 } src_addr;
3381 union {
3382 struct in_addr in4;
3383 #ifdef USE_INET6
3384 struct in6_addr in6;
3385 #endif
3386 } dst_addr;
3387 char zero;
3388 char proto;
3389 uint16_t len;
3390 } phdr;
3391 uint32_t sum, ip_sum;
3392 void *buf;
3393 uint16_t *l4_csum_p;
3394 tcphdr_t *tcp;
3395 udphdr_t *udp;
3396 icmphdr_t *icmp;
3397 #ifdef USE_INET6
3398 struct icmp6_hdr *icmp6;
3399 #endif
3400 ip_t *ip;
3401 unsigned int len;
3402 int pld_len;
3403
3404 /*
3405 * We need to pullup the packet to the single continuous buffer to avoid
3406 * potential misaligment of b_rptr member in mblk chain.
3407 */
3408 if (pullupmsg(pkt, -1) == 0) {
3409 cmn_err(CE_WARN, "Failed to pullup loopback pkt -> chksum"
3410 " will not be computed by IPF");
3411 return;
3412 }
3413
3414 /*
3415 * It is guaranteed IP header starts right at b_rptr, because we are
3416 * working with a copy of the original packet.
3417 *
3418 * Compute pseudo header chksum for TCP and UDP.
3419 */
3420 if ((fin->fin_p == IPPROTO_UDP) ||
3421 (fin->fin_p == IPPROTO_TCP)) {
3422 bzero(&phdr, sizeof (phdr));
3423 #ifdef USE_INET6
3424 if (fin->fin_v == 6) {
3425 phdr.src_addr.in6 = fin->fin_srcip6;
3426 phdr.dst_addr.in6 = fin->fin_dstip6;
3427 } else {
3428 phdr.src_addr.in4 = fin->fin_src;
3429 phdr.dst_addr.in4 = fin->fin_dst;
3430 }
3431 #else
3432 phdr.src_addr.in4 = fin->fin_src;
3433 phdr.dst_addr.in4 = fin->fin_dst;
3434 #endif
3435 phdr.zero = (char) 0;
3436 phdr.proto = fin->fin_p;
3437 phdr.len = htons((uint16_t)fin->fin_dlen);
3438 sum = fr_buf_sum(&phdr, (unsigned int)sizeof (phdr));
3439 } else {
3440 sum = 0;
3441 }
3442
3443 /*
3444 * Set pointer to the L4 chksum field in the packet, set buf pointer to
3445 * the L4 header start.
3446 */
3447 switch (fin->fin_p) {
3448 case IPPROTO_UDP:
3449 udp = (udphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3450 l4_csum_p = &udp->uh_sum;
3451 buf = udp;
3452 break;
3453 case IPPROTO_TCP:
3454 tcp = (tcphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3455 l4_csum_p = &tcp->th_sum;
3456 buf = tcp;
3457 break;
3458 case IPPROTO_ICMP:
3459 icmp = (icmphdr_t *)(pkt->b_rptr + fin->fin_hlen);
3460 l4_csum_p = &icmp->icmp_cksum;
3461 buf = icmp;
3462 break;
3463 #ifdef USE_INET6
3464 case IPPROTO_ICMPV6:
3465 icmp6 = (struct icmp6_hdr *)(pkt->b_rptr + fin->fin_hlen);
3466 l4_csum_p = &icmp6->icmp6_cksum;
3467 buf = icmp6;
3468 break;
3469 #endif
3470 default:
3471 l4_csum_p = NULL;
3472 }
3473
3474 /*
3475 * Compute L4 chksum if needed.
3476 */
3477 if (l4_csum_p != NULL) {
3478 *l4_csum_p = (uint16_t)0;
3479 pld_len = fin->fin_dlen;
3480 len = pkt->b_wptr - (unsigned char *)buf;
3481 ASSERT(len == pld_len);
3482 /*
3483 * Add payload sum to pseudoheader sum.
3484 */
3485 sum += fr_buf_sum(buf, len);
3486 while (sum >> 16)
3487 sum = (sum & 0xFFFF) + (sum >> 16);
3488
3489 *l4_csum_p = ~((uint16_t)sum);
3490 DTRACE_PROBE1(l4_sum, uint16_t, *l4_csum_p);
3491 }
3492
3493 /*
3494 * The IP header chksum is needed just for IPv4.
3495 */
3496 if (fin->fin_v == 4) {
3497 /*
3498 * Compute IPv4 header chksum.
3499 */
3500 ip = (ip_t *)pkt->b_rptr;
3501 ip->ip_sum = (uint16_t)0;
3502 ip_sum = fr_buf_sum(ip, (unsigned int)fin->fin_hlen);
3503 while (ip_sum >> 16)
3504 ip_sum = (ip_sum & 0xFFFF) + (ip_sum >> 16);
3505
3506 ip->ip_sum = ~((uint16_t)ip_sum);
3507 DTRACE_PROBE1(l3_sum, uint16_t, ip->ip_sum);
3508 }
3509
3510 return;
3511 }
3512
3513 #endif /* _KERNEL && SOLARIS2 >= 10 */