Print this page
10409 ipf sometimes freezes RFC 1323 transfers
Reviewed by: Jason King <jbk@joyent.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/ipf/ip_state.c
+++ new/usr/src/uts/common/inet/ipf/ip_state.c
1 1 /*
2 2 * Copyright (C) 1995-2003 by Darren Reed.
3 3 *
4 4 * See the IPFILTER.LICENCE file for details on licencing.
5 5 *
6 6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
7 7 *
8 8 * Copyright (c) 2014, Joyent, Inc. All rights reserved.
9 9 */
10 10
11 11 #if defined(KERNEL) || defined(_KERNEL)
12 12 # undef KERNEL
13 13 # undef _KERNEL
14 14 # define KERNEL 1
15 15 # define _KERNEL 1
16 16 #endif
17 17 #include <sys/errno.h>
18 18 #include <sys/types.h>
19 19 #include <sys/param.h>
20 20 #include <sys/file.h>
21 21 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
22 22 defined(_KERNEL)
23 23 # include "opt_ipfilter_log.h"
24 24 #endif
25 25 #if defined(_KERNEL) && defined(__FreeBSD_version) && \
26 26 (__FreeBSD_version >= 400000) && !defined(KLD_MODULE)
27 27 #include "opt_inet6.h"
28 28 #endif
29 29 #if !defined(_KERNEL) && !defined(__KERNEL__)
30 30 # include <stdio.h>
31 31 # include <stdlib.h>
32 32 # include <string.h>
33 33 # define _KERNEL
34 34 # ifdef __OpenBSD__
35 35 struct file;
36 36 # endif
37 37 # include <sys/uio.h>
38 38 # undef _KERNEL
39 39 #endif
40 40 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
41 41 # include <sys/filio.h>
42 42 # include <sys/fcntl.h>
43 43 # if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM)
44 44 # include "opt_ipfilter.h"
45 45 # endif
46 46 #else
47 47 # include <sys/ioctl.h>
48 48 #endif
49 49 #include <sys/time.h>
50 50 #if !defined(linux)
51 51 # include <sys/protosw.h>
52 52 #endif
53 53 #include <sys/socket.h>
54 54 #if defined(_KERNEL)
55 55 # include <sys/systm.h>
56 56 # if !defined(__SVR4) && !defined(__svr4__)
57 57 # include <sys/mbuf.h>
58 58 # endif
59 59 #endif
60 60 #if defined(__SVR4) || defined(__svr4__)
61 61 # include <sys/filio.h>
62 62 # include <sys/byteorder.h>
63 63 # ifdef _KERNEL
64 64 # include <sys/dditypes.h>
65 65 # endif
66 66 # include <sys/stream.h>
67 67 # include <sys/kmem.h>
68 68 #endif
69 69
70 70 #include <net/if.h>
71 71 #ifdef sun
72 72 # include <net/af.h>
73 73 #endif
74 74 #include <net/route.h>
75 75 #include <netinet/in.h>
76 76 #include <netinet/in_systm.h>
77 77 #include <netinet/ip.h>
78 78 #include <netinet/tcp.h>
79 79 #if !defined(linux)
80 80 # include <netinet/ip_var.h>
81 81 #endif
82 82 #if !defined(__hpux) && !defined(linux)
83 83 # include <netinet/tcp_fsm.h>
84 84 #endif
85 85 #include <netinet/udp.h>
86 86 #include <netinet/ip_icmp.h>
87 87 #include "netinet/ip_compat.h"
88 88 #include <netinet/tcpip.h>
89 89 #include "netinet/ip_fil.h"
90 90 #include "netinet/ip_nat.h"
91 91 #include "netinet/ip_frag.h"
92 92 #include "netinet/ip_state.h"
93 93 #include "netinet/ip_proxy.h"
94 94 #include "netinet/ipf_stack.h"
95 95 #ifdef IPFILTER_SYNC
96 96 #include "netinet/ip_sync.h"
97 97 #endif
98 98 #ifdef IPFILTER_SCAN
99 99 #include "netinet/ip_scan.h"
100 100 #endif
101 101 #ifdef USE_INET6
102 102 #include <netinet/icmp6.h>
103 103 #endif
104 104 #if (__FreeBSD_version >= 300000)
105 105 # include <sys/malloc.h>
106 106 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
107 107 # include <sys/libkern.h>
108 108 # include <sys/systm.h>
109 109 # endif
110 110 #endif
111 111 /* END OF INCLUDES */
112 112
113 113
114 114 #if !defined(lint)
115 115 static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed";
116 116 static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.36 2005/08/11 19:58:03 darrenr Exp $";
117 117 #endif
118 118
119 119 #ifdef USE_INET6
120 120 static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *));
121 121 #endif
122 122 static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *,
123 123 i6addr_t *, tcphdr_t *, u_32_t));
124 124 static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *));
125 125 static int fr_state_flush __P((int, int, ipf_stack_t *));
126 126 static ips_stat_t *fr_statetstats __P((ipf_stack_t *));
127 127 static int fr_state_remove __P((caddr_t, ipf_stack_t *));
128 128 static void fr_ipsmove __P((ipstate_t *, u_int, ipf_stack_t *));
129 129 static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *));
130 130 static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *));
131 131 static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *));
132 132 static void fr_fixinisn __P((fr_info_t *, ipstate_t *));
133 133 static void fr_fixoutisn __P((fr_info_t *, ipstate_t *));
134 134 static void fr_checknewisn __P((fr_info_t *, ipstate_t *));
135 135 static int fr_stateiter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
136 136
137 137 int fr_stputent __P((caddr_t, ipf_stack_t *));
138 138 int fr_stgetent __P((caddr_t, ipf_stack_t *));
139 139
140 140 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */
141 141 #define FIVE_DAYS (5 * ONE_DAY)
142 142 #define DOUBLE_HASH(x, ifs) \
143 143 (((x) + ifs->ifs_ips_seed[(x) % ifs->ifs_fr_statesize]) % ifs->ifs_fr_statesize)
144 144
145 145
146 146 /* ------------------------------------------------------------------------ */
147 147 /* Function: fr_stateinit */
148 148 /* Returns: int - 0 == success, -1 == failure */
149 149 /* Parameters: ifs - ipf stack instance */
150 150 /* */
151 151 /* Initialise all the global variables used within the state code. */
152 152 /* This action also includes initiailising locks. */
153 153 /* ------------------------------------------------------------------------ */
154 154 int fr_stateinit(ifs)
155 155 ipf_stack_t *ifs;
156 156 {
157 157 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL)
158 158 struct timeval tv;
159 159 #endif
160 160 int i;
161 161
162 162 KMALLOCS(ifs->ifs_ips_table, ipstate_t **,
163 163 ifs->ifs_fr_statesize * sizeof(ipstate_t *));
164 164 if (ifs->ifs_ips_table == NULL)
165 165 return -1;
166 166 bzero((char *)ifs->ifs_ips_table,
167 167 ifs->ifs_fr_statesize * sizeof(ipstate_t *));
168 168
169 169 KMALLOCS(ifs->ifs_ips_seed, u_long *,
170 170 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed));
171 171 if (ifs->ifs_ips_seed == NULL)
172 172 return -2;
173 173 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL)
174 174 tv.tv_sec = 0;
175 175 GETKTIME(&tv);
176 176 #endif
177 177 for (i = 0; i < ifs->ifs_fr_statesize; i++) {
178 178 /*
179 179 * XXX - ips_seed[X] should be a random number of sorts.
180 180 */
181 181 #if !defined(NEED_LOCAL_RAND) && defined(_KERNEL)
182 182 ifs->ifs_ips_seed[i] = ipf_random();
183 183 #else
184 184 ifs->ifs_ips_seed[i] = ((u_long)ifs->ifs_ips_seed + i) *
185 185 ifs->ifs_fr_statesize;
186 186 ifs->ifs_ips_seed[i] += tv.tv_sec;
187 187 ifs->ifs_ips_seed[i] *= (u_long)ifs->ifs_ips_seed;
188 188 ifs->ifs_ips_seed[i] ^= 0x5a5aa5a5;
189 189 ifs->ifs_ips_seed[i] *= ifs->ifs_fr_statemax;
190 190 #endif
191 191 }
192 192
193 193 /* fill icmp reply type table */
194 194 for (i = 0; i <= ICMP_MAXTYPE; i++)
195 195 icmpreplytype4[i] = -1;
196 196 icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY;
197 197 icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY;
198 198 icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY;
199 199 icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY;
200 200 #ifdef USE_INET6
201 201 /* fill icmp reply type table */
202 202 for (i = 0; i <= ICMP6_MAXTYPE; i++)
203 203 icmpreplytype6[i] = -1;
204 204 icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY;
205 205 icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT;
206 206 icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY;
207 207 icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT;
208 208 icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT;
209 209 #endif
210 210
211 211 KMALLOCS(ifs->ifs_ips_stats.iss_bucketlen, u_long *,
212 212 ifs->ifs_fr_statesize * sizeof(u_long));
213 213 if (ifs->ifs_ips_stats.iss_bucketlen == NULL)
214 214 return -1;
215 215 bzero((char *)ifs->ifs_ips_stats.iss_bucketlen,
216 216 ifs->ifs_fr_statesize * sizeof(u_long));
217 217
218 218 if (ifs->ifs_fr_state_maxbucket == 0) {
219 219 for (i = ifs->ifs_fr_statesize; i > 0; i >>= 1)
220 220 ifs->ifs_fr_state_maxbucket++;
221 221 ifs->ifs_fr_state_maxbucket *= 2;
222 222 }
223 223
224 224 fr_sttab_init(ifs->ifs_ips_tqtqb, ifs);
225 225 ifs->ifs_ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_ips_udptq;
226 226 ifs->ifs_ips_udptq.ifq_ttl = (u_long)ifs->ifs_fr_udptimeout;
227 227 ifs->ifs_ips_udptq.ifq_ref = 1;
228 228 ifs->ifs_ips_udptq.ifq_head = NULL;
229 229 ifs->ifs_ips_udptq.ifq_tail = &ifs->ifs_ips_udptq.ifq_head;
230 230 MUTEX_INIT(&ifs->ifs_ips_udptq.ifq_lock, "ipftq udp tab");
231 231 ifs->ifs_ips_udptq.ifq_next = &ifs->ifs_ips_udpacktq;
232 232 ifs->ifs_ips_udpacktq.ifq_ttl = (u_long)ifs->ifs_fr_udpacktimeout;
233 233 ifs->ifs_ips_udpacktq.ifq_ref = 1;
234 234 ifs->ifs_ips_udpacktq.ifq_head = NULL;
235 235 ifs->ifs_ips_udpacktq.ifq_tail = &ifs->ifs_ips_udpacktq.ifq_head;
236 236 MUTEX_INIT(&ifs->ifs_ips_udpacktq.ifq_lock, "ipftq udpack tab");
237 237 ifs->ifs_ips_udpacktq.ifq_next = &ifs->ifs_ips_icmptq;
238 238 ifs->ifs_ips_icmptq.ifq_ttl = (u_long)ifs->ifs_fr_icmptimeout;
239 239 ifs->ifs_ips_icmptq.ifq_ref = 1;
240 240 ifs->ifs_ips_icmptq.ifq_head = NULL;
241 241 ifs->ifs_ips_icmptq.ifq_tail = &ifs->ifs_ips_icmptq.ifq_head;
242 242 MUTEX_INIT(&ifs->ifs_ips_icmptq.ifq_lock, "ipftq icmp tab");
243 243 ifs->ifs_ips_icmptq.ifq_next = &ifs->ifs_ips_icmpacktq;
244 244 ifs->ifs_ips_icmpacktq.ifq_ttl = (u_long)ifs->ifs_fr_icmpacktimeout;
245 245 ifs->ifs_ips_icmpacktq.ifq_ref = 1;
246 246 ifs->ifs_ips_icmpacktq.ifq_head = NULL;
247 247 ifs->ifs_ips_icmpacktq.ifq_tail = &ifs->ifs_ips_icmpacktq.ifq_head;
248 248 MUTEX_INIT(&ifs->ifs_ips_icmpacktq.ifq_lock, "ipftq icmpack tab");
249 249 ifs->ifs_ips_icmpacktq.ifq_next = &ifs->ifs_ips_iptq;
250 250 ifs->ifs_ips_iptq.ifq_ttl = (u_long)ifs->ifs_fr_iptimeout;
251 251 ifs->ifs_ips_iptq.ifq_ref = 1;
252 252 ifs->ifs_ips_iptq.ifq_head = NULL;
253 253 ifs->ifs_ips_iptq.ifq_tail = &ifs->ifs_ips_iptq.ifq_head;
254 254 MUTEX_INIT(&ifs->ifs_ips_iptq.ifq_lock, "ipftq ip tab");
255 255 ifs->ifs_ips_iptq.ifq_next = &ifs->ifs_ips_deletetq;
256 256 /* entry's ttl in deletetq is just 1 tick */
257 257 ifs->ifs_ips_deletetq.ifq_ttl = (u_long) 1;
258 258 ifs->ifs_ips_deletetq.ifq_ref = 1;
259 259 ifs->ifs_ips_deletetq.ifq_head = NULL;
260 260 ifs->ifs_ips_deletetq.ifq_tail = &ifs->ifs_ips_deletetq.ifq_head;
261 261 MUTEX_INIT(&ifs->ifs_ips_deletetq.ifq_lock, "state delete queue");
262 262 ifs->ifs_ips_deletetq.ifq_next = NULL;
263 263
264 264 RWLOCK_INIT(&ifs->ifs_ipf_state, "ipf IP state rwlock");
265 265 MUTEX_INIT(&ifs->ifs_ipf_stinsert, "ipf state insert mutex");
266 266 ifs->ifs_fr_state_init = 1;
267 267
268 268 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks;
269 269 return 0;
270 270 }
271 271
272 272
273 273 /* ------------------------------------------------------------------------ */
274 274 /* Function: fr_stateunload */
275 275 /* Returns: Nil */
276 276 /* Parameters: ifs - ipf stack instance */
277 277 /* */
278 278 /* Release and destroy any resources acquired or initialised so that */
279 279 /* IPFilter can be unloaded or re-initialised. */
280 280 /* ------------------------------------------------------------------------ */
281 281 void fr_stateunload(ifs)
282 282 ipf_stack_t *ifs;
283 283 {
284 284 ipftq_t *ifq, *ifqnext;
285 285 ipstate_t *is;
286 286
287 287 while ((is = ifs->ifs_ips_list) != NULL)
288 288 (void) fr_delstate(is, 0, ifs);
289 289
290 290 /*
291 291 * Proxy timeout queues are not cleaned here because although they
292 292 * exist on the state list, appr_unload is called after fr_stateunload
293 293 * and the proxies actually are responsible for them being created.
294 294 * Should the proxy timeouts have their own list? There's no real
295 295 * justification as this is the only complicationA
296 296 */
297 297 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) {
298 298 ifqnext = ifq->ifq_next;
299 299 if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
300 300 (fr_deletetimeoutqueue(ifq) == 0))
301 301 fr_freetimeoutqueue(ifq, ifs);
302 302 }
303 303
304 304 ifs->ifs_ips_stats.iss_inuse = 0;
305 305 ifs->ifs_ips_num = 0;
306 306
307 307 if (ifs->ifs_fr_state_init == 1) {
308 308 fr_sttab_destroy(ifs->ifs_ips_tqtqb);
309 309 MUTEX_DESTROY(&ifs->ifs_ips_udptq.ifq_lock);
310 310 MUTEX_DESTROY(&ifs->ifs_ips_icmptq.ifq_lock);
311 311 MUTEX_DESTROY(&ifs->ifs_ips_udpacktq.ifq_lock);
312 312 MUTEX_DESTROY(&ifs->ifs_ips_icmpacktq.ifq_lock);
313 313 MUTEX_DESTROY(&ifs->ifs_ips_iptq.ifq_lock);
314 314 MUTEX_DESTROY(&ifs->ifs_ips_deletetq.ifq_lock);
315 315 }
316 316
317 317 if (ifs->ifs_ips_table != NULL) {
318 318 KFREES(ifs->ifs_ips_table,
319 319 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_table));
320 320 ifs->ifs_ips_table = NULL;
321 321 }
322 322
323 323 if (ifs->ifs_ips_seed != NULL) {
324 324 KFREES(ifs->ifs_ips_seed,
325 325 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed));
326 326 ifs->ifs_ips_seed = NULL;
327 327 }
328 328
329 329 if (ifs->ifs_ips_stats.iss_bucketlen != NULL) {
330 330 KFREES(ifs->ifs_ips_stats.iss_bucketlen,
331 331 ifs->ifs_fr_statesize * sizeof(u_long));
332 332 ifs->ifs_ips_stats.iss_bucketlen = NULL;
333 333 }
334 334
335 335 if (ifs->ifs_fr_state_maxbucket_reset == 1)
336 336 ifs->ifs_fr_state_maxbucket = 0;
337 337
338 338 if (ifs->ifs_fr_state_init == 1) {
339 339 ifs->ifs_fr_state_init = 0;
340 340 RW_DESTROY(&ifs->ifs_ipf_state);
341 341 MUTEX_DESTROY(&ifs->ifs_ipf_stinsert);
342 342 }
343 343 }
344 344
345 345
346 346 /* ------------------------------------------------------------------------ */
347 347 /* Function: fr_statetstats */
348 348 /* Returns: ips_state_t* - pointer to state stats structure */
349 349 /* Parameters: Nil */
350 350 /* */
351 351 /* Put all the current numbers and pointers into a single struct and return */
352 352 /* a pointer to it. */
353 353 /* ------------------------------------------------------------------------ */
354 354 static ips_stat_t *fr_statetstats(ifs)
355 355 ipf_stack_t *ifs;
356 356 {
357 357 ifs->ifs_ips_stats.iss_active = ifs->ifs_ips_num;
358 358 ifs->ifs_ips_stats.iss_statesize = ifs->ifs_fr_statesize;
359 359 ifs->ifs_ips_stats.iss_statemax = ifs->ifs_fr_statemax;
360 360 ifs->ifs_ips_stats.iss_table = ifs->ifs_ips_table;
361 361 ifs->ifs_ips_stats.iss_list = ifs->ifs_ips_list;
362 362 ifs->ifs_ips_stats.iss_ticks = ifs->ifs_fr_ticks;
363 363 return &ifs->ifs_ips_stats;
364 364 }
365 365
366 366 /* ------------------------------------------------------------------------ */
367 367 /* Function: fr_state_remove */
368 368 /* Returns: int - 0 == success, != 0 == failure */
369 369 /* Parameters: data(I) - pointer to state structure to delete from table */
370 370 /* ifs - ipf stack instance */
371 371 /* */
372 372 /* Search for a state structure that matches the one passed, according to */
373 373 /* the IP addresses and other protocol specific information. */
374 374 /* ------------------------------------------------------------------------ */
375 375 static int fr_state_remove(data, ifs)
376 376 caddr_t data;
377 377 ipf_stack_t *ifs;
378 378 {
379 379 ipstate_t *sp, st;
380 380 int error;
381 381
382 382 sp = &st;
383 383 error = fr_inobj(data, &st, IPFOBJ_IPSTATE);
384 384 if (error)
385 385 return EFAULT;
386 386
387 387 WRITE_ENTER(&ifs->ifs_ipf_state);
388 388 for (sp = ifs->ifs_ips_list; sp; sp = sp->is_next)
389 389 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) &&
390 390 !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src,
391 391 sizeof(st.is_src)) &&
392 392 !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_dst,
393 393 sizeof(st.is_dst)) &&
394 394 !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps,
395 395 sizeof(st.is_ps))) {
396 396 (void) fr_delstate(sp, ISL_REMOVE, ifs);
397 397 RWLOCK_EXIT(&ifs->ifs_ipf_state);
398 398 return 0;
399 399 }
400 400 RWLOCK_EXIT(&ifs->ifs_ipf_state);
401 401 return ESRCH;
402 402 }
403 403
404 404
405 405 /* ------------------------------------------------------------------------ */
406 406 /* Function: fr_state_ioctl */
407 407 /* Returns: int - 0 == success, != 0 == failure */
408 408 /* Parameters: data(I) - pointer to ioctl data */
409 409 /* cmd(I) - ioctl command integer */
410 410 /* mode(I) - file mode bits used with open */
411 411 /* uid(I) - uid of caller */
412 412 /* ctx(I) - pointer to give the uid context */
413 413 /* ifs - ipf stack instance */
414 414 /* */
415 415 /* Processes an ioctl call made to operate on the IP Filter state device. */
416 416 /* ------------------------------------------------------------------------ */
417 417 int fr_state_ioctl(data, cmd, mode, uid, ctx, ifs)
418 418 caddr_t data;
419 419 ioctlcmd_t cmd;
420 420 int mode, uid;
421 421 void *ctx;
422 422 ipf_stack_t *ifs;
423 423 {
424 424 int arg, ret, error = 0;
425 425
426 426 switch (cmd)
427 427 {
428 428 /*
429 429 * Delete an entry from the state table.
430 430 */
431 431 case SIOCDELST :
432 432 error = fr_state_remove(data, ifs);
433 433 break;
434 434 /*
435 435 * Flush the state table
436 436 */
437 437 case SIOCIPFFL :
438 438 error = BCOPYIN(data, (char *)&arg, sizeof(arg));
439 439 if (error != 0) {
440 440 error = EFAULT;
441 441 } else {
442 442 if (VALID_TABLE_FLUSH_OPT(arg)) {
443 443 WRITE_ENTER(&ifs->ifs_ipf_state);
444 444 ret = fr_state_flush(arg, 4, ifs);
445 445 RWLOCK_EXIT(&ifs->ifs_ipf_state);
446 446 error = BCOPYOUT((char *)&ret, data,
447 447 sizeof(ret));
448 448 if (error != 0)
449 449 return EFAULT;
450 450 } else {
451 451 error = EINVAL;
452 452 }
453 453 }
454 454 break;
455 455
456 456 #ifdef USE_INET6
457 457 case SIOCIPFL6 :
458 458 error = BCOPYIN(data, (char *)&arg, sizeof(arg));
459 459 if (error != 0) {
460 460 error = EFAULT;
461 461 } else {
462 462 if (VALID_TABLE_FLUSH_OPT(arg)) {
463 463 WRITE_ENTER(&ifs->ifs_ipf_state);
464 464 ret = fr_state_flush(arg, 6, ifs);
465 465 RWLOCK_EXIT(&ifs->ifs_ipf_state);
466 466 error = BCOPYOUT((char *)&ret, data,
467 467 sizeof(ret));
468 468 if (error != 0)
469 469 return EFAULT;
470 470 } else {
471 471 error = EINVAL;
472 472 }
473 473 }
474 474 break;
475 475 #endif
476 476 #ifdef IPFILTER_LOG
477 477 /*
478 478 * Flush the state log.
479 479 */
480 480 case SIOCIPFFB :
481 481 if (!(mode & FWRITE))
482 482 error = EPERM;
483 483 else {
484 484 int tmp;
485 485
486 486 tmp = ipflog_clear(IPL_LOGSTATE, ifs);
487 487 error = BCOPYOUT((char *)&tmp, data, sizeof(tmp));
488 488 if (error != 0)
489 489 error = EFAULT;
490 490 }
491 491 break;
492 492 /*
493 493 * Turn logging of state information on/off.
494 494 */
495 495 case SIOCSETLG :
496 496 if (!(mode & FWRITE)) {
497 497 error = EPERM;
498 498 } else {
499 499 error = BCOPYIN((char *)data,
500 500 (char *)&ifs->ifs_ipstate_logging,
501 501 sizeof(ifs->ifs_ipstate_logging));
502 502 if (error != 0)
503 503 error = EFAULT;
504 504 }
505 505 break;
506 506 /*
507 507 * Return the current state of logging.
508 508 */
509 509 case SIOCGETLG :
510 510 error = BCOPYOUT((char *)&ifs->ifs_ipstate_logging,
511 511 (char *)data,
512 512 sizeof(ifs->ifs_ipstate_logging));
513 513 if (error != 0)
514 514 error = EFAULT;
515 515 break;
516 516 /*
517 517 * Return the number of bytes currently waiting to be read.
518 518 */
519 519 case FIONREAD :
520 520 arg = ifs->ifs_iplused[IPL_LOGSTATE]; /* returned in an int */
521 521 error = BCOPYOUT((char *)&arg, data, sizeof(arg));
522 522 if (error != 0)
523 523 error = EFAULT;
524 524 break;
525 525 #endif
526 526 /*
527 527 * Get the current state statistics.
528 528 */
529 529 case SIOCGETFS :
530 530 error = fr_outobj(data, fr_statetstats(ifs), IPFOBJ_STATESTAT);
531 531 break;
532 532 /*
533 533 * Lock/Unlock the state table. (Locking prevents any changes, which
534 534 * means no packets match).
535 535 */
536 536 case SIOCSTLCK :
537 537 if (!(mode & FWRITE)) {
538 538 error = EPERM;
539 539 } else {
540 540 error = fr_lock(data, &ifs->ifs_fr_state_lock);
541 541 }
542 542 break;
543 543 /*
544 544 * Add an entry to the current state table.
545 545 */
546 546 case SIOCSTPUT :
547 547 if (!ifs->ifs_fr_state_lock || !(mode & FWRITE)) {
548 548 error = EACCES;
549 549 break;
550 550 }
551 551 error = fr_stputent(data, ifs);
552 552 break;
553 553 /*
554 554 * Get a state table entry.
555 555 */
556 556 case SIOCSTGET :
557 557 if (!ifs->ifs_fr_state_lock) {
558 558 error = EACCES;
559 559 break;
560 560 }
561 561 error = fr_stgetent(data, ifs);
562 562 break;
563 563
564 564 case SIOCGENITER :
565 565 {
566 566 ipftoken_t *token;
567 567 ipfgeniter_t iter;
568 568
569 569 error = fr_inobj(data, &iter, IPFOBJ_GENITER);
570 570 if (error != 0)
571 571 break;
572 572
573 573 token = ipf_findtoken(IPFGENITER_STATE, uid, ctx, ifs);
574 574 if (token != NULL)
575 575 error = fr_stateiter(token, &iter, ifs);
576 576 else
577 577 error = ESRCH;
578 578 RWLOCK_EXIT(&ifs->ifs_ipf_tokens);
579 579 break;
580 580 }
581 581
582 582 case SIOCIPFDELTOK :
583 583 error = BCOPYIN(data, (char *)&arg, sizeof(arg));
584 584 if (error != 0) {
585 585 error = EFAULT;
586 586 } else {
587 587 error = ipf_deltoken(arg, uid, ctx, ifs);
588 588 }
589 589 break;
590 590
591 591 default :
592 592 error = EINVAL;
593 593 break;
594 594 }
595 595 return error;
596 596 }
597 597
598 598
599 599 /* ------------------------------------------------------------------------ */
600 600 /* Function: fr_stgetent */
601 601 /* Returns: int - 0 == success, != 0 == failure */
602 602 /* Parameters: data(I) - pointer to state structure to retrieve from table */
603 603 /* */
604 604 /* Copy out state information from the kernel to a user space process. If */
605 605 /* there is a filter rule associated with the state entry, copy that out */
606 606 /* as well. The entry to copy out is taken from the value of "ips_next" in */
607 607 /* the struct passed in and if not null and not found in the list of current*/
608 608 /* state entries, the retrieval fails. */
609 609 /* ------------------------------------------------------------------------ */
610 610 int fr_stgetent(data, ifs)
611 611 caddr_t data;
612 612 ipf_stack_t *ifs;
613 613 {
614 614 ipstate_t *is, *isn;
615 615 ipstate_save_t ips;
616 616 int error;
617 617
618 618 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE);
619 619 if (error)
620 620 return EFAULT;
621 621
622 622 isn = ips.ips_next;
623 623 if (isn == NULL) {
624 624 isn = ifs->ifs_ips_list;
625 625 if (isn == NULL) {
626 626 if (ips.ips_next == NULL)
627 627 return ENOENT;
628 628 return 0;
629 629 }
630 630 } else {
631 631 /*
632 632 * Make sure the pointer we're copying from exists in the
633 633 * current list of entries. Security precaution to prevent
634 634 * copying of random kernel data.
635 635 */
636 636 for (is = ifs->ifs_ips_list; is; is = is->is_next)
637 637 if (is == isn)
638 638 break;
639 639 if (!is)
640 640 return ESRCH;
641 641 }
642 642 ips.ips_next = isn->is_next;
643 643 bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is));
644 644 ips.ips_rule = isn->is_rule;
645 645 if (isn->is_rule != NULL)
646 646 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr,
647 647 sizeof(ips.ips_fr));
648 648 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE);
649 649 if (error)
650 650 return EFAULT;
651 651 return 0;
652 652 }
653 653
654 654
655 655 /* ------------------------------------------------------------------------ */
656 656 /* Function: fr_stputent */
657 657 /* Returns: int - 0 == success, != 0 == failure */
658 658 /* Parameters: data(I) - pointer to state information struct */
659 659 /* ifs - ipf stack instance */
660 660 /* */
661 661 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */
662 662 /* the state table. If the state info. includes a pointer to a filter rule */
663 663 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */
664 664 /* output. */
665 665 /* ------------------------------------------------------------------------ */
666 666 int fr_stputent(data, ifs)
667 667 caddr_t data;
668 668 ipf_stack_t *ifs;
669 669 {
670 670 ipstate_t *is, *isn;
671 671 ipstate_save_t ips;
672 672 int error, i;
673 673 frentry_t *fr;
674 674 char *name;
675 675
676 676 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE);
677 677 if (error)
678 678 return EFAULT;
679 679
680 680 /*
681 681 * Trigger automatic call to fr_state_flush() if the
682 682 * table has reached capacity specified by hi watermark.
683 683 */
684 684 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi)
685 685 ifs->ifs_fr_state_doflush = 1;
686 686
687 687 /*
688 688 * If automatic flushing did not do its job, and the table
689 689 * has filled up, don't try to create a new entry.
690 690 */
691 691 if (ifs->ifs_ips_num >= ifs->ifs_fr_statemax) {
692 692 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max);
693 693 return ENOMEM;
694 694 }
695 695
696 696 KMALLOC(isn, ipstate_t *);
697 697 if (isn == NULL)
698 698 return ENOMEM;
699 699
700 700 bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn));
701 701 bzero((char *)isn, offsetof(struct ipstate, is_pkts));
702 702 isn->is_sti.tqe_pnext = NULL;
703 703 isn->is_sti.tqe_next = NULL;
704 704 isn->is_sti.tqe_ifq = NULL;
705 705 isn->is_sti.tqe_parent = isn;
706 706 isn->is_ifp[0] = NULL;
707 707 isn->is_ifp[1] = NULL;
708 708 isn->is_ifp[2] = NULL;
709 709 isn->is_ifp[3] = NULL;
710 710 isn->is_sync = NULL;
711 711 fr = ips.ips_rule;
712 712
713 713 if (fr == NULL) {
714 714 READ_ENTER(&ifs->ifs_ipf_state);
715 715 fr_stinsert(isn, 0, ifs);
716 716 MUTEX_EXIT(&isn->is_lock);
717 717 RWLOCK_EXIT(&ifs->ifs_ipf_state);
718 718 return 0;
719 719 }
720 720
721 721 if (isn->is_flags & SI_NEWFR) {
722 722 KMALLOC(fr, frentry_t *);
723 723 if (fr == NULL) {
724 724 KFREE(isn);
725 725 return ENOMEM;
726 726 }
727 727 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr));
728 728 isn->is_rule = fr;
729 729 ips.ips_is.is_rule = fr;
730 730 MUTEX_NUKE(&fr->fr_lock);
731 731 MUTEX_INIT(&fr->fr_lock, "state filter rule lock");
732 732
733 733 /*
734 734 * Look up all the interface names in the rule.
735 735 */
736 736 for (i = 0; i < 4; i++) {
737 737 name = fr->fr_ifnames[i];
738 738 fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v, ifs);
739 739 name = isn->is_ifname[i];
740 740 isn->is_ifp[i] = fr_resolvenic(name, isn->is_v, ifs);
741 741 }
742 742
743 743 fr->fr_ref = 0;
744 744 fr->fr_dsize = 0;
745 745 fr->fr_data = NULL;
746 746 fr->fr_type = FR_T_NONE;
747 747
748 748 fr_resolvedest(&fr->fr_tif, fr->fr_v, ifs);
749 749 fr_resolvedest(&fr->fr_dif, fr->fr_v, ifs);
750 750 fr_resolvedest(&fr->fr_rif, fr->fr_v, ifs);
751 751
752 752 /*
753 753 * send a copy back to userland of what we ended up
754 754 * to allow for verification.
755 755 */
756 756 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE);
757 757 if (error) {
758 758 KFREE(isn);
759 759 MUTEX_DESTROY(&fr->fr_lock);
760 760 KFREE(fr);
761 761 return EFAULT;
762 762 }
763 763 READ_ENTER(&ifs->ifs_ipf_state);
764 764 fr_stinsert(isn, 0, ifs);
765 765 MUTEX_EXIT(&isn->is_lock);
766 766 RWLOCK_EXIT(&ifs->ifs_ipf_state);
767 767
768 768 } else {
769 769 READ_ENTER(&ifs->ifs_ipf_state);
770 770 for (is = ifs->ifs_ips_list; is; is = is->is_next)
771 771 if (is->is_rule == fr) {
772 772 fr_stinsert(isn, 0, ifs);
773 773 MUTEX_EXIT(&isn->is_lock);
774 774 break;
775 775 }
776 776
777 777 if (is == NULL) {
778 778 KFREE(isn);
779 779 isn = NULL;
780 780 }
781 781 RWLOCK_EXIT(&ifs->ifs_ipf_state);
782 782
783 783 return (isn == NULL) ? ESRCH : 0;
784 784 }
785 785
786 786 return 0;
787 787 }
788 788
789 789
790 790 /* ------------------------------------------------------------------------ */
791 791 /* Function: fr_stinsert */
792 792 /* Returns: Nil */
793 793 /* Parameters: is(I) - pointer to state structure */
794 794 /* rev(I) - flag indicating forward/reverse direction of packet */
795 795 /* */
796 796 /* Inserts a state structure into the hash table (for lookups) and the list */
797 797 /* of state entries (for enumeration). Resolves all of the interface names */
798 798 /* to pointers and adjusts running stats for the hash table as appropriate. */
799 799 /* */
800 800 /* Locking: it is assumed that some kind of lock on ipf_state is held. */
801 801 /* Exits with is_lock initialised and held. */
802 802 /* ------------------------------------------------------------------------ */
803 803 void fr_stinsert(is, rev, ifs)
804 804 ipstate_t *is;
805 805 int rev;
806 806 ipf_stack_t *ifs;
807 807 {
808 808 frentry_t *fr;
809 809 u_int hv;
810 810 int i;
811 811
812 812 MUTEX_INIT(&is->is_lock, "ipf state entry");
813 813
814 814 fr = is->is_rule;
815 815 if (fr != NULL) {
816 816 MUTEX_ENTER(&fr->fr_lock);
817 817 fr->fr_ref++;
818 818 fr->fr_statecnt++;
819 819 MUTEX_EXIT(&fr->fr_lock);
820 820 }
821 821
822 822 /*
823 823 * Look up all the interface names in the state entry.
824 824 */
825 825 for (i = 0; i < 4; i++) {
826 826 if (is->is_ifp[i] != NULL)
827 827 continue;
828 828 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v, ifs);
829 829 }
830 830
831 831 /*
832 832 * If we could trust is_hv, then the modulous would not be needed, but
833 833 * when running with IPFILTER_SYNC, this stops bad values.
834 834 */
835 835 hv = is->is_hv % ifs->ifs_fr_statesize;
836 836 is->is_hv = hv;
837 837
838 838 /*
839 839 * We need to get both of these locks...the first because it is
840 840 * possible that once the insert is complete another packet might
841 841 * come along, match the entry and want to update it.
842 842 */
843 843 MUTEX_ENTER(&is->is_lock);
844 844 MUTEX_ENTER(&ifs->ifs_ipf_stinsert);
845 845
846 846 /*
847 847 * add into list table.
848 848 */
849 849 if (ifs->ifs_ips_list != NULL)
850 850 ifs->ifs_ips_list->is_pnext = &is->is_next;
851 851 is->is_pnext = &ifs->ifs_ips_list;
852 852 is->is_next = ifs->ifs_ips_list;
853 853 ifs->ifs_ips_list = is;
854 854
855 855 if (ifs->ifs_ips_table[hv] != NULL)
856 856 ifs->ifs_ips_table[hv]->is_phnext = &is->is_hnext;
857 857 else
858 858 ifs->ifs_ips_stats.iss_inuse++;
859 859 is->is_phnext = ifs->ifs_ips_table + hv;
860 860 is->is_hnext = ifs->ifs_ips_table[hv];
861 861 ifs->ifs_ips_table[hv] = is;
862 862 ifs->ifs_ips_stats.iss_bucketlen[hv]++;
863 863 ifs->ifs_ips_num++;
864 864 MUTEX_EXIT(&ifs->ifs_ipf_stinsert);
865 865
866 866 fr_setstatequeue(is, rev, ifs);
867 867 }
868 868
869 869 /* ------------------------------------------------------------------------ */
870 870 /* Function: fr_match_ipv4addrs */
871 871 /* Returns: int - 2 strong match (same addresses, same direction) */
872 872 /* 1 weak match (same address, opposite direction) */
873 873 /* 0 no match */
874 874 /* */
875 875 /* Function matches IPv4 addresses. */
876 876 /* ------------------------------------------------------------------------ */
877 877 static int fr_match_ipv4addrs(is1, is2)
878 878 ipstate_t *is1;
879 879 ipstate_t *is2;
880 880 {
881 881 int rv;
882 882
883 883 if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr)
884 884 rv = 2;
885 885 else if (is1->is_saddr == is2->is_daddr &&
886 886 is1->is_daddr == is2->is_saddr)
887 887 rv = 1;
888 888 else
889 889 rv = 0;
890 890
891 891 return (rv);
892 892 }
893 893
894 894 /* ------------------------------------------------------------------------ */
895 895 /* Function: fr_match_ipv6addrs */
896 896 /* Returns: int - 2 strong match (same addresses, same direction) */
897 897 /* 1 weak match (same addresses, opposite direction) */
898 898 /* 0 no match */
899 899 /* */
900 900 /* Function matches IPv6 addresses. */
901 901 /* ------------------------------------------------------------------------ */
902 902 static int fr_match_ipv6addrs(is1, is2)
903 903 ipstate_t *is1;
904 904 ipstate_t *is2;
905 905 {
906 906 int rv;
907 907
908 908 if (IP6_EQ(&is1->is_src, &is2->is_src) &&
909 909 IP6_EQ(&is1->is_dst, &is2->is_dst))
910 910 rv = 2;
911 911 else if (IP6_EQ(&is1->is_src, &is2->is_dst) &&
912 912 IP6_EQ(&is1->is_dst, &is2->is_src)) {
913 913 rv = 1;
914 914 }
915 915 else
916 916 rv = 0;
917 917
918 918 return (rv);
919 919 }
920 920 /* ------------------------------------------------------------------------ */
921 921 /* Function: fr_match_addresses */
922 922 /* Returns: int - 2 strong match (same addresses, same direction) */
923 923 /* 1 weak match (same address, opposite directions) */
924 924 /* 0 no match */
925 925 /* Parameters: is1, is2 pointers to states we are checking */
926 926 /* */
927 927 /* Matches addresses, function uses fr_match_ipvXaddrs() to deal with IPv4 */
928 928 /* and IPv6 address format. */
929 929 /* ------------------------------------------------------------------------ */
930 930 static int fr_match_addresses(is1, is2)
931 931 ipstate_t *is1;
932 932 ipstate_t *is2;
933 933 {
934 934 int rv;
935 935
936 936 if (is1->is_v == 4) {
937 937 rv = fr_match_ipv4addrs(is1, is2);
938 938 } else {
939 939 rv = fr_match_ipv6addrs(is1, is2);
940 940 }
941 941
942 942 return (rv);
943 943 }
944 944
945 945 /* ------------------------------------------------------------------------ */
946 946 /* Function: fr_match_ppairs */
947 947 /* Returns: int - 2 strong match (same ports, same direction) */
948 948 /* 1 weak match (same ports, different direction) */
949 949 /* 0 no match */
950 950 /* Parameters ppairs1, ppairs - src, dst ports we want to match. */
951 951 /* */
952 952 /* Matches two port_pair_t types (port pairs). Each port pair contains */
953 953 /* src, dst port, which belong to session (state entry). */
954 954 /* ------------------------------------------------------------------------ */
955 955 static int fr_match_ppairs(ppairs1, ppairs2)
956 956 port_pair_t *ppairs1;
957 957 port_pair_t *ppairs2;
958 958 {
959 959 int rv;
960 960
961 961 if (ppairs1->pp_sport == ppairs2->pp_sport &&
962 962 ppairs1->pp_dport == ppairs2->pp_dport)
963 963 rv = 2;
964 964 else if (ppairs1->pp_sport == ppairs2->pp_dport &&
965 965 ppairs1->pp_dport == ppairs2->pp_sport)
966 966 rv = 1;
967 967 else
968 968 rv = 0;
969 969
970 970 return (rv);
971 971 }
972 972
973 973 /* ------------------------------------------------------------------------ */
974 974 /* Function: fr_match_l4_hdr */
975 975 /* Returns: int - 0 no match, */
976 976 /* 1 weak match (same ports, different directions) */
977 977 /* 2 strong match (same ports, same direction) */
978 978 /* Parameters is1, is2 - states we want to match */
979 979 /* */
980 980 /* Function matches L4 header data (source ports for TCP, UDP, CallIds for */
981 981 /* GRE protocol). */
982 982 /* ------------------------------------------------------------------------ */
983 983 static int fr_match_l4_hdr(is1, is2)
984 984 ipstate_t *is1;
985 985 ipstate_t *is2;
986 986 {
987 987 int rv = 0;
988 988 port_pair_t pp1;
989 989 port_pair_t pp2;
990 990
991 991 if (is1->is_p != is2->is_p)
992 992 return (0);
993 993
994 994 switch (is1->is_p) {
995 995 case IPPROTO_TCP:
996 996 pp1.pp_sport = is1->is_ps.is_ts.ts_sport;
997 997 pp1.pp_dport = is1->is_ps.is_ts.ts_dport;
998 998 pp2.pp_sport = is2->is_ps.is_ts.ts_sport;
999 999 pp2.pp_dport = is2->is_ps.is_ts.ts_dport;
1000 1000 rv = fr_match_ppairs(&pp1, &pp2);
1001 1001 break;
1002 1002 case IPPROTO_UDP:
1003 1003 pp1.pp_sport = is1->is_ps.is_us.us_sport;
1004 1004 pp1.pp_dport = is1->is_ps.is_us.us_dport;
1005 1005 pp2.pp_sport = is2->is_ps.is_us.us_sport;
1006 1006 pp2.pp_dport = is2->is_ps.is_us.us_dport;
1007 1007 rv = fr_match_ppairs(&pp1, &pp2);
1008 1008 break;
1009 1009 case IPPROTO_GRE:
1010 1010 /* greinfo_t can be also interprted as port pair */
1011 1011 pp1.pp_sport = is1->is_ps.is_ug.gs_call[0];
1012 1012 pp1.pp_dport = is1->is_ps.is_ug.gs_call[1];
1013 1013 pp2.pp_sport = is2->is_ps.is_ug.gs_call[0];
1014 1014 pp2.pp_dport = is2->is_ps.is_ug.gs_call[1];
1015 1015 rv = fr_match_ppairs(&pp1, &pp2);
1016 1016 break;
1017 1017 case IPPROTO_ICMP:
1018 1018 case IPPROTO_ICMPV6:
1019 1019 if (bcmp(&is1->is_ps, &is2->is_ps, sizeof (icmpinfo_t)))
1020 1020 rv = 1;
1021 1021 else
1022 1022 rv = 0;
1023 1023 break;
1024 1024 default:
1025 1025 rv = 0;
1026 1026 }
1027 1027
1028 1028 return (rv);
1029 1029 }
1030 1030
1031 1031 /* ------------------------------------------------------------------------ */
1032 1032 /* Function: fr_matchstates */
1033 1033 /* Returns: int - nonzero match, zero no match */
1034 1034 /* Parameters is1, is2 - states we want to match */
1035 1035 /* */
1036 1036 /* The state entries are equal (identical match) if they belong to the same */
1037 1037 /* session. Any time new state entry is being added the fr_addstate() */
1038 1038 /* function creates temporal state entry from the data it gets from IP and */
1039 1039 /* L4 header. The fr_matchstats() must be also aware of packet direction, */
1040 1040 /* which is also stored within the state entry. We should keep in mind the */
1041 1041 /* information about packet direction is spread accross L3 (addresses) and */
1042 1042 /* L4 (ports). There are three possible relationships betwee is1, is2: */
1043 1043 /* - no match (match(is1, is2) == 0)) */
1044 1044 /* - weak match same addresses (ports), but different */
1045 1045 /* directions (1) (fr_match_xxxx(is1, is2) == 1) */
1046 1046 /* - strong match same addresses (ports) and same directions */
1047 1047 /* (2) (fr_match_xxxx(is1, is2) == 2) */
1048 1048 /* */
1049 1049 /* There are functions, which match match addresses (L3 header) in is1, is2 */
1050 1050 /* and functions, which are used to compare ports (L4 header) data. We say */
1051 1051 /* the is1 and is2 are same (identical) if there is a match */
1052 1052 /* (fr_match_l4_hdr(is1, is2) != 0) and matchlevels are same for entries */
1053 1053 /* (fr_match_l3_hdr(is1, is2) == fr_match_l4_hdr(is1, is2)) for is1, is2. */
1054 1054 /* Such requirement deals with case as follows: */
1055 1055 /* suppose there are two connections between hosts A, B. Connection 1: */
1056 1056 /* a.a.a.a:12345 <=> b.b.b.b:54321 */
1057 1057 /* Connection 2: */
1058 1058 /* a.a.a.a:54321 <=> b.b.b.b:12345 */
1059 1059 /* since we've introduced match levels into our fr_matchstates(), we are */
1060 1060 /* able to identify, which packets belong to connection A and which belong */
1061 1061 /* to connection B. Assume there are two entries is1, is2. is1 has been */
1062 1062 /* from con. 1 packet, which travelled from A to B: */
1063 1063 /* a.a.a.a:12345 -> b.b.b.b:54321 */
1064 1064 /* while s2, has been created from packet which belongs to con. 2 and is */
1065 1065 /* also coming from A to B: */
1066 1066 /* a.a.a.a:54321 -> b.b.b.b:12345 */
1067 1067 /* fr_match_l3_hdr(is1, is2) == 2 -> strong match, while */
1068 1068 /* fr_match_l4_hdr(is1, is2) == 1 -> weak match. Since match levels are */
1069 1069 /* different the state entries are not identical -> no match as a final */
1070 1070 /* result. */
1071 1071 /* ------------------------------------------------------------------------ */
1072 1072 static int fr_matchstates(is1, is2)
1073 1073 ipstate_t *is1;
1074 1074 ipstate_t *is2;
1075 1075 {
1076 1076 int rv;
1077 1077 int amatch;
1078 1078 int pmatch;
1079 1079
1080 1080 if (bcmp(&is1->is_pass, &is2->is_pass,
1081 1081 offsetof(struct ipstate, is_ps) -
1082 1082 offsetof(struct ipstate, is_pass)) == 0) {
1083 1083
1084 1084 pmatch = fr_match_l4_hdr(is1, is2);
1085 1085 amatch = fr_match_addresses(is1, is2);
1086 1086 /*
1087 1087 * If addresses match (amatch != 0), then 'match levels'
1088 1088 * must be same for matching entries. If amatch and pmatch
1089 1089 * have different values (different match levels), then
1090 1090 * is1 and is2 belong to different sessions.
1091 1091 */
1092 1092 rv = (amatch != 0) && (amatch == pmatch);
1093 1093 }
1094 1094 else
1095 1095 rv = 0;
1096 1096
1097 1097 return (rv);
1098 1098 }
1099 1099
1100 1100 /* ------------------------------------------------------------------------ */
1101 1101 /* Function: fr_addstate */
1102 1102 /* Returns: ipstate_t* - NULL == failure, else pointer to new state */
1103 1103 /* Parameters: fin(I) - pointer to packet information */
1104 1104 /* stsave(O) - pointer to place to save pointer to created */
1105 1105 /* state structure. */
1106 1106 /* flags(I) - flags to use when creating the structure */
1107 1107 /* */
1108 1108 /* Creates a new IP state structure from the packet information collected. */
1109 1109 /* Inserts it into the state table and appends to the bottom of the active */
1110 1110 /* list. If the capacity of the table has reached the maximum allowed then */
1111 1111 /* the call will fail and a flush is scheduled for the next timeout call. */
1112 1112 /* ------------------------------------------------------------------------ */
1113 1113 ipstate_t *fr_addstate(fin, stsave, flags)
1114 1114 fr_info_t *fin;
1115 1115 ipstate_t **stsave;
1116 1116 u_int flags;
1117 1117 {
1118 1118 ipstate_t *is, ips;
1119 1119 struct icmp *ic;
1120 1120 u_int pass, hv;
1121 1121 frentry_t *fr;
1122 1122 tcphdr_t *tcp;
1123 1123 grehdr_t *gre;
1124 1124 void *ifp;
1125 1125 int out;
1126 1126 ipf_stack_t *ifs = fin->fin_ifs;
1127 1127
1128 1128 if (ifs->ifs_fr_state_lock ||
1129 1129 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD)))
1130 1130 return NULL;
1131 1131
1132 1132 if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN))
1133 1133 return NULL;
1134 1134
1135 1135 /*
1136 1136 * Trigger automatic call to fr_state_flush() if the
1137 1137 * table has reached capacity specified by hi watermark.
1138 1138 */
1139 1139 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi)
1140 1140 ifs->ifs_fr_state_doflush = 1;
1141 1141
1142 1142 /*
1143 1143 * If the max number of state entries has been reached, and there is no
1144 1144 * limit on the state count for the rule, then do not continue. In the
1145 1145 * case where a limit exists, it's ok allow the entries to be created as
1146 1146 * long as specified limit itself has not been reached.
1147 1147 *
1148 1148 * Note that because the lock isn't held on fr, it is possible to exceed
1149 1149 * the specified size of the table. However, the cost of this is being
1150 1150 * ignored here; as the number by which it can go over is a product of
1151 1151 * the number of simultaneous threads that could be executing in here.
1152 1152 * So, a limit of 100 won't result in 200, but could result in 101 or 102.
1153 1153 *
1154 1154 * Also note that, since the automatic flush should have been triggered
1155 1155 * well before we reach the maximum number of state table entries, the
1156 1156 * likelihood of reaching the max (and thus exceedng it) is minimal.
1157 1157 */
1158 1158 fr = fin->fin_fr;
1159 1159 if (fr != NULL) {
1160 1160 if ((ifs->ifs_ips_num >= ifs->ifs_fr_statemax) &&
1161 1161 (fr->fr_statemax == 0)) {
1162 1162 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max);
1163 1163 return NULL;
1164 1164 }
1165 1165 if ((fr->fr_statemax != 0) &&
1166 1166 (fr->fr_statecnt >= fr->fr_statemax)) {
1167 1167 ATOMIC_INCL(ifs->ifs_ips_stats.iss_maxref);
1168 1168 ifs->ifs_fr_state_doflush = 1;
1169 1169 return NULL;
1170 1170 }
1171 1171 }
1172 1172
1173 1173 ic = NULL;
1174 1174 tcp = NULL;
1175 1175 out = fin->fin_out;
1176 1176 is = &ips;
1177 1177 bzero((char *)is, sizeof(*is));
1178 1178
1179 1179 if (fr == NULL) {
1180 1180 pass = ifs->ifs_fr_flags;
1181 1181 is->is_tag = FR_NOLOGTAG;
1182 1182 } else {
1183 1183 pass = fr->fr_flags;
1184 1184 }
1185 1185
1186 1186 is->is_die = 1 + ifs->ifs_fr_ticks;
1187 1187 /*
1188 1188 * We want to check everything that is a property of this packet,
1189 1189 * but we don't (automatically) care about it's fragment status as
1190 1190 * this may change.
1191 1191 */
1192 1192 is->is_pass = pass;
1193 1193 is->is_v = fin->fin_v;
1194 1194 is->is_opt[0] = fin->fin_optmsk;
1195 1195 is->is_optmsk[0] = 0xffffffff;
1196 1196 /*
1197 1197 * The reverse direction option mask will be set in fr_matchsrcdst(),
1198 1198 * when we will see the first packet from the peer. We will leave it
1199 1199 * as zero for now.
1200 1200 */
1201 1201 is->is_optmsk[1] = 0x0;
1202 1202
1203 1203 if (is->is_v == 6) {
1204 1204 is->is_opt[0] &= ~0x8;
1205 1205 is->is_optmsk[0] &= ~0x8;
1206 1206 }
1207 1207 is->is_sec = fin->fin_secmsk;
1208 1208 is->is_secmsk = 0xffff;
1209 1209 is->is_auth = fin->fin_auth;
1210 1210 is->is_authmsk = 0xffff;
1211 1211
1212 1212 /*
1213 1213 * Copy and calculate...
1214 1214 */
1215 1215 hv = (is->is_p = fin->fin_fi.fi_p);
1216 1216 is->is_src = fin->fin_fi.fi_src;
1217 1217 hv += is->is_saddr;
1218 1218 is->is_dst = fin->fin_fi.fi_dst;
1219 1219 hv += is->is_daddr;
1220 1220 #ifdef USE_INET6
1221 1221 if (fin->fin_v == 6) {
1222 1222 /*
1223 1223 * For ICMPv6, we check to see if the destination address is
1224 1224 * a multicast address. If it is, do not include it in the
1225 1225 * calculation of the hash because the correct reply will come
1226 1226 * back from a real address, not a multicast address.
1227 1227 */
1228 1228 if ((is->is_p == IPPROTO_ICMPV6) &&
1229 1229 IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) {
1230 1230 /*
1231 1231 * So you can do keep state with neighbour discovery.
1232 1232 *
1233 1233 * Here we could use the address from the neighbour
1234 1234 * solicit message to put in the state structure and
1235 1235 * we could use that without a wildcard flag too...
1236 1236 */
1237 1237 is->is_flags |= SI_W_DADDR;
1238 1238 hv -= is->is_daddr;
1239 1239 } else {
1240 1240 hv += is->is_dst.i6[1];
1241 1241 hv += is->is_dst.i6[2];
1242 1242 hv += is->is_dst.i6[3];
1243 1243 }
1244 1244 hv += is->is_src.i6[1];
1245 1245 hv += is->is_src.i6[2];
1246 1246 hv += is->is_src.i6[3];
1247 1247 }
1248 1248 #endif
1249 1249 if ((fin->fin_v == 4) &&
1250 1250 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) {
1251 1251 if (fin->fin_out == 0) {
1252 1252 flags |= SI_W_DADDR|SI_CLONE;
1253 1253 hv -= is->is_daddr;
1254 1254 } else {
1255 1255 flags |= SI_W_SADDR|SI_CLONE;
1256 1256 hv -= is->is_saddr;
1257 1257 }
1258 1258 }
1259 1259
1260 1260 switch (is->is_p)
1261 1261 {
1262 1262 #ifdef USE_INET6
1263 1263 case IPPROTO_ICMPV6 :
1264 1264 ic = fin->fin_dp;
1265 1265
1266 1266 switch (ic->icmp_type)
1267 1267 {
1268 1268 case ICMP6_ECHO_REQUEST :
1269 1269 is->is_icmp.ici_type = ic->icmp_type;
1270 1270 hv += (is->is_icmp.ici_id = ic->icmp_id);
1271 1271 break;
1272 1272 case ICMP6_MEMBERSHIP_QUERY :
1273 1273 case ND_ROUTER_SOLICIT :
1274 1274 case ND_NEIGHBOR_SOLICIT :
1275 1275 case ICMP6_NI_QUERY :
1276 1276 is->is_icmp.ici_type = ic->icmp_type;
1277 1277 break;
1278 1278 default :
1279 1279 return NULL;
1280 1280 }
1281 1281 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp);
1282 1282 break;
1283 1283 #endif
1284 1284 case IPPROTO_ICMP :
1285 1285 ic = fin->fin_dp;
1286 1286
1287 1287 switch (ic->icmp_type)
1288 1288 {
1289 1289 case ICMP_ECHO :
1290 1290 case ICMP_ECHOREPLY :
1291 1291 case ICMP_TSTAMP :
1292 1292 case ICMP_IREQ :
1293 1293 case ICMP_MASKREQ :
1294 1294 is->is_icmp.ici_type = ic->icmp_type;
1295 1295 hv += (is->is_icmp.ici_id = ic->icmp_id);
1296 1296 break;
1297 1297 default :
1298 1298 return NULL;
1299 1299 }
1300 1300 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp);
1301 1301 break;
1302 1302
1303 1303 case IPPROTO_GRE :
1304 1304 gre = fin->fin_dp;
1305 1305
1306 1306 is->is_gre.gs_flags = gre->gr_flags;
1307 1307 is->is_gre.gs_ptype = gre->gr_ptype;
1308 1308 if (GRE_REV(is->is_gre.gs_flags) == 1) {
1309 1309 is->is_call[0] = fin->fin_data[0];
1310 1310 is->is_call[1] = fin->fin_data[1];
1311 1311 }
1312 1312 break;
1313 1313
1314 1314 case IPPROTO_TCP :
1315 1315 tcp = fin->fin_dp;
1316 1316
1317 1317 if (tcp->th_flags & TH_RST)
1318 1318 return NULL;
1319 1319 /*
1320 1320 * The endian of the ports doesn't matter, but the ack and
1321 1321 * sequence numbers do as we do mathematics on them later.
1322 1322 */
1323 1323 is->is_sport = htons(fin->fin_data[0]);
1324 1324 is->is_dport = htons(fin->fin_data[1]);
1325 1325 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) {
1326 1326 hv += is->is_sport;
1327 1327 hv += is->is_dport;
1328 1328 }
1329 1329
1330 1330 /*
1331 1331 * If this is a real packet then initialise fields in the
1332 1332 * state information structure from the TCP header information.
1333 1333 */
1334 1334
1335 1335 is->is_maxdwin = 1;
1336 1336 is->is_maxswin = ntohs(tcp->th_win);
1337 1337 if (is->is_maxswin == 0)
1338 1338 is->is_maxswin = 1;
1339 1339
1340 1340 if ((fin->fin_flx & FI_IGNORE) == 0) {
1341 1341 is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen -
1342 1342 (TCP_OFF(tcp) << 2) +
1343 1343 ((tcp->th_flags & TH_SYN) ? 1 : 0) +
1344 1344 ((tcp->th_flags & TH_FIN) ? 1 : 0);
1345 1345 is->is_maxsend = is->is_send;
1346 1346
1347 1347 /*
1348 1348 * Window scale option is only present in
1349 1349 * SYN/SYN-ACK packet.
1350 1350 */
1351 1351 if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) ==
1352 1352 TH_SYN &&
1353 1353 (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
1354 1354 if (fr_tcpoptions(fin, tcp,
1355 1355 &is->is_tcp.ts_data[0]) == -1) {
1356 1356 fin->fin_flx |= FI_BAD;
1357 1357 }
1358 1358 }
1359 1359
1360 1360 if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) {
1361 1361 fr_checknewisn(fin, is);
1362 1362 fr_fixoutisn(fin, is);
1363 1363 }
1364 1364
1365 1365 if ((tcp->th_flags & TH_OPENING) == TH_SYN)
1366 1366 flags |= IS_TCPFSM;
1367 1367 else {
1368 1368 is->is_maxdwin = is->is_maxswin * 2;
1369 1369 is->is_dend = ntohl(tcp->th_ack);
1370 1370 is->is_maxdend = ntohl(tcp->th_ack);
1371 1371 is->is_maxdwin *= 2;
1372 1372 }
1373 1373 }
1374 1374
1375 1375 /*
1376 1376 * If we're creating state for a starting connection, start the
1377 1377 * timer on it as we'll never see an error if it fails to
1378 1378 * connect.
1379 1379 */
1380 1380 ATOMIC_INCL(ifs->ifs_ips_stats.iss_tcp);
1381 1381 break;
1382 1382
1383 1383 case IPPROTO_UDP :
1384 1384 tcp = fin->fin_dp;
1385 1385
1386 1386 is->is_sport = htons(fin->fin_data[0]);
1387 1387 is->is_dport = htons(fin->fin_data[1]);
1388 1388 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) {
1389 1389 hv += tcp->th_dport;
1390 1390 hv += tcp->th_sport;
1391 1391 }
1392 1392 ATOMIC_INCL(ifs->ifs_ips_stats.iss_udp);
1393 1393 break;
1394 1394
1395 1395 default :
1396 1396 break;
1397 1397 }
1398 1398 hv = DOUBLE_HASH(hv, ifs);
1399 1399 is->is_hv = hv;
1400 1400 is->is_rule = fr;
1401 1401 is->is_flags = flags & IS_INHERITED;
1402 1402
1403 1403 /*
1404 1404 * Look for identical state.
1405 1405 */
1406 1406 for (is = ifs->ifs_ips_table[is->is_hv % ifs->ifs_fr_statesize];
1407 1407 is != NULL;
1408 1408 is = is->is_hnext) {
1409 1409 if (fr_matchstates(&ips, is) == 1)
1410 1410 break;
1411 1411 }
1412 1412
1413 1413 /*
1414 1414 * we've found a matching state -> state already exists,
1415 1415 * we are not going to add a duplicate record.
1416 1416 */
1417 1417 if (is != NULL)
1418 1418 return NULL;
1419 1419
1420 1420 if (ifs->ifs_ips_stats.iss_bucketlen[hv] >= ifs->ifs_fr_state_maxbucket) {
1421 1421 ATOMIC_INCL(ifs->ifs_ips_stats.iss_bucketfull);
1422 1422 return NULL;
1423 1423 }
1424 1424 KMALLOC(is, ipstate_t *);
1425 1425 if (is == NULL) {
1426 1426 ATOMIC_INCL(ifs->ifs_ips_stats.iss_nomem);
1427 1427 return NULL;
1428 1428 }
1429 1429 bcopy((char *)&ips, (char *)is, sizeof(*is));
1430 1430 /*
1431 1431 * Do not do the modulous here, it is done in fr_stinsert().
1432 1432 */
1433 1433 if (fr != NULL) {
1434 1434 (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN);
1435 1435 if (fr->fr_age[0] != 0) {
1436 1436 is->is_tqehead[0] =
1437 1437 fr_addtimeoutqueue(&ifs->ifs_ips_utqe,
1438 1438 fr->fr_age[0], ifs);
1439 1439 is->is_sti.tqe_flags |= TQE_RULEBASED;
1440 1440 }
1441 1441 if (fr->fr_age[1] != 0) {
1442 1442 is->is_tqehead[1] =
1443 1443 fr_addtimeoutqueue(&ifs->ifs_ips_utqe,
1444 1444 fr->fr_age[1], ifs);
1445 1445 is->is_sti.tqe_flags |= TQE_RULEBASED;
1446 1446 }
1447 1447 is->is_tag = fr->fr_logtag;
1448 1448
1449 1449 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1];
1450 1450 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2];
1451 1451 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3];
1452 1452
1453 1453 if (((ifp = fr->fr_ifas[1]) != NULL) &&
1454 1454 (ifp != (void *)-1)) {
1455 1455 COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1], fr->fr_v);
1456 1456 }
1457 1457 if (((ifp = fr->fr_ifas[2]) != NULL) &&
1458 1458 (ifp != (void *)-1)) {
1459 1459 COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1], fr->fr_v);
1460 1460 }
1461 1461 if (((ifp = fr->fr_ifas[3]) != NULL) &&
1462 1462 (ifp != (void *)-1)) {
1463 1463 COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1], fr->fr_v);
1464 1464 }
1465 1465 }
1466 1466
1467 1467 is->is_ifp[out << 1] = fin->fin_ifp;
1468 1468 if (fin->fin_ifp != NULL) {
1469 1469 COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1], fin->fin_v);
1470 1470 }
1471 1471
1472 1472 is->is_ref = 1;
1473 1473 is->is_pkts[0] = 0, is->is_bytes[0] = 0;
1474 1474 is->is_pkts[1] = 0, is->is_bytes[1] = 0;
1475 1475 is->is_pkts[2] = 0, is->is_bytes[2] = 0;
1476 1476 is->is_pkts[3] = 0, is->is_bytes[3] = 0;
1477 1477 if ((fin->fin_flx & FI_IGNORE) == 0) {
1478 1478 is->is_pkts[out] = 1;
1479 1479 is->is_bytes[out] = fin->fin_plen;
1480 1480 is->is_flx[out][0] = fin->fin_flx & FI_CMP;
1481 1481 is->is_flx[out][0] &= ~FI_OOW;
1482 1482 }
1483 1483
1484 1484 if (pass & FR_STSTRICT)
1485 1485 is->is_flags |= IS_STRICT;
1486 1486
1487 1487 if (pass & FR_STATESYNC)
1488 1488 is->is_flags |= IS_STATESYNC;
1489 1489
1490 1490 if (flags & (SI_WILDP|SI_WILDA)) {
1491 1491 ATOMIC_INCL(ifs->ifs_ips_stats.iss_wild);
1492 1492 }
1493 1493 is->is_rulen = fin->fin_rule;
1494 1494
1495 1495
1496 1496 if (pass & FR_LOGFIRST)
1497 1497 is->is_pass &= ~(FR_LOGFIRST|FR_LOG);
1498 1498
1499 1499 READ_ENTER(&ifs->ifs_ipf_state);
1500 1500 is->is_me = stsave;
1501 1501
1502 1502 fr_stinsert(is, fin->fin_rev, ifs);
1503 1503
1504 1504 if (fin->fin_p == IPPROTO_TCP) {
1505 1505 /*
1506 1506 * If we're creating state for a starting connection, start the
1507 1507 * timer on it as we'll never see an error if it fails to
1508 1508 * connect.
1509 1509 */
1510 1510 (void) fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb,
1511 1511 is->is_flags);
1512 1512 MUTEX_EXIT(&is->is_lock);
1513 1513 #ifdef IPFILTER_SCAN
1514 1514 if ((is->is_flags & SI_CLONE) == 0)
1515 1515 (void) ipsc_attachis(is);
1516 1516 #endif
1517 1517 } else {
1518 1518 MUTEX_EXIT(&is->is_lock);
1519 1519 }
1520 1520 #ifdef IPFILTER_SYNC
1521 1521 if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0))
1522 1522 is->is_sync = ipfsync_new(SMC_STATE, fin, is);
1523 1523 #endif
1524 1524 if (ifs->ifs_ipstate_logging)
1525 1525 ipstate_log(is, ISL_NEW, ifs);
1526 1526
1527 1527 RWLOCK_EXIT(&ifs->ifs_ipf_state);
1528 1528 fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr);
1529 1529 fin->fin_flx |= FI_STATE;
1530 1530 if (fin->fin_flx & FI_FRAG)
1531 1531 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE);
1532 1532
1533 1533 return is;
1534 1534 }
1535 1535
1536 1536
1537 1537 /* ------------------------------------------------------------------------ */
1538 1538 /* Function: fr_tcpoptions */
1539 1539 /* Returns: int - 1 == packet matches state entry, 0 == it does not */
1540 1540 /* Parameters: fin(I) - pointer to packet information */
1541 1541 /* tcp(I) - pointer to TCP packet header */
1542 1542 /* td(I) - pointer to TCP data held as part of the state */
1543 1543 /* */
1544 1544 /* Look after the TCP header for any options and deal with those that are */
1545 1545 /* present. Record details about those that we recogise. */
1546 1546 /* ------------------------------------------------------------------------ */
1547 1547 static int fr_tcpoptions(fin, tcp, td)
1548 1548 fr_info_t *fin;
1549 1549 tcphdr_t *tcp;
1550 1550 tcpdata_t *td;
1551 1551 {
1552 1552 int off, mlen, ol, i, len, retval;
1553 1553 char buf[64], *s, opt;
1554 1554 mb_t *m = NULL;
1555 1555
1556 1556 len = (TCP_OFF(tcp) << 2);
1557 1557 if (fin->fin_dlen < len)
1558 1558 return 0;
1559 1559 len -= sizeof(*tcp);
1560 1560
1561 1561 off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff;
1562 1562
1563 1563 m = fin->fin_m;
1564 1564 mlen = MSGDSIZE(m) - off;
1565 1565 if (len > mlen) {
1566 1566 len = mlen;
1567 1567 retval = 0;
1568 1568 } else {
1569 1569 retval = 1;
1570 1570 }
1571 1571
1572 1572 COPYDATA(m, off, len, buf);
1573 1573
1574 1574 for (s = buf; len > 0; ) {
1575 1575 opt = *s;
1576 1576 if (opt == TCPOPT_EOL)
1577 1577 break;
1578 1578 else if (opt == TCPOPT_NOP)
1579 1579 ol = 1;
1580 1580 else {
1581 1581 if (len < 2)
1582 1582 break;
1583 1583 ol = (int)*(s + 1);
1584 1584 if (ol < 2 || ol > len)
1585 1585 break;
1586 1586
1587 1587 /*
1588 1588 * Extract the TCP options we are interested in out of
1589 1589 * the header and store them in the the tcpdata struct.
1590 1590 */
1591 1591 switch (opt)
1592 1592 {
1593 1593 case TCPOPT_WINDOW :
1594 1594 if (ol == TCPOLEN_WINDOW) {
1595 1595 i = (int)*(s + 2);
1596 1596 if (i > TCP_WSCALE_MAX)
1597 1597 i = TCP_WSCALE_MAX;
1598 1598 else if (i < 0)
1599 1599 i = 0;
1600 1600 td->td_winscale = i;
1601 1601 td->td_winflags |= TCP_WSCALE_SEEN |
1602 1602 TCP_WSCALE_FIRST;
1603 1603 } else
1604 1604 retval = -1;
1605 1605 break;
1606 1606 case TCPOPT_MAXSEG :
1607 1607 /*
1608 1608 * So, if we wanted to set the TCP MAXSEG,
1609 1609 * it should be done here...
1610 1610 */
1611 1611 if (ol == TCPOLEN_MAXSEG) {
1612 1612 i = (int)*(s + 2);
1613 1613 i <<= 8;
1614 1614 i += (int)*(s + 3);
1615 1615 td->td_maxseg = i;
1616 1616 } else
1617 1617 retval = -1;
1618 1618 break;
1619 1619 case TCPOPT_SACK_PERMITTED :
1620 1620 if (ol == TCPOLEN_SACK_PERMITTED)
1621 1621 td->td_winflags |= TCP_SACK_PERMIT;
1622 1622 else
1623 1623 retval = -1;
1624 1624 break;
1625 1625 }
1626 1626 }
1627 1627 len -= ol;
1628 1628 s += ol;
1629 1629 }
1630 1630 return retval;
1631 1631 }
1632 1632
1633 1633
1634 1634 /* ------------------------------------------------------------------------ */
1635 1635 /* Function: fr_tcpstate */
1636 1636 /* Returns: int - 1 == packet matches state entry, 0 == it does not */
1637 1637 /* Parameters: fin(I) - pointer to packet information */
1638 1638 /* tcp(I) - pointer to TCP packet header */
1639 1639 /* is(I) - pointer to master state structure */
1640 1640 /* */
1641 1641 /* Check to see if a packet with TCP headers fits within the TCP window. */
1642 1642 /* Change timeout depending on whether new packet is a SYN-ACK returning */
1643 1643 /* for a SYN or a RST or FIN which indicate time to close up shop. */
1644 1644 /* ------------------------------------------------------------------------ */
1645 1645 static int fr_tcpstate(fin, tcp, is)
1646 1646 fr_info_t *fin;
1647 1647 tcphdr_t *tcp;
1648 1648 ipstate_t *is;
1649 1649 {
1650 1650 int source, ret = 0, flags;
1651 1651 tcpdata_t *fdata, *tdata;
1652 1652 ipf_stack_t *ifs = fin->fin_ifs;
1653 1653
1654 1654 source = !fin->fin_rev;
1655 1655 if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) &&
1656 1656 (ntohs(is->is_sport) != fin->fin_data[0]))
1657 1657 source = 0;
1658 1658 fdata = &is->is_tcp.ts_data[!source];
1659 1659 tdata = &is->is_tcp.ts_data[source];
1660 1660
1661 1661 MUTEX_ENTER(&is->is_lock);
1662 1662
1663 1663 /*
1664 1664 * If a SYN packet is received for a connection that is in a half
1665 1665 * closed state, then move its state entry to deletetq. In such case
1666 1666 * the SYN packet will be consequently dropped. This allows new state
1667 1667 * entry to be created with a retransmited SYN packet.
1668 1668 */
1669 1669 if ((tcp->th_flags & TH_OPENING) == TH_SYN) {
1670 1670 if ((is->is_state[source] > IPF_TCPS_ESTABLISHED) &&
1671 1671 (is->is_state[!source] > IPF_TCPS_ESTABLISHED)) {
1672 1672 is->is_state[source] = IPF_TCPS_CLOSED;
1673 1673 is->is_state[!source] = IPF_TCPS_CLOSED;
1674 1674 /*
1675 1675 * Do not update is->is_sti.tqe_die in case state entry
1676 1676 * is already present in deletetq. It prevents state
1677 1677 * entry ttl update by retransmitted SYN packets, which
1678 1678 * may arrive before timer tick kicks off. The SYN
1679 1679 * packet will be dropped again.
1680 1680 */
1681 1681 if (is->is_sti.tqe_ifq != &ifs->ifs_ips_deletetq)
1682 1682 fr_movequeue(&is->is_sti, is->is_sti.tqe_ifq,
1683 1683 &fin->fin_ifs->ifs_ips_deletetq,
1684 1684 fin->fin_ifs);
1685 1685
1686 1686 MUTEX_EXIT(&is->is_lock);
1687 1687 return 0;
1688 1688 }
1689 1689 }
1690 1690
1691 1691 if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) {
1692 1692 #ifdef IPFILTER_SCAN
1693 1693 if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) {
1694 1694 ipsc_packet(fin, is);
1695 1695 if (FR_ISBLOCK(is->is_pass)) {
1696 1696 MUTEX_EXIT(&is->is_lock);
1697 1697 return 1;
1698 1698 }
1699 1699 }
1700 1700 #endif
1701 1701
1702 1702 /*
1703 1703 * Nearing end of connection, start timeout.
1704 1704 */
1705 1705 ret = fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb,
1706 1706 is->is_flags);
1707 1707 if (ret == 0) {
1708 1708 MUTEX_EXIT(&is->is_lock);
1709 1709 return 0;
1710 1710 }
1711 1711
1712 1712 /*
1713 1713 * set s0's as appropriate. Use syn-ack packet as it
1714 1714 * contains both pieces of required information.
1715 1715 */
1716 1716 /*
1717 1717 * Window scale option is only present in SYN/SYN-ACK packet.
1718 1718 * Compare with ~TH_FIN to mask out T/TCP setups.
1719 1719 */
1720 1720 flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL);
1721 1721 if (flags == (TH_SYN|TH_ACK)) {
|
↓ open down ↓ |
1721 lines elided |
↑ open up ↑ |
1722 1722 is->is_s0[source] = ntohl(tcp->th_ack);
1723 1723 is->is_s0[!source] = ntohl(tcp->th_seq) + 1;
1724 1724 if (TCP_OFF(tcp) > (sizeof (tcphdr_t) >> 2)) {
1725 1725 (void) fr_tcpoptions(fin, tcp, fdata);
1726 1726 }
1727 1727 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN))
1728 1728 fr_checknewisn(fin, is);
1729 1729 } else if (flags == TH_SYN) {
1730 1730 is->is_s0[source] = ntohl(tcp->th_seq) + 1;
1731 1731 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2)))
1732 - (void) fr_tcpoptions(fin, tcp, tdata);
1732 + (void) fr_tcpoptions(fin, tcp, fdata);
1733 1733
1734 1734 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN))
1735 1735 fr_checknewisn(fin, is);
1736 1736
1737 1737 }
1738 1738 ret = 1;
1739 1739 } else
1740 1740 fin->fin_flx |= FI_OOW;
1741 1741 MUTEX_EXIT(&is->is_lock);
1742 1742 return ret;
1743 1743 }
1744 1744
1745 1745
1746 1746 /* ------------------------------------------------------------------------ */
1747 1747 /* Function: fr_checknewisn */
1748 1748 /* Returns: Nil */
1749 1749 /* Parameters: fin(I) - pointer to packet information */
1750 1750 /* is(I) - pointer to master state structure */
1751 1751 /* */
1752 1752 /* Check to see if this TCP connection is expecting and needs a new */
1753 1753 /* sequence number for a particular direction of the connection. */
1754 1754 /* */
1755 1755 /* NOTE: This does not actually change the sequence numbers, only gets new */
1756 1756 /* one ready. */
1757 1757 /* ------------------------------------------------------------------------ */
1758 1758 static void fr_checknewisn(fin, is)
1759 1759 fr_info_t *fin;
1760 1760 ipstate_t *is;
1761 1761 {
1762 1762 u_32_t sumd, old, new;
1763 1763 tcphdr_t *tcp;
1764 1764 int i;
1765 1765
1766 1766 i = fin->fin_rev;
1767 1767 tcp = fin->fin_dp;
1768 1768
1769 1769 if (((i == 0) && !(is->is_flags & IS_ISNSYN)) ||
1770 1770 ((i == 1) && !(is->is_flags & IS_ISNACK))) {
1771 1771 old = ntohl(tcp->th_seq);
1772 1772 new = fr_newisn(fin);
1773 1773 is->is_isninc[i] = new - old;
1774 1774 CALC_SUMD(old, new, sumd);
1775 1775 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16);
1776 1776
1777 1777 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK);
1778 1778 }
1779 1779 }
1780 1780
1781 1781
1782 1782 /* ------------------------------------------------------------------------ */
1783 1783 /* Function: fr_tcpinwindow */
1784 1784 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */
1785 1785 /* Parameters: fin(I) - pointer to packet information */
1786 1786 /* fdata(I) - pointer to tcp state informatio (forward) */
1787 1787 /* tdata(I) - pointer to tcp state informatio (reverse) */
1788 1788 /* tcp(I) - pointer to TCP packet header */
1789 1789 /* */
1790 1790 /* Given a packet has matched addresses and ports, check to see if it is */
1791 1791 /* within the TCP data window. In a show of generosity, allow packets that */
1792 1792 /* are within the window space behind the current sequence # as well. */
1793 1793 /* ------------------------------------------------------------------------ */
1794 1794 int fr_tcpinwindow(fin, fdata, tdata, tcp, flags)
1795 1795 fr_info_t *fin;
1796 1796 tcpdata_t *fdata, *tdata;
1797 1797 tcphdr_t *tcp;
1798 1798 int flags;
1799 1799 {
1800 1800 tcp_seq seq, ack, end;
1801 1801 int ackskew, tcpflags;
1802 1802 u_32_t win, maxwin;
1803 1803 int dsize, inseq;
1804 1804
1805 1805 /*
1806 1806 * Find difference between last checked packet and this packet.
1807 1807 */
1808 1808 tcpflags = tcp->th_flags;
1809 1809 seq = ntohl(tcp->th_seq);
1810 1810 ack = ntohl(tcp->th_ack);
1811 1811
1812 1812 if (tcpflags & TH_SYN)
1813 1813 win = ntohs(tcp->th_win);
1814 1814 else
1815 1815 win = ntohs(tcp->th_win) << fdata->td_winscale;
1816 1816
1817 1817 /*
1818 1818 * win 0 means the receiving endpoint has closed the window, because it
1819 1819 * has not enough memory to receive data from sender. In such case we
1820 1820 * are pretending window size to be 1 to let TCP probe data through.
1821 1821 * TCP probe data can be either 0 or 1 octet of data, the RFC does not
1822 1822 * state this accurately, so we have to allow 1 octet (win = 1) even if
1823 1823 * the window is closed (win == 0).
1824 1824 */
1825 1825 if (win == 0)
1826 1826 win = 1;
1827 1827
1828 1828 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
1829 1829 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0);
1830 1830
1831 1831 /*
1832 1832 * if window scaling is present, the scaling is only allowed
|
↓ open down ↓ |
90 lines elided |
↑ open up ↑ |
1833 1833 * for windows not in the first SYN packet. In that packet the
1834 1834 * window is 65535 to specify the largest window possible
1835 1835 * for receivers not implementing the window scale option.
1836 1836 * Currently, we do not assume TTCP here. That means that
1837 1837 * if we see a second packet from a host (after the initial
1838 1838 * SYN), we can assume that the receiver of the SYN did
1839 1839 * already send back the SYN/ACK (and thus that we know if
1840 1840 * the receiver also does window scaling)
1841 1841 */
1842 1842 if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) {
1843 + fdata->td_winflags &= ~TCP_WSCALE_FIRST;
1843 1844 fdata->td_maxwin = win;
1844 1845 }
1845 1846
1846 1847 end = seq + dsize;
1847 1848
1848 1849 if ((fdata->td_end == 0) &&
1849 1850 (!(flags & IS_TCPFSM) ||
1850 1851 ((tcpflags & TH_OPENING) == TH_OPENING))) {
1851 1852 /*
1852 1853 * Must be a (outgoing) SYN-ACK in reply to a SYN.
1853 1854 */
1854 1855 fdata->td_end = end - 1;
1855 1856 fdata->td_maxwin = 1;
1856 1857 fdata->td_maxend = end + win;
1857 1858 }
1858 1859
1859 1860 if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */
1860 1861 ack = tdata->td_end;
1861 1862 } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) &&
1862 1863 (ack == 0)) {
1863 1864 /* gross hack to get around certain broken tcp stacks */
1864 1865 ack = tdata->td_end;
1865 1866 }
1866 1867
1867 1868 maxwin = tdata->td_maxwin;
1868 1869 ackskew = tdata->td_end - ack;
1869 1870
1870 1871 /*
1871 1872 * Strict sequencing only allows in-order delivery.
1872 1873 */
1873 1874 if ((flags & IS_STRICT) != 0) {
1874 1875 if (seq != fdata->td_end) {
1875 1876 DTRACE_PROBE(strict_check);
1876 1877 return 0;
1877 1878 }
1878 1879 }
1879 1880
1880 1881 #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0)
1881 1882 #define SEQ_GT(a,b) ((int)((a) - (b)) > 0)
1882 1883 inseq = 0;
1883 1884 DTRACE_PROBE4(
1884 1885 dyn_params,
1885 1886 int, dsize,
1886 1887 int, ackskew,
1887 1888 int, maxwin,
1888 1889 int, win
1889 1890 );
1890 1891 if (
1891 1892 #if defined(_KERNEL)
1892 1893 /*
1893 1894 * end <-> s + n
1894 1895 * maxend <-> ack + win
|
↓ open down ↓ |
42 lines elided |
↑ open up ↑ |
1895 1896 * this is upperbound check
1896 1897 */
1897 1898 (SEQ_GE(fdata->td_maxend, end)) &&
1898 1899 /*
1899 1900 * this is lowerbound check
1900 1901 */
1901 1902 (SEQ_GE(seq, fdata->td_end - maxwin)) &&
1902 1903 #endif
1903 1904 /* XXX what about big packets */
1904 1905 #define MAXACKWINDOW 66000
1905 - (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) &&
1906 + (-ackskew <= (MAXACKWINDOW)) &&
1906 1907 ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) {
1907 1908 inseq = 1;
1908 1909 /*
1909 1910 * Microsoft Windows will send the next packet to the right of the
1910 1911 * window if SACK is in use.
1911 1912 */
1912 1913 } else if ((seq == fdata->td_maxend) && (ackskew == 0) &&
1913 1914 (fdata->td_winflags & TCP_SACK_PERMIT) &&
1914 1915 (tdata->td_winflags & TCP_SACK_PERMIT)) {
1915 1916 inseq = 1;
1916 1917 /*
1917 1918 * RST ACK with SEQ equal to 0 is sent by some OSes (i.e. Solaris) as a
1918 1919 * response to initial SYN packet, when there is no application
1919 1920 * listeing to on a port, where the SYN packet has came to.
1920 1921 */
1921 1922 } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) &&
1922 1923 (ackskew >= -1) && (ackskew <= 1)) {
1923 1924 inseq = 1;
1924 1925 } else if (!(flags & IS_TCPFSM)) {
1925 1926
1926 1927 if (!(fdata->td_winflags &
1927 1928 (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) {
1928 1929 /*
1929 1930 * No TCPFSM and no window scaling, so make some
1930 1931 * extra guesses.
1931 1932 */
1932 1933 if ((seq == fdata->td_maxend) && (ackskew == 0))
1933 1934 inseq = 1;
1934 1935 else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin))
1935 1936 inseq = 1;
1936 1937 }
1937 1938 }
1938 1939
1939 1940 if (inseq) {
1940 1941 /* if ackskew < 0 then this should be due to fragmented
1941 1942 * packets. There is no way to know the length of the
1942 1943 * total packet in advance.
1943 1944 * We do know the total length from the fragment cache though.
1944 1945 * Note however that there might be more sessions with
1945 1946 * exactly the same source and destination parameters in the
1946 1947 * state cache (and source and destination is the only stuff
1947 1948 * that is saved in the fragment cache). Note further that
1948 1949 * some TCP connections in the state cache are hashed with
1949 1950 * sport and dport as well which makes it not worthwhile to
1950 1951 * look for them.
1951 1952 * Thus, when ackskew is negative but still seems to belong
1952 1953 * to this session, we bump up the destinations end value.
1953 1954 */
1954 1955 if (ackskew < 0) {
1955 1956 DTRACE_PROBE2(end_update_td,
1956 1957 int, tdata->td_end,
1957 1958 int, ack
1958 1959 );
1959 1960 tdata->td_end = ack;
1960 1961 }
1961 1962
1962 1963 /* update max window seen */
1963 1964 if (fdata->td_maxwin < win) {
1964 1965 DTRACE_PROBE2(win_update_fd,
1965 1966 int, fdata->td_maxwin,
1966 1967 int, win
1967 1968 );
1968 1969 fdata->td_maxwin = win;
1969 1970 }
1970 1971
1971 1972 if (SEQ_GT(end, fdata->td_end)) {
1972 1973 DTRACE_PROBE2(end_update_fd,
1973 1974 int, fdata->td_end,
1974 1975 int, end
1975 1976 );
1976 1977 fdata->td_end = end;
1977 1978 }
1978 1979
1979 1980 if (SEQ_GE(ack + win, tdata->td_maxend)) {
1980 1981 DTRACE_PROBE2(max_end_update_td,
1981 1982 int, tdata->td_maxend,
1982 1983 int, ack + win
1983 1984 );
1984 1985 tdata->td_maxend = ack + win;
1985 1986 }
1986 1987
1987 1988 return 1;
1988 1989 }
1989 1990 fin->fin_flx |= FI_OOW;
1990 1991
1991 1992 #if defined(_KERNEL)
1992 1993 if (!(SEQ_GE(seq, fdata->td_end - maxwin)))
1993 1994 fin->fin_flx |= FI_NEG_OOW;
1994 1995 #endif
1995 1996
1996 1997 return 0;
1997 1998 }
1998 1999
1999 2000
2000 2001 /* ------------------------------------------------------------------------ */
2001 2002 /* Function: fr_stclone */
2002 2003 /* Returns: ipstate_t* - NULL == cloning failed, */
2003 2004 /* else pointer to new state structure */
2004 2005 /* Parameters: fin(I) - pointer to packet information */
2005 2006 /* tcp(I) - pointer to TCP/UDP header */
2006 2007 /* is(I) - pointer to master state structure */
2007 2008 /* */
2008 2009 /* Create a "duplcate" state table entry from the master. */
2009 2010 /* ------------------------------------------------------------------------ */
2010 2011 static ipstate_t *fr_stclone(fin, tcp, is)
2011 2012 fr_info_t *fin;
2012 2013 tcphdr_t *tcp;
2013 2014 ipstate_t *is;
2014 2015 {
2015 2016 ipstate_t *clone;
2016 2017 u_32_t send;
2017 2018 ipf_stack_t *ifs = fin->fin_ifs;
2018 2019
2019 2020 /*
2020 2021 * Trigger automatic call to fr_state_flush() if the
2021 2022 * table has reached capacity specified by hi watermark.
2022 2023 */
2023 2024 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi)
2024 2025 ifs->ifs_fr_state_doflush = 1;
2025 2026
2026 2027 /*
2027 2028 * If automatic flushing did not do its job, and the table
2028 2029 * has filled up, don't try to create a new entry. A NULL
2029 2030 * return will indicate that the cloning has failed.
2030 2031 */
2031 2032 if (ifs->ifs_ips_num >= ifs->ifs_fr_statemax) {
2032 2033 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max);
2033 2034 return NULL;
2034 2035 }
2035 2036
2036 2037 KMALLOC(clone, ipstate_t *);
2037 2038 if (clone == NULL)
2038 2039 return NULL;
2039 2040 bcopy((char *)is, (char *)clone, sizeof(*clone));
2040 2041
2041 2042 MUTEX_NUKE(&clone->is_lock);
2042 2043
2043 2044 clone->is_die = ONE_DAY + ifs->ifs_fr_ticks;
2044 2045 clone->is_state[0] = 0;
2045 2046 clone->is_state[1] = 0;
2046 2047 send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) +
2047 2048 ((tcp->th_flags & TH_SYN) ? 1 : 0) +
2048 2049 ((tcp->th_flags & TH_FIN) ? 1 : 0);
2049 2050
2050 2051 if (fin->fin_rev == 1) {
2051 2052 clone->is_dend = send;
2052 2053 clone->is_maxdend = send;
2053 2054 clone->is_send = 0;
2054 2055 clone->is_maxswin = 1;
2055 2056 clone->is_maxdwin = ntohs(tcp->th_win);
2056 2057 if (clone->is_maxdwin == 0)
2057 2058 clone->is_maxdwin = 1;
2058 2059 } else {
2059 2060 clone->is_send = send;
2060 2061 clone->is_maxsend = send;
2061 2062 clone->is_dend = 0;
2062 2063 clone->is_maxdwin = 1;
2063 2064 clone->is_maxswin = ntohs(tcp->th_win);
2064 2065 if (clone->is_maxswin == 0)
2065 2066 clone->is_maxswin = 1;
2066 2067 }
2067 2068
2068 2069 clone->is_flags &= ~SI_CLONE;
2069 2070 clone->is_flags |= SI_CLONED;
2070 2071 fr_stinsert(clone, fin->fin_rev, ifs);
2071 2072 clone->is_ref = 1;
2072 2073 if (clone->is_p == IPPROTO_TCP) {
2073 2074 (void) fr_tcp_age(&clone->is_sti, fin, ifs->ifs_ips_tqtqb,
2074 2075 clone->is_flags);
2075 2076 }
2076 2077 MUTEX_EXIT(&clone->is_lock);
2077 2078 #ifdef IPFILTER_SCAN
2078 2079 (void) ipsc_attachis(is);
2079 2080 #endif
2080 2081 #ifdef IPFILTER_SYNC
2081 2082 if (is->is_flags & IS_STATESYNC)
2082 2083 clone->is_sync = ipfsync_new(SMC_STATE, fin, clone);
2083 2084 #endif
2084 2085 return clone;
2085 2086 }
2086 2087
2087 2088
2088 2089 /* ------------------------------------------------------------------------ */
2089 2090 /* Function: fr_matchsrcdst */
2090 2091 /* Returns: Nil */
2091 2092 /* Parameters: fin(I) - pointer to packet information */
2092 2093 /* is(I) - pointer to state structure */
2093 2094 /* src(I) - pointer to source address */
2094 2095 /* dst(I) - pointer to destination address */
2095 2096 /* tcp(I) - pointer to TCP/UDP header */
2096 2097 /* */
2097 2098 /* Match a state table entry against an IP packet. The logic below is that */
2098 2099 /* ret gets set to one if the match succeeds, else remains 0. If it is */
2099 2100 /* still 0 after the test. no match. */
2100 2101 /* ------------------------------------------------------------------------ */
2101 2102 static ipstate_t *fr_matchsrcdst(fin, is, src, dst, tcp, cmask)
2102 2103 fr_info_t *fin;
2103 2104 ipstate_t *is;
2104 2105 i6addr_t *src, *dst;
2105 2106 tcphdr_t *tcp;
2106 2107 u_32_t cmask;
2107 2108 {
2108 2109 int ret = 0, rev, out, flags, flx = 0, idx;
2109 2110 u_short sp, dp;
2110 2111 u_32_t cflx;
2111 2112 void *ifp;
2112 2113 ipf_stack_t *ifs = fin->fin_ifs;
2113 2114
2114 2115 rev = IP6_NEQ(&is->is_dst, dst);
2115 2116 ifp = fin->fin_ifp;
2116 2117 out = fin->fin_out;
2117 2118 flags = is->is_flags;
2118 2119 sp = 0;
2119 2120 dp = 0;
2120 2121
2121 2122 if (tcp != NULL) {
2122 2123 sp = htons(fin->fin_sport);
2123 2124 dp = ntohs(fin->fin_dport);
2124 2125 }
2125 2126 if (!rev) {
2126 2127 if (tcp != NULL) {
2127 2128 if (!(flags & SI_W_SPORT) && (sp != is->is_sport))
2128 2129 rev = 1;
2129 2130 else if (!(flags & SI_W_DPORT) && (dp != is->is_dport))
2130 2131 rev = 1;
2131 2132 }
2132 2133 }
2133 2134
2134 2135 idx = (out << 1) + rev;
2135 2136
2136 2137 /*
2137 2138 * If the interface for this 'direction' is set, make sure it matches.
2138 2139 * An interface name that is not set matches any, as does a name of *.
2139 2140 */
2140 2141 if ((is->is_ifp[idx] == NULL &&
2141 2142 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) ||
2142 2143 is->is_ifp[idx] == ifp)
2143 2144 ret = 1;
2144 2145
2145 2146 if (ret == 0) {
2146 2147 DTRACE_PROBE(no_match_on_iface);
2147 2148 return NULL;
2148 2149 }
2149 2150 ret = 0;
2150 2151
2151 2152 /*
2152 2153 * Match addresses and ports.
2153 2154 */
2154 2155 if (rev == 0) {
2155 2156 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) &&
2156 2157 (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) {
2157 2158 if (tcp) {
2158 2159 if ((sp == is->is_sport || flags & SI_W_SPORT)&&
2159 2160 (dp == is->is_dport || flags & SI_W_DPORT))
2160 2161 ret = 1;
2161 2162 } else {
2162 2163 ret = 1;
2163 2164 }
2164 2165 }
2165 2166 } else {
2166 2167 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) &&
2167 2168 (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) {
2168 2169 if (tcp) {
2169 2170 if ((dp == is->is_sport || flags & SI_W_SPORT)&&
2170 2171 (sp == is->is_dport || flags & SI_W_DPORT))
2171 2172 ret = 1;
2172 2173 } else {
2173 2174 ret = 1;
2174 2175 }
2175 2176 }
2176 2177 }
2177 2178
2178 2179 if (ret == 0) {
2179 2180 DTRACE_PROBE(no_match_on_addrs);
2180 2181 return NULL;
2181 2182 }
2182 2183 /*
2183 2184 * Whether or not this should be here, is questionable, but the aim
2184 2185 * is to get this out of the main line.
2185 2186 */
2186 2187 if (tcp == NULL)
2187 2188 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED);
2188 2189
2189 2190 /*
2190 2191 * Only one of the source or destination address can be flaged as a
2191 2192 * wildcard. Fill in the missing address, if set.
2192 2193 * For IPv6, if the address being copied in is multicast, then
2193 2194 * don't reset the wild flag - multicast causes it to be set in the
2194 2195 * first place!
2195 2196 */
2196 2197 if ((flags & (SI_W_SADDR|SI_W_DADDR))) {
2197 2198 fr_ip_t *fi = &fin->fin_fi;
2198 2199
2199 2200 if ((flags & SI_W_SADDR) != 0) {
2200 2201 if (rev == 0) {
2201 2202 #ifdef USE_INET6
2202 2203 if (is->is_v == 6 &&
2203 2204 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6))
2204 2205 /*EMPTY*/;
2205 2206 else
2206 2207 #endif
2207 2208 {
2208 2209 is->is_src = fi->fi_src;
2209 2210 is->is_flags &= ~SI_W_SADDR;
2210 2211 }
2211 2212 } else {
2212 2213 #ifdef USE_INET6
2213 2214 if (is->is_v == 6 &&
2214 2215 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6))
2215 2216 /*EMPTY*/;
2216 2217 else
2217 2218 #endif
2218 2219 {
2219 2220 is->is_src = fi->fi_dst;
2220 2221 is->is_flags &= ~SI_W_SADDR;
2221 2222 }
2222 2223 }
2223 2224 } else if ((flags & SI_W_DADDR) != 0) {
2224 2225 if (rev == 0) {
2225 2226 #ifdef USE_INET6
2226 2227 if (is->is_v == 6 &&
2227 2228 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6))
2228 2229 /*EMPTY*/;
2229 2230 else
2230 2231 #endif
2231 2232 {
2232 2233 is->is_dst = fi->fi_dst;
2233 2234 is->is_flags &= ~SI_W_DADDR;
2234 2235 }
2235 2236 } else {
2236 2237 #ifdef USE_INET6
2237 2238 if (is->is_v == 6 &&
2238 2239 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6))
2239 2240 /*EMPTY*/;
2240 2241 else
2241 2242 #endif
2242 2243 {
2243 2244 is->is_dst = fi->fi_src;
2244 2245 is->is_flags &= ~SI_W_DADDR;
2245 2246 }
2246 2247 }
2247 2248 }
2248 2249 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) {
2249 2250 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild);
2250 2251 }
2251 2252 }
2252 2253
2253 2254 flx = fin->fin_flx & cmask;
2254 2255 cflx = is->is_flx[out][rev];
2255 2256
2256 2257 /*
2257 2258 * Match up any flags set from IP options.
2258 2259 */
2259 2260 if ((cflx && (flx != (cflx & cmask))) ||
2260 2261 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) ||
2261 2262 ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) ||
2262 2263 ((fin->fin_auth & is->is_authmsk) != is->is_auth)) {
2263 2264 DTRACE_PROBE4(no_match_on_flags,
2264 2265 int, (cflx && (flx != (cflx & cmask))),
2265 2266 int,
2266 2267 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]),
2267 2268 int, ((fin->fin_secmsk & is->is_secmsk) != is->is_sec),
2268 2269 int, ((fin->fin_auth & is->is_authmsk) != is->is_auth)
2269 2270 );
2270 2271 return NULL;
2271 2272 }
2272 2273 /*
2273 2274 * Only one of the source or destination port can be flagged as a
2274 2275 * wildcard. When filling it in, fill in a copy of the matched entry
2275 2276 * if it has the cloning flag set.
2276 2277 */
2277 2278 if ((fin->fin_flx & FI_IGNORE) != 0) {
2278 2279 fin->fin_rev = rev;
2279 2280 return is;
2280 2281 }
2281 2282
2282 2283 if ((flags & (SI_W_SPORT|SI_W_DPORT))) {
2283 2284 if ((flags & SI_CLONE) != 0) {
2284 2285 ipstate_t *clone;
2285 2286
2286 2287 clone = fr_stclone(fin, tcp, is);
2287 2288 if (clone == NULL)
2288 2289 return NULL;
2289 2290 is = clone;
2290 2291 } else {
2291 2292 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild);
2292 2293 }
2293 2294
2294 2295 if ((flags & SI_W_SPORT) != 0) {
2295 2296 if (rev == 0) {
2296 2297 is->is_sport = sp;
2297 2298 is->is_send = ntohl(tcp->th_seq);
2298 2299 } else {
2299 2300 is->is_sport = dp;
2300 2301 is->is_send = ntohl(tcp->th_ack);
2301 2302 }
2302 2303 is->is_maxsend = is->is_send + 1;
2303 2304 } else if ((flags & SI_W_DPORT) != 0) {
2304 2305 if (rev == 0) {
2305 2306 is->is_dport = dp;
2306 2307 is->is_dend = ntohl(tcp->th_ack);
2307 2308 } else {
2308 2309 is->is_dport = sp;
2309 2310 is->is_dend = ntohl(tcp->th_seq);
2310 2311 }
2311 2312 is->is_maxdend = is->is_dend + 1;
2312 2313 }
2313 2314 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT);
2314 2315 if ((flags & SI_CLONED) && ifs->ifs_ipstate_logging)
2315 2316 ipstate_log(is, ISL_CLONE, ifs);
2316 2317 }
2317 2318
2318 2319 ret = -1;
2319 2320
2320 2321 if (is->is_flx[out][rev] == 0) {
2321 2322 is->is_flx[out][rev] = flx;
2322 2323 /*
2323 2324 * If we are dealing with the first packet coming in reverse
2324 2325 * direction (sent by peer), then we have to set options into
2325 2326 * state.
2326 2327 */
2327 2328 if (rev == 1 && is->is_optmsk[1] == 0x0) {
2328 2329 is->is_optmsk[1] = 0xffffffff;
2329 2330 is->is_opt[1] = fin->fin_optmsk;
2330 2331 DTRACE_PROBE(set_rev_opts);
2331 2332 }
2332 2333 if (is->is_v == 6) {
2333 2334 is->is_opt[rev] &= ~0x8;
2334 2335 is->is_optmsk[rev] &= ~0x8;
2335 2336 }
2336 2337 }
2337 2338
2338 2339 /*
2339 2340 * Check if the interface name for this "direction" is set and if not,
2340 2341 * fill it in.
2341 2342 */
2342 2343 if (is->is_ifp[idx] == NULL &&
2343 2344 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) {
2344 2345 is->is_ifp[idx] = ifp;
2345 2346 COPYIFNAME(ifp, is->is_ifname[idx], fin->fin_v);
2346 2347 }
2347 2348 fin->fin_rev = rev;
2348 2349 return is;
2349 2350 }
2350 2351
2351 2352
2352 2353 /* ------------------------------------------------------------------------ */
2353 2354 /* Function: fr_checkicmpmatchingstate */
2354 2355 /* Returns: Nil */
2355 2356 /* Parameters: fin(I) - pointer to packet information */
2356 2357 /* */
2357 2358 /* If we've got an ICMP error message, using the information stored in the */
2358 2359 /* ICMP packet, look for a matching state table entry. */
2359 2360 /* */
2360 2361 /* If we return NULL then no lock on ipf_state is held. */
2361 2362 /* If we return non-null then a read-lock on ipf_state is held. */
2362 2363 /* ------------------------------------------------------------------------ */
2363 2364 static ipstate_t *fr_checkicmpmatchingstate(fin)
2364 2365 fr_info_t *fin;
2365 2366 {
2366 2367 ipstate_t *is, **isp;
2367 2368 u_short sport, dport;
2368 2369 u_char pr;
2369 2370 int backward, i, oi;
2370 2371 i6addr_t dst, src;
2371 2372 struct icmp *ic;
2372 2373 u_short savelen;
2373 2374 icmphdr_t *icmp;
2374 2375 fr_info_t ofin;
2375 2376 tcphdr_t *tcp;
2376 2377 int len;
2377 2378 ip_t *oip;
2378 2379 u_int hv;
2379 2380 ipf_stack_t *ifs = fin->fin_ifs;
2380 2381
2381 2382 /*
2382 2383 * Does it at least have the return (basic) IP header ?
2383 2384 * Is it an actual recognised ICMP error type?
2384 2385 * Only a basic IP header (no options) should be with
2385 2386 * an ICMP error header.
2386 2387 */
2387 2388 if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) ||
2388 2389 (fin->fin_plen < ICMPERR_MINPKTLEN) ||
2389 2390 !(fin->fin_flx & FI_ICMPERR))
2390 2391 return NULL;
2391 2392 ic = fin->fin_dp;
2392 2393
2393 2394 oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN);
2394 2395 /*
2395 2396 * Check if the at least the old IP header (with options) and
2396 2397 * 8 bytes of payload is present.
2397 2398 */
2398 2399 if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2))
2399 2400 return NULL;
2400 2401
2401 2402 /*
2402 2403 * Sanity Checks.
2403 2404 */
2404 2405 len = fin->fin_dlen - ICMPERR_ICMPHLEN;
2405 2406 if ((len <= 0) || ((IP_HL(oip) << 2) > len))
2406 2407 return NULL;
2407 2408
2408 2409 /*
2409 2410 * Is the buffer big enough for all of it ? It's the size of the IP
2410 2411 * header claimed in the encapsulated part which is of concern. It
2411 2412 * may be too big to be in this buffer but not so big that it's
2412 2413 * outside the ICMP packet, leading to TCP deref's causing problems.
2413 2414 * This is possible because we don't know how big oip_hl is when we
2414 2415 * do the pullup early in fr_check() and thus can't guarantee it is
2415 2416 * all here now.
2416 2417 */
2417 2418 #ifdef _KERNEL
2418 2419 {
2419 2420 mb_t *m;
2420 2421
2421 2422 m = fin->fin_m;
2422 2423 # if defined(MENTAT)
2423 2424 if ((char *)oip + len > (char *)m->b_wptr)
2424 2425 return NULL;
2425 2426 # else
2426 2427 if ((char *)oip + len > (char *)fin->fin_ip + m->m_len)
2427 2428 return NULL;
2428 2429 # endif
2429 2430 }
2430 2431 #endif
2431 2432 bcopy((char *)fin, (char *)&ofin, sizeof(*fin));
2432 2433
2433 2434 /*
2434 2435 * in the IPv4 case we must zero the i6addr union otherwise
2435 2436 * the IP6_EQ and IP6_NEQ macros produce the wrong results because
2436 2437 * of the 'junk' in the unused part of the union
2437 2438 */
2438 2439 bzero((char *)&src, sizeof(src));
2439 2440 bzero((char *)&dst, sizeof(dst));
2440 2441
2441 2442 /*
2442 2443 * we make an fin entry to be able to feed it to
2443 2444 * matchsrcdst note that not all fields are encessary
2444 2445 * but this is the cleanest way. Note further we fill
2445 2446 * in fin_mp such that if someone uses it we'll get
2446 2447 * a kernel panic. fr_matchsrcdst does not use this.
2447 2448 *
2448 2449 * watch out here, as ip is in host order and oip in network
2449 2450 * order. Any change we make must be undone afterwards, like
2450 2451 * oip->ip_off - it is still in network byte order so fix it.
2451 2452 */
2452 2453 savelen = oip->ip_len;
2453 2454 oip->ip_len = len;
2454 2455 oip->ip_off = ntohs(oip->ip_off);
2455 2456
2456 2457 ofin.fin_flx = FI_NOCKSUM;
2457 2458 ofin.fin_v = 4;
2458 2459 ofin.fin_ip = oip;
2459 2460 ofin.fin_m = NULL; /* if dereferenced, panic XXX */
2460 2461 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
2461 2462 ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN;
2462 2463 (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin);
2463 2464 ofin.fin_ifp = fin->fin_ifp;
2464 2465 ofin.fin_out = !fin->fin_out;
2465 2466 /*
2466 2467 * Reset the short and bad flag here because in fr_matchsrcdst()
2467 2468 * the flags for the current packet (fin_flx) are compared against
2468 2469 * those for the existing session.
2469 2470 */
2470 2471 ofin.fin_flx &= ~(FI_BAD|FI_SHORT);
2471 2472
2472 2473 /*
2473 2474 * Put old values of ip_len and ip_off back as we don't know
2474 2475 * if we have to forward the packet (or process it again.
2475 2476 */
2476 2477 oip->ip_len = savelen;
2477 2478 oip->ip_off = htons(oip->ip_off);
2478 2479
2479 2480 switch (oip->ip_p)
2480 2481 {
2481 2482 case IPPROTO_ICMP :
2482 2483 /*
2483 2484 * an ICMP error can only be generated as a result of an
2484 2485 * ICMP query, not as the response on an ICMP error
2485 2486 *
2486 2487 * XXX theoretically ICMP_ECHOREP and the other reply's are
2487 2488 * ICMP query's as well, but adding them here seems strange XXX
2488 2489 */
2489 2490 if ((ofin.fin_flx & FI_ICMPERR) != 0)
2490 2491 return NULL;
2491 2492
2492 2493 /*
2493 2494 * perform a lookup of the ICMP packet in the state table
2494 2495 */
2495 2496 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2496 2497 hv = (pr = oip->ip_p);
2497 2498 src.in4 = oip->ip_src;
2498 2499 hv += src.in4.s_addr;
2499 2500 dst.in4 = oip->ip_dst;
2500 2501 hv += dst.in4.s_addr;
2501 2502 hv += icmp->icmp_id;
2502 2503 hv = DOUBLE_HASH(hv, ifs);
2503 2504
2504 2505 READ_ENTER(&ifs->ifs_ipf_state);
2505 2506 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) {
2506 2507 isp = &is->is_hnext;
2507 2508 if ((is->is_p != pr) || (is->is_v != 4))
2508 2509 continue;
2509 2510 if (is->is_pass & FR_NOICMPERR)
2510 2511 continue;
2511 2512 is = fr_matchsrcdst(&ofin, is, &src, &dst,
2512 2513 NULL, FI_ICMPCMP);
2513 2514 if (is != NULL) {
2514 2515 if ((is->is_pass & FR_NOICMPERR) != 0) {
2515 2516 RWLOCK_EXIT(&ifs->ifs_ipf_state);
2516 2517 return NULL;
2517 2518 }
2518 2519 /*
2519 2520 * i : the index of this packet (the icmp
2520 2521 * unreachable)
2521 2522 * oi : the index of the original packet found
2522 2523 * in the icmp header (i.e. the packet
2523 2524 * causing this icmp)
2524 2525 * backward : original packet was backward
2525 2526 * compared to the state
2526 2527 */
2527 2528 backward = IP6_NEQ(&is->is_src, &src);
2528 2529 fin->fin_rev = !backward;
2529 2530 i = (!backward << 1) + fin->fin_out;
2530 2531 oi = (backward << 1) + ofin.fin_out;
2531 2532 if (is->is_icmppkts[i] > is->is_pkts[oi])
2532 2533 continue;
2533 2534 ifs->ifs_ips_stats.iss_hits++;
2534 2535 is->is_icmppkts[i]++;
2535 2536 return is;
2536 2537 }
2537 2538 }
2538 2539 RWLOCK_EXIT(&ifs->ifs_ipf_state);
2539 2540 return NULL;
2540 2541 case IPPROTO_TCP :
2541 2542 case IPPROTO_UDP :
2542 2543 break;
2543 2544 default :
2544 2545 return NULL;
2545 2546 }
2546 2547
2547 2548 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2548 2549 dport = tcp->th_dport;
2549 2550 sport = tcp->th_sport;
2550 2551
2551 2552 hv = (pr = oip->ip_p);
2552 2553 src.in4 = oip->ip_src;
2553 2554 hv += src.in4.s_addr;
2554 2555 dst.in4 = oip->ip_dst;
2555 2556 hv += dst.in4.s_addr;
2556 2557 hv += dport;
2557 2558 hv += sport;
2558 2559 hv = DOUBLE_HASH(hv, ifs);
2559 2560
2560 2561 READ_ENTER(&ifs->ifs_ipf_state);
2561 2562 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) {
2562 2563 isp = &is->is_hnext;
2563 2564 /*
2564 2565 * Only allow this icmp though if the
2565 2566 * encapsulated packet was allowed through the
2566 2567 * other way around. Note that the minimal amount
2567 2568 * of info present does not allow for checking against
2568 2569 * tcp internals such as seq and ack numbers. Only the
2569 2570 * ports are known to be present and can be even if the
2570 2571 * short flag is set.
2571 2572 */
2572 2573 if ((is->is_p == pr) && (is->is_v == 4) &&
2573 2574 (is = fr_matchsrcdst(&ofin, is, &src, &dst,
2574 2575 tcp, FI_ICMPCMP))) {
2575 2576 /*
2576 2577 * i : the index of this packet (the icmp unreachable)
2577 2578 * oi : the index of the original packet found in the
2578 2579 * icmp header (i.e. the packet causing this icmp)
2579 2580 * backward : original packet was backward compared to
2580 2581 * the state
2581 2582 */
2582 2583 backward = IP6_NEQ(&is->is_src, &src);
2583 2584 fin->fin_rev = !backward;
2584 2585 i = (!backward << 1) + fin->fin_out;
2585 2586 oi = (backward << 1) + ofin.fin_out;
2586 2587
2587 2588 if (((is->is_pass & FR_NOICMPERR) != 0) ||
2588 2589 (is->is_icmppkts[i] > is->is_pkts[oi]))
2589 2590 break;
2590 2591 ifs->ifs_ips_stats.iss_hits++;
2591 2592 is->is_icmppkts[i]++;
2592 2593 /*
2593 2594 * we deliberately do not touch the timeouts
2594 2595 * for the accompanying state table entry.
2595 2596 * It remains to be seen if that is correct. XXX
2596 2597 */
2597 2598 return is;
2598 2599 }
2599 2600 }
2600 2601 RWLOCK_EXIT(&ifs->ifs_ipf_state);
2601 2602 return NULL;
2602 2603 }
2603 2604
2604 2605
2605 2606 /* ------------------------------------------------------------------------ */
2606 2607 /* Function: fr_ipsmove */
2607 2608 /* Returns: Nil */
2608 2609 /* Parameters: is(I) - pointer to state table entry */
2609 2610 /* hv(I) - new hash value for state table entry */
2610 2611 /* Write Locks: ipf_state */
2611 2612 /* */
2612 2613 /* Move a state entry from one position in the hash table to another. */
2613 2614 /* ------------------------------------------------------------------------ */
2614 2615 static void fr_ipsmove(is, hv, ifs)
2615 2616 ipstate_t *is;
2616 2617 u_int hv;
2617 2618 ipf_stack_t *ifs;
2618 2619 {
2619 2620 ipstate_t **isp;
2620 2621 u_int hvm;
2621 2622
2622 2623 ASSERT(rw_read_locked(&ifs->ifs_ipf_state.ipf_lk) == 0);
2623 2624
2624 2625 hvm = is->is_hv;
2625 2626 /*
2626 2627 * Remove the hash from the old location...
2627 2628 */
2628 2629 isp = is->is_phnext;
2629 2630 if (is->is_hnext)
2630 2631 is->is_hnext->is_phnext = isp;
2631 2632 *isp = is->is_hnext;
2632 2633 if (ifs->ifs_ips_table[hvm] == NULL)
2633 2634 ifs->ifs_ips_stats.iss_inuse--;
2634 2635 ifs->ifs_ips_stats.iss_bucketlen[hvm]--;
2635 2636
2636 2637 /*
2637 2638 * ...and put the hash in the new one.
2638 2639 */
2639 2640 hvm = DOUBLE_HASH(hv, ifs);
2640 2641 is->is_hv = hvm;
2641 2642 isp = &ifs->ifs_ips_table[hvm];
2642 2643 if (*isp)
2643 2644 (*isp)->is_phnext = &is->is_hnext;
2644 2645 else
2645 2646 ifs->ifs_ips_stats.iss_inuse++;
2646 2647 ifs->ifs_ips_stats.iss_bucketlen[hvm]++;
2647 2648 is->is_phnext = isp;
2648 2649 is->is_hnext = *isp;
2649 2650 *isp = is;
2650 2651 }
2651 2652
2652 2653
2653 2654 /* ------------------------------------------------------------------------ */
2654 2655 /* Function: fr_stlookup */
2655 2656 /* Returns: ipstate_t* - NULL == no matching state found, */
2656 2657 /* else pointer to state information is returned */
2657 2658 /* Parameters: fin(I) - pointer to packet information */
2658 2659 /* tcp(I) - pointer to TCP/UDP header. */
2659 2660 /* */
2660 2661 /* Search the state table for a matching entry to the packet described by */
2661 2662 /* the contents of *fin. */
2662 2663 /* */
2663 2664 /* If we return NULL then no lock on ipf_state is held. */
2664 2665 /* If we return non-null then a read-lock on ipf_state is held. */
2665 2666 /* ------------------------------------------------------------------------ */
2666 2667 ipstate_t *fr_stlookup(fin, tcp, ifqp)
2667 2668 fr_info_t *fin;
2668 2669 tcphdr_t *tcp;
2669 2670 ipftq_t **ifqp;
2670 2671 {
2671 2672 u_int hv, hvm, pr, v, tryagain;
2672 2673 ipstate_t *is, **isp;
2673 2674 u_short dport, sport;
2674 2675 i6addr_t src, dst;
2675 2676 struct icmp *ic;
2676 2677 ipftq_t *ifq;
2677 2678 int oow;
2678 2679 ipf_stack_t *ifs = fin->fin_ifs;
2679 2680
2680 2681 is = NULL;
2681 2682 ifq = NULL;
2682 2683 tcp = fin->fin_dp;
2683 2684 ic = (struct icmp *)tcp;
2684 2685 hv = (pr = fin->fin_fi.fi_p);
2685 2686 src = fin->fin_fi.fi_src;
2686 2687 dst = fin->fin_fi.fi_dst;
2687 2688 hv += src.in4.s_addr;
2688 2689 hv += dst.in4.s_addr;
2689 2690
2690 2691 v = fin->fin_fi.fi_v;
2691 2692 #ifdef USE_INET6
2692 2693 if (v == 6) {
2693 2694 hv += fin->fin_fi.fi_src.i6[1];
2694 2695 hv += fin->fin_fi.fi_src.i6[2];
2695 2696 hv += fin->fin_fi.fi_src.i6[3];
2696 2697
2697 2698 if ((fin->fin_p == IPPROTO_ICMPV6) &&
2698 2699 IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) {
2699 2700 hv -= dst.in4.s_addr;
2700 2701 } else {
2701 2702 hv += fin->fin_fi.fi_dst.i6[1];
2702 2703 hv += fin->fin_fi.fi_dst.i6[2];
2703 2704 hv += fin->fin_fi.fi_dst.i6[3];
2704 2705 }
2705 2706 }
2706 2707 #endif
2707 2708 if ((v == 4) &&
2708 2709 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) {
2709 2710 if (fin->fin_out == 0) {
2710 2711 hv -= src.in4.s_addr;
2711 2712 } else {
2712 2713 hv -= dst.in4.s_addr;
2713 2714 }
2714 2715 }
2715 2716
2716 2717 /*
2717 2718 * Search the hash table for matching packet header info.
2718 2719 */
2719 2720 switch (pr)
2720 2721 {
2721 2722 #ifdef USE_INET6
2722 2723 case IPPROTO_ICMPV6 :
2723 2724 tryagain = 0;
2724 2725 if (v == 6) {
2725 2726 if ((ic->icmp_type == ICMP6_ECHO_REQUEST) ||
2726 2727 (ic->icmp_type == ICMP6_ECHO_REPLY)) {
2727 2728 hv += ic->icmp_id;
2728 2729 }
2729 2730 }
2730 2731 READ_ENTER(&ifs->ifs_ipf_state);
2731 2732 icmp6again:
2732 2733 hvm = DOUBLE_HASH(hv, ifs);
2733 2734 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) {
2734 2735 isp = &is->is_hnext;
2735 2736 if ((is->is_p != pr) || (is->is_v != v))
2736 2737 continue;
2737 2738 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
2738 2739 if (is != NULL &&
2739 2740 fr_matchicmpqueryreply(v, &is->is_icmp,
2740 2741 ic, fin->fin_rev)) {
2741 2742 if (fin->fin_rev)
2742 2743 ifq = &ifs->ifs_ips_icmpacktq;
2743 2744 else
2744 2745 ifq = &ifs->ifs_ips_icmptq;
2745 2746 break;
2746 2747 }
2747 2748 }
2748 2749
2749 2750 if (is != NULL) {
2750 2751 if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) {
2751 2752 hv += fin->fin_fi.fi_src.i6[0];
2752 2753 hv += fin->fin_fi.fi_src.i6[1];
2753 2754 hv += fin->fin_fi.fi_src.i6[2];
2754 2755 hv += fin->fin_fi.fi_src.i6[3];
2755 2756 fr_ipsmove(is, hv, ifs);
2756 2757 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state);
2757 2758 }
2758 2759 break;
2759 2760 }
2760 2761 RWLOCK_EXIT(&ifs->ifs_ipf_state);
2761 2762
2762 2763 /*
2763 2764 * No matching icmp state entry. Perhaps this is a
2764 2765 * response to another state entry.
2765 2766 *
2766 2767 * XXX With some ICMP6 packets, the "other" address is already
2767 2768 * in the packet, after the ICMP6 header, and this could be
2768 2769 * used in place of the multicast address. However, taking
2769 2770 * advantage of this requires some significant code changes
2770 2771 * to handle the specific types where that is the case.
2771 2772 */
2772 2773 if ((ifs->ifs_ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) &&
2773 2774 !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) {
2774 2775 hv -= fin->fin_fi.fi_src.i6[0];
2775 2776 hv -= fin->fin_fi.fi_src.i6[1];
2776 2777 hv -= fin->fin_fi.fi_src.i6[2];
2777 2778 hv -= fin->fin_fi.fi_src.i6[3];
2778 2779 tryagain = 1;
2779 2780 WRITE_ENTER(&ifs->ifs_ipf_state);
2780 2781 goto icmp6again;
2781 2782 }
2782 2783
2783 2784 is = fr_checkicmp6matchingstate(fin);
2784 2785 if (is != NULL)
2785 2786 return is;
2786 2787 break;
2787 2788 #endif
2788 2789
2789 2790 case IPPROTO_ICMP :
2790 2791 if (v == 4) {
2791 2792 hv += ic->icmp_id;
2792 2793 }
2793 2794 hv = DOUBLE_HASH(hv, ifs);
2794 2795 READ_ENTER(&ifs->ifs_ipf_state);
2795 2796 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) {
2796 2797 isp = &is->is_hnext;
2797 2798 if ((is->is_p != pr) || (is->is_v != v))
2798 2799 continue;
2799 2800 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
2800 2801 if (is != NULL &&
2801 2802 fr_matchicmpqueryreply(v, &is->is_icmp,
2802 2803 ic, fin->fin_rev)) {
2803 2804 if (fin->fin_rev)
2804 2805 ifq = &ifs->ifs_ips_icmpacktq;
2805 2806 else
2806 2807 ifq = &ifs->ifs_ips_icmptq;
2807 2808 break;
2808 2809 }
2809 2810 }
2810 2811 if (is == NULL) {
2811 2812 RWLOCK_EXIT(&ifs->ifs_ipf_state);
2812 2813 }
2813 2814 break;
2814 2815
2815 2816 case IPPROTO_TCP :
2816 2817 case IPPROTO_UDP :
2817 2818 ifqp = NULL;
2818 2819 sport = htons(fin->fin_data[0]);
2819 2820 hv += sport;
2820 2821 dport = htons(fin->fin_data[1]);
2821 2822 hv += dport;
2822 2823 oow = 0;
2823 2824 tryagain = 0;
2824 2825 READ_ENTER(&ifs->ifs_ipf_state);
2825 2826 retry_tcpudp:
2826 2827 hvm = DOUBLE_HASH(hv, ifs);
2827 2828 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) {
2828 2829 isp = &is->is_hnext;
2829 2830 if ((is->is_p != pr) || (is->is_v != v))
2830 2831 continue;
2831 2832 fin->fin_flx &= ~FI_OOW;
2832 2833 is = fr_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP);
2833 2834 if (is != NULL) {
2834 2835 if (pr == IPPROTO_TCP) {
2835 2836 if (!fr_tcpstate(fin, tcp, is)) {
2836 2837 oow |= fin->fin_flx & FI_OOW;
2837 2838 continue;
2838 2839 }
2839 2840 }
2840 2841 break;
2841 2842 }
2842 2843 }
2843 2844 if (is != NULL) {
2844 2845 if (tryagain &&
2845 2846 !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) {
2846 2847 hv += dport;
2847 2848 hv += sport;
2848 2849 fr_ipsmove(is, hv, ifs);
2849 2850 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state);
2850 2851 }
2851 2852 break;
2852 2853 }
2853 2854 RWLOCK_EXIT(&ifs->ifs_ipf_state);
2854 2855
2855 2856 if (ifs->ifs_ips_stats.iss_wild) {
2856 2857 if (tryagain == 0) {
2857 2858 hv -= dport;
2858 2859 hv -= sport;
2859 2860 } else if (tryagain == 1) {
2860 2861 hv = fin->fin_fi.fi_p;
2861 2862 /*
2862 2863 * If we try to pretend this is a reply to a
2863 2864 * multicast/broadcast packet then we need to
2864 2865 * exclude part of the address from the hash
2865 2866 * calculation.
2866 2867 */
2867 2868 if (fin->fin_out == 0) {
2868 2869 hv += src.in4.s_addr;
2869 2870 } else {
2870 2871 hv += dst.in4.s_addr;
2871 2872 }
2872 2873 hv += dport;
2873 2874 hv += sport;
2874 2875 }
2875 2876 tryagain++;
2876 2877 if (tryagain <= 2) {
2877 2878 WRITE_ENTER(&ifs->ifs_ipf_state);
2878 2879 goto retry_tcpudp;
2879 2880 }
2880 2881 }
2881 2882 fin->fin_flx |= oow;
2882 2883 break;
2883 2884
2884 2885 #if 0
2885 2886 case IPPROTO_GRE :
2886 2887 gre = fin->fin_dp;
2887 2888 if (GRE_REV(gre->gr_flags) == 1) {
2888 2889 hv += gre->gr_call;
2889 2890 }
2890 2891 /* FALLTHROUGH */
2891 2892 #endif
2892 2893 default :
2893 2894 ifqp = NULL;
2894 2895 hvm = DOUBLE_HASH(hv, ifs);
2895 2896 READ_ENTER(&ifs->ifs_ipf_state);
2896 2897 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) {
2897 2898 isp = &is->is_hnext;
2898 2899 if ((is->is_p != pr) || (is->is_v != v))
2899 2900 continue;
2900 2901 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
2901 2902 if (is != NULL) {
2902 2903 ifq = &ifs->ifs_ips_iptq;
2903 2904 break;
2904 2905 }
2905 2906 }
2906 2907 if (is == NULL) {
2907 2908 RWLOCK_EXIT(&ifs->ifs_ipf_state);
2908 2909 }
2909 2910 break;
2910 2911 }
2911 2912
2912 2913 if ((is != NULL) && ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) &&
2913 2914 (is->is_tqehead[fin->fin_rev] != NULL))
2914 2915 ifq = is->is_tqehead[fin->fin_rev];
2915 2916 if (ifq != NULL && ifqp != NULL)
2916 2917 *ifqp = ifq;
2917 2918 return is;
2918 2919 }
2919 2920
2920 2921
2921 2922 /* ------------------------------------------------------------------------ */
2922 2923 /* Function: fr_updatestate */
2923 2924 /* Returns: Nil */
2924 2925 /* Parameters: fin(I) - pointer to packet information */
2925 2926 /* is(I) - pointer to state table entry */
2926 2927 /* Read Locks: ipf_state */
2927 2928 /* */
2928 2929 /* Updates packet and byte counters for a newly received packet. Seeds the */
2929 2930 /* fragment cache with a new entry as required. */
2930 2931 /* ------------------------------------------------------------------------ */
2931 2932 void fr_updatestate(fin, is, ifq)
2932 2933 fr_info_t *fin;
2933 2934 ipstate_t *is;
2934 2935 ipftq_t *ifq;
2935 2936 {
2936 2937 ipftqent_t *tqe;
2937 2938 int i, pass;
2938 2939 ipf_stack_t *ifs = fin->fin_ifs;
2939 2940
2940 2941 i = (fin->fin_rev << 1) + fin->fin_out;
2941 2942
2942 2943 /*
2943 2944 * For TCP packets, ifq == NULL. For all others, check if this new
2944 2945 * queue is different to the last one it was on and move it if so.
2945 2946 */
2946 2947 tqe = &is->is_sti;
2947 2948 MUTEX_ENTER(&is->is_lock);
2948 2949 if ((tqe->tqe_flags & TQE_RULEBASED) != 0)
2949 2950 ifq = is->is_tqehead[fin->fin_rev];
2950 2951
2951 2952 if (ifq != NULL)
2952 2953 fr_movequeue(tqe, tqe->tqe_ifq, ifq, ifs);
2953 2954
2954 2955 is->is_pkts[i]++;
2955 2956 fin->fin_pktnum = is->is_pkts[i] + is->is_icmppkts[i];
2956 2957 is->is_bytes[i] += fin->fin_plen;
2957 2958 MUTEX_EXIT(&is->is_lock);
2958 2959
2959 2960 #ifdef IPFILTER_SYNC
2960 2961 if (is->is_flags & IS_STATESYNC)
2961 2962 ipfsync_update(SMC_STATE, fin, is->is_sync);
2962 2963 #endif
2963 2964
2964 2965 ATOMIC_INCL(ifs->ifs_ips_stats.iss_hits);
2965 2966
2966 2967 fin->fin_fr = is->is_rule;
2967 2968
2968 2969 /*
2969 2970 * If this packet is a fragment and the rule says to track fragments,
2970 2971 * then create a new fragment cache entry.
2971 2972 */
2972 2973 pass = is->is_pass;
2973 2974 if ((fin->fin_flx & FI_FRAG) && FR_ISPASS(pass))
2974 2975 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE);
2975 2976 }
2976 2977
2977 2978
2978 2979 /* ------------------------------------------------------------------------ */
2979 2980 /* Function: fr_checkstate */
2980 2981 /* Returns: frentry_t* - NULL == search failed, */
2981 2982 /* else pointer to rule for matching state */
2982 2983 /* Parameters: ifp(I) - pointer to interface */
2983 2984 /* passp(I) - pointer to filtering result flags */
2984 2985 /* */
2985 2986 /* Check if a packet is associated with an entry in the state table. */
2986 2987 /* ------------------------------------------------------------------------ */
2987 2988 frentry_t *fr_checkstate(fin, passp)
2988 2989 fr_info_t *fin;
2989 2990 u_32_t *passp;
2990 2991 {
2991 2992 ipstate_t *is;
2992 2993 frentry_t *fr;
2993 2994 tcphdr_t *tcp;
2994 2995 ipftq_t *ifq;
2995 2996 u_int pass;
2996 2997 ipf_stack_t *ifs = fin->fin_ifs;
2997 2998
2998 2999 if (ifs->ifs_fr_state_lock || (ifs->ifs_ips_list == NULL) ||
2999 3000 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD)))
3000 3001 return NULL;
3001 3002
3002 3003 is = NULL;
3003 3004 if ((fin->fin_flx & FI_TCPUDP) ||
3004 3005 (fin->fin_fi.fi_p == IPPROTO_ICMP)
3005 3006 #ifdef USE_INET6
3006 3007 || (fin->fin_fi.fi_p == IPPROTO_ICMPV6)
3007 3008 #endif
3008 3009 )
3009 3010 tcp = fin->fin_dp;
3010 3011 else
3011 3012 tcp = NULL;
3012 3013
3013 3014 /*
3014 3015 * Search the hash table for matching packet header info.
3015 3016 */
3016 3017 ifq = NULL;
3017 3018 is = fr_stlookup(fin, tcp, &ifq);
3018 3019 switch (fin->fin_p)
3019 3020 {
3020 3021 #ifdef USE_INET6
3021 3022 case IPPROTO_ICMPV6 :
3022 3023 if (is != NULL)
3023 3024 break;
3024 3025 if (fin->fin_v == 6) {
3025 3026 is = fr_checkicmp6matchingstate(fin);
3026 3027 if (is != NULL)
3027 3028 goto matched;
3028 3029 }
3029 3030 break;
3030 3031 #endif
3031 3032 case IPPROTO_ICMP :
3032 3033 if (is != NULL)
3033 3034 break;
3034 3035 /*
3035 3036 * No matching icmp state entry. Perhaps this is a
3036 3037 * response to another state entry.
3037 3038 */
3038 3039 is = fr_checkicmpmatchingstate(fin);
3039 3040 if (is != NULL)
3040 3041 goto matched;
3041 3042 break;
3042 3043 case IPPROTO_TCP :
3043 3044 if (is == NULL)
3044 3045 break;
3045 3046
3046 3047 if (is->is_pass & FR_NEWISN) {
3047 3048 if (fin->fin_out == 0)
3048 3049 fr_fixinisn(fin, is);
3049 3050 else if (fin->fin_out == 1)
3050 3051 fr_fixoutisn(fin, is);
3051 3052 }
3052 3053 break;
3053 3054 default :
3054 3055 if (fin->fin_rev)
3055 3056 ifq = &ifs->ifs_ips_udpacktq;
3056 3057 else
3057 3058 ifq = &ifs->ifs_ips_udptq;
3058 3059 break;
3059 3060 }
3060 3061 if (is == NULL) {
3061 3062 ATOMIC_INCL(ifs->ifs_ips_stats.iss_miss);
3062 3063 return NULL;
3063 3064 }
3064 3065
3065 3066 matched:
3066 3067 fr = is->is_rule;
3067 3068 if (fr != NULL) {
3068 3069 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) {
3069 3070 if (fin->fin_nattag == NULL) {
3070 3071 RWLOCK_EXIT(&ifs->ifs_ipf_state);
3071 3072 return NULL;
3072 3073 }
3073 3074 if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) != 0) {
3074 3075 RWLOCK_EXIT(&ifs->ifs_ipf_state);
3075 3076 return NULL;
3076 3077 }
3077 3078 }
3078 3079 (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN);
3079 3080 fin->fin_icode = fr->fr_icode;
3080 3081 }
3081 3082
3082 3083 fin->fin_rule = is->is_rulen;
3083 3084 pass = is->is_pass;
3084 3085 fr_updatestate(fin, is, ifq);
3085 3086
3086 3087 RWLOCK_EXIT(&ifs->ifs_ipf_state);
3087 3088 fin->fin_flx |= FI_STATE;
3088 3089 if ((pass & FR_LOGFIRST) != 0)
3089 3090 pass &= ~(FR_LOGFIRST|FR_LOG);
3090 3091 *passp = pass;
3091 3092 return fr;
3092 3093 }
3093 3094
3094 3095
3095 3096 /* ------------------------------------------------------------------------ */
3096 3097 /* Function: fr_fixoutisn */
3097 3098 /* Returns: Nil */
3098 3099 /* Parameters: fin(I) - pointer to packet information */
3099 3100 /* is(I) - pointer to master state structure */
3100 3101 /* */
3101 3102 /* Called only for outbound packets, adjusts the sequence number and the */
3102 3103 /* TCP checksum to match that change. */
3103 3104 /* ------------------------------------------------------------------------ */
3104 3105 static void fr_fixoutisn(fin, is)
3105 3106 fr_info_t *fin;
3106 3107 ipstate_t *is;
3107 3108 {
3108 3109 tcphdr_t *tcp;
3109 3110 int rev;
3110 3111 u_32_t seq;
3111 3112
3112 3113 tcp = fin->fin_dp;
3113 3114 rev = fin->fin_rev;
3114 3115 if ((is->is_flags & IS_ISNSYN) != 0) {
3115 3116 if (rev == 0) {
3116 3117 seq = ntohl(tcp->th_seq);
3117 3118 seq += is->is_isninc[0];
3118 3119 tcp->th_seq = htonl(seq);
3119 3120 fix_outcksum(&tcp->th_sum, is->is_sumd[0]);
3120 3121 }
3121 3122 }
3122 3123 if ((is->is_flags & IS_ISNACK) != 0) {
3123 3124 if (rev == 1) {
3124 3125 seq = ntohl(tcp->th_seq);
3125 3126 seq += is->is_isninc[1];
3126 3127 tcp->th_seq = htonl(seq);
3127 3128 fix_outcksum(&tcp->th_sum, is->is_sumd[1]);
3128 3129 }
3129 3130 }
3130 3131 }
3131 3132
3132 3133
3133 3134 /* ------------------------------------------------------------------------ */
3134 3135 /* Function: fr_fixinisn */
3135 3136 /* Returns: Nil */
3136 3137 /* Parameters: fin(I) - pointer to packet information */
3137 3138 /* is(I) - pointer to master state structure */
3138 3139 /* */
3139 3140 /* Called only for inbound packets, adjusts the acknowledge number and the */
3140 3141 /* TCP checksum to match that change. */
3141 3142 /* ------------------------------------------------------------------------ */
3142 3143 static void fr_fixinisn(fin, is)
3143 3144 fr_info_t *fin;
3144 3145 ipstate_t *is;
3145 3146 {
3146 3147 tcphdr_t *tcp;
3147 3148 int rev;
3148 3149 u_32_t ack;
3149 3150
3150 3151 tcp = fin->fin_dp;
3151 3152 rev = fin->fin_rev;
3152 3153 if ((is->is_flags & IS_ISNSYN) != 0) {
3153 3154 if (rev == 1) {
3154 3155 ack = ntohl(tcp->th_ack);
3155 3156 ack -= is->is_isninc[0];
3156 3157 tcp->th_ack = htonl(ack);
3157 3158 fix_incksum(&tcp->th_sum, is->is_sumd[0]);
3158 3159 }
3159 3160 }
3160 3161 if ((is->is_flags & IS_ISNACK) != 0) {
3161 3162 if (rev == 0) {
3162 3163 ack = ntohl(tcp->th_ack);
3163 3164 ack -= is->is_isninc[1];
3164 3165 tcp->th_ack = htonl(ack);
3165 3166 fix_incksum(&tcp->th_sum, is->is_sumd[1]);
3166 3167 }
3167 3168 }
3168 3169 }
3169 3170
3170 3171
3171 3172 /* ------------------------------------------------------------------------ */
3172 3173 /* Function: fr_statesync */
3173 3174 /* Returns: Nil */
3174 3175 /* Parameters: action(I) - type of synchronisation to do */
3175 3176 /* v(I) - IP version being sync'd (v4 or v6) */
3176 3177 /* ifp(I) - interface identifier associated with action */
3177 3178 /* name(I) - name associated with ifp parameter */
3178 3179 /* */
3179 3180 /* Walk through all state entries and if an interface pointer match is */
3180 3181 /* found then look it up again, based on its name in case the pointer has */
3181 3182 /* changed since last time. */
3182 3183 /* */
3183 3184 /* If ifp is passed in as being non-null then we are only doing updates for */
3184 3185 /* existing, matching, uses of it. */
3185 3186 /* ------------------------------------------------------------------------ */
3186 3187 void fr_statesync(action, v, ifp, name, ifs)
3187 3188 int action, v;
3188 3189 void *ifp;
3189 3190 char *name;
3190 3191 ipf_stack_t *ifs;
3191 3192 {
3192 3193 ipstate_t *is;
3193 3194 int i;
3194 3195
3195 3196 if (ifs->ifs_fr_running <= 0)
3196 3197 return;
3197 3198
3198 3199 WRITE_ENTER(&ifs->ifs_ipf_state);
3199 3200
3200 3201 if (ifs->ifs_fr_running <= 0) {
3201 3202 RWLOCK_EXIT(&ifs->ifs_ipf_state);
3202 3203 return;
3203 3204 }
3204 3205
3205 3206 switch (action)
3206 3207 {
3207 3208 case IPFSYNC_RESYNC :
3208 3209 for (is = ifs->ifs_ips_list; is; is = is->is_next) {
3209 3210 if (v != 0 && is->is_v != v)
3210 3211 continue;
3211 3212 /*
3212 3213 * Look up all the interface names in the state entry.
3213 3214 */
3214 3215 for (i = 0; i < 4; i++) {
3215 3216 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i],
3216 3217 is->is_v, ifs);
3217 3218 }
3218 3219 }
3219 3220 break;
3220 3221 case IPFSYNC_NEWIFP :
3221 3222 for (is = ifs->ifs_ips_list; is; is = is->is_next) {
3222 3223 if (v != 0 && is->is_v != v)
3223 3224 continue;
3224 3225 /*
3225 3226 * Look up all the interface names in the state entry.
3226 3227 */
3227 3228 for (i = 0; i < 4; i++) {
3228 3229 if (!strncmp(is->is_ifname[i], name,
3229 3230 sizeof(is->is_ifname[i])))
3230 3231 is->is_ifp[i] = ifp;
3231 3232 }
3232 3233 }
3233 3234 break;
3234 3235 case IPFSYNC_OLDIFP :
3235 3236 for (is = ifs->ifs_ips_list; is; is = is->is_next) {
3236 3237 if (v != 0 && is->is_v != v)
3237 3238 continue;
3238 3239 /*
3239 3240 * Look up all the interface names in the state entry.
3240 3241 */
3241 3242 for (i = 0; i < 4; i++) {
3242 3243 if (is->is_ifp[i] == ifp)
3243 3244 is->is_ifp[i] = (void *)-1;
3244 3245 }
3245 3246 }
3246 3247 break;
3247 3248 }
3248 3249 RWLOCK_EXIT(&ifs->ifs_ipf_state);
3249 3250 }
3250 3251
3251 3252
3252 3253 #if SOLARIS2 >= 10
3253 3254 /* ------------------------------------------------------------------------ */
3254 3255 /* Function: fr_stateifindexsync */
3255 3256 /* Returns: void */
3256 3257 /* Parameters: ifp - current network interface descriptor (ifindex) */
3257 3258 /* newifp - new interface descriptor (new ifindex) */
3258 3259 /* ifs - pointer to IPF stack */
3259 3260 /* */
3260 3261 /* Write Locks: assumes ipf_mutex is locked */
3261 3262 /* */
3262 3263 /* Updates all interface indeces matching to ifp with new interface index */
3263 3264 /* value. */
3264 3265 /* ------------------------------------------------------------------------ */
3265 3266 void fr_stateifindexsync(ifp, newifp, ifs)
3266 3267 void *ifp;
3267 3268 void *newifp;
3268 3269 ipf_stack_t *ifs;
3269 3270 {
3270 3271 ipstate_t *is;
3271 3272 int i;
3272 3273
3273 3274 WRITE_ENTER(&ifs->ifs_ipf_state);
3274 3275
3275 3276 for (is = ifs->ifs_ips_list; is != NULL; is = is->is_next) {
3276 3277
3277 3278 for (i = 0; i < 4; i++) {
3278 3279 if (is->is_ifp[i] == ifp)
3279 3280 is->is_ifp[i] = newifp;
3280 3281 }
3281 3282 }
3282 3283
3283 3284 RWLOCK_EXIT(&ifs->ifs_ipf_state);
3284 3285 }
3285 3286 #endif
3286 3287
3287 3288 /* ------------------------------------------------------------------------ */
3288 3289 /* Function: fr_delstate */
3289 3290 /* Returns: int - 0 = entry deleted, else ref count on entry */
3290 3291 /* Parameters: is(I) - pointer to state structure to delete */
3291 3292 /* why(I) - if not 0, log reason why it was deleted */
3292 3293 /* ifs - ipf stack instance */
3293 3294 /* Write Locks: ipf_state/ipf_global */
3294 3295 /* */
3295 3296 /* Deletes a state entry from the enumerated list as well as the hash table */
3296 3297 /* and timeout queue lists. Make adjustments to hash table statistics and */
3297 3298 /* global counters as required. */
3298 3299 /* ------------------------------------------------------------------------ */
3299 3300 int fr_delstate(is, why, ifs)
3300 3301 ipstate_t *is;
3301 3302 int why;
3302 3303 ipf_stack_t *ifs;
3303 3304 {
3304 3305 int removed = 0;
3305 3306
3306 3307 ASSERT(rw_write_held(&ifs->ifs_ipf_global.ipf_lk) == 0 ||
3307 3308 rw_write_held(&ifs->ifs_ipf_state.ipf_lk) == 0);
3308 3309
3309 3310 /*
3310 3311 * Start by removing the entry from the hash table of state entries
3311 3312 * so it will not be "used" again.
3312 3313 *
3313 3314 * It will remain in the "list" of state entries until all references
3314 3315 * have been accounted for.
3315 3316 */
3316 3317 if (is->is_phnext != NULL) {
3317 3318 removed = 1;
3318 3319 *is->is_phnext = is->is_hnext;
3319 3320 if (is->is_hnext != NULL)
3320 3321 is->is_hnext->is_phnext = is->is_phnext;
3321 3322 if (ifs->ifs_ips_table[is->is_hv] == NULL)
3322 3323 ifs->ifs_ips_stats.iss_inuse--;
3323 3324 ifs->ifs_ips_stats.iss_bucketlen[is->is_hv]--;
3324 3325
3325 3326 is->is_phnext = NULL;
3326 3327 is->is_hnext = NULL;
3327 3328 }
3328 3329
3329 3330 /*
3330 3331 * Because ifs->ifs_ips_stats.iss_wild is a count of entries in the state
3331 3332 * table that have wildcard flags set, only decerement it once
3332 3333 * and do it here.
3333 3334 */
3334 3335 if (is->is_flags & (SI_WILDP|SI_WILDA)) {
3335 3336 if (!(is->is_flags & SI_CLONED)) {
3336 3337 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild);
3337 3338 }
3338 3339 is->is_flags &= ~(SI_WILDP|SI_WILDA);
3339 3340 }
3340 3341
3341 3342 /*
3342 3343 * Next, remove it from the timeout queue it is in.
3343 3344 */
3344 3345 fr_deletequeueentry(&is->is_sti);
3345 3346
3346 3347 is->is_me = NULL;
3347 3348
3348 3349 /*
3349 3350 * If it is still in use by something else, do not go any further,
3350 3351 * but note that at this point it is now an orphan.
3351 3352 */
3352 3353 MUTEX_ENTER(&is->is_lock);
3353 3354 if (is->is_ref > 1) {
3354 3355 is->is_ref--;
3355 3356 MUTEX_EXIT(&is->is_lock);
3356 3357 if (removed)
3357 3358 ifs->ifs_ips_stats.iss_orphans++;
3358 3359 return (is->is_ref);
3359 3360 }
3360 3361 MUTEX_EXIT(&is->is_lock);
3361 3362
3362 3363 is->is_ref = 0;
3363 3364
3364 3365 /*
3365 3366 * If entry has already been removed from table,
3366 3367 * it means we're simply cleaning up an orphan.
3367 3368 */
3368 3369 if (!removed)
3369 3370 ifs->ifs_ips_stats.iss_orphans--;
3370 3371
3371 3372 if (is->is_tqehead[0] != NULL)
3372 3373 (void) fr_deletetimeoutqueue(is->is_tqehead[0]);
3373 3374
3374 3375 if (is->is_tqehead[1] != NULL)
3375 3376 (void) fr_deletetimeoutqueue(is->is_tqehead[1]);
3376 3377
3377 3378 #ifdef IPFILTER_SYNC
3378 3379 if (is->is_sync)
3379 3380 ipfsync_del(is->is_sync);
3380 3381 #endif
3381 3382 #ifdef IPFILTER_SCAN
3382 3383 (void) ipsc_detachis(is);
3383 3384 #endif
3384 3385
3385 3386 /*
3386 3387 * Now remove it from master list of state table entries.
3387 3388 */
3388 3389 if (is->is_pnext != NULL) {
3389 3390 *is->is_pnext = is->is_next;
3390 3391 if (is->is_next != NULL) {
3391 3392 is->is_next->is_pnext = is->is_pnext;
3392 3393 is->is_next = NULL;
3393 3394 }
3394 3395 is->is_pnext = NULL;
3395 3396 }
3396 3397
3397 3398 if (ifs->ifs_ipstate_logging != 0 && why != 0)
3398 3399 ipstate_log(is, why, ifs);
3399 3400
3400 3401 if (is->is_rule != NULL) {
3401 3402 is->is_rule->fr_statecnt--;
3402 3403 (void)fr_derefrule(&is->is_rule, ifs);
3403 3404 }
3404 3405
3405 3406 MUTEX_DESTROY(&is->is_lock);
3406 3407 KFREE(is);
3407 3408 ifs->ifs_ips_num--;
3408 3409
3409 3410 return (0);
3410 3411 }
3411 3412
3412 3413
3413 3414 /* ------------------------------------------------------------------------ */
3414 3415 /* Function: fr_timeoutstate */
3415 3416 /* Returns: Nil */
3416 3417 /* Parameters: ifs - ipf stack instance */
3417 3418 /* */
3418 3419 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */
3419 3420 /* used here is to keep the queue sorted with the oldest things at the top */
3420 3421 /* and the youngest at the bottom. So if the top one doesn't need to be */
3421 3422 /* expired then neither will any under it. */
3422 3423 /* ------------------------------------------------------------------------ */
3423 3424 void fr_timeoutstate(ifs)
3424 3425 ipf_stack_t *ifs;
3425 3426 {
3426 3427 ipftq_t *ifq, *ifqnext;
3427 3428 ipftqent_t *tqe, *tqn;
3428 3429 ipstate_t *is;
3429 3430 SPL_INT(s);
3430 3431
3431 3432 SPL_NET(s);
3432 3433 WRITE_ENTER(&ifs->ifs_ipf_state);
3433 3434 for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next)
3434 3435 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3435 3436 if (tqe->tqe_die > ifs->ifs_fr_ticks)
3436 3437 break;
3437 3438 tqn = tqe->tqe_next;
3438 3439 is = tqe->tqe_parent;
3439 3440 (void) fr_delstate(is, ISL_EXPIRE, ifs);
3440 3441 }
3441 3442
3442 3443 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifq->ifq_next) {
3443 3444 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3444 3445 if (tqe->tqe_die > ifs->ifs_fr_ticks)
3445 3446 break;
3446 3447 tqn = tqe->tqe_next;
3447 3448 is = tqe->tqe_parent;
3448 3449 (void) fr_delstate(is, ISL_EXPIRE, ifs);
3449 3450 }
3450 3451 }
3451 3452
3452 3453 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) {
3453 3454 ifqnext = ifq->ifq_next;
3454 3455
3455 3456 if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
3456 3457 (ifq->ifq_ref == 0)) {
3457 3458 fr_freetimeoutqueue(ifq, ifs);
3458 3459 }
3459 3460 }
3460 3461
3461 3462 if (ifs->ifs_fr_state_doflush) {
3462 3463 (void) fr_state_flush(FLUSH_TABLE_EXTRA, 0, ifs);
3463 3464 ifs->ifs_fr_state_doflush = 0;
3464 3465 }
3465 3466 RWLOCK_EXIT(&ifs->ifs_ipf_state);
3466 3467 SPL_X(s);
3467 3468 }
3468 3469
3469 3470
3470 3471 /* ---------------------------------------------------------------------- */
3471 3472 /* Function: fr_state_flush */
3472 3473 /* Returns: int - 0 == success, -1 == failure */
3473 3474 /* Parameters: flush_option - how to flush the active State table */
3474 3475 /* proto - IP version to flush (4, 6, or both) */
3475 3476 /* ifs - ipf stack instance */
3476 3477 /* Write Locks: ipf_state */
3477 3478 /* */
3478 3479 /* Flush state tables. Three possible flush options currently defined: */
3479 3480 /* */
3480 3481 /* FLUSH_TABLE_ALL : Flush all state table entries */
3481 3482 /* */
3482 3483 /* FLUSH_TABLE_CLOSING : Flush entries with TCP connections which */
3483 3484 /* have started to close on both ends using */
3484 3485 /* ipf_flushclosing(). */
3485 3486 /* */
3486 3487 /* FLUSH_TABLE_EXTRA : First, flush entries which are "almost" closed. */
3487 3488 /* Then, if needed, flush entries with TCP */
3488 3489 /* connections which have been idle for a long */
3489 3490 /* time with ipf_extraflush(). */
3490 3491 /* ---------------------------------------------------------------------- */
3491 3492 static int fr_state_flush(flush_option, proto, ifs)
3492 3493 int flush_option, proto;
3493 3494 ipf_stack_t *ifs;
3494 3495 {
3495 3496 ipstate_t *is, *isn;
3496 3497 int removed;
3497 3498 SPL_INT(s);
3498 3499
3499 3500 removed = 0;
3500 3501
3501 3502 SPL_NET(s);
3502 3503 switch (flush_option)
3503 3504 {
3504 3505 case FLUSH_TABLE_ALL:
3505 3506 isn = ifs->ifs_ips_list;
3506 3507 while ((is = isn) != NULL) {
3507 3508 isn = is->is_next;
3508 3509 if ((proto != 0) && (is->is_v != proto))
3509 3510 continue;
3510 3511 if (fr_delstate(is, ISL_FLUSH, ifs) == 0)
3511 3512 removed++;
3512 3513 }
3513 3514 break;
3514 3515
3515 3516 case FLUSH_TABLE_CLOSING:
3516 3517 removed = ipf_flushclosing(STATE_FLUSH,
3517 3518 IPF_TCPS_CLOSE_WAIT,
3518 3519 ifs->ifs_ips_tqtqb,
3519 3520 ifs->ifs_ips_utqe,
3520 3521 ifs);
3521 3522 break;
3522 3523
3523 3524 case FLUSH_TABLE_EXTRA:
3524 3525 removed = ipf_flushclosing(STATE_FLUSH,
3525 3526 IPF_TCPS_FIN_WAIT_2,
3526 3527 ifs->ifs_ips_tqtqb,
3527 3528 ifs->ifs_ips_utqe,
3528 3529 ifs);
3529 3530
3530 3531 /*
3531 3532 * Be sure we haven't done this in the last 10 seconds.
3532 3533 */
3533 3534 if (ifs->ifs_fr_ticks - ifs->ifs_ips_last_force_flush <
3534 3535 IPF_TTLVAL(10))
3535 3536 break;
3536 3537 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks;
3537 3538 removed += ipf_extraflush(STATE_FLUSH,
3538 3539 &ifs->ifs_ips_tqtqb[IPF_TCPS_ESTABLISHED],
3539 3540 ifs->ifs_ips_utqe,
3540 3541 ifs);
3541 3542 break;
3542 3543
3543 3544 default: /* Flush Nothing */
3544 3545 break;
3545 3546 }
3546 3547
3547 3548 SPL_X(s);
3548 3549 return (removed);
3549 3550 }
3550 3551
3551 3552
3552 3553 /* ------------------------------------------------------------------------ */
3553 3554 /* Function: fr_tcp_age */
3554 3555 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */
3555 3556 /* Parameters: tq(I) - pointer to timeout queue information */
3556 3557 /* fin(I) - pointer to packet information */
3557 3558 /* tqtab(I) - TCP timeout queue table this is in */
3558 3559 /* flags(I) - flags from state/NAT entry */
3559 3560 /* */
3560 3561 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */
3561 3562 /* */
3562 3563 /* - (try to) base state transitions on real evidence only, */
3563 3564 /* i.e. packets that are sent and have been received by ipfilter; */
3564 3565 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */
3565 3566 /* */
3566 3567 /* - deal with half-closed connections correctly; */
3567 3568 /* */
3568 3569 /* - store the state of the source in state[0] such that ipfstat */
3569 3570 /* displays the state as source/dest instead of dest/source; the calls */
3570 3571 /* to fr_tcp_age have been changed accordingly. */
3571 3572 /* */
3572 3573 /* Internal Parameters: */
3573 3574 /* */
3574 3575 /* state[0] = state of source (host that initiated connection) */
3575 3576 /* state[1] = state of dest (host that accepted the connection) */
3576 3577 /* */
3577 3578 /* dir == 0 : a packet from source to dest */
3578 3579 /* dir == 1 : a packet from dest to source */
3579 3580 /* */
3580 3581 /* Locking: it is assumed that the parent of the tqe structure is locked. */
3581 3582 /* ------------------------------------------------------------------------ */
3582 3583 int fr_tcp_age(tqe, fin, tqtab, flags)
3583 3584 ipftqent_t *tqe;
3584 3585 fr_info_t *fin;
3585 3586 ipftq_t *tqtab;
3586 3587 int flags;
3587 3588 {
3588 3589 int dlen, ostate, nstate, rval, dir;
3589 3590 u_char tcpflags;
3590 3591 tcphdr_t *tcp;
3591 3592 ipf_stack_t *ifs = fin->fin_ifs;
3592 3593
3593 3594 tcp = fin->fin_dp;
3594 3595
3595 3596 rval = 0;
3596 3597 dir = fin->fin_rev;
3597 3598 tcpflags = tcp->th_flags;
3598 3599 dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2);
3599 3600
3600 3601 ostate = tqe->tqe_state[1 - dir];
3601 3602 nstate = tqe->tqe_state[dir];
3602 3603
3603 3604 DTRACE_PROBE4(
3604 3605 indata,
3605 3606 fr_info_t *, fin,
3606 3607 int, ostate,
3607 3608 int, nstate,
3608 3609 u_char, tcpflags
3609 3610 );
3610 3611
3611 3612 if (tcpflags & TH_RST) {
3612 3613 if (!(tcpflags & TH_PUSH) && !dlen)
3613 3614 nstate = IPF_TCPS_CLOSED;
3614 3615 else
3615 3616 nstate = IPF_TCPS_CLOSE_WAIT;
3616 3617
3617 3618 /*
3618 3619 * Once RST is received, we must advance peer's state to
3619 3620 * CLOSE_WAIT.
3620 3621 */
3621 3622 if (ostate <= IPF_TCPS_ESTABLISHED) {
3622 3623 tqe->tqe_state[1 - dir] = IPF_TCPS_CLOSE_WAIT;
3623 3624 }
3624 3625 rval = 1;
3625 3626 } else {
3626 3627
3627 3628 switch (nstate)
3628 3629 {
3629 3630 case IPF_TCPS_LISTEN: /* 0 */
3630 3631 if ((tcpflags & TH_OPENING) == TH_OPENING) {
3631 3632 /*
3632 3633 * 'dir' received an S and sends SA in
3633 3634 * response, CLOSED -> SYN_RECEIVED
3634 3635 */
3635 3636 nstate = IPF_TCPS_SYN_RECEIVED;
3636 3637 rval = 1;
3637 3638 } else if ((tcpflags & TH_OPENING) == TH_SYN) {
3638 3639 /* 'dir' sent S, CLOSED -> SYN_SENT */
3639 3640 nstate = IPF_TCPS_SYN_SENT;
3640 3641 rval = 1;
3641 3642 }
3642 3643 /*
3643 3644 * the next piece of code makes it possible to get
3644 3645 * already established connections into the state table
3645 3646 * after a restart or reload of the filter rules; this
3646 3647 * does not work when a strict 'flags S keep state' is
3647 3648 * used for tcp connections of course
3648 3649 */
3649 3650 if (((flags & IS_TCPFSM) == 0) &&
3650 3651 ((tcpflags & TH_ACKMASK) == TH_ACK)) {
3651 3652 /*
3652 3653 * we saw an A, guess 'dir' is in ESTABLISHED
3653 3654 * mode
3654 3655 */
3655 3656 switch (ostate)
3656 3657 {
3657 3658 case IPF_TCPS_LISTEN :
3658 3659 case IPF_TCPS_SYN_RECEIVED :
3659 3660 nstate = IPF_TCPS_HALF_ESTAB;
3660 3661 rval = 1;
3661 3662 break;
3662 3663 case IPF_TCPS_HALF_ESTAB :
3663 3664 case IPF_TCPS_ESTABLISHED :
3664 3665 nstate = IPF_TCPS_ESTABLISHED;
3665 3666 rval = 1;
3666 3667 break;
3667 3668 default :
3668 3669 break;
3669 3670 }
3670 3671 }
3671 3672 /*
3672 3673 * TODO: besides regular ACK packets we can have other
3673 3674 * packets as well; it is yet to be determined how we
3674 3675 * should initialize the states in those cases
3675 3676 */
3676 3677 break;
3677 3678
3678 3679 case IPF_TCPS_SYN_SENT: /* 1 */
3679 3680 if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) {
3680 3681 /*
3681 3682 * A retransmitted SYN packet. We do not reset
3682 3683 * the timeout here to fr_tcptimeout because a
3683 3684 * connection connect timeout does not renew
3684 3685 * after every packet that is sent. We need to
3685 3686 * set rval so as to indicate the packet has
3686 3687 * passed the check for its flags being valid
3687 3688 * in the TCP FSM. Setting rval to 2 has the
3688 3689 * result of not resetting the timeout.
3689 3690 */
3690 3691 rval = 2;
3691 3692 } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) ==
3692 3693 TH_ACK) {
3693 3694 /*
3694 3695 * we see an A from 'dir' which is in SYN_SENT
3695 3696 * state: 'dir' sent an A in response to an SA
3696 3697 * which it received, SYN_SENT -> ESTABLISHED
3697 3698 */
3698 3699 nstate = IPF_TCPS_ESTABLISHED;
3699 3700 rval = 1;
3700 3701 } else if (tcpflags & TH_FIN) {
3701 3702 /*
3702 3703 * we see an F from 'dir' which is in SYN_SENT
3703 3704 * state and wants to close its side of the
3704 3705 * connection; SYN_SENT -> FIN_WAIT_1
3705 3706 */
3706 3707 nstate = IPF_TCPS_FIN_WAIT_1;
3707 3708 rval = 1;
3708 3709 } else if ((tcpflags & TH_OPENING) == TH_OPENING) {
3709 3710 /*
3710 3711 * we see an SA from 'dir' which is already in
3711 3712 * SYN_SENT state, this means we have a
3712 3713 * simultaneous open; SYN_SENT -> SYN_RECEIVED
3713 3714 */
3714 3715 nstate = IPF_TCPS_SYN_RECEIVED;
3715 3716 rval = 1;
3716 3717 }
3717 3718 break;
3718 3719
3719 3720 case IPF_TCPS_SYN_RECEIVED: /* 2 */
3720 3721 if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) {
3721 3722 /*
3722 3723 * we see an A from 'dir' which was in
3723 3724 * SYN_RECEIVED state so it must now be in
3724 3725 * established state, SYN_RECEIVED ->
3725 3726 * ESTABLISHED
3726 3727 */
3727 3728 nstate = IPF_TCPS_ESTABLISHED;
3728 3729 rval = 1;
3729 3730 } else if ((tcpflags & ~(TH_ECN|TH_CWR)) ==
3730 3731 TH_OPENING) {
3731 3732 /*
3732 3733 * We see an SA from 'dir' which is already in
3733 3734 * SYN_RECEIVED state.
3734 3735 */
3735 3736 rval = 2;
3736 3737 } else if (tcpflags & TH_FIN) {
3737 3738 /*
3738 3739 * we see an F from 'dir' which is in
3739 3740 * SYN_RECEIVED state and wants to close its
3740 3741 * side of the connection; SYN_RECEIVED ->
3741 3742 * FIN_WAIT_1
3742 3743 */
3743 3744 nstate = IPF_TCPS_FIN_WAIT_1;
3744 3745 rval = 1;
3745 3746 }
3746 3747 break;
3747 3748
3748 3749 case IPF_TCPS_HALF_ESTAB: /* 3 */
3749 3750 if (tcpflags & TH_FIN) {
3750 3751 nstate = IPF_TCPS_FIN_WAIT_1;
3751 3752 rval = 1;
3752 3753 } else if ((tcpflags & TH_ACKMASK) == TH_ACK) {
3753 3754 /*
3754 3755 * If we've picked up a connection in mid
3755 3756 * flight, we could be looking at a follow on
3756 3757 * packet from the same direction as the one
3757 3758 * that created this state. Recognise it but
3758 3759 * do not advance the entire connection's
3759 3760 * state.
3760 3761 */
3761 3762 switch (ostate)
3762 3763 {
3763 3764 case IPF_TCPS_LISTEN :
3764 3765 case IPF_TCPS_SYN_SENT :
3765 3766 case IPF_TCPS_SYN_RECEIVED :
3766 3767 rval = 1;
3767 3768 break;
3768 3769 case IPF_TCPS_HALF_ESTAB :
3769 3770 case IPF_TCPS_ESTABLISHED :
3770 3771 nstate = IPF_TCPS_ESTABLISHED;
3771 3772 rval = 1;
3772 3773 break;
3773 3774 default :
3774 3775 break;
3775 3776 }
3776 3777 }
3777 3778 break;
3778 3779
3779 3780 case IPF_TCPS_ESTABLISHED: /* 4 */
3780 3781 rval = 1;
3781 3782 if (tcpflags & TH_FIN) {
3782 3783 /*
3783 3784 * 'dir' closed its side of the connection;
3784 3785 * this gives us a half-closed connection;
3785 3786 * ESTABLISHED -> FIN_WAIT_1
3786 3787 */
3787 3788 if (ostate == IPF_TCPS_FIN_WAIT_1) {
3788 3789 nstate = IPF_TCPS_CLOSING;
3789 3790 } else {
3790 3791 nstate = IPF_TCPS_FIN_WAIT_1;
3791 3792 }
3792 3793 } else if (tcpflags & TH_ACK) {
3793 3794 /*
3794 3795 * an ACK, should we exclude other flags here?
3795 3796 */
3796 3797 if (ostate == IPF_TCPS_FIN_WAIT_1) {
3797 3798 /*
3798 3799 * We know the other side did an active
3799 3800 * close, so we are ACKing the recvd
3800 3801 * FIN packet (does the window matching
3801 3802 * code guarantee this?) and go into
3802 3803 * CLOSE_WAIT state; this gives us a
3803 3804 * half-closed connection
3804 3805 */
3805 3806 nstate = IPF_TCPS_CLOSE_WAIT;
3806 3807 } else if (ostate < IPF_TCPS_CLOSE_WAIT) {
3807 3808 /*
3808 3809 * still a fully established
3809 3810 * connection reset timeout
3810 3811 */
3811 3812 nstate = IPF_TCPS_ESTABLISHED;
3812 3813 }
3813 3814 }
3814 3815 break;
3815 3816
3816 3817 case IPF_TCPS_CLOSE_WAIT: /* 5 */
3817 3818 rval = 1;
3818 3819 if (tcpflags & TH_FIN) {
3819 3820 /*
3820 3821 * application closed and 'dir' sent a FIN,
3821 3822 * we're now going into LAST_ACK state
3822 3823 */
3823 3824 nstate = IPF_TCPS_LAST_ACK;
3824 3825 } else {
3825 3826 /*
3826 3827 * we remain in CLOSE_WAIT because the other
3827 3828 * side has closed already and we did not
3828 3829 * close our side yet; reset timeout
3829 3830 */
3830 3831 nstate = IPF_TCPS_CLOSE_WAIT;
3831 3832 }
3832 3833 break;
3833 3834
3834 3835 case IPF_TCPS_FIN_WAIT_1: /* 6 */
3835 3836 rval = 1;
3836 3837 if ((tcpflags & TH_ACK) &&
3837 3838 ostate > IPF_TCPS_CLOSE_WAIT) {
3838 3839 /*
3839 3840 * if the other side is not active anymore
3840 3841 * it has sent us a FIN packet that we are
3841 3842 * ack'ing now with an ACK; this means both
3842 3843 * sides have now closed the connection and
3843 3844 * we go into LAST_ACK
3844 3845 */
3845 3846 /*
3846 3847 * XXX: how do we know we really are ACKing
3847 3848 * the FIN packet here? does the window code
3848 3849 * guarantee that?
3849 3850 */
3850 3851 nstate = IPF_TCPS_LAST_ACK;
3851 3852 } else {
3852 3853 /*
3853 3854 * we closed our side of the connection
3854 3855 * already but the other side is still active
3855 3856 * (ESTABLISHED/CLOSE_WAIT); continue with
3856 3857 * this half-closed connection
3857 3858 */
3858 3859 nstate = IPF_TCPS_FIN_WAIT_1;
3859 3860 }
3860 3861 break;
3861 3862
3862 3863 case IPF_TCPS_CLOSING: /* 7 */
3863 3864 if ((tcpflags & (TH_FIN|TH_ACK)) == TH_ACK) {
3864 3865 nstate = IPF_TCPS_TIME_WAIT;
3865 3866 }
3866 3867 rval = 1;
3867 3868 break;
3868 3869
3869 3870 case IPF_TCPS_LAST_ACK: /* 8 */
3870 3871 /*
3871 3872 * We want to reset timer here to keep state in table.
3872 3873 * If we would allow the state to time out here, while
3873 3874 * there would still be packets being retransmitted, we
3874 3875 * would cut off line between the two peers preventing
3875 3876 * them to close connection properly.
3876 3877 */
3877 3878 rval = 1;
3878 3879 break;
3879 3880
3880 3881 case IPF_TCPS_FIN_WAIT_2: /* 9 */
3881 3882 /* NOT USED */
3882 3883 break;
3883 3884
3884 3885 case IPF_TCPS_TIME_WAIT: /* 10 */
3885 3886 /* we're in 2MSL timeout now */
3886 3887 if (ostate == IPF_TCPS_LAST_ACK) {
3887 3888 nstate = IPF_TCPS_CLOSED;
3888 3889 rval = 1;
3889 3890 } else {
3890 3891 rval = 2;
3891 3892 }
3892 3893 break;
3893 3894
3894 3895 case IPF_TCPS_CLOSED: /* 11 */
3895 3896 rval = 2;
3896 3897 break;
3897 3898
3898 3899 default :
3899 3900 #if defined(_KERNEL)
3900 3901 ASSERT(nstate >= IPF_TCPS_LISTEN &&
3901 3902 nstate <= IPF_TCPS_CLOSED);
3902 3903 #else
3903 3904 abort();
3904 3905 #endif
3905 3906 break;
3906 3907 }
3907 3908 }
3908 3909
3909 3910 /*
3910 3911 * If rval == 2 then do not update the queue position, but treat the
3911 3912 * packet as being ok.
3912 3913 */
3913 3914 if (rval == 2) {
3914 3915 DTRACE_PROBE1(state_keeping_timer, int, nstate);
3915 3916 rval = 1;
3916 3917 }
3917 3918 else if (rval == 1) {
3918 3919 tqe->tqe_state[dir] = nstate;
3919 3920 /*
3920 3921 * The nstate can either advance to a new state, or remain
3921 3922 * unchanged, resetting the timer by moving to the bottom of
3922 3923 * the queue.
3923 3924 */
3924 3925 DTRACE_PROBE1(state_done, int, nstate);
3925 3926
3926 3927 if ((tqe->tqe_flags & TQE_RULEBASED) == 0)
3927 3928 fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate, ifs);
3928 3929 }
3929 3930
3930 3931 return rval;
3931 3932 }
3932 3933
3933 3934
3934 3935 /* ------------------------------------------------------------------------ */
3935 3936 /* Function: ipstate_log */
3936 3937 /* Returns: Nil */
3937 3938 /* Parameters: is(I) - pointer to state structure */
3938 3939 /* type(I) - type of log entry to create */
3939 3940 /* */
3940 3941 /* Creates a state table log entry using the state structure and type info. */
3941 3942 /* passed in. Log packet/byte counts, source/destination address and other */
3942 3943 /* protocol specific information. */
3943 3944 /* ------------------------------------------------------------------------ */
3944 3945 void ipstate_log(is, type, ifs)
3945 3946 struct ipstate *is;
3946 3947 u_int type;
3947 3948 ipf_stack_t *ifs;
3948 3949 {
3949 3950 #ifdef IPFILTER_LOG
3950 3951 struct ipslog ipsl;
3951 3952 size_t sizes[1];
3952 3953 void *items[1];
3953 3954 int types[1];
3954 3955
3955 3956 /*
3956 3957 * Copy information out of the ipstate_t structure and into the
3957 3958 * structure used for logging.
3958 3959 */
3959 3960 ipsl.isl_type = type;
3960 3961 ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0];
3961 3962 ipsl.isl_bytes[0] = is->is_bytes[0];
3962 3963 ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1];
3963 3964 ipsl.isl_bytes[1] = is->is_bytes[1];
3964 3965 ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2];
3965 3966 ipsl.isl_bytes[2] = is->is_bytes[2];
3966 3967 ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3];
3967 3968 ipsl.isl_bytes[3] = is->is_bytes[3];
3968 3969 ipsl.isl_src = is->is_src;
3969 3970 ipsl.isl_dst = is->is_dst;
3970 3971 ipsl.isl_p = is->is_p;
3971 3972 ipsl.isl_v = is->is_v;
3972 3973 ipsl.isl_flags = is->is_flags;
3973 3974 ipsl.isl_tag = is->is_tag;
3974 3975 ipsl.isl_rulen = is->is_rulen;
3975 3976 (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN);
3976 3977
3977 3978 if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) {
3978 3979 ipsl.isl_sport = is->is_sport;
3979 3980 ipsl.isl_dport = is->is_dport;
3980 3981 if (ipsl.isl_p == IPPROTO_TCP) {
3981 3982 ipsl.isl_state[0] = is->is_state[0];
3982 3983 ipsl.isl_state[1] = is->is_state[1];
3983 3984 }
3984 3985 } else if (ipsl.isl_p == IPPROTO_ICMP) {
3985 3986 ipsl.isl_itype = is->is_icmp.ici_type;
3986 3987 } else if (ipsl.isl_p == IPPROTO_ICMPV6) {
3987 3988 ipsl.isl_itype = is->is_icmp.ici_type;
3988 3989 } else {
3989 3990 ipsl.isl_ps.isl_filler[0] = 0;
3990 3991 ipsl.isl_ps.isl_filler[1] = 0;
3991 3992 }
3992 3993
3993 3994 items[0] = &ipsl;
3994 3995 sizes[0] = sizeof(ipsl);
3995 3996 types[0] = 0;
3996 3997
3997 3998 if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1, ifs)) {
3998 3999 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logged);
3999 4000 } else {
4000 4001 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logfail);
4001 4002 }
4002 4003 #endif
4003 4004 }
4004 4005
4005 4006
4006 4007 #ifdef USE_INET6
4007 4008 /* ------------------------------------------------------------------------ */
4008 4009 /* Function: fr_checkicmp6matchingstate */
4009 4010 /* Returns: ipstate_t* - NULL == no match found, */
4010 4011 /* else pointer to matching state entry */
4011 4012 /* Parameters: fin(I) - pointer to packet information */
4012 4013 /* Locks: NULL == no locks, else Read Lock on ipf_state */
4013 4014 /* */
4014 4015 /* If we've got an ICMPv6 error message, using the information stored in */
4015 4016 /* the ICMPv6 packet, look for a matching state table entry. */
4016 4017 /* ------------------------------------------------------------------------ */
4017 4018 static ipstate_t *fr_checkicmp6matchingstate(fin)
4018 4019 fr_info_t *fin;
4019 4020 {
4020 4021 struct icmp6_hdr *ic6, *oic;
4021 4022 int backward, i;
4022 4023 ipstate_t *is, **isp;
4023 4024 u_short sport, dport;
4024 4025 i6addr_t dst, src;
4025 4026 u_short savelen;
4026 4027 icmpinfo_t *ic;
4027 4028 fr_info_t ofin;
4028 4029 tcphdr_t *tcp;
4029 4030 ip6_t *oip6;
4030 4031 u_char pr;
4031 4032 u_int hv;
4032 4033 ipf_stack_t *ifs = fin->fin_ifs;
4033 4034
4034 4035 /*
4035 4036 * Does it at least have the return (basic) IP header ?
4036 4037 * Is it an actual recognised ICMP error type?
4037 4038 * Only a basic IP header (no options) should be with
4038 4039 * an ICMP error header.
4039 4040 */
4040 4041 if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) ||
4041 4042 !(fin->fin_flx & FI_ICMPERR))
4042 4043 return NULL;
4043 4044
4044 4045 ic6 = fin->fin_dp;
4045 4046
4046 4047 oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN);
4047 4048 if (fin->fin_plen < sizeof(*oip6))
4048 4049 return NULL;
4049 4050
4050 4051 bcopy((char *)fin, (char *)&ofin, sizeof(*fin));
4051 4052 ofin.fin_v = 6;
4052 4053 ofin.fin_ifp = fin->fin_ifp;
4053 4054 ofin.fin_out = !fin->fin_out;
4054 4055 ofin.fin_m = NULL; /* if dereferenced, panic XXX */
4055 4056 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
4056 4057
4057 4058 /*
4058 4059 * We make a fin entry to be able to feed it to
4059 4060 * matchsrcdst. Note that not all fields are necessary
4060 4061 * but this is the cleanest way. Note further we fill
4061 4062 * in fin_mp such that if someone uses it we'll get
4062 4063 * a kernel panic. fr_matchsrcdst does not use this.
4063 4064 *
4064 4065 * watch out here, as ip is in host order and oip6 in network
4065 4066 * order. Any change we make must be undone afterwards.
4066 4067 */
4067 4068 savelen = oip6->ip6_plen;
4068 4069 oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN;
4069 4070 ofin.fin_flx = FI_NOCKSUM;
4070 4071 ofin.fin_ip = (ip_t *)oip6;
4071 4072 ofin.fin_plen = oip6->ip6_plen;
4072 4073 (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin);
4073 4074 ofin.fin_flx &= ~(FI_BAD|FI_SHORT);
4074 4075 oip6->ip6_plen = savelen;
4075 4076
4076 4077 if (oip6->ip6_nxt == IPPROTO_ICMPV6) {
4077 4078 oic = (struct icmp6_hdr *)(oip6 + 1);
4078 4079 /*
4079 4080 * an ICMP error can only be generated as a result of an
4080 4081 * ICMP query, not as the response on an ICMP error
4081 4082 *
4082 4083 * XXX theoretically ICMP_ECHOREP and the other reply's are
4083 4084 * ICMP query's as well, but adding them here seems strange XXX
4084 4085 */
4085 4086 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK))
4086 4087 return NULL;
4087 4088
4088 4089 /*
4089 4090 * perform a lookup of the ICMP packet in the state table
4090 4091 */
4091 4092 hv = (pr = oip6->ip6_nxt);
4092 4093 src.in6 = oip6->ip6_src;
4093 4094 hv += src.in4.s_addr;
4094 4095 dst.in6 = oip6->ip6_dst;
4095 4096 hv += dst.in4.s_addr;
4096 4097 hv += oic->icmp6_id;
4097 4098 hv += oic->icmp6_seq;
4098 4099 hv = DOUBLE_HASH(hv, ifs);
4099 4100
4100 4101 READ_ENTER(&ifs->ifs_ipf_state);
4101 4102 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) {
4102 4103 ic = &is->is_icmp;
4103 4104 isp = &is->is_hnext;
4104 4105 if ((is->is_p == pr) &&
4105 4106 !(is->is_pass & FR_NOICMPERR) &&
4106 4107 (oic->icmp6_id == ic->ici_id) &&
4107 4108 (oic->icmp6_seq == ic->ici_seq) &&
4108 4109 (is = fr_matchsrcdst(&ofin, is, &src,
4109 4110 &dst, NULL, FI_ICMPCMP))) {
4110 4111 /*
4111 4112 * in the state table ICMP query's are stored
4112 4113 * with the type of the corresponding ICMP
4113 4114 * response. Correct here
4114 4115 */
4115 4116 if (((ic->ici_type == ICMP6_ECHO_REPLY) &&
4116 4117 (oic->icmp6_type == ICMP6_ECHO_REQUEST)) ||
4117 4118 (ic->ici_type - 1 == oic->icmp6_type )) {
4118 4119 ifs->ifs_ips_stats.iss_hits++;
4119 4120 backward = IP6_NEQ(&is->is_dst, &src);
4120 4121 fin->fin_rev = !backward;
4121 4122 i = (backward << 1) + fin->fin_out;
4122 4123 is->is_icmppkts[i]++;
4123 4124 return is;
4124 4125 }
4125 4126 }
4126 4127 }
4127 4128 RWLOCK_EXIT(&ifs->ifs_ipf_state);
4128 4129 return NULL;
4129 4130 }
4130 4131
4131 4132 hv = (pr = oip6->ip6_nxt);
4132 4133 src.in6 = oip6->ip6_src;
4133 4134 hv += src.i6[0];
4134 4135 hv += src.i6[1];
4135 4136 hv += src.i6[2];
4136 4137 hv += src.i6[3];
4137 4138 dst.in6 = oip6->ip6_dst;
4138 4139 hv += dst.i6[0];
4139 4140 hv += dst.i6[1];
4140 4141 hv += dst.i6[2];
4141 4142 hv += dst.i6[3];
4142 4143
4143 4144 if ((oip6->ip6_nxt == IPPROTO_TCP) || (oip6->ip6_nxt == IPPROTO_UDP)) {
4144 4145 tcp = (tcphdr_t *)(oip6 + 1);
4145 4146 dport = tcp->th_dport;
4146 4147 sport = tcp->th_sport;
4147 4148 hv += dport;
4148 4149 hv += sport;
4149 4150 } else
4150 4151 tcp = NULL;
4151 4152 hv = DOUBLE_HASH(hv, ifs);
4152 4153
4153 4154 READ_ENTER(&ifs->ifs_ipf_state);
4154 4155 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) {
4155 4156 isp = &is->is_hnext;
4156 4157 /*
4157 4158 * Only allow this icmp though if the
4158 4159 * encapsulated packet was allowed through the
4159 4160 * other way around. Note that the minimal amount
4160 4161 * of info present does not allow for checking against
4161 4162 * tcp internals such as seq and ack numbers.
4162 4163 */
4163 4164 if ((is->is_p != pr) || (is->is_v != 6) ||
4164 4165 (is->is_pass & FR_NOICMPERR))
4165 4166 continue;
4166 4167 is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP);
4167 4168 if (is != NULL) {
4168 4169 ifs->ifs_ips_stats.iss_hits++;
4169 4170 backward = IP6_NEQ(&is->is_dst, &src);
4170 4171 fin->fin_rev = !backward;
4171 4172 i = (backward << 1) + fin->fin_out;
4172 4173 is->is_icmppkts[i]++;
4173 4174 /*
4174 4175 * we deliberately do not touch the timeouts
4175 4176 * for the accompanying state table entry.
4176 4177 * It remains to be seen if that is correct. XXX
4177 4178 */
4178 4179 return is;
4179 4180 }
4180 4181 }
4181 4182 RWLOCK_EXIT(&ifs->ifs_ipf_state);
4182 4183 return NULL;
4183 4184 }
4184 4185 #endif
4185 4186
4186 4187
4187 4188 /* ------------------------------------------------------------------------ */
4188 4189 /* Function: fr_sttab_init */
4189 4190 /* Returns: Nil */
4190 4191 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */
4191 4192 /* */
4192 4193 /* Initialise the array of timeout queues for TCP. */
4193 4194 /* ------------------------------------------------------------------------ */
4194 4195 void fr_sttab_init(tqp, ifs)
4195 4196 ipftq_t *tqp;
4196 4197 ipf_stack_t *ifs;
4197 4198 {
4198 4199 int i;
4199 4200
4200 4201 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) {
4201 4202 tqp[i].ifq_ttl = 0;
4202 4203 tqp[i].ifq_ref = 1;
4203 4204 tqp[i].ifq_head = NULL;
4204 4205 tqp[i].ifq_tail = &tqp[i].ifq_head;
4205 4206 tqp[i].ifq_next = tqp + i + 1;
4206 4207 MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab");
4207 4208 }
4208 4209 tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL;
4209 4210 tqp[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcpclosed;
4210 4211 tqp[IPF_TCPS_LISTEN].ifq_ttl = ifs->ifs_fr_tcptimeout;
4211 4212 tqp[IPF_TCPS_SYN_SENT].ifq_ttl = ifs->ifs_fr_tcptimeout;
4212 4213 tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = ifs->ifs_fr_tcptimeout;
4213 4214 tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = ifs->ifs_fr_tcpidletimeout;
4214 4215 tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = ifs->ifs_fr_tcphalfclosed;
4215 4216 tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = ifs->ifs_fr_tcphalfclosed;
4216 4217 tqp[IPF_TCPS_CLOSING].ifq_ttl = ifs->ifs_fr_tcptimeout;
4217 4218 tqp[IPF_TCPS_LAST_ACK].ifq_ttl = ifs->ifs_fr_tcplastack;
4218 4219 tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = ifs->ifs_fr_tcpclosewait;
4219 4220 tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = ifs->ifs_fr_tcptimeout;
4220 4221 tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = ifs->ifs_fr_tcptimeout;
4221 4222 }
4222 4223
4223 4224
4224 4225 /* ------------------------------------------------------------------------ */
4225 4226 /* Function: fr_sttab_destroy */
4226 4227 /* Returns: Nil */
4227 4228 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */
4228 4229 /* */
4229 4230 /* Do whatever is necessary to "destroy" each of the entries in the array */
4230 4231 /* of timeout queues for TCP. */
4231 4232 /* ------------------------------------------------------------------------ */
4232 4233 void fr_sttab_destroy(tqp)
4233 4234 ipftq_t *tqp;
4234 4235 {
4235 4236 int i;
4236 4237
4237 4238 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--)
4238 4239 MUTEX_DESTROY(&tqp[i].ifq_lock);
4239 4240 }
4240 4241
4241 4242
4242 4243 /* ------------------------------------------------------------------------ */
4243 4244 /* Function: fr_statederef */
4244 4245 /* Returns: Nil */
4245 4246 /* Parameters: isp(I) - pointer to pointer to state table entry */
4246 4247 /* ifs - ipf stack instance */
4247 4248 /* */
4248 4249 /* Decrement the reference counter for this state table entry and free it */
4249 4250 /* if there are no more things using it. */
4250 4251 /* */
4251 4252 /* Internal parameters: */
4252 4253 /* state[0] = state of source (host that initiated connection) */
4253 4254 /* state[1] = state of dest (host that accepted the connection) */
4254 4255 /* ------------------------------------------------------------------------ */
4255 4256 void fr_statederef(isp, ifs)
4256 4257 ipstate_t **isp;
4257 4258 ipf_stack_t *ifs;
4258 4259 {
4259 4260 ipstate_t *is;
4260 4261
4261 4262 is = *isp;
4262 4263 *isp = NULL;
4263 4264
4264 4265 MUTEX_ENTER(&is->is_lock);
4265 4266 if (is->is_ref > 1) {
4266 4267 is->is_ref--;
4267 4268 MUTEX_EXIT(&is->is_lock);
4268 4269 #ifndef _KERNEL
4269 4270 if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) ||
4270 4271 (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) {
4271 4272 (void) fr_delstate(is, ISL_ORPHAN, ifs);
4272 4273 }
4273 4274 #endif
4274 4275 return;
4275 4276 }
4276 4277 MUTEX_EXIT(&is->is_lock);
4277 4278
4278 4279 WRITE_ENTER(&ifs->ifs_ipf_state);
4279 4280 (void) fr_delstate(is, ISL_EXPIRE, ifs);
4280 4281 RWLOCK_EXIT(&ifs->ifs_ipf_state);
4281 4282 }
4282 4283
4283 4284
4284 4285 /* ------------------------------------------------------------------------ */
4285 4286 /* Function: fr_setstatequeue */
4286 4287 /* Returns: Nil */
4287 4288 /* Parameters: is(I) - pointer to state structure */
4288 4289 /* rev(I) - forward(0) or reverse(1) direction */
4289 4290 /* Locks: ipf_state (read or write) */
4290 4291 /* */
4291 4292 /* Put the state entry on its default queue entry, using rev as a helped in */
4292 4293 /* determining which queue it should be placed on. */
4293 4294 /* ------------------------------------------------------------------------ */
4294 4295 void fr_setstatequeue(is, rev, ifs)
4295 4296 ipstate_t *is;
4296 4297 int rev;
4297 4298 ipf_stack_t *ifs;
4298 4299 {
4299 4300 ipftq_t *oifq, *nifq;
4300 4301
4301 4302
4302 4303 if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0)
4303 4304 nifq = is->is_tqehead[rev];
4304 4305 else
4305 4306 nifq = NULL;
4306 4307
4307 4308 if (nifq == NULL) {
4308 4309 switch (is->is_p)
4309 4310 {
4310 4311 #ifdef USE_INET6
4311 4312 case IPPROTO_ICMPV6 :
4312 4313 if (rev == 1)
4313 4314 nifq = &ifs->ifs_ips_icmpacktq;
4314 4315 else
4315 4316 nifq = &ifs->ifs_ips_icmptq;
4316 4317 break;
4317 4318 #endif
4318 4319 case IPPROTO_ICMP :
4319 4320 if (rev == 1)
4320 4321 nifq = &ifs->ifs_ips_icmpacktq;
4321 4322 else
4322 4323 nifq = &ifs->ifs_ips_icmptq;
4323 4324 break;
4324 4325 case IPPROTO_TCP :
4325 4326 nifq = ifs->ifs_ips_tqtqb + is->is_state[rev];
4326 4327 break;
4327 4328
4328 4329 case IPPROTO_UDP :
4329 4330 if (rev == 1)
4330 4331 nifq = &ifs->ifs_ips_udpacktq;
4331 4332 else
4332 4333 nifq = &ifs->ifs_ips_udptq;
4333 4334 break;
4334 4335
4335 4336 default :
4336 4337 nifq = &ifs->ifs_ips_iptq;
4337 4338 break;
4338 4339 }
4339 4340 }
4340 4341
4341 4342 oifq = is->is_sti.tqe_ifq;
4342 4343 /*
4343 4344 * If it's currently on a timeout queue, move it from one queue to
4344 4345 * another, else put it on the end of the newly determined queue.
4345 4346 */
4346 4347 if (oifq != NULL)
4347 4348 fr_movequeue(&is->is_sti, oifq, nifq, ifs);
4348 4349 else
4349 4350 fr_queueappend(&is->is_sti, nifq, is, ifs);
4350 4351 return;
4351 4352 }
4352 4353
4353 4354
4354 4355 /* ------------------------------------------------------------------------ */
4355 4356 /* Function: fr_stateiter */
4356 4357 /* Returns: int - 0 == success, else error */
4357 4358 /* Parameters: token(I) - pointer to ipftoken structure */
4358 4359 /* itp(I) - pointer to ipfgeniter structure */
4359 4360 /* */
4360 4361 /* This function handles the SIOCGENITER ioctl for the state tables and */
4361 4362 /* walks through the list of entries in the state table list (ips_list.) */
4362 4363 /* ------------------------------------------------------------------------ */
4363 4364 static int fr_stateiter(token, itp, ifs)
4364 4365 ipftoken_t *token;
4365 4366 ipfgeniter_t *itp;
4366 4367 ipf_stack_t *ifs;
4367 4368 {
4368 4369 ipstate_t *is, *next, zero;
4369 4370 int error, count;
4370 4371 char *dst;
4371 4372
4372 4373 if (itp->igi_data == NULL)
4373 4374 return EFAULT;
4374 4375
4375 4376 if (itp->igi_nitems == 0)
4376 4377 return EINVAL;
4377 4378
4378 4379 if (itp->igi_type != IPFGENITER_STATE)
4379 4380 return EINVAL;
4380 4381
4381 4382 error = 0;
4382 4383
4383 4384 READ_ENTER(&ifs->ifs_ipf_state);
4384 4385
4385 4386 /*
4386 4387 * Get "previous" entry from the token and find the next entry.
4387 4388 */
4388 4389 is = token->ipt_data;
4389 4390 if (is == NULL) {
4390 4391 next = ifs->ifs_ips_list;
4391 4392 } else {
4392 4393 next = is->is_next;
4393 4394 }
4394 4395
4395 4396 dst = itp->igi_data;
4396 4397 for (count = itp->igi_nitems; count > 0; count--) {
4397 4398 /*
4398 4399 * If we found an entry, add a reference to it and update the token.
4399 4400 * Otherwise, zero out data to be returned and NULL out token.
4400 4401 */
4401 4402 if (next != NULL) {
4402 4403 MUTEX_ENTER(&next->is_lock);
4403 4404 next->is_ref++;
4404 4405 MUTEX_EXIT(&next->is_lock);
4405 4406 token->ipt_data = next;
4406 4407 } else {
4407 4408 bzero(&zero, sizeof(zero));
4408 4409 next = &zero;
4409 4410 token->ipt_data = NULL;
4410 4411 }
4411 4412
4412 4413 /*
4413 4414 * Safe to release lock now the we have a reference.
4414 4415 */
4415 4416 RWLOCK_EXIT(&ifs->ifs_ipf_state);
4416 4417
4417 4418 /*
4418 4419 * Copy out data and clean up references and tokens.
4419 4420 */
4420 4421 error = COPYOUT(next, dst, sizeof(*next));
4421 4422 if (error != 0)
4422 4423 error = EFAULT;
4423 4424 if (token->ipt_data == NULL) {
4424 4425 ipf_freetoken(token, ifs);
4425 4426 break;
4426 4427 } else {
4427 4428 if (is != NULL)
4428 4429 fr_statederef(&is, ifs);
4429 4430 if (next->is_next == NULL) {
4430 4431 ipf_freetoken(token, ifs);
4431 4432 break;
4432 4433 }
4433 4434 }
4434 4435
4435 4436 if ((count == 1) || (error != 0))
4436 4437 break;
4437 4438
4438 4439 READ_ENTER(&ifs->ifs_ipf_state);
4439 4440 dst += sizeof(*next);
4440 4441 is = next;
4441 4442 next = is->is_next;
4442 4443 }
4443 4444
4444 4445 return error;
4445 4446 }
|
↓ open down ↓ |
2530 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX