Print this page
917 Make TCP's iss_incr a tunable
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/tcp/tcp_time_wait.c
+++ new/usr/src/uts/common/inet/tcp/tcp_time_wait.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 + * Copyright (c) 2011, Joyent Inc. All rights reserved.
24 25 */
25 26
26 27 /*
27 28 * This file contains functions related to TCP time wait processing. Also
28 29 * refer to the time wait handling comments in tcp_impl.h.
29 30 */
30 31
31 32 #include <sys/types.h>
32 33 #include <sys/strsun.h>
33 34 #include <sys/squeue_impl.h>
34 35 #include <sys/squeue.h>
35 36 #include <sys/callo.h>
36 37
37 38 #include <inet/common.h>
38 39 #include <inet/ip.h>
39 40 #include <inet/tcp.h>
40 41 #include <inet/tcp_impl.h>
41 42 #include <inet/tcp_cluster.h>
42 43
43 44 static void tcp_timewait_close(void *, mblk_t *, void *, ip_recv_attr_t *);
44 45
45 46 /*
46 47 * TCP_TIME_WAIT_DELAY governs how often the time_wait_collector runs.
47 48 * Running it every 5 seconds seems to give the best results.
48 49 */
49 50 #define TCP_TIME_WAIT_DELAY ((hrtime_t)5 * NANOSEC)
50 51
51 52 /*
52 53 * Remove a connection from the list of detached TIME_WAIT connections.
53 54 * It returns B_FALSE if it can't remove the connection from the list
54 55 * as the connection has already been removed from the list due to an
55 56 * earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE.
56 57 */
57 58 boolean_t
58 59 tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tcp_time_wait)
59 60 {
60 61 boolean_t locked = B_FALSE;
61 62
62 63 if (tcp_time_wait == NULL) {
63 64 tcp_time_wait = *((tcp_squeue_priv_t **)
64 65 squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP));
65 66 mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
66 67 locked = B_TRUE;
67 68 } else {
68 69 ASSERT(MUTEX_HELD(&tcp_time_wait->tcp_time_wait_lock));
69 70 }
70 71
71 72 /* 0 means that the tcp_t has not been added to the time wait list. */
72 73 if (tcp->tcp_time_wait_expire == 0) {
73 74 ASSERT(tcp->tcp_time_wait_next == NULL);
74 75 ASSERT(tcp->tcp_time_wait_prev == NULL);
75 76 if (locked)
76 77 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
77 78 return (B_FALSE);
78 79 }
79 80 ASSERT(TCP_IS_DETACHED(tcp));
80 81 ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
81 82
82 83 if (tcp == tcp_time_wait->tcp_time_wait_head) {
83 84 ASSERT(tcp->tcp_time_wait_prev == NULL);
84 85 tcp_time_wait->tcp_time_wait_head = tcp->tcp_time_wait_next;
85 86 if (tcp_time_wait->tcp_time_wait_head != NULL) {
86 87 tcp_time_wait->tcp_time_wait_head->tcp_time_wait_prev =
87 88 NULL;
88 89 } else {
89 90 tcp_time_wait->tcp_time_wait_tail = NULL;
90 91 }
91 92 } else if (tcp == tcp_time_wait->tcp_time_wait_tail) {
92 93 ASSERT(tcp->tcp_time_wait_next == NULL);
93 94 tcp_time_wait->tcp_time_wait_tail = tcp->tcp_time_wait_prev;
94 95 ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
95 96 tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = NULL;
96 97 } else {
97 98 ASSERT(tcp->tcp_time_wait_prev->tcp_time_wait_next == tcp);
98 99 ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == tcp);
99 100 tcp->tcp_time_wait_prev->tcp_time_wait_next =
100 101 tcp->tcp_time_wait_next;
101 102 tcp->tcp_time_wait_next->tcp_time_wait_prev =
102 103 tcp->tcp_time_wait_prev;
103 104 }
104 105 tcp->tcp_time_wait_next = NULL;
105 106 tcp->tcp_time_wait_prev = NULL;
106 107 tcp->tcp_time_wait_expire = 0;
107 108
108 109 if (locked)
109 110 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
110 111 return (B_TRUE);
111 112 }
112 113
113 114 /*
114 115 * Add a connection to the list of detached TIME_WAIT connections
115 116 * and set its time to expire.
116 117 */
117 118 void
118 119 tcp_time_wait_append(tcp_t *tcp)
119 120 {
120 121 tcp_stack_t *tcps = tcp->tcp_tcps;
121 122 squeue_t *sqp = tcp->tcp_connp->conn_sqp;
122 123 tcp_squeue_priv_t *tcp_time_wait =
123 124 *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
124 125
125 126 tcp_timers_stop(tcp);
126 127
127 128 /* Freed above */
128 129 ASSERT(tcp->tcp_timer_tid == 0);
129 130 ASSERT(tcp->tcp_ack_tid == 0);
130 131
131 132 /* must have happened at the time of detaching the tcp */
132 133 ASSERT(tcp->tcp_ptpahn == NULL);
133 134 ASSERT(tcp->tcp_flow_stopped == 0);
134 135 ASSERT(tcp->tcp_time_wait_next == NULL);
135 136 ASSERT(tcp->tcp_time_wait_prev == NULL);
136 137 ASSERT(tcp->tcp_time_wait_expire == 0);
137 138 ASSERT(tcp->tcp_listener == NULL);
138 139
139 140 tcp->tcp_time_wait_expire = ddi_get_lbolt64();
140 141 /*
141 142 * Since tcp_time_wait_expire is lbolt64, it should not wrap around
142 143 * in practice. Hence it cannot be 0. Note that zero means that the
143 144 * tcp_t is not in the TIME_WAIT list.
144 145 */
145 146 tcp->tcp_time_wait_expire += MSEC_TO_TICK(
146 147 tcps->tcps_time_wait_interval);
147 148
148 149 ASSERT(TCP_IS_DETACHED(tcp));
149 150 ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
150 151 ASSERT(tcp->tcp_time_wait_next == NULL);
151 152 ASSERT(tcp->tcp_time_wait_prev == NULL);
152 153 TCP_DBGSTAT(tcps, tcp_time_wait);
153 154
154 155 mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
155 156 if (tcp_time_wait->tcp_time_wait_head == NULL) {
156 157 ASSERT(tcp_time_wait->tcp_time_wait_tail == NULL);
157 158 tcp_time_wait->tcp_time_wait_head = tcp;
158 159
159 160 /*
160 161 * Even if the list was empty before, there may be a timer
161 162 * running since a tcp_t can be removed from the list
162 163 * in other places, such as tcp_clean_death(). So check if
163 164 * a timer is needed.
164 165 */
165 166 if (tcp_time_wait->tcp_time_wait_tid == 0) {
166 167 tcp_time_wait->tcp_time_wait_tid =
167 168 timeout_generic(CALLOUT_NORMAL,
168 169 tcp_time_wait_collector, sqp,
169 170 (hrtime_t)(tcps->tcps_time_wait_interval + 1) *
170 171 MICROSEC, CALLOUT_TCP_RESOLUTION,
171 172 CALLOUT_FLAG_ROUNDUP);
172 173 }
173 174 } else {
174 175 /*
175 176 * The list is not empty, so a timer must be running. If not,
176 177 * tcp_time_wait_collector() must be running on this
177 178 * tcp_time_wait list at the same time.
178 179 */
179 180 ASSERT(tcp_time_wait->tcp_time_wait_tid != 0 ||
180 181 tcp_time_wait->tcp_time_wait_running);
181 182 ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
182 183 ASSERT(tcp_time_wait->tcp_time_wait_tail->tcp_state ==
183 184 TCPS_TIME_WAIT);
184 185 tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = tcp;
185 186 tcp->tcp_time_wait_prev = tcp_time_wait->tcp_time_wait_tail;
186 187
187 188 }
188 189 tcp_time_wait->tcp_time_wait_tail = tcp;
189 190 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
190 191 }
191 192
192 193 /*
193 194 * Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT
194 195 * tcp_t. Used in tcp_time_wait_collector().
195 196 */
196 197 /* ARGSUSED */
197 198 static void
198 199 tcp_timewait_close(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
199 200 {
200 201 conn_t *connp = (conn_t *)arg;
201 202 tcp_t *tcp = connp->conn_tcp;
202 203
203 204 ASSERT(tcp != NULL);
204 205 if (tcp->tcp_state == TCPS_CLOSED) {
205 206 return;
206 207 }
207 208
208 209 ASSERT((connp->conn_family == AF_INET &&
209 210 connp->conn_ipversion == IPV4_VERSION) ||
210 211 (connp->conn_family == AF_INET6 &&
211 212 (connp->conn_ipversion == IPV4_VERSION ||
212 213 connp->conn_ipversion == IPV6_VERSION)));
213 214 ASSERT(!tcp->tcp_listener);
214 215
215 216 ASSERT(TCP_IS_DETACHED(tcp));
216 217
217 218 /*
218 219 * Because they have no upstream client to rebind or tcp_close()
219 220 * them later, we axe the connection here and now.
220 221 */
221 222 tcp_close_detached(tcp);
222 223 }
223 224
224 225 /*
225 226 * Blows away all tcps whose TIME_WAIT has expired. List traversal
226 227 * is done forwards from the head.
227 228 * This walks all stack instances since
228 229 * tcp_time_wait remains global across all stacks.
229 230 */
230 231 /* ARGSUSED */
231 232 void
232 233 tcp_time_wait_collector(void *arg)
233 234 {
234 235 tcp_t *tcp;
235 236 int64_t now;
236 237 mblk_t *mp;
237 238 conn_t *connp;
238 239 kmutex_t *lock;
239 240 boolean_t removed;
240 241 extern void (*cl_inet_disconnect)(netstackid_t, uint8_t, sa_family_t,
241 242 uint8_t *, in_port_t, uint8_t *, in_port_t, void *);
242 243
243 244 squeue_t *sqp = (squeue_t *)arg;
244 245 tcp_squeue_priv_t *tcp_time_wait =
245 246 *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
246 247
247 248 mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
248 249 tcp_time_wait->tcp_time_wait_tid = 0;
249 250 #ifdef DEBUG
250 251 tcp_time_wait->tcp_time_wait_running = B_TRUE;
251 252 #endif
252 253
253 254 if (tcp_time_wait->tcp_free_list != NULL &&
254 255 tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) {
255 256 TCP_G_STAT(tcp_freelist_cleanup);
256 257 while ((tcp = tcp_time_wait->tcp_free_list) != NULL) {
257 258 tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next;
258 259 tcp->tcp_time_wait_next = NULL;
259 260 tcp_time_wait->tcp_free_list_cnt--;
260 261 ASSERT(tcp->tcp_tcps == NULL);
261 262 CONN_DEC_REF(tcp->tcp_connp);
262 263 }
263 264 ASSERT(tcp_time_wait->tcp_free_list_cnt == 0);
264 265 }
265 266
266 267 /*
267 268 * In order to reap time waits reliably, we should use a
268 269 * source of time that is not adjustable by the user -- hence
269 270 * the call to ddi_get_lbolt64().
270 271 */
271 272 now = ddi_get_lbolt64();
272 273 while ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL) {
273 274 /*
274 275 * lbolt64 should not wrap around in practice... So we can
275 276 * do a direct comparison.
276 277 */
277 278 if (now < tcp->tcp_time_wait_expire)
278 279 break;
279 280
280 281 removed = tcp_time_wait_remove(tcp, tcp_time_wait);
281 282 ASSERT(removed);
282 283
283 284 connp = tcp->tcp_connp;
284 285 ASSERT(connp->conn_fanout != NULL);
285 286 lock = &connp->conn_fanout->connf_lock;
286 287 /*
287 288 * This is essentially a TW reclaim fast path optimization for
288 289 * performance where the timewait collector checks under the
289 290 * fanout lock (so that no one else can get access to the
290 291 * conn_t) that the refcnt is 2 i.e. one for TCP and one for
291 292 * the classifier hash list. If ref count is indeed 2, we can
292 293 * just remove the conn under the fanout lock and avoid
293 294 * cleaning up the conn under the squeue, provided that
294 295 * clustering callbacks are not enabled. If clustering is
295 296 * enabled, we need to make the clustering callback before
296 297 * setting the CONDEMNED flag and after dropping all locks and
297 298 * so we forego this optimization and fall back to the slow
298 299 * path. Also please see the comments in tcp_closei_local
299 300 * regarding the refcnt logic.
300 301 *
301 302 * Since we are holding the tcp_time_wait_lock, its better
302 303 * not to block on the fanout_lock because other connections
303 304 * can't add themselves to time_wait list. So we do a
304 305 * tryenter instead of mutex_enter.
305 306 */
306 307 if (mutex_tryenter(lock)) {
307 308 mutex_enter(&connp->conn_lock);
308 309 if ((connp->conn_ref == 2) &&
309 310 (cl_inet_disconnect == NULL)) {
310 311 ipcl_hash_remove_locked(connp,
311 312 connp->conn_fanout);
312 313 /*
313 314 * Set the CONDEMNED flag now itself so that
314 315 * the refcnt cannot increase due to any
315 316 * walker.
316 317 */
317 318 connp->conn_state_flags |= CONN_CONDEMNED;
318 319 mutex_exit(lock);
319 320 mutex_exit(&connp->conn_lock);
320 321 if (tcp_time_wait->tcp_free_list_cnt <
321 322 tcp_free_list_max_cnt) {
322 323 /* Add to head of tcp_free_list */
323 324 mutex_exit(
324 325 &tcp_time_wait->tcp_time_wait_lock);
325 326 tcp_cleanup(tcp);
326 327 ASSERT(connp->conn_latch == NULL);
327 328 ASSERT(connp->conn_policy == NULL);
328 329 ASSERT(tcp->tcp_tcps == NULL);
329 330 ASSERT(connp->conn_netstack == NULL);
330 331
331 332 mutex_enter(
332 333 &tcp_time_wait->tcp_time_wait_lock);
333 334 tcp->tcp_time_wait_next =
334 335 tcp_time_wait->tcp_free_list;
335 336 tcp_time_wait->tcp_free_list = tcp;
336 337 tcp_time_wait->tcp_free_list_cnt++;
337 338 continue;
338 339 } else {
339 340 /* Do not add to tcp_free_list */
340 341 mutex_exit(
341 342 &tcp_time_wait->tcp_time_wait_lock);
342 343 tcp_bind_hash_remove(tcp);
343 344 ixa_cleanup(tcp->tcp_connp->conn_ixa);
344 345 tcp_ipsec_cleanup(tcp);
345 346 CONN_DEC_REF(tcp->tcp_connp);
346 347 }
347 348 } else {
348 349 CONN_INC_REF_LOCKED(connp);
349 350 mutex_exit(lock);
350 351 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
351 352 mutex_exit(&connp->conn_lock);
352 353 /*
353 354 * We can reuse the closemp here since conn has
354 355 * detached (otherwise we wouldn't even be in
355 356 * time_wait list). tcp_closemp_used can safely
356 357 * be changed without taking a lock as no other
357 358 * thread can concurrently access it at this
358 359 * point in the connection lifecycle.
359 360 */
360 361
361 362 if (tcp->tcp_closemp.b_prev == NULL)
362 363 tcp->tcp_closemp_used = B_TRUE;
363 364 else
364 365 cmn_err(CE_PANIC,
365 366 "tcp_timewait_collector: "
366 367 "concurrent use of tcp_closemp: "
367 368 "connp %p tcp %p\n", (void *)connp,
368 369 (void *)tcp);
369 370
370 371 TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
371 372 mp = &tcp->tcp_closemp;
372 373 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
373 374 tcp_timewait_close, connp, NULL,
374 375 SQ_FILL, SQTAG_TCP_TIMEWAIT);
375 376 }
376 377 } else {
377 378 mutex_enter(&connp->conn_lock);
378 379 CONN_INC_REF_LOCKED(connp);
379 380 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
380 381 mutex_exit(&connp->conn_lock);
381 382 /*
382 383 * We can reuse the closemp here since conn has
383 384 * detached (otherwise we wouldn't even be in
384 385 * time_wait list). tcp_closemp_used can safely
385 386 * be changed without taking a lock as no other
386 387 * thread can concurrently access it at this
387 388 * point in the connection lifecycle.
388 389 */
389 390
390 391 if (tcp->tcp_closemp.b_prev == NULL)
391 392 tcp->tcp_closemp_used = B_TRUE;
392 393 else
393 394 cmn_err(CE_PANIC, "tcp_timewait_collector: "
394 395 "concurrent use of tcp_closemp: "
395 396 "connp %p tcp %p\n", (void *)connp,
396 397 (void *)tcp);
397 398
398 399 TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
399 400 mp = &tcp->tcp_closemp;
400 401 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
401 402 tcp_timewait_close, connp, NULL,
402 403 SQ_FILL, SQTAG_TCP_TIMEWAIT);
403 404 }
404 405 mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
405 406 }
406 407
407 408 if (tcp_time_wait->tcp_free_list != NULL)
408 409 tcp_time_wait->tcp_free_list->tcp_in_free_list = B_TRUE;
409 410
410 411 /*
411 412 * If the time wait list is not empty and there is no timer running,
412 413 * restart it.
413 414 */
414 415 if ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL &&
415 416 tcp_time_wait->tcp_time_wait_tid == 0) {
416 417 hrtime_t firetime;
417 418
418 419 firetime = TICK_TO_NSEC(tcp->tcp_time_wait_expire - now);
419 420 /* This ensures that we won't wake up too often. */
420 421 firetime = MAX(TCP_TIME_WAIT_DELAY, firetime);
421 422 tcp_time_wait->tcp_time_wait_tid =
422 423 timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector,
423 424 sqp, firetime, CALLOUT_TCP_RESOLUTION,
424 425 CALLOUT_FLAG_ROUNDUP);
425 426 }
426 427 #ifdef DEBUG
427 428 tcp_time_wait->tcp_time_wait_running = B_FALSE;
428 429 #endif
429 430 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
430 431 }
431 432
432 433 /*
433 434 * tcp_time_wait_processing() handles processing of incoming packets when
434 435 * the tcp_t is in the TIME_WAIT state.
435 436 *
436 437 * A TIME_WAIT tcp_t that has an associated open TCP end point (not in
437 438 * detached state) is never put on the time wait list.
438 439 */
439 440 void
440 441 tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
441 442 uint32_t seg_ack, int seg_len, tcpha_t *tcpha, ip_recv_attr_t *ira)
442 443 {
443 444 int32_t bytes_acked;
444 445 int32_t gap;
445 446 int32_t rgap;
446 447 tcp_opt_t tcpopt;
447 448 uint_t flags;
448 449 uint32_t new_swnd = 0;
449 450 conn_t *nconnp;
450 451 conn_t *connp = tcp->tcp_connp;
451 452 tcp_stack_t *tcps = tcp->tcp_tcps;
452 453
453 454 BUMP_LOCAL(tcp->tcp_ibsegs);
454 455 DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp);
455 456
456 457 flags = (unsigned int)tcpha->tha_flags & 0xFF;
457 458 new_swnd = ntohs(tcpha->tha_win) <<
458 459 ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws);
459 460 if (tcp->tcp_snd_ts_ok) {
460 461 if (!tcp_paws_check(tcp, tcpha, &tcpopt)) {
461 462 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
462 463 tcp->tcp_rnxt, TH_ACK);
463 464 goto done;
464 465 }
465 466 }
466 467 gap = seg_seq - tcp->tcp_rnxt;
467 468 rgap = tcp->tcp_rwnd - (gap + seg_len);
468 469 if (gap < 0) {
469 470 TCPS_BUMP_MIB(tcps, tcpInDataDupSegs);
470 471 TCPS_UPDATE_MIB(tcps, tcpInDataDupBytes,
471 472 (seg_len > -gap ? -gap : seg_len));
472 473 seg_len += gap;
473 474 if (seg_len < 0 || (seg_len == 0 && !(flags & TH_FIN))) {
474 475 if (flags & TH_RST) {
475 476 goto done;
476 477 }
477 478 if ((flags & TH_FIN) && seg_len == -1) {
478 479 /*
479 480 * When TCP receives a duplicate FIN in
480 481 * TIME_WAIT state, restart the 2 MSL timer.
481 482 * See page 73 in RFC 793. Make sure this TCP
482 483 * is already on the TIME_WAIT list. If not,
483 484 * just restart the timer.
484 485 */
485 486 if (TCP_IS_DETACHED(tcp)) {
486 487 if (tcp_time_wait_remove(tcp, NULL) ==
487 488 B_TRUE) {
488 489 tcp_time_wait_append(tcp);
489 490 TCP_DBGSTAT(tcps,
490 491 tcp_rput_time_wait);
491 492 }
492 493 } else {
493 494 ASSERT(tcp != NULL);
494 495 TCP_TIMER_RESTART(tcp,
495 496 tcps->tcps_time_wait_interval);
496 497 }
497 498 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
498 499 tcp->tcp_rnxt, TH_ACK);
499 500 goto done;
500 501 }
501 502 flags |= TH_ACK_NEEDED;
502 503 seg_len = 0;
|
↓ open down ↓ |
469 lines elided |
↑ open up ↑ |
503 504 goto process_ack;
504 505 }
505 506
506 507 /* Fix seg_seq, and chew the gap off the front. */
507 508 seg_seq = tcp->tcp_rnxt;
508 509 }
509 510
510 511 if ((flags & TH_SYN) && gap > 0 && rgap < 0) {
511 512 /*
512 513 * Make sure that when we accept the connection, pick
513 - * an ISS greater than (tcp_snxt + ISS_INCR/2) for the
514 + * an ISS greater than (tcp_snxt + tcp_iss_incr/2) for the
514 515 * old connection.
515 516 *
516 517 * The next ISS generated is equal to tcp_iss_incr_extra
517 - * + ISS_INCR/2 + other components depending on the
518 + * + tcp_iss_incr/2 + other components depending on the
518 519 * value of tcp_strong_iss. We pre-calculate the new
519 520 * ISS here and compare with tcp_snxt to determine if
520 521 * we need to make adjustment to tcp_iss_incr_extra.
521 522 *
522 523 * The above calculation is ugly and is a
523 524 * waste of CPU cycles...
524 525 */
525 526 uint32_t new_iss = tcps->tcps_iss_incr_extra;
526 527 int32_t adj;
527 528 ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
528 529
529 530 switch (tcps->tcps_strong_iss) {
530 531 case 2: {
531 532 /* Add time and MD5 components. */
532 533 uint32_t answer[4];
533 534 struct {
534 535 uint32_t ports;
535 536 in6_addr_t src;
536 537 in6_addr_t dst;
537 538 } arg;
538 539 MD5_CTX context;
539 540
540 541 mutex_enter(&tcps->tcps_iss_key_lock);
541 542 context = tcps->tcps_iss_key;
542 543 mutex_exit(&tcps->tcps_iss_key_lock);
543 544 arg.ports = connp->conn_ports;
544 545 /* We use MAPPED addresses in tcp_iss_init */
545 546 arg.src = connp->conn_laddr_v6;
546 547 arg.dst = connp->conn_faddr_v6;
547 548 MD5Update(&context, (uchar_t *)&arg,
548 549 sizeof (arg));
549 550 MD5Final((uchar_t *)answer, &context);
|
↓ open down ↓ |
22 lines elided |
↑ open up ↑ |
550 551 answer[0] ^= answer[1] ^ answer[2] ^ answer[3];
551 552 new_iss += (gethrtime() >> ISS_NSEC_SHT) + answer[0];
552 553 break;
553 554 }
554 555 case 1:
555 556 /* Add time component and min random (i.e. 1). */
556 557 new_iss += (gethrtime() >> ISS_NSEC_SHT) + 1;
557 558 break;
558 559 default:
559 560 /* Add only time component. */
560 - new_iss += (uint32_t)gethrestime_sec() * ISS_INCR;
561 + new_iss += (uint32_t)gethrestime_sec() *
562 + tcps->tcps_iss_incr;
561 563 break;
562 564 }
563 565 if ((adj = (int32_t)(tcp->tcp_snxt - new_iss)) > 0) {
564 566 /*
565 - * New ISS not guaranteed to be ISS_INCR/2
567 + * New ISS not guaranteed to be tcp_iss_incr/2
566 568 * ahead of the current tcp_snxt, so add the
567 569 * difference to tcp_iss_incr_extra.
568 570 */
569 571 tcps->tcps_iss_incr_extra += adj;
570 572 }
571 573 /*
572 574 * If tcp_clean_death() can not perform the task now,
573 575 * drop the SYN packet and let the other side re-xmit.
574 576 * Otherwise pass the SYN packet back in, since the
575 577 * old tcp state has been cleaned up or freed.
576 578 */
577 579 if (tcp_clean_death(tcp, 0) == -1)
578 580 goto done;
579 581 nconnp = ipcl_classify(mp, ira, ipst);
580 582 if (nconnp != NULL) {
581 583 TCP_STAT(tcps, tcp_time_wait_syn_success);
582 584 /* Drops ref on nconnp */
583 585 tcp_reinput(nconnp, mp, ira, ipst);
584 586 return;
585 587 }
586 588 goto done;
587 589 }
588 590
589 591 /*
590 592 * rgap is the amount of stuff received out of window. A negative
591 593 * value is the amount out of window.
592 594 */
593 595 if (rgap < 0) {
594 596 TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs);
595 597 TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap);
596 598 /* Fix seg_len and make sure there is something left. */
597 599 seg_len += rgap;
598 600 if (seg_len <= 0) {
599 601 if (flags & TH_RST) {
600 602 goto done;
601 603 }
602 604 flags |= TH_ACK_NEEDED;
603 605 seg_len = 0;
604 606 goto process_ack;
605 607 }
606 608 }
607 609 /*
608 610 * Check whether we can update tcp_ts_recent. This test is
609 611 * NOT the one in RFC 1323 3.4. It is from Braden, 1993, "TCP
610 612 * Extensions for High Performance: An Update", Internet Draft.
611 613 */
612 614 if (tcp->tcp_snd_ts_ok &&
613 615 TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) &&
614 616 SEQ_LEQ(seg_seq, tcp->tcp_rack)) {
615 617 tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val;
616 618 tcp->tcp_last_rcv_lbolt = ddi_get_lbolt64();
617 619 }
618 620
619 621 if (seg_seq != tcp->tcp_rnxt && seg_len > 0) {
620 622 /* Always ack out of order packets */
621 623 flags |= TH_ACK_NEEDED;
622 624 seg_len = 0;
623 625 } else if (seg_len > 0) {
624 626 TCPS_BUMP_MIB(tcps, tcpInClosed);
625 627 TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs);
626 628 TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len);
627 629 }
628 630 if (flags & TH_RST) {
629 631 (void) tcp_clean_death(tcp, 0);
630 632 goto done;
631 633 }
632 634 if (flags & TH_SYN) {
633 635 tcp_xmit_ctl("TH_SYN", tcp, seg_ack, seg_seq + 1,
634 636 TH_RST|TH_ACK);
635 637 /*
636 638 * Do not delete the TCP structure if it is in
637 639 * TIME_WAIT state. Refer to RFC 1122, 4.2.2.13.
638 640 */
639 641 goto done;
640 642 }
641 643 process_ack:
642 644 if (flags & TH_ACK) {
643 645 bytes_acked = (int)(seg_ack - tcp->tcp_suna);
644 646 if (bytes_acked <= 0) {
645 647 if (bytes_acked == 0 && seg_len == 0 &&
646 648 new_swnd == tcp->tcp_swnd)
647 649 TCPS_BUMP_MIB(tcps, tcpInDupAck);
648 650 } else {
649 651 /* Acks something not sent */
650 652 flags |= TH_ACK_NEEDED;
651 653 }
652 654 }
653 655 if (flags & TH_ACK_NEEDED) {
654 656 /*
655 657 * Time to send an ack for some reason.
656 658 */
657 659 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
658 660 tcp->tcp_rnxt, TH_ACK);
659 661 }
660 662 done:
661 663 freemsg(mp);
662 664 }
|
↓ open down ↓ |
87 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX