1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, Joyent Inc. All rights reserved.
25 */
26
27 /*
28 * This file contains functions related to TCP time wait processing. Also
29 * refer to the time wait handling comments in tcp_impl.h.
30 */
31
32 #include <sys/types.h>
33 #include <sys/strsun.h>
34 #include <sys/squeue_impl.h>
35 #include <sys/squeue.h>
36 #include <sys/callo.h>
37
38 #include <inet/common.h>
39 #include <inet/ip.h>
40 #include <inet/tcp.h>
41 #include <inet/tcp_impl.h>
42 #include <inet/tcp_cluster.h>
43
44 static void tcp_timewait_close(void *, mblk_t *, void *, ip_recv_attr_t *);
45
46 /*
47 * TCP_TIME_WAIT_DELAY governs how often the time_wait_collector runs.
48 * Running it every 5 seconds seems to give the best results.
49 */
50 #define TCP_TIME_WAIT_DELAY ((hrtime_t)5 * NANOSEC)
51
52 /*
53 * Remove a connection from the list of detached TIME_WAIT connections.
54 * It returns B_FALSE if it can't remove the connection from the list
55 * as the connection has already been removed from the list due to an
56 * earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE.
57 */
58 boolean_t
59 tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tcp_time_wait)
60 {
61 boolean_t locked = B_FALSE;
62
63 if (tcp_time_wait == NULL) {
64 tcp_time_wait = *((tcp_squeue_priv_t **)
65 squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP));
66 mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
67 locked = B_TRUE;
68 } else {
69 ASSERT(MUTEX_HELD(&tcp_time_wait->tcp_time_wait_lock));
70 }
71
72 /* 0 means that the tcp_t has not been added to the time wait list. */
73 if (tcp->tcp_time_wait_expire == 0) {
74 ASSERT(tcp->tcp_time_wait_next == NULL);
75 ASSERT(tcp->tcp_time_wait_prev == NULL);
76 if (locked)
77 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
78 return (B_FALSE);
79 }
80 ASSERT(TCP_IS_DETACHED(tcp));
81 ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
82
83 if (tcp == tcp_time_wait->tcp_time_wait_head) {
84 ASSERT(tcp->tcp_time_wait_prev == NULL);
85 tcp_time_wait->tcp_time_wait_head = tcp->tcp_time_wait_next;
86 if (tcp_time_wait->tcp_time_wait_head != NULL) {
87 tcp_time_wait->tcp_time_wait_head->tcp_time_wait_prev =
88 NULL;
89 } else {
90 tcp_time_wait->tcp_time_wait_tail = NULL;
91 }
92 } else if (tcp == tcp_time_wait->tcp_time_wait_tail) {
93 ASSERT(tcp->tcp_time_wait_next == NULL);
94 tcp_time_wait->tcp_time_wait_tail = tcp->tcp_time_wait_prev;
95 ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
96 tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = NULL;
97 } else {
98 ASSERT(tcp->tcp_time_wait_prev->tcp_time_wait_next == tcp);
99 ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == tcp);
100 tcp->tcp_time_wait_prev->tcp_time_wait_next =
101 tcp->tcp_time_wait_next;
102 tcp->tcp_time_wait_next->tcp_time_wait_prev =
103 tcp->tcp_time_wait_prev;
104 }
105 tcp->tcp_time_wait_next = NULL;
106 tcp->tcp_time_wait_prev = NULL;
107 tcp->tcp_time_wait_expire = 0;
108
109 if (locked)
110 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
111 return (B_TRUE);
112 }
113
114 /*
115 * Add a connection to the list of detached TIME_WAIT connections
116 * and set its time to expire.
117 */
118 void
119 tcp_time_wait_append(tcp_t *tcp)
120 {
121 tcp_stack_t *tcps = tcp->tcp_tcps;
122 squeue_t *sqp = tcp->tcp_connp->conn_sqp;
123 tcp_squeue_priv_t *tcp_time_wait =
124 *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
125
126 tcp_timers_stop(tcp);
127
128 /* Freed above */
129 ASSERT(tcp->tcp_timer_tid == 0);
130 ASSERT(tcp->tcp_ack_tid == 0);
131
132 /* must have happened at the time of detaching the tcp */
133 ASSERT(tcp->tcp_ptpahn == NULL);
134 ASSERT(tcp->tcp_flow_stopped == 0);
135 ASSERT(tcp->tcp_time_wait_next == NULL);
136 ASSERT(tcp->tcp_time_wait_prev == NULL);
137 ASSERT(tcp->tcp_time_wait_expire == 0);
138 ASSERT(tcp->tcp_listener == NULL);
139
140 tcp->tcp_time_wait_expire = ddi_get_lbolt64();
141 /*
142 * Since tcp_time_wait_expire is lbolt64, it should not wrap around
143 * in practice. Hence it cannot be 0. Note that zero means that the
144 * tcp_t is not in the TIME_WAIT list.
145 */
146 tcp->tcp_time_wait_expire += MSEC_TO_TICK(
147 tcps->tcps_time_wait_interval);
148
149 ASSERT(TCP_IS_DETACHED(tcp));
150 ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
151 ASSERT(tcp->tcp_time_wait_next == NULL);
152 ASSERT(tcp->tcp_time_wait_prev == NULL);
153 TCP_DBGSTAT(tcps, tcp_time_wait);
154
155 mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
156 if (tcp_time_wait->tcp_time_wait_head == NULL) {
157 ASSERT(tcp_time_wait->tcp_time_wait_tail == NULL);
158 tcp_time_wait->tcp_time_wait_head = tcp;
159
160 /*
161 * Even if the list was empty before, there may be a timer
162 * running since a tcp_t can be removed from the list
163 * in other places, such as tcp_clean_death(). So check if
164 * a timer is needed.
165 */
166 if (tcp_time_wait->tcp_time_wait_tid == 0) {
167 tcp_time_wait->tcp_time_wait_tid =
168 timeout_generic(CALLOUT_NORMAL,
169 tcp_time_wait_collector, sqp,
170 (hrtime_t)(tcps->tcps_time_wait_interval + 1) *
171 MICROSEC, CALLOUT_TCP_RESOLUTION,
172 CALLOUT_FLAG_ROUNDUP);
173 }
174 } else {
175 /*
176 * The list is not empty, so a timer must be running. If not,
177 * tcp_time_wait_collector() must be running on this
178 * tcp_time_wait list at the same time.
179 */
180 ASSERT(tcp_time_wait->tcp_time_wait_tid != 0 ||
181 tcp_time_wait->tcp_time_wait_running);
182 ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
183 ASSERT(tcp_time_wait->tcp_time_wait_tail->tcp_state ==
184 TCPS_TIME_WAIT);
185 tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = tcp;
186 tcp->tcp_time_wait_prev = tcp_time_wait->tcp_time_wait_tail;
187
188 }
189 tcp_time_wait->tcp_time_wait_tail = tcp;
190 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
191 }
192
193 /*
194 * Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT
195 * tcp_t. Used in tcp_time_wait_collector().
196 */
197 /* ARGSUSED */
198 static void
199 tcp_timewait_close(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
200 {
201 conn_t *connp = (conn_t *)arg;
202 tcp_t *tcp = connp->conn_tcp;
203
204 ASSERT(tcp != NULL);
205 if (tcp->tcp_state == TCPS_CLOSED) {
206 return;
207 }
208
209 ASSERT((connp->conn_family == AF_INET &&
210 connp->conn_ipversion == IPV4_VERSION) ||
211 (connp->conn_family == AF_INET6 &&
212 (connp->conn_ipversion == IPV4_VERSION ||
213 connp->conn_ipversion == IPV6_VERSION)));
214 ASSERT(!tcp->tcp_listener);
215
216 ASSERT(TCP_IS_DETACHED(tcp));
217
218 /*
219 * Because they have no upstream client to rebind or tcp_close()
220 * them later, we axe the connection here and now.
221 */
222 tcp_close_detached(tcp);
223 }
224
225 /*
226 * Blows away all tcps whose TIME_WAIT has expired. List traversal
227 * is done forwards from the head.
228 * This walks all stack instances since
229 * tcp_time_wait remains global across all stacks.
230 */
231 /* ARGSUSED */
232 void
233 tcp_time_wait_collector(void *arg)
234 {
235 tcp_t *tcp;
236 int64_t now;
237 mblk_t *mp;
238 conn_t *connp;
239 kmutex_t *lock;
240 boolean_t removed;
241 extern void (*cl_inet_disconnect)(netstackid_t, uint8_t, sa_family_t,
242 uint8_t *, in_port_t, uint8_t *, in_port_t, void *);
243
244 squeue_t *sqp = (squeue_t *)arg;
245 tcp_squeue_priv_t *tcp_time_wait =
246 *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
247
248 mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
249 tcp_time_wait->tcp_time_wait_tid = 0;
250 #ifdef DEBUG
251 tcp_time_wait->tcp_time_wait_running = B_TRUE;
252 #endif
253
254 if (tcp_time_wait->tcp_free_list != NULL &&
255 tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) {
256 TCP_G_STAT(tcp_freelist_cleanup);
257 while ((tcp = tcp_time_wait->tcp_free_list) != NULL) {
258 tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next;
259 tcp->tcp_time_wait_next = NULL;
260 tcp_time_wait->tcp_free_list_cnt--;
261 ASSERT(tcp->tcp_tcps == NULL);
262 CONN_DEC_REF(tcp->tcp_connp);
263 }
264 ASSERT(tcp_time_wait->tcp_free_list_cnt == 0);
265 }
266
267 /*
268 * In order to reap time waits reliably, we should use a
269 * source of time that is not adjustable by the user -- hence
270 * the call to ddi_get_lbolt64().
271 */
272 now = ddi_get_lbolt64();
273 while ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL) {
274 /*
275 * lbolt64 should not wrap around in practice... So we can
276 * do a direct comparison.
277 */
278 if (now < tcp->tcp_time_wait_expire)
279 break;
280
281 removed = tcp_time_wait_remove(tcp, tcp_time_wait);
282 ASSERT(removed);
283
284 connp = tcp->tcp_connp;
285 ASSERT(connp->conn_fanout != NULL);
286 lock = &connp->conn_fanout->connf_lock;
287 /*
288 * This is essentially a TW reclaim fast path optimization for
289 * performance where the timewait collector checks under the
290 * fanout lock (so that no one else can get access to the
291 * conn_t) that the refcnt is 2 i.e. one for TCP and one for
292 * the classifier hash list. If ref count is indeed 2, we can
293 * just remove the conn under the fanout lock and avoid
294 * cleaning up the conn under the squeue, provided that
295 * clustering callbacks are not enabled. If clustering is
296 * enabled, we need to make the clustering callback before
297 * setting the CONDEMNED flag and after dropping all locks and
298 * so we forego this optimization and fall back to the slow
299 * path. Also please see the comments in tcp_closei_local
300 * regarding the refcnt logic.
301 *
302 * Since we are holding the tcp_time_wait_lock, its better
303 * not to block on the fanout_lock because other connections
304 * can't add themselves to time_wait list. So we do a
305 * tryenter instead of mutex_enter.
306 */
307 if (mutex_tryenter(lock)) {
308 mutex_enter(&connp->conn_lock);
309 if ((connp->conn_ref == 2) &&
310 (cl_inet_disconnect == NULL)) {
311 ipcl_hash_remove_locked(connp,
312 connp->conn_fanout);
313 /*
314 * Set the CONDEMNED flag now itself so that
315 * the refcnt cannot increase due to any
316 * walker.
317 */
318 connp->conn_state_flags |= CONN_CONDEMNED;
319 mutex_exit(lock);
320 mutex_exit(&connp->conn_lock);
321 if (tcp_time_wait->tcp_free_list_cnt <
322 tcp_free_list_max_cnt) {
323 /* Add to head of tcp_free_list */
324 mutex_exit(
325 &tcp_time_wait->tcp_time_wait_lock);
326 tcp_cleanup(tcp);
327 ASSERT(connp->conn_latch == NULL);
328 ASSERT(connp->conn_policy == NULL);
329 ASSERT(tcp->tcp_tcps == NULL);
330 ASSERT(connp->conn_netstack == NULL);
331
332 mutex_enter(
333 &tcp_time_wait->tcp_time_wait_lock);
334 tcp->tcp_time_wait_next =
335 tcp_time_wait->tcp_free_list;
336 tcp_time_wait->tcp_free_list = tcp;
337 tcp_time_wait->tcp_free_list_cnt++;
338 continue;
339 } else {
340 /* Do not add to tcp_free_list */
341 mutex_exit(
342 &tcp_time_wait->tcp_time_wait_lock);
343 tcp_bind_hash_remove(tcp);
344 ixa_cleanup(tcp->tcp_connp->conn_ixa);
345 tcp_ipsec_cleanup(tcp);
346 CONN_DEC_REF(tcp->tcp_connp);
347 }
348 } else {
349 CONN_INC_REF_LOCKED(connp);
350 mutex_exit(lock);
351 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
352 mutex_exit(&connp->conn_lock);
353 /*
354 * We can reuse the closemp here since conn has
355 * detached (otherwise we wouldn't even be in
356 * time_wait list). tcp_closemp_used can safely
357 * be changed without taking a lock as no other
358 * thread can concurrently access it at this
359 * point in the connection lifecycle.
360 */
361
362 if (tcp->tcp_closemp.b_prev == NULL)
363 tcp->tcp_closemp_used = B_TRUE;
364 else
365 cmn_err(CE_PANIC,
366 "tcp_timewait_collector: "
367 "concurrent use of tcp_closemp: "
368 "connp %p tcp %p\n", (void *)connp,
369 (void *)tcp);
370
371 TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
372 mp = &tcp->tcp_closemp;
373 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
374 tcp_timewait_close, connp, NULL,
375 SQ_FILL, SQTAG_TCP_TIMEWAIT);
376 }
377 } else {
378 mutex_enter(&connp->conn_lock);
379 CONN_INC_REF_LOCKED(connp);
380 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
381 mutex_exit(&connp->conn_lock);
382 /*
383 * We can reuse the closemp here since conn has
384 * detached (otherwise we wouldn't even be in
385 * time_wait list). tcp_closemp_used can safely
386 * be changed without taking a lock as no other
387 * thread can concurrently access it at this
388 * point in the connection lifecycle.
389 */
390
391 if (tcp->tcp_closemp.b_prev == NULL)
392 tcp->tcp_closemp_used = B_TRUE;
393 else
394 cmn_err(CE_PANIC, "tcp_timewait_collector: "
395 "concurrent use of tcp_closemp: "
396 "connp %p tcp %p\n", (void *)connp,
397 (void *)tcp);
398
399 TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
400 mp = &tcp->tcp_closemp;
401 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
402 tcp_timewait_close, connp, NULL,
403 SQ_FILL, SQTAG_TCP_TIMEWAIT);
404 }
405 mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
406 }
407
408 if (tcp_time_wait->tcp_free_list != NULL)
409 tcp_time_wait->tcp_free_list->tcp_in_free_list = B_TRUE;
410
411 /*
412 * If the time wait list is not empty and there is no timer running,
413 * restart it.
414 */
415 if ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL &&
416 tcp_time_wait->tcp_time_wait_tid == 0) {
417 hrtime_t firetime;
418
419 firetime = TICK_TO_NSEC(tcp->tcp_time_wait_expire - now);
420 /* This ensures that we won't wake up too often. */
421 firetime = MAX(TCP_TIME_WAIT_DELAY, firetime);
422 tcp_time_wait->tcp_time_wait_tid =
423 timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector,
424 sqp, firetime, CALLOUT_TCP_RESOLUTION,
425 CALLOUT_FLAG_ROUNDUP);
426 }
427 #ifdef DEBUG
428 tcp_time_wait->tcp_time_wait_running = B_FALSE;
429 #endif
430 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
431 }
432
433 /*
434 * tcp_time_wait_processing() handles processing of incoming packets when
435 * the tcp_t is in the TIME_WAIT state.
436 *
437 * A TIME_WAIT tcp_t that has an associated open TCP end point (not in
438 * detached state) is never put on the time wait list.
439 */
440 void
441 tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
442 uint32_t seg_ack, int seg_len, tcpha_t *tcpha, ip_recv_attr_t *ira)
443 {
444 int32_t bytes_acked;
445 int32_t gap;
446 int32_t rgap;
447 tcp_opt_t tcpopt;
448 uint_t flags;
449 uint32_t new_swnd = 0;
450 conn_t *nconnp;
451 conn_t *connp = tcp->tcp_connp;
452 tcp_stack_t *tcps = tcp->tcp_tcps;
453
454 BUMP_LOCAL(tcp->tcp_ibsegs);
455 DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp);
456
457 flags = (unsigned int)tcpha->tha_flags & 0xFF;
458 new_swnd = ntohs(tcpha->tha_win) <<
459 ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws);
460 if (tcp->tcp_snd_ts_ok) {
461 if (!tcp_paws_check(tcp, tcpha, &tcpopt)) {
462 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
463 tcp->tcp_rnxt, TH_ACK);
464 goto done;
465 }
466 }
467 gap = seg_seq - tcp->tcp_rnxt;
468 rgap = tcp->tcp_rwnd - (gap + seg_len);
469 if (gap < 0) {
470 TCPS_BUMP_MIB(tcps, tcpInDataDupSegs);
471 TCPS_UPDATE_MIB(tcps, tcpInDataDupBytes,
472 (seg_len > -gap ? -gap : seg_len));
473 seg_len += gap;
474 if (seg_len < 0 || (seg_len == 0 && !(flags & TH_FIN))) {
475 if (flags & TH_RST) {
476 goto done;
477 }
478 if ((flags & TH_FIN) && seg_len == -1) {
479 /*
480 * When TCP receives a duplicate FIN in
481 * TIME_WAIT state, restart the 2 MSL timer.
482 * See page 73 in RFC 793. Make sure this TCP
483 * is already on the TIME_WAIT list. If not,
484 * just restart the timer.
485 */
486 if (TCP_IS_DETACHED(tcp)) {
487 if (tcp_time_wait_remove(tcp, NULL) ==
488 B_TRUE) {
489 tcp_time_wait_append(tcp);
490 TCP_DBGSTAT(tcps,
491 tcp_rput_time_wait);
492 }
493 } else {
494 ASSERT(tcp != NULL);
495 TCP_TIMER_RESTART(tcp,
496 tcps->tcps_time_wait_interval);
497 }
498 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
499 tcp->tcp_rnxt, TH_ACK);
500 goto done;
501 }
502 flags |= TH_ACK_NEEDED;
503 seg_len = 0;
504 goto process_ack;
505 }
506
507 /* Fix seg_seq, and chew the gap off the front. */
508 seg_seq = tcp->tcp_rnxt;
509 }
510
511 if ((flags & TH_SYN) && gap > 0 && rgap < 0) {
512 /*
513 * Make sure that when we accept the connection, pick
514 * an ISS greater than (tcp_snxt + tcp_iss_incr/2) for the
515 * old connection.
516 *
517 * The next ISS generated is equal to tcp_iss_incr_extra
518 * + tcp_iss_incr/2 + other components depending on the
519 * value of tcp_strong_iss. We pre-calculate the new
520 * ISS here and compare with tcp_snxt to determine if
521 * we need to make adjustment to tcp_iss_incr_extra.
522 *
523 * The above calculation is ugly and is a
524 * waste of CPU cycles...
525 */
526 uint32_t new_iss = tcps->tcps_iss_incr_extra;
527 int32_t adj;
528 ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
529
530 switch (tcps->tcps_strong_iss) {
531 case 2: {
532 /* Add time and MD5 components. */
533 uint32_t answer[4];
534 struct {
535 uint32_t ports;
536 in6_addr_t src;
537 in6_addr_t dst;
538 } arg;
539 MD5_CTX context;
540
541 mutex_enter(&tcps->tcps_iss_key_lock);
542 context = tcps->tcps_iss_key;
543 mutex_exit(&tcps->tcps_iss_key_lock);
544 arg.ports = connp->conn_ports;
545 /* We use MAPPED addresses in tcp_iss_init */
546 arg.src = connp->conn_laddr_v6;
547 arg.dst = connp->conn_faddr_v6;
548 MD5Update(&context, (uchar_t *)&arg,
549 sizeof (arg));
550 MD5Final((uchar_t *)answer, &context);
551 answer[0] ^= answer[1] ^ answer[2] ^ answer[3];
552 new_iss += (gethrtime() >> ISS_NSEC_SHT) + answer[0];
553 break;
554 }
555 case 1:
556 /* Add time component and min random (i.e. 1). */
557 new_iss += (gethrtime() >> ISS_NSEC_SHT) + 1;
558 break;
559 default:
560 /* Add only time component. */
561 new_iss += (uint32_t)gethrestime_sec() *
562 tcps->tcps_iss_incr;
563 break;
564 }
565 if ((adj = (int32_t)(tcp->tcp_snxt - new_iss)) > 0) {
566 /*
567 * New ISS not guaranteed to be tcp_iss_incr/2
568 * ahead of the current tcp_snxt, so add the
569 * difference to tcp_iss_incr_extra.
570 */
571 tcps->tcps_iss_incr_extra += adj;
572 }
573 /*
574 * If tcp_clean_death() can not perform the task now,
575 * drop the SYN packet and let the other side re-xmit.
576 * Otherwise pass the SYN packet back in, since the
577 * old tcp state has been cleaned up or freed.
578 */
579 if (tcp_clean_death(tcp, 0) == -1)
580 goto done;
581 nconnp = ipcl_classify(mp, ira, ipst);
582 if (nconnp != NULL) {
583 TCP_STAT(tcps, tcp_time_wait_syn_success);
584 /* Drops ref on nconnp */
585 tcp_reinput(nconnp, mp, ira, ipst);
586 return;
587 }
588 goto done;
589 }
590
591 /*
592 * rgap is the amount of stuff received out of window. A negative
593 * value is the amount out of window.
594 */
595 if (rgap < 0) {
596 TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs);
597 TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap);
598 /* Fix seg_len and make sure there is something left. */
599 seg_len += rgap;
600 if (seg_len <= 0) {
601 if (flags & TH_RST) {
602 goto done;
603 }
604 flags |= TH_ACK_NEEDED;
605 seg_len = 0;
606 goto process_ack;
607 }
608 }
609 /*
610 * Check whether we can update tcp_ts_recent. This test is
611 * NOT the one in RFC 1323 3.4. It is from Braden, 1993, "TCP
612 * Extensions for High Performance: An Update", Internet Draft.
613 */
614 if (tcp->tcp_snd_ts_ok &&
615 TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) &&
616 SEQ_LEQ(seg_seq, tcp->tcp_rack)) {
617 tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val;
618 tcp->tcp_last_rcv_lbolt = ddi_get_lbolt64();
619 }
620
621 if (seg_seq != tcp->tcp_rnxt && seg_len > 0) {
622 /* Always ack out of order packets */
623 flags |= TH_ACK_NEEDED;
624 seg_len = 0;
625 } else if (seg_len > 0) {
626 TCPS_BUMP_MIB(tcps, tcpInClosed);
627 TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs);
628 TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len);
629 }
630 if (flags & TH_RST) {
631 (void) tcp_clean_death(tcp, 0);
632 goto done;
633 }
634 if (flags & TH_SYN) {
635 tcp_xmit_ctl("TH_SYN", tcp, seg_ack, seg_seq + 1,
636 TH_RST|TH_ACK);
637 /*
638 * Do not delete the TCP structure if it is in
639 * TIME_WAIT state. Refer to RFC 1122, 4.2.2.13.
640 */
641 goto done;
642 }
643 process_ack:
644 if (flags & TH_ACK) {
645 bytes_acked = (int)(seg_ack - tcp->tcp_suna);
646 if (bytes_acked <= 0) {
647 if (bytes_acked == 0 && seg_len == 0 &&
648 new_swnd == tcp->tcp_swnd)
649 TCPS_BUMP_MIB(tcps, tcpInDupAck);
650 } else {
651 /* Acks something not sent */
652 flags |= TH_ACK_NEEDED;
653 }
654 }
655 if (flags & TH_ACK_NEEDED) {
656 /*
657 * Time to send an ack for some reason.
658 */
659 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
660 tcp->tcp_rnxt, TH_ACK);
661 }
662 done:
663 freemsg(mp);
664 }