6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2011 Joyent, Inc. All rights reserved.
26 */
27
28 /* This file contains all TCP input processing functions. */
29
30 #include <sys/types.h>
31 #include <sys/stream.h>
32 #include <sys/strsun.h>
33 #include <sys/strsubr.h>
34 #include <sys/stropts.h>
35 #include <sys/strlog.h>
36 #define _SUN_TPI_VERSION 2
37 #include <sys/tihdr.h>
38 #include <sys/suntpi.h>
39 #include <sys/xti_inet.h>
40 #include <sys/squeue_impl.h>
41 #include <sys/squeue.h>
42 #include <sys/tsol/tnet.h>
43
44 #include <inet/common.h>
45 #include <inet/ip.h>
2627 freemsg(mp);
2628 return;
2629 }
2630 TCPS_CONN_INC(tcps);
2631 /* SYN was acked - making progress */
2632 tcp->tcp_ip_forward_progress = B_TRUE;
2633
2634 /* One for the SYN */
2635 tcp->tcp_suna = tcp->tcp_iss + 1;
2636 tcp->tcp_valid_bits &= ~TCP_ISS_VALID;
2637
2638 /*
2639 * If SYN was retransmitted, need to reset all
2640 * retransmission info. This is because this
2641 * segment will be treated as a dup ACK.
2642 */
2643 if (tcp->tcp_rexmit) {
2644 tcp->tcp_rexmit = B_FALSE;
2645 tcp->tcp_rexmit_nxt = tcp->tcp_snxt;
2646 tcp->tcp_rexmit_max = tcp->tcp_snxt;
2647 tcp->tcp_snd_burst = tcp->tcp_localnet ?
2648 TCP_CWND_INFINITE : TCP_CWND_NORMAL;
2649 tcp->tcp_ms_we_have_waited = 0;
2650
2651 /*
2652 * Set tcp_cwnd back to 1 MSS, per
2653 * recommendation from
2654 * draft-floyd-incr-init-win-01.txt,
2655 * Increasing TCP's Initial Window.
2656 */
2657 tcp->tcp_cwnd = tcp->tcp_mss;
2658 }
2659
2660 tcp->tcp_swl1 = seg_seq;
2661 tcp->tcp_swl2 = seg_ack;
2662
2663 new_swnd = ntohs(tcpha->tha_win);
2664 tcp->tcp_swnd = new_swnd;
2665 if (new_swnd > tcp->tcp_max_swnd)
2666 tcp->tcp_max_swnd = new_swnd;
2667
2668 /*
3823 DTRACE_TCP5(accept__established, mlbk_t *, NULL,
3824 ip_xmit_attr_t *, connp->conn_ixa, void_ip_t *,
3825 iphdr, tcp_t *, tcp, tcph_t *, tcpha);
3826 }
3827 TCPS_CONN_INC(tcps);
3828
3829 tcp->tcp_suna = tcp->tcp_iss + 1; /* One for the SYN */
3830 bytes_acked--;
3831 /* SYN was acked - making progress */
3832 tcp->tcp_ip_forward_progress = B_TRUE;
3833
3834 /*
3835 * If SYN was retransmitted, need to reset all
3836 * retransmission info as this segment will be
3837 * treated as a dup ACK.
3838 */
3839 if (tcp->tcp_rexmit) {
3840 tcp->tcp_rexmit = B_FALSE;
3841 tcp->tcp_rexmit_nxt = tcp->tcp_snxt;
3842 tcp->tcp_rexmit_max = tcp->tcp_snxt;
3843 tcp->tcp_snd_burst = tcp->tcp_localnet ?
3844 TCP_CWND_INFINITE : TCP_CWND_NORMAL;
3845 tcp->tcp_ms_we_have_waited = 0;
3846 tcp->tcp_cwnd = mss;
3847 }
3848
3849 /*
3850 * We set the send window to zero here.
3851 * This is needed if there is data to be
3852 * processed already on the queue.
3853 * Later (at swnd_update label), the
3854 * "new_swnd > tcp_swnd" condition is satisfied
3855 * the XMIT_NEEDED flag is set in the current
3856 * (SYN_RCVD) state. This ensures tcp_wput_data() is
3857 * called if there is already data on queue in
3858 * this state.
3859 */
3860 tcp->tcp_swnd = 0;
3861
3862 if (new_swnd > tcp->tcp_max_swnd)
3863 tcp->tcp_max_swnd = new_swnd;
3864 tcp->tcp_swl1 = seg_seq;
3993 tcp->tcp_ecn_cwr_sent = B_FALSE;
3994 }
3995
3996 /*
3997 * We do Hoe's algorithm. Refer to her
3998 * paper "Improving the Start-up Behavior
3999 * of a Congestion Control Scheme for TCP,"
4000 * appeared in SIGCOMM'96.
4001 *
4002 * Save highest seq no we have sent so far.
4003 * Be careful about the invisible FIN byte.
4004 */
4005 if ((tcp->tcp_valid_bits & TCP_FSS_VALID) &&
4006 (tcp->tcp_unsent == 0)) {
4007 tcp->tcp_rexmit_max = tcp->tcp_fss;
4008 } else {
4009 tcp->tcp_rexmit_max = tcp->tcp_snxt;
4010 }
4011
4012 /*
4013 * Do not allow bursty traffic during.
4014 * fast recovery. Refer to Fall and Floyd's
4015 * paper "Simulation-based Comparisons of
4016 * Tahoe, Reno and SACK TCP" (in CCR?)
4017 * This is a best current practise.
4018 */
4019 tcp->tcp_snd_burst = TCP_CWND_SS;
4020
4021 /*
4022 * For SACK:
4023 * Calculate tcp_pipe, which is the
4024 * estimated number of bytes in
4025 * network.
4026 *
4027 * tcp_fack is the highest sack'ed seq num
4028 * TCP has received.
4029 *
4030 * tcp_pipe is explained in the above quoted
4031 * Fall and Floyd's paper. tcp_fack is
4032 * explained in Mathis and Mahdavi's
4033 * "Forward Acknowledgment: Refining TCP
4034 * Congestion Control" in SIGCOMM '96.
4035 */
4036 if (tcp->tcp_snd_sack_ok) {
4037 if (tcp->tcp_notsack_list != NULL) {
4038 tcp->tcp_pipe = tcp->tcp_snxt -
4039 tcp->tcp_fack;
4040 tcp->tcp_sack_snxt = seg_ack;
4041 flags |= TH_NEED_SACK_REXMIT;
4201
4202 /*
4203 * If we got an ACK after fast retransmit, check to see
4204 * if it is a partial ACK. If it is not and the congestion
4205 * window was inflated to account for the other side's
4206 * cached packets, retract it. If it is, do Hoe's algorithm.
4207 */
4208 if (tcp->tcp_dupack_cnt >= tcps->tcps_dupack_fast_retransmit) {
4209 ASSERT(tcp->tcp_rexmit == B_FALSE);
4210 if (SEQ_GEQ(seg_ack, tcp->tcp_rexmit_max)) {
4211 tcp->tcp_dupack_cnt = 0;
4212 /*
4213 * Restore the orig tcp_cwnd_ssthresh after
4214 * fast retransmit phase.
4215 */
4216 if (tcp->tcp_cwnd > tcp->tcp_cwnd_ssthresh) {
4217 tcp->tcp_cwnd = tcp->tcp_cwnd_ssthresh;
4218 }
4219 tcp->tcp_rexmit_max = seg_ack;
4220 tcp->tcp_cwnd_cnt = 0;
4221 tcp->tcp_snd_burst = tcp->tcp_localnet ?
4222 TCP_CWND_INFINITE : TCP_CWND_NORMAL;
4223
4224 /*
4225 * Remove all notsack info to avoid confusion with
4226 * the next fast retrasnmit/recovery phase.
4227 */
4228 if (tcp->tcp_snd_sack_ok) {
4229 TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list,
4230 tcp);
4231 }
4232 } else {
4233 if (tcp->tcp_snd_sack_ok &&
4234 tcp->tcp_notsack_list != NULL) {
4235 flags |= TH_NEED_SACK_REXMIT;
4236 tcp->tcp_pipe -= mss;
4237 if (tcp->tcp_pipe < 0)
4238 tcp->tcp_pipe = 0;
4239 } else {
4240 /*
4241 * Hoe's algorithm:
4242 *
4263 * tcp_rexmit_nxt. Otherwise, update tcp_rexmit_nxt
4264 * to the correct value.
4265 *
4266 * Note that SEQ_LEQ() is used. This is to avoid
4267 * unnecessary fast retransmit caused by dup ACKs
4268 * received when TCP does slow start retransmission
4269 * after a time out. During this phase, TCP may
4270 * send out segments which are already received.
4271 * This causes dup ACKs to be sent back.
4272 */
4273 if (SEQ_LEQ(seg_ack, tcp->tcp_rexmit_max)) {
4274 if (SEQ_GT(seg_ack, tcp->tcp_rexmit_nxt)) {
4275 tcp->tcp_rexmit_nxt = seg_ack;
4276 }
4277 if (seg_ack != tcp->tcp_rexmit_max) {
4278 flags |= TH_XMIT_NEEDED;
4279 }
4280 } else {
4281 tcp->tcp_rexmit = B_FALSE;
4282 tcp->tcp_rexmit_nxt = tcp->tcp_snxt;
4283 tcp->tcp_snd_burst = tcp->tcp_localnet ?
4284 TCP_CWND_INFINITE : TCP_CWND_NORMAL;
4285 }
4286 tcp->tcp_ms_we_have_waited = 0;
4287 }
4288 }
4289
4290 TCPS_BUMP_MIB(tcps, tcpInAckSegs);
4291 TCPS_UPDATE_MIB(tcps, tcpInAckBytes, bytes_acked);
4292 tcp->tcp_suna = seg_ack;
4293 if (tcp->tcp_zero_win_probe != 0) {
4294 tcp->tcp_zero_win_probe = 0;
4295 tcp->tcp_timer_backoff = 0;
4296 }
4297
4298 /*
4299 * If tcp_xmit_head is NULL, then it must be the FIN being ack'ed.
4300 * Note that it cannot be the SYN being ack'ed. The code flow
4301 * will not reach here.
4302 */
4303 if (mp1 == NULL) {
4304 goto fin_acked;
|
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2011 Joyent, Inc. All rights reserved.
26 * Copyright (c) 2014 by Delphix. All rights reserved.
27 */
28
29 /* This file contains all TCP input processing functions. */
30
31 #include <sys/types.h>
32 #include <sys/stream.h>
33 #include <sys/strsun.h>
34 #include <sys/strsubr.h>
35 #include <sys/stropts.h>
36 #include <sys/strlog.h>
37 #define _SUN_TPI_VERSION 2
38 #include <sys/tihdr.h>
39 #include <sys/suntpi.h>
40 #include <sys/xti_inet.h>
41 #include <sys/squeue_impl.h>
42 #include <sys/squeue.h>
43 #include <sys/tsol/tnet.h>
44
45 #include <inet/common.h>
46 #include <inet/ip.h>
2628 freemsg(mp);
2629 return;
2630 }
2631 TCPS_CONN_INC(tcps);
2632 /* SYN was acked - making progress */
2633 tcp->tcp_ip_forward_progress = B_TRUE;
2634
2635 /* One for the SYN */
2636 tcp->tcp_suna = tcp->tcp_iss + 1;
2637 tcp->tcp_valid_bits &= ~TCP_ISS_VALID;
2638
2639 /*
2640 * If SYN was retransmitted, need to reset all
2641 * retransmission info. This is because this
2642 * segment will be treated as a dup ACK.
2643 */
2644 if (tcp->tcp_rexmit) {
2645 tcp->tcp_rexmit = B_FALSE;
2646 tcp->tcp_rexmit_nxt = tcp->tcp_snxt;
2647 tcp->tcp_rexmit_max = tcp->tcp_snxt;
2648 tcp->tcp_ms_we_have_waited = 0;
2649
2650 /*
2651 * Set tcp_cwnd back to 1 MSS, per
2652 * recommendation from
2653 * draft-floyd-incr-init-win-01.txt,
2654 * Increasing TCP's Initial Window.
2655 */
2656 tcp->tcp_cwnd = tcp->tcp_mss;
2657 }
2658
2659 tcp->tcp_swl1 = seg_seq;
2660 tcp->tcp_swl2 = seg_ack;
2661
2662 new_swnd = ntohs(tcpha->tha_win);
2663 tcp->tcp_swnd = new_swnd;
2664 if (new_swnd > tcp->tcp_max_swnd)
2665 tcp->tcp_max_swnd = new_swnd;
2666
2667 /*
3822 DTRACE_TCP5(accept__established, mlbk_t *, NULL,
3823 ip_xmit_attr_t *, connp->conn_ixa, void_ip_t *,
3824 iphdr, tcp_t *, tcp, tcph_t *, tcpha);
3825 }
3826 TCPS_CONN_INC(tcps);
3827
3828 tcp->tcp_suna = tcp->tcp_iss + 1; /* One for the SYN */
3829 bytes_acked--;
3830 /* SYN was acked - making progress */
3831 tcp->tcp_ip_forward_progress = B_TRUE;
3832
3833 /*
3834 * If SYN was retransmitted, need to reset all
3835 * retransmission info as this segment will be
3836 * treated as a dup ACK.
3837 */
3838 if (tcp->tcp_rexmit) {
3839 tcp->tcp_rexmit = B_FALSE;
3840 tcp->tcp_rexmit_nxt = tcp->tcp_snxt;
3841 tcp->tcp_rexmit_max = tcp->tcp_snxt;
3842 tcp->tcp_ms_we_have_waited = 0;
3843 tcp->tcp_cwnd = mss;
3844 }
3845
3846 /*
3847 * We set the send window to zero here.
3848 * This is needed if there is data to be
3849 * processed already on the queue.
3850 * Later (at swnd_update label), the
3851 * "new_swnd > tcp_swnd" condition is satisfied
3852 * the XMIT_NEEDED flag is set in the current
3853 * (SYN_RCVD) state. This ensures tcp_wput_data() is
3854 * called if there is already data on queue in
3855 * this state.
3856 */
3857 tcp->tcp_swnd = 0;
3858
3859 if (new_swnd > tcp->tcp_max_swnd)
3860 tcp->tcp_max_swnd = new_swnd;
3861 tcp->tcp_swl1 = seg_seq;
3990 tcp->tcp_ecn_cwr_sent = B_FALSE;
3991 }
3992
3993 /*
3994 * We do Hoe's algorithm. Refer to her
3995 * paper "Improving the Start-up Behavior
3996 * of a Congestion Control Scheme for TCP,"
3997 * appeared in SIGCOMM'96.
3998 *
3999 * Save highest seq no we have sent so far.
4000 * Be careful about the invisible FIN byte.
4001 */
4002 if ((tcp->tcp_valid_bits & TCP_FSS_VALID) &&
4003 (tcp->tcp_unsent == 0)) {
4004 tcp->tcp_rexmit_max = tcp->tcp_fss;
4005 } else {
4006 tcp->tcp_rexmit_max = tcp->tcp_snxt;
4007 }
4008
4009 /*
4010 * For SACK:
4011 * Calculate tcp_pipe, which is the
4012 * estimated number of bytes in
4013 * network.
4014 *
4015 * tcp_fack is the highest sack'ed seq num
4016 * TCP has received.
4017 *
4018 * tcp_pipe is explained in the above quoted
4019 * Fall and Floyd's paper. tcp_fack is
4020 * explained in Mathis and Mahdavi's
4021 * "Forward Acknowledgment: Refining TCP
4022 * Congestion Control" in SIGCOMM '96.
4023 */
4024 if (tcp->tcp_snd_sack_ok) {
4025 if (tcp->tcp_notsack_list != NULL) {
4026 tcp->tcp_pipe = tcp->tcp_snxt -
4027 tcp->tcp_fack;
4028 tcp->tcp_sack_snxt = seg_ack;
4029 flags |= TH_NEED_SACK_REXMIT;
4189
4190 /*
4191 * If we got an ACK after fast retransmit, check to see
4192 * if it is a partial ACK. If it is not and the congestion
4193 * window was inflated to account for the other side's
4194 * cached packets, retract it. If it is, do Hoe's algorithm.
4195 */
4196 if (tcp->tcp_dupack_cnt >= tcps->tcps_dupack_fast_retransmit) {
4197 ASSERT(tcp->tcp_rexmit == B_FALSE);
4198 if (SEQ_GEQ(seg_ack, tcp->tcp_rexmit_max)) {
4199 tcp->tcp_dupack_cnt = 0;
4200 /*
4201 * Restore the orig tcp_cwnd_ssthresh after
4202 * fast retransmit phase.
4203 */
4204 if (tcp->tcp_cwnd > tcp->tcp_cwnd_ssthresh) {
4205 tcp->tcp_cwnd = tcp->tcp_cwnd_ssthresh;
4206 }
4207 tcp->tcp_rexmit_max = seg_ack;
4208 tcp->tcp_cwnd_cnt = 0;
4209
4210 /*
4211 * Remove all notsack info to avoid confusion with
4212 * the next fast retrasnmit/recovery phase.
4213 */
4214 if (tcp->tcp_snd_sack_ok) {
4215 TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list,
4216 tcp);
4217 }
4218 } else {
4219 if (tcp->tcp_snd_sack_ok &&
4220 tcp->tcp_notsack_list != NULL) {
4221 flags |= TH_NEED_SACK_REXMIT;
4222 tcp->tcp_pipe -= mss;
4223 if (tcp->tcp_pipe < 0)
4224 tcp->tcp_pipe = 0;
4225 } else {
4226 /*
4227 * Hoe's algorithm:
4228 *
4249 * tcp_rexmit_nxt. Otherwise, update tcp_rexmit_nxt
4250 * to the correct value.
4251 *
4252 * Note that SEQ_LEQ() is used. This is to avoid
4253 * unnecessary fast retransmit caused by dup ACKs
4254 * received when TCP does slow start retransmission
4255 * after a time out. During this phase, TCP may
4256 * send out segments which are already received.
4257 * This causes dup ACKs to be sent back.
4258 */
4259 if (SEQ_LEQ(seg_ack, tcp->tcp_rexmit_max)) {
4260 if (SEQ_GT(seg_ack, tcp->tcp_rexmit_nxt)) {
4261 tcp->tcp_rexmit_nxt = seg_ack;
4262 }
4263 if (seg_ack != tcp->tcp_rexmit_max) {
4264 flags |= TH_XMIT_NEEDED;
4265 }
4266 } else {
4267 tcp->tcp_rexmit = B_FALSE;
4268 tcp->tcp_rexmit_nxt = tcp->tcp_snxt;
4269 }
4270 tcp->tcp_ms_we_have_waited = 0;
4271 }
4272 }
4273
4274 TCPS_BUMP_MIB(tcps, tcpInAckSegs);
4275 TCPS_UPDATE_MIB(tcps, tcpInAckBytes, bytes_acked);
4276 tcp->tcp_suna = seg_ack;
4277 if (tcp->tcp_zero_win_probe != 0) {
4278 tcp->tcp_zero_win_probe = 0;
4279 tcp->tcp_timer_backoff = 0;
4280 }
4281
4282 /*
4283 * If tcp_xmit_head is NULL, then it must be the FIN being ack'ed.
4284 * Note that it cannot be the SYN being ack'ed. The code flow
4285 * will not reach here.
4286 */
4287 if (mp1 == NULL) {
4288 goto fin_acked;
|