Print this page
DLPX-25998 TCP congestion control is inadequate
Reviewed at: http://reviews.delphix.com/r/34808/
DLPX-43064 include high-resolution round-trip times in connstat (EP-652)
DLPX-42721 Create inline function for TCP RTO calculation
DLPX-37540 TCP per-connection kernel statistics DLPX-37544 connstat command to display per-connection TCP statistics


   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
  25  * Copyright 2011 Joyent, Inc.  All rights reserved.
  26  * Copyright (c) 2014 by Delphix. All rights reserved.
  27  */
  28 
  29 #include <sys/types.h>
  30 #include <sys/strlog.h>
  31 #include <sys/strsun.h>
  32 #include <sys/squeue_impl.h>
  33 #include <sys/squeue.h>
  34 #include <sys/callo.h>
  35 #include <sys/strsubr.h>
  36 
  37 #include <inet/common.h>
  38 #include <inet/ip.h>
  39 #include <inet/ip_ire.h>
  40 #include <inet/ip_rts.h>
  41 #include <inet/tcp.h>
  42 #include <inet/tcp_impl.h>
  43 
  44 /*
  45  * Implementation of TCP Timers.
  46  * =============================


 577         }
 578 
 579         if ((tcp->tcp_rnxt - tcp->tcp_rack) > tcp->tcp_mss) {
 580                 /*
 581                  * Make sure we don't allow deferred ACKs to result in
 582                  * timer-based ACKing.  If we have held off an ACK
 583                  * when there was more than an mss here, and the timer
 584                  * goes off, we have to worry about the possibility
 585                  * that the sender isn't doing slow-start, or is out
 586                  * of step with us for some other reason.  We fall
 587                  * permanently back in the direction of
 588                  * ACK-every-other-packet as suggested in RFC 1122.
 589                  */
 590                 if (tcp->tcp_rack_abs_max > 2)
 591                         tcp->tcp_rack_abs_max--;
 592                 tcp->tcp_rack_cur_max = 2;
 593         }
 594         mp = tcp_ack_mp(tcp);
 595 
 596         if (mp != NULL) {
 597                 BUMP_LOCAL(tcp->tcp_obsegs);
 598                 TCPS_BUMP_MIB(tcps, tcpOutAck);
 599                 TCPS_BUMP_MIB(tcps, tcpOutAckDelayed);
 600                 tcp_send_data(tcp, mp);
 601         }
 602 }
 603 
 604 /*
 605  * Notify IP that we are having trouble with this connection.  IP should
 606  * make note so it can potentially use a different IRE.
 607  */
 608 static void
 609 tcp_ip_notify(tcp_t *tcp)
 610 {
 611         conn_t          *connp = tcp->tcp_connp;
 612         ire_t           *ire;
 613 
 614         /*
 615          * Note: in the case of source routing we want to blow away the
 616          * route to the first source route hop.
 617          */


 739                 /*
 740                  * If the end point has not been closed, TCP can retransmit
 741                  * forever.  But if the end point is closed, the normal
 742                  * timeout applies.
 743                  */
 744                 if (second_threshold == 0) {
 745                         second_threshold = tcps->tcps_ip_abort_linterval;
 746                         dont_timeout = B_TRUE;
 747                 }
 748                 /* FALLTHRU */
 749         case TCPS_FIN_WAIT_1:
 750         case TCPS_CLOSING:
 751         case TCPS_LAST_ACK:
 752                 /* If we have data to rexmit */
 753                 if (tcp->tcp_suna != tcp->tcp_snxt) {
 754                         clock_t time_to_wait;
 755 
 756                         TCPS_BUMP_MIB(tcps, tcpTimRetrans);
 757                         if (!tcp->tcp_xmit_head)
 758                                 break;
 759                         time_to_wait = ddi_get_lbolt() -
 760                             (clock_t)tcp->tcp_xmit_head->b_prev;
 761                         time_to_wait = tcp->tcp_rto -
 762                             TICK_TO_MSEC(time_to_wait);




 763                         /*
 764                          * If the timer fires too early, 1 clock tick earlier,
 765                          * restart the timer.
 766                          */
 767                         if (time_to_wait > msec_per_tick) {
 768                                 TCP_STAT(tcps, tcp_timer_fire_early);
 769                                 TCP_TIMER_RESTART(tcp, time_to_wait);
 770                                 return;
 771                         }
 772                         /*
 773                          * When we probe zero windows, we force the swnd open.
 774                          * If our peer acks with a closed window swnd will be
 775                          * set to zero by tcp_rput(). As long as we are
 776                          * receiving acks tcp_rput will
 777                          * reset 'tcp_ms_we_have_waited' so as not to trip the
 778                          * first and second interval actions.  NOTE: the timer
 779                          * interval is allowed to continue its exponential
 780                          * backoff.
 781                          */
 782                         if (tcp->tcp_swnd == 0 || tcp->tcp_zero_win_probe) {
 783                                 if (connp->conn_debug) {
 784                                         (void) strlog(TCP_MOD_ID, 0, 1,
 785                                             SL_TRACE, "tcp_timer: zero win");
 786                                 }
 787                         } else {
 788                                 /*
 789                                  * After retransmission, we need to do
 790                                  * slow start.  Set the ssthresh to one
 791                                  * half of current effective window and
 792                                  * cwnd to one MSS.  Also reset
 793                                  * tcp_cwnd_cnt.
 794                                  *
 795                                  * Note that if tcp_ssthresh is reduced because
 796                                  * of ECN, do not reduce it again unless it is
 797                                  * already one window of data away (tcp_cwr
 798                                  * should then be cleared) or this is a
 799                                  * timeout for a retransmitted segment.
 800                                  */
 801                                 uint32_t npkt;
 802 
 803                                 if (!tcp->tcp_cwr || tcp->tcp_rexmit) {
 804                                         npkt = ((tcp->tcp_timer_backoff ?
 805                                             tcp->tcp_cwnd_ssthresh :
 806                                             tcp->tcp_snxt -
 807                                             tcp->tcp_suna) >> 1) / tcp->tcp_mss;
 808                                         tcp->tcp_cwnd_ssthresh = MAX(npkt, 2) *
 809                                             tcp->tcp_mss;
 810                                 }
 811                                 tcp->tcp_cwnd = tcp->tcp_mss;
 812                                 tcp->tcp_cwnd_cnt = 0;
 813                                 if (tcp->tcp_ecn_ok) {
 814                                         tcp->tcp_cwr = B_TRUE;
 815                                         tcp->tcp_cwr_snd_max = tcp->tcp_snxt;
 816                                         tcp->tcp_ecn_cwr_sent = B_FALSE;
 817                                 }
 818                         }
 819                         break;
 820                 }
 821                 /*
 822                  * We have something to send yet we cannot send.  The
 823                  * reason can be:
 824                  *
 825                  * 1. Zero send window: we need to do zero window probe.
 826                  * 2. Zero cwnd: because of ECN, we need to "clock out
 827                  * segments.
 828                  * 3. SWS avoidance: receiver may have shrunk window,
 829                  * reset our knowledge.
 830                  *
 831                  * Note that condition 2 can happen with either 1 or
 832                  * 3.  But 1 and 3 are exclusive.
 833                  */
 834                 if (tcp->tcp_unsent != 0) {
 835                         /*
 836                          * Should not hold the zero-copy messages for too long.
 837                          */
 838                         if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean)
 839                                 tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp,
 840                                     tcp->tcp_xmit_head, B_TRUE);
 841 
 842                         if (tcp->tcp_cwnd == 0) {
 843                                 /*
 844                                  * Set tcp_cwnd to 1 MSS so that a
 845                                  * new segment can be sent out.  We
 846                                  * are "clocking out" new data when
 847                                  * the network is really congested.
 848                                  */
 849                                 ASSERT(tcp->tcp_ecn_ok);
 850                                 tcp->tcp_cwnd = tcp->tcp_mss;
 851                         }
 852                         if (tcp->tcp_swnd == 0) {
 853                                 /* Extend window for zero window probe */
 854                                 tcp->tcp_swnd++;
 855                                 tcp->tcp_zero_win_probe = B_TRUE;
 856                                 TCPS_BUMP_MIB(tcps, tcpOutWinProbe);

 857                         } else {
 858                                 /*
 859                                  * Handle timeout from sender SWS avoidance.
 860                                  * Reset our knowledge of the max send window
 861                                  * since the receiver might have reduced its
 862                                  * receive buffer.  Avoid setting tcp_max_swnd
 863                                  * to one since that will essentially disable
 864                                  * the SWS checks.
 865                                  *
 866                                  * Note that since we don't have a SWS
 867                                  * state variable, if the timeout is set
 868                                  * for ECN but not for SWS, this
 869                                  * code will also be executed.  This is
 870                                  * fine as tcp_max_swnd is updated
 871                                  * constantly and it will not affect
 872                                  * anything.
 873                                  */
 874                                 tcp->tcp_max_swnd = MAX(tcp->tcp_swnd, 2);
 875                         }
 876                         tcp_wput_data(tcp, NULL, B_FALSE);


 995                          */
 996                         tcp->tcp_ms_we_have_waited = second_threshold;
 997                 }
 998         } else if (ms > first_threshold) {
 999                 /*
1000                  * Should not hold the zero-copy messages for too long.
1001                  */
1002                 if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean)
1003                         tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp,
1004                             tcp->tcp_xmit_head, B_TRUE);
1005 
1006                 /*
1007                  * We have been retransmitting for too long...  The RTT
1008                  * we calculated is probably incorrect.  Reinitialize it.
1009                  * Need to compensate for 0 tcp_rtt_sa.  Reset
1010                  * tcp_rtt_update so that we won't accidentally cache a
1011                  * bad value.  But only do this if this is not a zero
1012                  * window probe.
1013                  */
1014                 if (tcp->tcp_rtt_sa != 0 && tcp->tcp_zero_win_probe == 0) {
1015                         tcp->tcp_rtt_sd += (tcp->tcp_rtt_sa >> 3) +
1016                             (tcp->tcp_rtt_sa >> 5);
1017                         tcp->tcp_rtt_sa = 0;
1018                         tcp_ip_notify(tcp);
1019                         tcp->tcp_rtt_update = 0;
1020                 }
1021         }
1022 
1023 timer_rexmit:
1024         tcp->tcp_timer_backoff++;
1025         if ((ms = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd +
1026             tcps->tcps_rexmit_interval_extra + (tcp->tcp_rtt_sa >> 5)) <
1027             tcp->tcp_rto_min) {
1028                 /*
1029                  * This means the original RTO is tcp_rexmit_interval_min.
1030                  * So we will use tcp_rexmit_interval_min as the RTO value
1031                  * and do the backoff.
1032                  */
1033                 ms = tcp->tcp_rto_min << tcp->tcp_timer_backoff;
1034         } else {
1035                 ms <<= tcp->tcp_timer_backoff;
1036         }
1037         if (ms > tcp->tcp_rto_max) {
1038                 ms = tcp->tcp_rto_max;
1039                 /*
1040                  * ms is at max, decrement tcp_timer_backoff to avoid
1041                  * overflow.
1042                  */
1043                 tcp->tcp_timer_backoff--;
1044         }
1045         tcp->tcp_ms_we_have_waited += ms;
1046         if (tcp->tcp_zero_win_probe == 0) {
1047                 tcp->tcp_rto = ms;
1048         }
1049         TCP_TIMER_RESTART(tcp, ms);
1050         /*
1051          * This is after a timeout and tcp_rto is backed off.  Set
1052          * tcp_set_timer to 1 so that next time RTO is updated, we will
1053          * restart the timer with a correct value.
1054          */
1055         tcp->tcp_set_timer = 1;
1056         mss = tcp->tcp_snxt - tcp->tcp_suna;
1057         if (mss > tcp->tcp_mss)
1058                 mss = tcp->tcp_mss;
1059         if (mss > tcp->tcp_swnd && tcp->tcp_swnd != 0)
1060                 mss = tcp->tcp_swnd;
1061 
1062         if ((mp = tcp->tcp_xmit_head) != NULL)

1063                 mp->b_prev = (mblk_t *)ddi_get_lbolt();




1064         mp = tcp_xmit_mp(tcp, mp, mss, NULL, NULL, tcp->tcp_suna, B_TRUE, &mss,
1065             B_TRUE);
1066 
1067         /*
1068          * When slow start after retransmission begins, start with
1069          * this seq no.  tcp_rexmit_max marks the end of special slow
1070          * start phase.
1071          */
1072         tcp->tcp_rexmit_nxt = tcp->tcp_suna;
1073         if ((tcp->tcp_valid_bits & TCP_FSS_VALID) &&
1074             (tcp->tcp_unsent == 0)) {
1075                 tcp->tcp_rexmit_max = tcp->tcp_fss;
1076         } else {
1077                 tcp->tcp_rexmit_max = tcp->tcp_snxt;
1078         }
1079         tcp->tcp_rexmit = B_TRUE;
1080         tcp->tcp_dupack_cnt = 0;
1081 
1082         /*
1083          * Remove all rexmit SACK blk to start from fresh.
1084          */
1085         if (tcp->tcp_snd_sack_ok)
1086                 TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list, tcp);
1087         if (mp == NULL) {
1088                 return;
1089         }
1090 
1091         tcp->tcp_csuna = tcp->tcp_snxt;
1092         TCPS_BUMP_MIB(tcps, tcpRetransSegs);
1093         TCPS_UPDATE_MIB(tcps, tcpRetransBytes, mss);


1094         tcp_send_data(tcp, mp);
1095 
1096 }
1097 
1098 /*
1099  * Handle lingering timeouts. This function is called when the SO_LINGER timeout
1100  * expires.
1101  */
1102 void
1103 tcp_close_linger_timeout(void *arg)
1104 {
1105         conn_t  *connp = (conn_t *)arg;
1106         tcp_t   *tcp = connp->conn_tcp;
1107 
1108         tcp->tcp_client_errno = ETIMEDOUT;
1109         tcp_stop_lingering(tcp);
1110 }


   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
  25  * Copyright 2011 Joyent, Inc.  All rights reserved.
  26  * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
  27  */
  28 
  29 #include <sys/types.h>
  30 #include <sys/strlog.h>
  31 #include <sys/strsun.h>
  32 #include <sys/squeue_impl.h>
  33 #include <sys/squeue.h>
  34 #include <sys/callo.h>
  35 #include <sys/strsubr.h>
  36 
  37 #include <inet/common.h>
  38 #include <inet/ip.h>
  39 #include <inet/ip_ire.h>
  40 #include <inet/ip_rts.h>
  41 #include <inet/tcp.h>
  42 #include <inet/tcp_impl.h>
  43 
  44 /*
  45  * Implementation of TCP Timers.
  46  * =============================


 577         }
 578 
 579         if ((tcp->tcp_rnxt - tcp->tcp_rack) > tcp->tcp_mss) {
 580                 /*
 581                  * Make sure we don't allow deferred ACKs to result in
 582                  * timer-based ACKing.  If we have held off an ACK
 583                  * when there was more than an mss here, and the timer
 584                  * goes off, we have to worry about the possibility
 585                  * that the sender isn't doing slow-start, or is out
 586                  * of step with us for some other reason.  We fall
 587                  * permanently back in the direction of
 588                  * ACK-every-other-packet as suggested in RFC 1122.
 589                  */
 590                 if (tcp->tcp_rack_abs_max > 2)
 591                         tcp->tcp_rack_abs_max--;
 592                 tcp->tcp_rack_cur_max = 2;
 593         }
 594         mp = tcp_ack_mp(tcp);
 595 
 596         if (mp != NULL) {
 597                 TCPS_BUMP_MIB(tcps, tcpHCOutSegs);
 598                 TCPS_BUMP_MIB(tcps, tcpOutAck);
 599                 TCPS_BUMP_MIB(tcps, tcpOutAckDelayed);
 600                 tcp_send_data(tcp, mp);
 601         }
 602 }
 603 
 604 /*
 605  * Notify IP that we are having trouble with this connection.  IP should
 606  * make note so it can potentially use a different IRE.
 607  */
 608 static void
 609 tcp_ip_notify(tcp_t *tcp)
 610 {
 611         conn_t          *connp = tcp->tcp_connp;
 612         ire_t           *ire;
 613 
 614         /*
 615          * Note: in the case of source routing we want to blow away the
 616          * route to the first source route hop.
 617          */


 739                 /*
 740                  * If the end point has not been closed, TCP can retransmit
 741                  * forever.  But if the end point is closed, the normal
 742                  * timeout applies.
 743                  */
 744                 if (second_threshold == 0) {
 745                         second_threshold = tcps->tcps_ip_abort_linterval;
 746                         dont_timeout = B_TRUE;
 747                 }
 748                 /* FALLTHRU */
 749         case TCPS_FIN_WAIT_1:
 750         case TCPS_CLOSING:
 751         case TCPS_LAST_ACK:
 752                 /* If we have data to rexmit */
 753                 if (tcp->tcp_suna != tcp->tcp_snxt) {
 754                         clock_t time_to_wait;
 755 
 756                         TCPS_BUMP_MIB(tcps, tcpTimRetrans);
 757                         if (!tcp->tcp_xmit_head)
 758                                 break;
 759 #ifdef KERNEL_32
 760                         time_to_wait = TICK_TO_MSEC(ddi_get_lbolt() -
 761                             (clock_t)tcp->tcp_xmit_head->b_prev);
 762 #else
 763                         time_to_wait = NSEC2MSEC(gethrtime() -
 764                             (hrtime_t)(intptr_t)tcp->tcp_xmit_head->b_prev);
 765 #endif
 766                         time_to_wait = tcp->tcp_rto - time_to_wait;
 767                         /*
 768                          * If the timer fires too early, 1 clock tick earlier,
 769                          * restart the timer.
 770                          */
 771                         if (time_to_wait > msec_per_tick) {
 772                                 TCP_STAT(tcps, tcp_timer_fire_early);
 773                                 TCP_TIMER_RESTART(tcp, time_to_wait);
 774                                 return;
 775                         }
 776                         /*
 777                          * When we probe zero windows, we force the swnd open.
 778                          * If our peer acks with a closed window swnd will be
 779                          * set to zero by tcp_rput(). As long as we are
 780                          * receiving acks tcp_rput will
 781                          * reset 'tcp_ms_we_have_waited' so as not to trip the
 782                          * first and second interval actions.  NOTE: the timer
 783                          * interval is allowed to continue its exponential
 784                          * backoff.
 785                          */
 786                         if (tcp->tcp_swnd == 0 || tcp->tcp_zero_win_probe) {
 787                                 if (connp->conn_debug) {
 788                                         (void) strlog(TCP_MOD_ID, 0, 1,
 789                                             SL_TRACE, "tcp_timer: zero win");
 790                                 }
 791                         } else {
 792                                 cc_cong_signal(tcp, NULL, CC_RTO);





















 793                         }








 794                         break;
 795                 }
 796                 /*
 797                  * We have something to send yet we cannot send.  The
 798                  * reason can be:
 799                  *
 800                  * 1. Zero send window: we need to do zero window probe.
 801                  * 2. Zero cwnd: because of ECN, we need to "clock out
 802                  * segments.
 803                  * 3. SWS avoidance: receiver may have shrunk window,
 804                  * reset our knowledge.
 805                  *
 806                  * Note that condition 2 can happen with either 1 or
 807                  * 3.  But 1 and 3 are exclusive.
 808                  */
 809                 if (tcp->tcp_unsent != 0) {
 810                         /*
 811                          * Should not hold the zero-copy messages for too long.
 812                          */
 813                         if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean)
 814                                 tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp,
 815                                     tcp->tcp_xmit_head, B_TRUE);
 816 
 817                         if (tcp->tcp_cwnd == 0) {
 818                                 /*
 819                                  * Set tcp_cwnd to 1 MSS so that a
 820                                  * new segment can be sent out.  We
 821                                  * are "clocking out" new data when
 822                                  * the network is really congested.
 823                                  */
 824                                 ASSERT(tcp->tcp_ecn_ok);
 825                                 tcp->tcp_cwnd = tcp->tcp_mss;
 826                         }
 827                         if (tcp->tcp_swnd == 0) {
 828                                 /* Extend window for zero window probe */
 829                                 tcp->tcp_swnd++;
 830                                 tcp->tcp_zero_win_probe = B_TRUE;
 831                                 TCPS_BUMP_MIB(tcps, tcpOutWinProbe);
 832                                 tcp->tcp_cs.tcp_out_zwnd_probes++;
 833                         } else {
 834                                 /*
 835                                  * Handle timeout from sender SWS avoidance.
 836                                  * Reset our knowledge of the max send window
 837                                  * since the receiver might have reduced its
 838                                  * receive buffer.  Avoid setting tcp_max_swnd
 839                                  * to one since that will essentially disable
 840                                  * the SWS checks.
 841                                  *
 842                                  * Note that since we don't have a SWS
 843                                  * state variable, if the timeout is set
 844                                  * for ECN but not for SWS, this
 845                                  * code will also be executed.  This is
 846                                  * fine as tcp_max_swnd is updated
 847                                  * constantly and it will not affect
 848                                  * anything.
 849                                  */
 850                                 tcp->tcp_max_swnd = MAX(tcp->tcp_swnd, 2);
 851                         }
 852                         tcp_wput_data(tcp, NULL, B_FALSE);


 971                          */
 972                         tcp->tcp_ms_we_have_waited = second_threshold;
 973                 }
 974         } else if (ms > first_threshold) {
 975                 /*
 976                  * Should not hold the zero-copy messages for too long.
 977                  */
 978                 if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean)
 979                         tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp,
 980                             tcp->tcp_xmit_head, B_TRUE);
 981 
 982                 /*
 983                  * We have been retransmitting for too long...  The RTT
 984                  * we calculated is probably incorrect.  Reinitialize it.
 985                  * Need to compensate for 0 tcp_rtt_sa.  Reset
 986                  * tcp_rtt_update so that we won't accidentally cache a
 987                  * bad value.  But only do this if this is not a zero
 988                  * window probe.
 989                  */
 990                 if (tcp->tcp_rtt_sa != 0 && tcp->tcp_zero_win_probe == 0) {
 991                         tcp->tcp_rtt_sd += tcp->tcp_rtt_sa >> 3 +
 992                             tcp->tcp_rtt_sa >> 5;
 993                         tcp->tcp_rtt_sa = 0;
 994                         tcp_ip_notify(tcp);
 995                         tcp->tcp_rtt_update = 0;
 996                 }
 997         }
 998 
 999 timer_rexmit:
1000         tcp->tcp_timer_backoff++;
1001         if ((ms = tcp_calculate_rto(tcp, tcps)) < tcp->tcp_rto_min) {


1002                 /*
1003                  * This means the original RTO is tcp_rexmit_interval_min.
1004                  * So we will use tcp_rexmit_interval_min as the RTO value
1005                  * and do the backoff.
1006                  */
1007                 ms = tcp->tcp_rto_min << tcp->tcp_timer_backoff;
1008         } else {
1009                 ms <<= tcp->tcp_timer_backoff;
1010         }
1011         if (ms > tcp->tcp_rto_max) {
1012                 ms = tcp->tcp_rto_max;
1013                 /*
1014                  * ms is at max, decrement tcp_timer_backoff to avoid
1015                  * overflow.
1016                  */
1017                 tcp->tcp_timer_backoff--;
1018         }
1019         tcp->tcp_ms_we_have_waited += ms;
1020         if (tcp->tcp_zero_win_probe == 0) {
1021                 tcp->tcp_rto = ms;
1022         }
1023         TCP_TIMER_RESTART(tcp, ms);
1024         /*
1025          * This is after a timeout and tcp_rto is backed off.  Set
1026          * tcp_set_timer to 1 so that next time RTO is updated, we will
1027          * restart the timer with a correct value.
1028          */
1029         tcp->tcp_set_timer = 1;
1030         mss = tcp->tcp_snxt - tcp->tcp_suna;
1031         if (mss > tcp->tcp_mss)
1032                 mss = tcp->tcp_mss;
1033         if (mss > tcp->tcp_swnd && tcp->tcp_swnd != 0)
1034                 mss = tcp->tcp_swnd;
1035 
1036         if ((mp = tcp->tcp_xmit_head) != NULL) {
1037 #ifdef KERNEL_32
1038                 mp->b_prev = (mblk_t *)ddi_get_lbolt();
1039 #else
1040                 mp->b_prev = (mblk_t *)(intptr_t)gethrtime();
1041 #endif
1042         }
1043         mp = tcp_xmit_mp(tcp, mp, mss, NULL, NULL, tcp->tcp_suna, B_TRUE, &mss,
1044             B_TRUE);
1045 
1046         /*
1047          * When slow start after retransmission begins, start with
1048          * this seq no.  tcp_rexmit_max marks the end of special slow
1049          * start phase.
1050          */
1051         tcp->tcp_rexmit_nxt = tcp->tcp_suna;
1052         if ((tcp->tcp_valid_bits & TCP_FSS_VALID) &&
1053             (tcp->tcp_unsent == 0)) {
1054                 tcp->tcp_rexmit_max = tcp->tcp_fss;
1055         } else {
1056                 tcp->tcp_rexmit_max = tcp->tcp_snxt;
1057         }
1058         tcp->tcp_rexmit = B_TRUE;
1059         tcp->tcp_dupack_cnt = 0;
1060 
1061         /*
1062          * Remove all rexmit SACK blk to start from fresh.
1063          */
1064         if (tcp->tcp_snd_sack_ok)
1065                 TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list, tcp);
1066         if (mp == NULL) {
1067                 return;
1068         }
1069 
1070         tcp->tcp_csuna = tcp->tcp_snxt;
1071         TCPS_BUMP_MIB(tcps, tcpRetransSegs);
1072         TCPS_UPDATE_MIB(tcps, tcpRetransBytes, mss);
1073         tcp->tcp_cs.tcp_out_retrans_segs++;
1074         tcp->tcp_cs.tcp_out_retrans_bytes += mss;
1075         tcp_send_data(tcp, mp);
1076 
1077 }
1078 
1079 /*
1080  * Handle lingering timeouts. This function is called when the SO_LINGER timeout
1081  * expires.
1082  */
1083 void
1084 tcp_close_linger_timeout(void *arg)
1085 {
1086         conn_t  *connp = (conn_t *)arg;
1087         tcp_t   *tcp = connp->conn_tcp;
1088 
1089         tcp->tcp_client_errno = ETIMEDOUT;
1090         tcp_stop_lingering(tcp);
1091 }