1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2017 by Delphix. All rights reserved.
  24  */
  25 
  26 /*
  27  * The TCP congestion control algorithm extracted from the pre-framework
  28  * implementation of TCP congestion control.
  29  */
  30 
  31 #include <sys/errno.h>
  32 #include <inet/tcp.h>
  33 #include <inet/tcp_impl.h>
  34 #include <inet/cc.h>
  35 #include <inet/cc/cc_module.h>
  36 
  37 
  38 #define CC_SUNRENO_ALGO_NAME "sunreno"
  39 
  40 static struct modlmisc cc_sunreno_modlmisc = {
  41         &mod_miscops,
  42         "SUNReno Congestion Control"
  43 };
  44 
  45 static struct modlinkage cc_sunreno_modlinkage = {
  46         MODREV_1,
  47         &cc_sunreno_modlmisc,
  48         NULL
  49 };
  50 
  51 static void     sunreno_ack_received(struct cc_var *ccv, uint16_t type);
  52 static void     sunreno_after_idle(struct cc_var *ccv);
  53 static void     sunreno_cong_signal(struct cc_var *ccv, uint32_t type);
  54 static void     sunreno_post_recovery(struct cc_var *ccv);
  55 
  56 struct cc_algo sunreno_cc_algo = {
  57         .name = CC_SUNRENO_ALGO_NAME,
  58         .ack_received = sunreno_ack_received,
  59         .after_idle = sunreno_after_idle,
  60         .cong_signal = sunreno_cong_signal,
  61         .post_recovery = sunreno_post_recovery,
  62 };
  63 
  64 int
  65 _init(void)
  66 {
  67         int err;
  68 
  69         if ((err = cc_register_algo(&sunreno_cc_algo)) == 0) {
  70                 if ((err = mod_install(&cc_sunreno_modlinkage)) != 0)
  71                         (void) cc_deregister_algo(&sunreno_cc_algo);
  72         }
  73         return (err);
  74 }
  75 
  76 int
  77 _fini(void)
  78 {
  79         return (EBUSY);
  80 }
  81 
  82 int
  83 _info(struct modinfo *modinfop)
  84 {
  85         return (mod_info(&cc_sunreno_modlinkage, modinfop));
  86 }
  87 
  88 static void
  89 sunreno_ack_received(struct cc_var *ccv, uint16_t type)
  90 {
  91         uint32_t add;
  92         uint32_t cwnd;
  93         int mss;
  94 
  95         if (type == CC_ACK && !IN_RECOVERY(ccv->flags)) {
  96                 mss = CCV(ccv, tcp_mss);
  97                 cwnd = CCV(ccv, tcp_cwnd);
  98                 add = mss;
  99 
 100                 if (cwnd >= CCV(ccv, tcp_cwnd_ssthresh)) {
 101                         /*
 102                          * This is to prevent an increase of less than 1 MSS of
 103                          * tcp_cwnd.  With partial increase, tcp_wput_data()
 104                          * may send out tinygrams in order to preserve mblk
 105                          * boundaries.
 106                          *
 107                          * By initializing tcp_cwnd_cnt to new tcp_cwnd and
 108                          * decrementing it by 1 MSS for every ACKs, tcp_cwnd is
 109                          * increased by 1 MSS for every RTTs.
 110                          */
 111                         if (CCV(ccv, tcp_cwnd_cnt) <= 0) {
 112                                 CCV(ccv, tcp_cwnd_cnt) = cwnd + add;
 113                         } else {
 114                                 CCV(ccv, tcp_cwnd_cnt) -= add;
 115                                 add = 0;
 116                         }
 117                 }
 118                 CCV(ccv, tcp_cwnd) = MIN(cwnd + add, CCV(ccv, tcp_cwnd_max));
 119         }
 120 }
 121 
 122 static void
 123 sunreno_after_idle(struct cc_var *ccv)
 124 {
 125         int32_t num_sack_blk = 0;
 126         int mss;
 127 
 128         if (CCV(ccv, tcp_snd_sack_ok) && CCV(ccv, tcp_num_sack_blk) > 0) {
 129                 int32_t opt_len;
 130 
 131                 num_sack_blk = MIN(CCV(ccv, tcp_max_sack_blk),
 132                     CCV(ccv, tcp_num_sack_blk));
 133                 opt_len = num_sack_blk * sizeof (sack_blk_t) + TCPOPT_NOP_LEN *
 134                     2 + TCPOPT_HEADER_LEN;
 135                 mss = CCV(ccv, tcp_mss) - opt_len;
 136         } else {
 137                 mss = CCV(ccv, tcp_mss);
 138         }
 139 
 140         TCP_SET_INIT_CWND(CCV_PROTO(ccv), mss,
 141             CCSV(ccv, tcps_slow_start_after_idle));
 142 }
 143 
 144 /*
 145  * Perform any necessary tasks before we enter congestion recovery.
 146  */
 147 static void
 148 sunreno_cong_signal(struct cc_var *ccv, uint32_t type)
 149 {
 150         int npkt;
 151         int mss;
 152 
 153         /* Catch algos which mistakenly leak private signal types. */
 154         ASSERT((type & CC_SIGPRIVMASK) == 0);
 155 
 156         mss = CCV(ccv, tcp_mss);
 157         npkt = ((CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna)) >> 1) / mss;
 158 
 159         switch (type) {
 160         case CC_NDUPACK:
 161                 if (!IN_FASTRECOVERY(ccv->flags)) {
 162                         if (!IN_CONGRECOVERY(ccv->flags)) {
 163                                 CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) *
 164                                     mss;
 165                                 CCV(ccv, tcp_cwnd) = (npkt +
 166                                     CCV(ccv, tcp_dupack_cnt)) * mss;
 167                         }
 168                         ENTER_RECOVERY(ccv->flags);
 169                 }
 170                 break;
 171         case CC_ECN:
 172                 if (!IN_CONGRECOVERY(ccv->flags) && !CCV(ccv, tcp_cwr)) {
 173                         CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) * mss;
 174                         CCV(ccv, tcp_cwnd) = npkt * mss;
 175                         if (CCV(ccv, tcp_cwnd) == 0) {
 176                                 /*
 177                                  * This makes sure that when the ACK comes
 178                                  * back, we will increase tcp_cwnd by 1 MSS.
 179                                  */
 180                                 CCV(ccv, tcp_cwnd_cnt) = 0;
 181                         }
 182                         ENTER_CONGRECOVERY(ccv->flags);
 183                 }
 184                 break;
 185         case CC_RTO:
 186                 /*
 187                  * After retransmission, we need to do slow start.  Set the
 188                  * ssthresh to one half of current effective window and cwnd to
 189                  * one MSS.  Also reset tcp_cwnd_cnt.
 190                  *
 191                  * Note that if tcp_ssthresh is reduced because of ECN, do not
 192                  * reduce it again unless it is already one window of data away
 193                  * (tcp_cwr should then be cleared) or this is a timeout for a
 194                  * retransmitted segment.
 195                  */
 196                 if (!CCV(ccv, tcp_cwr) || CCV(ccv, tcp_rexmit)) {
 197                         if (CCV(ccv, tcp_timer_backoff) != 0)
 198                                 npkt = CCV(ccv, tcp_cwnd_ssthresh) / 2 / mss;
 199                         CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) * mss;
 200                 }
 201                 CCV(ccv, tcp_cwnd) = mss;
 202                 CCV(ccv, tcp_cwnd_cnt) = 0;
 203                 break;
 204         }
 205 }
 206 
 207 /*
 208  * Perform any necessary tasks before we exit congestion recovery.
 209  */
 210 static void
 211 sunreno_post_recovery(struct cc_var *ccv)
 212 {
 213         /*
 214          * Restore the congestion window back to ssthresh as per RFC 5681
 215          * section 3.2.
 216          */
 217         if (IN_FASTRECOVERY(ccv->flags)) {
 218                 if (CCV(ccv, tcp_cwnd) > CCV(ccv, tcp_cwnd_ssthresh)) {
 219                         CCV(ccv, tcp_cwnd) = CCV(ccv, tcp_cwnd_ssthresh);
 220                 }
 221         }
 222         CCV(ccv, tcp_cwnd_cnt) = 0;
 223 }