Print this page
DLPX-25998 TCP congestion control is inadequate
Reviewed at: http://reviews.delphix.com/r/34808/

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/inet/tcp/tcp_tunables.c
          +++ new/usr/src/uts/common/inet/tcp/tcp_tunables.c
↓ open down ↓ 14 lines elided ↑ open up ↑
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright 2016 Joyent, Inc.
  24   24   * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  25      - * Copyright (c) 2013 by Delphix. All rights reserved.
       25 + * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  26   26   */
  27   27  /* Copyright (c) 1990 Mentat Inc. */
  28   28  
  29   29  #include <inet/ip.h>
  30   30  #include <inet/tcp_impl.h>
       31 +#include <inet/cc.h>
  31   32  #include <sys/multidata.h>
  32   33  #include <sys/sunddi.h>
  33   34  
  34   35  /* Max size IP datagram is 64k - 1 */
  35   36  #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
  36   37  #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
  37   38  
  38   39  /* Max of the above */
  39   40  #define TCP_MSS_MAX             TCP_MSS_MAX_IPV4
  40   41  
↓ open down ↓ 191 lines elided ↑ open up ↑
 232  233  
 233  234          if ((err = mod_uint32_value(pval, pinfo, flags, &new_value)) != 0)
 234  235                  return (err);
 235  236          /* mod_uint32_value() + pinfo guarantees we're in TCP port range. */
 236  237          if ((uint32_t)new_value < tcps->tcps_smallest_anon_port)
 237  238                  return (ERANGE);
 238  239          pinfo->prop_cur_uval = (uint32_t)new_value;
 239  240          return (0);
 240  241  }
 241  242  
      243 +/* ARGSUSED */
      244 +static int
      245 +tcp_set_cc_algorithm(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
      246 +    const char *ifname, const void *pval, uint_t flags)
      247 +{
      248 +        tcp_stack_t *tcps = stack->netstack_tcp;
      249 +        char *name = (flags & MOD_PROP_DEFAULT) ?
      250 +            CC_DEFAULT_ALGO_NAME : (char *)pval;
      251 +        struct cc_algo *algo = cc_load_algo(name);
      252 +
      253 +        if (algo == NULL)
      254 +                return (EINVAL);
      255 +
      256 +        tcps->tcps_default_cc_algo = algo;
      257 +        return (0);
      258 +}
      259 +
      260 +/* ARGSUSED */
      261 +static int
      262 +tcp_get_cc_algorithm(netstack_t *stack, mod_prop_info_t *pinfo,
      263 +    const char *ifname, void *pval, uint_t psize, uint_t flags)
      264 +{
      265 +        size_t nbytes;
      266 +
      267 +        if (flags & MOD_PROP_POSSIBLE) {
      268 +                /* libipadm doesn't call down for possible values. */
      269 +                return (ENOTSUP);
      270 +        }
      271 +
      272 +        if (flags & MOD_PROP_PERM) {
      273 +                nbytes = snprintf(pval, psize, "%u", MOD_PROP_PERM_RW);
      274 +        } else if (flags & MOD_PROP_DEFAULT) {
      275 +                nbytes = snprintf(pval, psize, "%s", CC_DEFAULT_ALGO_NAME);
      276 +        } else {
      277 +                nbytes = snprintf(pval, psize, "%s",
      278 +                    stack->netstack_tcp->tcps_default_cc_algo->name);
      279 +        }
      280 +        if (nbytes >= psize)
      281 +                return (ENOBUFS);
      282 +        return (0);
      283 +}
      284 +
 242  285  /*
 243  286   * All of these are alterable, within the min/max values given, at run time.
 244  287   *
 245  288   * Note: All those tunables which do not start with "_" are Committed and
 246  289   * therefore are public. See PSARC 2010/080.
 247  290   */
 248  291  mod_prop_info_t tcp_propinfo_tbl[] = {
 249  292          /* tunable - 0 */
 250  293          { "_time_wait_interval", MOD_PROTO_TCP,
 251  294              mod_set_uint32, mod_get_uint32,
↓ open down ↓ 268 lines elided ↑ open up ↑
 520  563              tcp_listener_conf_add, NULL, {0}, {0} },
 521  564  
 522  565          { "_listener_limit_conf_del", MOD_PROTO_TCP,
 523  566              tcp_listener_conf_del, NULL, {0}, {0} },
 524  567  
 525  568          { "_iss_incr", MOD_PROTO_TCP,
 526  569              mod_set_uint32, mod_get_uint32,
 527  570              {1, ISS_INCR, ISS_INCR},
 528  571              {ISS_INCR} },
 529  572  
      573 +        { "congestion_control", MOD_PROTO_TCP,
      574 +            tcp_set_cc_algorithm, tcp_get_cc_algorithm, {0}, {0} },
      575 +
      576 +        /* RFC 3465 - TCP Congestion Control with Appropriate Byte Counting */
      577 +        { "_abc", MOD_PROTO_TCP,
      578 +            mod_set_boolean, mod_get_boolean, {B_TRUE}, {B_TRUE} },
      579 +
      580 +        /* "L" value from RFC 3465 */
      581 +        { "_abc_l_var", MOD_PROTO_TCP,
      582 +            mod_set_uint32, mod_get_uint32, {1, UINT32_MAX, 2}, {2} },
      583 +
 530  584          { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
 531  585  
 532  586          { NULL, 0, NULL, NULL, {0}, {0} }
 533  587  };
 534  588  
 535  589  int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX