Print this page
DLPX-25998 TCP congestion control is inadequate
Reviewed at: http://reviews.delphix.com/r/34808/


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2016 Joyent, Inc.
  24  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  25  * Copyright (c) 2013 by Delphix. All rights reserved.
  26  */
  27 /* Copyright (c) 1990 Mentat Inc. */
  28 
  29 #include <inet/ip.h>
  30 #include <inet/tcp_impl.h>

  31 #include <sys/multidata.h>
  32 #include <sys/sunddi.h>
  33 
  34 /* Max size IP datagram is 64k - 1 */
  35 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
  36 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
  37 
  38 /* Max of the above */
  39 #define TCP_MSS_MAX             TCP_MSS_MAX_IPV4
  40 
  41 /*
  42  * Set the RFC 1948 pass phrase
  43  */
  44 /* ARGSUSED */
  45 static int
  46 tcp_set_1948phrase(netstack_t *stack,  cred_t *cr, mod_prop_info_t *pinfo,
  47     const char *ifname, const void* pr_val, uint_t flags)
  48 {
  49         if (flags & MOD_PROP_DEFAULT)
  50                 return (ENOTSUP);


 222 }
 223 
 224 /* ARGSUSED */
 225 static int
 226 tcp_largest_anon_set(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
 227     const char *ifname, const void *pval, uint_t flags)
 228 {
 229         unsigned long new_value;
 230         tcp_stack_t *tcps = stack->netstack_tcp;
 231         int err;
 232 
 233         if ((err = mod_uint32_value(pval, pinfo, flags, &new_value)) != 0)
 234                 return (err);
 235         /* mod_uint32_value() + pinfo guarantees we're in TCP port range. */
 236         if ((uint32_t)new_value < tcps->tcps_smallest_anon_port)
 237                 return (ERANGE);
 238         pinfo->prop_cur_uval = (uint32_t)new_value;
 239         return (0);
 240 }
 241 










































 242 /*
 243  * All of these are alterable, within the min/max values given, at run time.
 244  *
 245  * Note: All those tunables which do not start with "_" are Committed and
 246  * therefore are public. See PSARC 2010/080.
 247  */
 248 mod_prop_info_t tcp_propinfo_tbl[] = {
 249         /* tunable - 0 */
 250         { "_time_wait_interval", MOD_PROTO_TCP,
 251             mod_set_uint32, mod_get_uint32,
 252             {1*SECONDS, TCP_TIME_WAIT_MAX, 1*MINUTES}, {1*MINUTES} },
 253 
 254         { "_conn_req_max_q", MOD_PROTO_TCP,
 255             mod_set_uint32, mod_get_uint32,
 256             {1, UINT32_MAX, 128}, {128} },
 257 
 258         { "_conn_req_max_q0", MOD_PROTO_TCP,
 259             mod_set_uint32, mod_get_uint32,
 260             {0, UINT32_MAX, 1024}, {1024} },
 261 


 510             mod_set_extra_privports, mod_get_extra_privports,
 511             {1, ULP_MAX_PORT, 0}, {0} },
 512 
 513         { "_1948_phrase", MOD_PROTO_TCP,
 514             tcp_set_1948phrase, NULL, {0}, {0} },
 515 
 516         { "_listener_limit_conf", MOD_PROTO_TCP,
 517             NULL, tcp_listener_conf_get, {0}, {0} },
 518 
 519         { "_listener_limit_conf_add", MOD_PROTO_TCP,
 520             tcp_listener_conf_add, NULL, {0}, {0} },
 521 
 522         { "_listener_limit_conf_del", MOD_PROTO_TCP,
 523             tcp_listener_conf_del, NULL, {0}, {0} },
 524 
 525         { "_iss_incr", MOD_PROTO_TCP,
 526             mod_set_uint32, mod_get_uint32,
 527             {1, ISS_INCR, ISS_INCR},
 528             {ISS_INCR} },
 529 











 530         { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
 531 
 532         { NULL, 0, NULL, NULL, {0}, {0} }
 533 };
 534 
 535 int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2016 Joyent, Inc.
  24  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  25  * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  26  */
  27 /* Copyright (c) 1990 Mentat Inc. */
  28 
  29 #include <inet/ip.h>
  30 #include <inet/tcp_impl.h>
  31 #include <inet/cc.h>
  32 #include <sys/multidata.h>
  33 #include <sys/sunddi.h>
  34 
  35 /* Max size IP datagram is 64k - 1 */
  36 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
  37 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
  38 
  39 /* Max of the above */
  40 #define TCP_MSS_MAX             TCP_MSS_MAX_IPV4
  41 
  42 /*
  43  * Set the RFC 1948 pass phrase
  44  */
  45 /* ARGSUSED */
  46 static int
  47 tcp_set_1948phrase(netstack_t *stack,  cred_t *cr, mod_prop_info_t *pinfo,
  48     const char *ifname, const void* pr_val, uint_t flags)
  49 {
  50         if (flags & MOD_PROP_DEFAULT)
  51                 return (ENOTSUP);


 223 }
 224 
 225 /* ARGSUSED */
 226 static int
 227 tcp_largest_anon_set(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
 228     const char *ifname, const void *pval, uint_t flags)
 229 {
 230         unsigned long new_value;
 231         tcp_stack_t *tcps = stack->netstack_tcp;
 232         int err;
 233 
 234         if ((err = mod_uint32_value(pval, pinfo, flags, &new_value)) != 0)
 235                 return (err);
 236         /* mod_uint32_value() + pinfo guarantees we're in TCP port range. */
 237         if ((uint32_t)new_value < tcps->tcps_smallest_anon_port)
 238                 return (ERANGE);
 239         pinfo->prop_cur_uval = (uint32_t)new_value;
 240         return (0);
 241 }
 242 
 243 /* ARGSUSED */
 244 static int
 245 tcp_set_cc_algorithm(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
 246     const char *ifname, const void *pval, uint_t flags)
 247 {
 248         tcp_stack_t *tcps = stack->netstack_tcp;
 249         char *name = (flags & MOD_PROP_DEFAULT) ?
 250             CC_DEFAULT_ALGO_NAME : (char *)pval;
 251         struct cc_algo *algo = cc_load_algo(name);
 252 
 253         if (algo == NULL)
 254                 return (EINVAL);
 255 
 256         tcps->tcps_default_cc_algo = algo;
 257         return (0);
 258 }
 259 
 260 /* ARGSUSED */
 261 static int
 262 tcp_get_cc_algorithm(netstack_t *stack, mod_prop_info_t *pinfo,
 263     const char *ifname, void *pval, uint_t psize, uint_t flags)
 264 {
 265         size_t nbytes;
 266 
 267         if (flags & MOD_PROP_POSSIBLE) {
 268                 /* libipadm doesn't call down for possible values. */
 269                 return (ENOTSUP);
 270         }
 271 
 272         if (flags & MOD_PROP_PERM) {
 273                 nbytes = snprintf(pval, psize, "%u", MOD_PROP_PERM_RW);
 274         } else if (flags & MOD_PROP_DEFAULT) {
 275                 nbytes = snprintf(pval, psize, "%s", CC_DEFAULT_ALGO_NAME);
 276         } else {
 277                 nbytes = snprintf(pval, psize, "%s",
 278                     stack->netstack_tcp->tcps_default_cc_algo->name);
 279         }
 280         if (nbytes >= psize)
 281                 return (ENOBUFS);
 282         return (0);
 283 }
 284 
 285 /*
 286  * All of these are alterable, within the min/max values given, at run time.
 287  *
 288  * Note: All those tunables which do not start with "_" are Committed and
 289  * therefore are public. See PSARC 2010/080.
 290  */
 291 mod_prop_info_t tcp_propinfo_tbl[] = {
 292         /* tunable - 0 */
 293         { "_time_wait_interval", MOD_PROTO_TCP,
 294             mod_set_uint32, mod_get_uint32,
 295             {1*SECONDS, TCP_TIME_WAIT_MAX, 1*MINUTES}, {1*MINUTES} },
 296 
 297         { "_conn_req_max_q", MOD_PROTO_TCP,
 298             mod_set_uint32, mod_get_uint32,
 299             {1, UINT32_MAX, 128}, {128} },
 300 
 301         { "_conn_req_max_q0", MOD_PROTO_TCP,
 302             mod_set_uint32, mod_get_uint32,
 303             {0, UINT32_MAX, 1024}, {1024} },
 304 


 553             mod_set_extra_privports, mod_get_extra_privports,
 554             {1, ULP_MAX_PORT, 0}, {0} },
 555 
 556         { "_1948_phrase", MOD_PROTO_TCP,
 557             tcp_set_1948phrase, NULL, {0}, {0} },
 558 
 559         { "_listener_limit_conf", MOD_PROTO_TCP,
 560             NULL, tcp_listener_conf_get, {0}, {0} },
 561 
 562         { "_listener_limit_conf_add", MOD_PROTO_TCP,
 563             tcp_listener_conf_add, NULL, {0}, {0} },
 564 
 565         { "_listener_limit_conf_del", MOD_PROTO_TCP,
 566             tcp_listener_conf_del, NULL, {0}, {0} },
 567 
 568         { "_iss_incr", MOD_PROTO_TCP,
 569             mod_set_uint32, mod_get_uint32,
 570             {1, ISS_INCR, ISS_INCR},
 571             {ISS_INCR} },
 572 
 573         { "congestion_control", MOD_PROTO_TCP,
 574             tcp_set_cc_algorithm, tcp_get_cc_algorithm, {0}, {0} },
 575 
 576         /* RFC 3465 - TCP Congestion Control with Appropriate Byte Counting */
 577         { "_abc", MOD_PROTO_TCP,
 578             mod_set_boolean, mod_get_boolean, {B_TRUE}, {B_TRUE} },
 579 
 580         /* "L" value from RFC 3465 */
 581         { "_abc_l_var", MOD_PROTO_TCP,
 582             mod_set_uint32, mod_get_uint32, {1, UINT32_MAX, 2}, {2} },
 583 
 584         { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
 585 
 586         { NULL, 0, NULL, NULL, {0}, {0} }
 587 };
 588 
 589 int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);