Print this page
DLPX-25998 TCP congestion control is inadequate
Reviewed at: http://reviews.delphix.com/r/34808/

@@ -20,16 +20,17 @@
  */
 /*
  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2016 Joyent, Inc.
  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  */
 /* Copyright (c) 1990 Mentat Inc. */
 
 #include <inet/ip.h>
 #include <inet/tcp_impl.h>
+#include <inet/cc.h>
 #include <sys/multidata.h>
 #include <sys/sunddi.h>
 
 /* Max size IP datagram is 64k - 1 */
 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))

@@ -237,10 +238,52 @@
                 return (ERANGE);
         pinfo->prop_cur_uval = (uint32_t)new_value;
         return (0);
 }
 
+/* ARGSUSED */
+static int
+tcp_set_cc_algorithm(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
+    const char *ifname, const void *pval, uint_t flags)
+{
+        tcp_stack_t *tcps = stack->netstack_tcp;
+        char *name = (flags & MOD_PROP_DEFAULT) ?
+            CC_DEFAULT_ALGO_NAME : (char *)pval;
+        struct cc_algo *algo = cc_load_algo(name);
+
+        if (algo == NULL)
+                return (EINVAL);
+
+        tcps->tcps_default_cc_algo = algo;
+        return (0);
+}
+
+/* ARGSUSED */
+static int
+tcp_get_cc_algorithm(netstack_t *stack, mod_prop_info_t *pinfo,
+    const char *ifname, void *pval, uint_t psize, uint_t flags)
+{
+        size_t nbytes;
+
+        if (flags & MOD_PROP_POSSIBLE) {
+                /* libipadm doesn't call down for possible values. */
+                return (ENOTSUP);
+        }
+
+        if (flags & MOD_PROP_PERM) {
+                nbytes = snprintf(pval, psize, "%u", MOD_PROP_PERM_RW);
+        } else if (flags & MOD_PROP_DEFAULT) {
+                nbytes = snprintf(pval, psize, "%s", CC_DEFAULT_ALGO_NAME);
+        } else {
+                nbytes = snprintf(pval, psize, "%s",
+                    stack->netstack_tcp->tcps_default_cc_algo->name);
+        }
+        if (nbytes >= psize)
+                return (ENOBUFS);
+        return (0);
+}
+
 /*
  * All of these are alterable, within the min/max values given, at run time.
  *
  * Note: All those tunables which do not start with "_" are Committed and
  * therefore are public. See PSARC 2010/080.

@@ -525,10 +568,21 @@
         { "_iss_incr", MOD_PROTO_TCP,
             mod_set_uint32, mod_get_uint32,
             {1, ISS_INCR, ISS_INCR},
             {ISS_INCR} },
 
+        { "congestion_control", MOD_PROTO_TCP,
+            tcp_set_cc_algorithm, tcp_get_cc_algorithm, {0}, {0} },
+
+        /* RFC 3465 - TCP Congestion Control with Appropriate Byte Counting */
+        { "_abc", MOD_PROTO_TCP,
+            mod_set_boolean, mod_get_boolean, {B_TRUE}, {B_TRUE} },
+
+        /* "L" value from RFC 3465 */
+        { "_abc_l_var", MOD_PROTO_TCP,
+            mod_set_uint32, mod_get_uint32, {1, UINT32_MAX, 2}, {2} },
+
         { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
 
         { NULL, 0, NULL, NULL, {0}, {0} }
 };