Print this page
DLPX-25998 TCP congestion control is inadequate
Reviewed at: http://reviews.delphix.com/r/34808/


   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013 by Delphix. All rights reserved.
  24  */
  25 
  26 /*
  27  * This file contains routines that are used to modify/retrieve protocol or
  28  * interface property values. It also holds all the supported properties for
  29  * both IP interface and protocols in `ipadm_prop_desc_t'. Following protocols
  30  * are supported: IP, IPv4, IPv6, TCP, SCTP, UDP and ICMP.
  31  *
  32  * This file also contains walkers, which walks through the property table and
  33  * calls the callback function, of the form `ipadm_prop_wfunc_t' , for every
  34  * property in the table.
  35  */
  36 
  37 #include <unistd.h>
  38 #include <errno.h>
  39 #include <ctype.h>
  40 #include <fcntl.h>
  41 #include <strings.h>
  42 #include <stdlib.h>


  43 #include <netinet/in.h>
  44 #include <arpa/inet.h>
  45 #include <sys/sockio.h>
  46 #include <assert.h>
  47 #include <libdllink.h>
  48 #include <zone.h>
  49 #include "libipadm_impl.h"
  50 #include <inet/tunables.h>
  51 
  52 #define IPADM_NONESTR           "none"
  53 #define DEF_METRIC_VAL          0       /* default metric value */
  54 
  55 #define A_CNT(arr)      (sizeof (arr) / sizeof (arr[0]))
  56 
  57 static ipadm_status_t   i_ipadm_validate_if(ipadm_handle_t, const char *,
  58                             uint_t, uint_t);
  59 
  60 /*
  61  * Callback functions to retrieve property values from the kernel. These
  62  * functions, when required, translate the values from the kernel to a format
  63  * suitable for printing. For example: boolean values will be translated
  64  * to on/off. They also retrieve DEFAULT, PERM and POSSIBLE values for
  65  * a given property.
  66  */
  67 static ipadm_pd_getf_t  i_ipadm_get_prop, i_ipadm_get_ifprop_flags,
  68                         i_ipadm_get_mtu, i_ipadm_get_metric,
  69                         i_ipadm_get_usesrc, i_ipadm_get_forwarding,
  70                         i_ipadm_get_ecnsack, i_ipadm_get_hostmodel;

  71 
  72 /*
  73  * Callback function to set property values. These functions translate the
  74  * values to a format suitable for kernel consumption, allocates the necessary
  75  * ioctl buffers and then invokes ioctl().
  76  */
  77 static ipadm_pd_setf_t  i_ipadm_set_prop, i_ipadm_set_mtu,
  78                         i_ipadm_set_ifprop_flags,
  79                         i_ipadm_set_metric, i_ipadm_set_usesrc,
  80                         i_ipadm_set_forwarding, i_ipadm_set_eprivport,
  81                         i_ipadm_set_ecnsack, i_ipadm_set_hostmodel;
  82 
  83 /* array of protocols we support */
  84 static int protocols[] = { MOD_PROTO_IP, MOD_PROTO_RAWIP,
  85                             MOD_PROTO_TCP, MOD_PROTO_UDP,
  86                             MOD_PROTO_SCTP };
  87 
  88 /*
  89  * Supported IP protocol properties.
  90  */


 136 
 137         { "usesrc", NULL, IPADMPROP_CLASS_IF, MOD_PROTO_IPV6, 0,
 138             i_ipadm_set_usesrc, NULL, i_ipadm_get_usesrc },
 139 
 140         { "hostmodel", NULL, IPADMPROP_CLASS_MODULE, MOD_PROTO_IPV6, 0,
 141             i_ipadm_set_hostmodel, i_ipadm_get_hostmodel,
 142             i_ipadm_get_hostmodel },
 143 
 144         { "hostmodel", NULL, IPADMPROP_CLASS_MODULE, MOD_PROTO_IPV4, 0,
 145             i_ipadm_set_hostmodel, i_ipadm_get_hostmodel,
 146             i_ipadm_get_hostmodel },
 147 
 148         { NULL, NULL, 0, 0, 0, NULL, NULL, NULL }
 149 };
 150 
 151 /* possible values for TCP properties `ecn' and `sack' */
 152 static const char *ecn_sack_vals[] = {"never", "passive", "active", NULL};
 153 
 154 /* Supported TCP protocol properties */
 155 static ipadm_prop_desc_t ipadm_tcp_prop_table[] = {



 156         { "ecn", NULL, IPADMPROP_CLASS_MODULE, MOD_PROTO_TCP, 0,
 157             i_ipadm_set_ecnsack, i_ipadm_get_ecnsack, i_ipadm_get_ecnsack },
 158 
 159         { "extra_priv_ports", NULL, IPADMPROP_CLASS_MODULE, MOD_PROTO_TCP,
 160             IPADMPROP_MULVAL, i_ipadm_set_eprivport, i_ipadm_get_prop,
 161             i_ipadm_get_prop },
 162 
 163         { "largest_anon_port", NULL, IPADMPROP_CLASS_MODULE, MOD_PROTO_TCP, 0,
 164             i_ipadm_set_prop, i_ipadm_get_prop, i_ipadm_get_prop },
 165 
 166         { "max_buf", "_max_buf", IPADMPROP_CLASS_MODULE, MOD_PROTO_TCP, 0,
 167             i_ipadm_set_prop, i_ipadm_get_prop, i_ipadm_get_prop },
 168 
 169         { "recv_buf", "recv_maxbuf", IPADMPROP_CLASS_MODULE, MOD_PROTO_TCP, 0,
 170             i_ipadm_set_prop, i_ipadm_get_prop, i_ipadm_get_prop },
 171 
 172         { "sack", NULL, IPADMPROP_CLASS_MODULE, MOD_PROTO_TCP, 0,
 173             i_ipadm_set_ecnsack, i_ipadm_get_ecnsack, i_ipadm_get_ecnsack },
 174 
 175         { "send_buf", "send_maxbuf", IPADMPROP_CLASS_MODULE, MOD_PROTO_TCP, 0,


 795                 if (status == IPADM_SUCCESS && (valtype == MOD_PROP_ACTIVE ||
 796                     valtype == MOD_PROP_DEFAULT)) {
 797                         i = atoi(buf);
 798                         assert(i < 3);
 799                         nbytes = snprintf(buf, *bufsize, "%s",
 800                             ecn_sack_vals[i]);
 801                 }
 802                 break;
 803         default:
 804                 return (IPADM_INVALID_ARG);
 805         }
 806         if (nbytes >= *bufsize) {
 807                 /* insufficient buffer space */
 808                 *bufsize = nbytes + 1;
 809                 return (IPADM_NO_BUFS);
 810         }
 811 
 812         return (status);
 813 }
 814 




































 815 /* ARGSUSED */
 816 static ipadm_status_t
 817 i_ipadm_get_forwarding(ipadm_handle_t iph, const void *arg,
 818     ipadm_prop_desc_t *pdp, char *buf, uint_t *bufsize, uint_t proto,
 819     uint_t valtype)
 820 {
 821         const char      *ifname = arg;
 822         ipadm_status_t  status = IPADM_SUCCESS;
 823 
 824         /*
 825          * if interface name is provided, then get forwarding status using
 826          * SIOCGLIFFLAGS
 827          */
 828         if (ifname != NULL) {
 829                 status = i_ipadm_get_ifprop_flags(iph, ifname, pdp,
 830                     buf, bufsize, pdp->ipd_proto, valtype);
 831         } else {
 832                 status = i_ipadm_get_prop(iph, ifname, pdp, buf,
 833                     bufsize, proto, valtype);
 834                 /*




   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
  24  */
  25 
  26 /*
  27  * This file contains routines that are used to modify/retrieve protocol or
  28  * interface property values. It also holds all the supported properties for
  29  * both IP interface and protocols in `ipadm_prop_desc_t'. Following protocols
  30  * are supported: IP, IPv4, IPv6, TCP, SCTP, UDP and ICMP.
  31  *
  32  * This file also contains walkers, which walks through the property table and
  33  * calls the callback function, of the form `ipadm_prop_wfunc_t' , for every
  34  * property in the table.
  35  */
  36 
  37 #include <unistd.h>
  38 #include <errno.h>
  39 #include <ctype.h>
  40 #include <fcntl.h>
  41 #include <strings.h>
  42 #include <stdlib.h>
  43 #include <sys/types.h>
  44 #include <dirent.h>
  45 #include <netinet/in.h>
  46 #include <arpa/inet.h>
  47 #include <sys/sockio.h>
  48 #include <assert.h>
  49 #include <libdllink.h>
  50 #include <zone.h>
  51 #include "libipadm_impl.h"
  52 #include <inet/tunables.h>
  53 
  54 #define IPADM_NONESTR           "none"
  55 #define DEF_METRIC_VAL          0       /* default metric value */
  56 
  57 #define A_CNT(arr)      (sizeof (arr) / sizeof (arr[0]))
  58 
  59 static ipadm_status_t   i_ipadm_validate_if(ipadm_handle_t, const char *,
  60                             uint_t, uint_t);
  61 
  62 /*
  63  * Callback functions to retrieve property values from the kernel. These
  64  * functions, when required, translate the values from the kernel to a format
  65  * suitable for printing. For example: boolean values will be translated
  66  * to on/off. They also retrieve DEFAULT, PERM and POSSIBLE values for
  67  * a given property.
  68  */
  69 static ipadm_pd_getf_t  i_ipadm_get_prop, i_ipadm_get_ifprop_flags,
  70                         i_ipadm_get_mtu, i_ipadm_get_metric,
  71                         i_ipadm_get_usesrc, i_ipadm_get_forwarding,
  72                         i_ipadm_get_ecnsack, i_ipadm_get_hostmodel,
  73                         i_ipadm_get_cc;
  74 
  75 /*
  76  * Callback function to set property values. These functions translate the
  77  * values to a format suitable for kernel consumption, allocates the necessary
  78  * ioctl buffers and then invokes ioctl().
  79  */
  80 static ipadm_pd_setf_t  i_ipadm_set_prop, i_ipadm_set_mtu,
  81                         i_ipadm_set_ifprop_flags,
  82                         i_ipadm_set_metric, i_ipadm_set_usesrc,
  83                         i_ipadm_set_forwarding, i_ipadm_set_eprivport,
  84                         i_ipadm_set_ecnsack, i_ipadm_set_hostmodel;
  85 
  86 /* array of protocols we support */
  87 static int protocols[] = { MOD_PROTO_IP, MOD_PROTO_RAWIP,
  88                             MOD_PROTO_TCP, MOD_PROTO_UDP,
  89                             MOD_PROTO_SCTP };
  90 
  91 /*
  92  * Supported IP protocol properties.
  93  */


 139 
 140         { "usesrc", NULL, IPADMPROP_CLASS_IF, MOD_PROTO_IPV6, 0,
 141             i_ipadm_set_usesrc, NULL, i_ipadm_get_usesrc },
 142 
 143         { "hostmodel", NULL, IPADMPROP_CLASS_MODULE, MOD_PROTO_IPV6, 0,
 144             i_ipadm_set_hostmodel, i_ipadm_get_hostmodel,
 145             i_ipadm_get_hostmodel },
 146 
 147         { "hostmodel", NULL, IPADMPROP_CLASS_MODULE, MOD_PROTO_IPV4, 0,
 148             i_ipadm_set_hostmodel, i_ipadm_get_hostmodel,
 149             i_ipadm_get_hostmodel },
 150 
 151         { NULL, NULL, 0, 0, 0, NULL, NULL, NULL }
 152 };
 153 
 154 /* possible values for TCP properties `ecn' and `sack' */
 155 static const char *ecn_sack_vals[] = {"never", "passive", "active", NULL};
 156 
 157 /* Supported TCP protocol properties */
 158 static ipadm_prop_desc_t ipadm_tcp_prop_table[] = {
 159         { "congestion_control", NULL, IPADMPROP_CLASS_MODULE, MOD_PROTO_TCP, 0,
 160             i_ipadm_set_prop, i_ipadm_get_cc, i_ipadm_get_prop },
 161 
 162         { "ecn", NULL, IPADMPROP_CLASS_MODULE, MOD_PROTO_TCP, 0,
 163             i_ipadm_set_ecnsack, i_ipadm_get_ecnsack, i_ipadm_get_ecnsack },
 164 
 165         { "extra_priv_ports", NULL, IPADMPROP_CLASS_MODULE, MOD_PROTO_TCP,
 166             IPADMPROP_MULVAL, i_ipadm_set_eprivport, i_ipadm_get_prop,
 167             i_ipadm_get_prop },
 168 
 169         { "largest_anon_port", NULL, IPADMPROP_CLASS_MODULE, MOD_PROTO_TCP, 0,
 170             i_ipadm_set_prop, i_ipadm_get_prop, i_ipadm_get_prop },
 171 
 172         { "max_buf", "_max_buf", IPADMPROP_CLASS_MODULE, MOD_PROTO_TCP, 0,
 173             i_ipadm_set_prop, i_ipadm_get_prop, i_ipadm_get_prop },
 174 
 175         { "recv_buf", "recv_maxbuf", IPADMPROP_CLASS_MODULE, MOD_PROTO_TCP, 0,
 176             i_ipadm_set_prop, i_ipadm_get_prop, i_ipadm_get_prop },
 177 
 178         { "sack", NULL, IPADMPROP_CLASS_MODULE, MOD_PROTO_TCP, 0,
 179             i_ipadm_set_ecnsack, i_ipadm_get_ecnsack, i_ipadm_get_ecnsack },
 180 
 181         { "send_buf", "send_maxbuf", IPADMPROP_CLASS_MODULE, MOD_PROTO_TCP, 0,


 801                 if (status == IPADM_SUCCESS && (valtype == MOD_PROP_ACTIVE ||
 802                     valtype == MOD_PROP_DEFAULT)) {
 803                         i = atoi(buf);
 804                         assert(i < 3);
 805                         nbytes = snprintf(buf, *bufsize, "%s",
 806                             ecn_sack_vals[i]);
 807                 }
 808                 break;
 809         default:
 810                 return (IPADM_INVALID_ARG);
 811         }
 812         if (nbytes >= *bufsize) {
 813                 /* insufficient buffer space */
 814                 *bufsize = nbytes + 1;
 815                 return (IPADM_NO_BUFS);
 816         }
 817 
 818         return (status);
 819 }
 820 
 821 /*
 822  * Retrieves the list of possible congestion control algorithms by enumerating
 823  * the modules in /kernel/cc.
 824  */
 825 /* ARGSUSED */
 826 ipadm_status_t
 827 i_ipadm_get_cc(ipadm_handle_t iph, const void *arg, ipadm_prop_desc_t *pdp,
 828     char *buf, uint_t *bufsize, uint_t proto, uint_t valtype)
 829 {
 830         DIR *dir;
 831         struct dirent *ent;
 832         boolean_t first = B_TRUE;
 833         uint_t bytes = 0;
 834 
 835         assert(valtype == MOD_PROP_POSSIBLE);
 836 
 837         /* We assume that all platforms have the same algorithms installed. */
 838         if ((dir = opendir("/kernel/cc")) != NULL) {
 839                 while ((ent = readdir(dir)) != NULL) {
 840                         /* By convention, modules are named cc_<algo>. */
 841                         if (strstr(ent->d_name, "cc_") != NULL) {
 842                                 bytes += snprintf(buf + bytes,
 843                                     bytes < *bufsize ? *bufsize - bytes : 0,
 844                                     "%s%s", first ? "" : ",", ent->d_name + 3);
 845                                 first = B_FALSE;
 846                         }
 847                 }
 848                 (void) closedir(dir);
 849         }
 850         if (bytes >= *bufsize) {
 851                 *bufsize = bytes + 1;
 852                 return (IPADM_NO_BUFS);
 853         }
 854         return (IPADM_SUCCESS);
 855 }
 856 
 857 /* ARGSUSED */
 858 static ipadm_status_t
 859 i_ipadm_get_forwarding(ipadm_handle_t iph, const void *arg,
 860     ipadm_prop_desc_t *pdp, char *buf, uint_t *bufsize, uint_t proto,
 861     uint_t valtype)
 862 {
 863         const char      *ifname = arg;
 864         ipadm_status_t  status = IPADM_SUCCESS;
 865 
 866         /*
 867          * if interface name is provided, then get forwarding status using
 868          * SIOCGLIFFLAGS
 869          */
 870         if (ifname != NULL) {
 871                 status = i_ipadm_get_ifprop_flags(iph, ifname, pdp,
 872                     buf, bufsize, pdp->ipd_proto, valtype);
 873         } else {
 874                 status = i_ipadm_get_prop(iph, ifname, pdp, buf,
 875                     bufsize, proto, valtype);
 876                 /*