1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 /* Copyright (c) 1990 Mentat Inc. */
  25 
  26 #include <inet/ip.h>
  27 #include <inet/tcp_impl.h>
  28 #include <sys/multidata.h>
  29 #include <sys/sunddi.h>
  30 
  31 /* Max size IP datagram is 64k - 1 */
  32 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
  33 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
  34 
  35 /* Max of the above */
  36 #define TCP_MSS_MAX             TCP_MSS_MAX_IPV4
  37 
  38 #define TCP_XMIT_LOWATER        4096
  39 #define TCP_XMIT_HIWATER        49152
  40 #define TCP_RECV_LOWATER        2048
  41 #define TCP_RECV_HIWATER        128000
  42 
  43 /*
  44  * Set the RFC 1948 pass phrase
  45  */
  46 /* ARGSUSED */
  47 static int
  48 tcp_set_1948phrase(void *cbarg,  cred_t *cr, mod_prop_info_t *pinfo,
  49     const char *ifname, const void* pr_val, uint_t flags)
  50 {
  51         tcp_stack_t     *tcps = (tcp_stack_t *)cbarg;
  52 
  53         if (flags & MOD_PROP_DEFAULT)
  54                 return (ENOTSUP);
  55 
  56         /*
  57          * Basically, value contains a new pass phrase.  Pass it along!
  58          */
  59         tcp_iss_key_init((uint8_t *)pr_val, strlen(pr_val), tcps);
  60         return (0);
  61 }
  62 
  63 /*
  64  * returns the current list of listener limit configuration.
  65  */
  66 /* ARGSUSED */
  67 static int
  68 tcp_listener_conf_get(void *cbarg, mod_prop_info_t *pinfo, const char *ifname,
  69     void *val, uint_t psize, uint_t flags)
  70 {
  71         tcp_stack_t     *tcps = (tcp_stack_t *)cbarg;
  72         tcp_listener_t  *tl;
  73         char            *pval = val;
  74         size_t          nbytes = 0, tbytes = 0;
  75         uint_t          size;
  76         int             err = 0;
  77 
  78         bzero(pval, psize);
  79         size = psize;
  80 
  81         if (flags & (MOD_PROP_DEFAULT|MOD_PROP_PERM|MOD_PROP_POSSIBLE))
  82                 return (0);
  83 
  84         mutex_enter(&tcps->tcps_listener_conf_lock);
  85         for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
  86             tl = list_next(&tcps->tcps_listener_conf, tl)) {
  87                 if (psize == size)
  88                         nbytes = snprintf(pval, size, "%d:%d",  tl->tl_port,
  89                             tl->tl_ratio);
  90                 else
  91                         nbytes = snprintf(pval, size, ",%d:%d",  tl->tl_port,
  92                             tl->tl_ratio);
  93                 size -= nbytes;
  94                 pval += nbytes;
  95                 tbytes += nbytes;
  96                 if (tbytes >= psize) {
  97                         /* Buffer overflow, stop copying information */
  98                         err = ENOBUFS;
  99                         break;
 100                 }
 101         }
 102 
 103         mutex_exit(&tcps->tcps_listener_conf_lock);
 104         return (err);
 105 }
 106 
 107 /*
 108  * add a new listener limit configuration.
 109  */
 110 /* ARGSUSED */
 111 static int
 112 tcp_listener_conf_add(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
 113     const char *ifname, const void* pval, uint_t flags)
 114 {
 115         tcp_listener_t  *new_tl;
 116         tcp_listener_t  *tl;
 117         long            lport;
 118         long            ratio;
 119         char            *colon;
 120         tcp_stack_t     *tcps = (tcp_stack_t *)cbarg;
 121 
 122         if (flags & MOD_PROP_DEFAULT)
 123                 return (ENOTSUP);
 124 
 125         if (ddi_strtol(pval, &colon, 10, &lport) != 0 || lport <= 0 ||
 126             lport > USHRT_MAX || *colon != ':') {
 127                 return (EINVAL);
 128         }
 129         if (ddi_strtol(colon + 1, NULL, 10, &ratio) != 0 || ratio <= 0)
 130                 return (EINVAL);
 131 
 132         mutex_enter(&tcps->tcps_listener_conf_lock);
 133         for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
 134             tl = list_next(&tcps->tcps_listener_conf, tl)) {
 135                 /* There is an existing entry, so update its ratio value. */
 136                 if (tl->tl_port == lport) {
 137                         tl->tl_ratio = ratio;
 138                         mutex_exit(&tcps->tcps_listener_conf_lock);
 139                         return (0);
 140                 }
 141         }
 142 
 143         if ((new_tl = kmem_alloc(sizeof (tcp_listener_t), KM_NOSLEEP)) ==
 144             NULL) {
 145                 mutex_exit(&tcps->tcps_listener_conf_lock);
 146                 return (ENOMEM);
 147         }
 148 
 149         new_tl->tl_port = lport;
 150         new_tl->tl_ratio = ratio;
 151         list_insert_tail(&tcps->tcps_listener_conf, new_tl);
 152         mutex_exit(&tcps->tcps_listener_conf_lock);
 153         return (0);
 154 }
 155 
 156 /*
 157  * remove a listener limit configuration.
 158  */
 159 /* ARGSUSED */
 160 static int
 161 tcp_listener_conf_del(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
 162     const char *ifname, const void* pval, uint_t flags)
 163 {
 164         tcp_listener_t  *tl;
 165         long            lport;
 166         tcp_stack_t     *tcps = (tcp_stack_t *)cbarg;
 167 
 168         if (flags & MOD_PROP_DEFAULT)
 169                 return (ENOTSUP);
 170 
 171         if (ddi_strtol(pval, NULL, 10, &lport) != 0 || lport <= 0 ||
 172             lport > USHRT_MAX) {
 173                 return (EINVAL);
 174         }
 175         mutex_enter(&tcps->tcps_listener_conf_lock);
 176         for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
 177             tl = list_next(&tcps->tcps_listener_conf, tl)) {
 178                 if (tl->tl_port == lport) {
 179                         list_remove(&tcps->tcps_listener_conf, tl);
 180                         mutex_exit(&tcps->tcps_listener_conf_lock);
 181                         kmem_free(tl, sizeof (tcp_listener_t));
 182                         return (0);
 183                 }
 184         }
 185         mutex_exit(&tcps->tcps_listener_conf_lock);
 186         return (ESRCH);
 187 }
 188 
 189 /*
 190  * All of these are alterable, within the min/max values given, at run time.
 191  *
 192  * Note: All those tunables which do not start with "_" are Committed and
 193  * therefore are public. See PSARC 2010/080.
 194  */
 195 mod_prop_info_t tcp_propinfo_tbl[] = {
 196         /* tunable - 0 */
 197         { "_time_wait_interval", MOD_PROTO_TCP,
 198             mod_set_uint32, mod_get_uint32,
 199             {1*SECONDS, 10*MINUTES, 1*MINUTES}, {1*MINUTES} },
 200 
 201         { "_conn_req_max_q", MOD_PROTO_TCP,
 202             mod_set_uint32, mod_get_uint32,
 203             {1, UINT32_MAX, 128}, {128} },
 204 
 205         { "_conn_req_max_q0", MOD_PROTO_TCP,
 206             mod_set_uint32, mod_get_uint32,
 207             {0, UINT32_MAX, 1024}, {1024} },
 208 
 209         { "_conn_req_min", MOD_PROTO_TCP,
 210             mod_set_uint32, mod_get_uint32,
 211             {1, 1024, 1}, {1} },
 212 
 213         { "_conn_grace_period", MOD_PROTO_TCP,
 214             mod_set_uint32, mod_get_uint32,
 215             {0*MS, 20*SECONDS, 0*MS}, {0*MS} },
 216 
 217         { "_cwnd_max", MOD_PROTO_TCP,
 218             mod_set_uint32, mod_get_uint32,
 219             {128, (1<<30), 1024*1024}, {1024*1024} },
 220 
 221         { "_debug", MOD_PROTO_TCP,
 222             mod_set_uint32, mod_get_uint32,
 223             {0, 10, 0}, {0} },
 224 
 225         { "smallest_nonpriv_port", MOD_PROTO_TCP,
 226             mod_set_uint32, mod_get_uint32,
 227             {1024, (32*1024), 1024}, {1024} },
 228 
 229         { "_ip_abort_cinterval", MOD_PROTO_TCP,
 230             mod_set_uint32, mod_get_uint32,
 231             {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
 232 
 233         { "_ip_abort_linterval", MOD_PROTO_TCP,
 234             mod_set_uint32, mod_get_uint32,
 235             {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
 236 
 237         /* tunable - 10 */
 238         { "_ip_abort_interval", MOD_PROTO_TCP,
 239             mod_set_uint32, mod_get_uint32,
 240             {500*MS, UINT32_MAX, 5*MINUTES}, {5*MINUTES} },
 241 
 242         { "_ip_notify_cinterval", MOD_PROTO_TCP,
 243             mod_set_uint32, mod_get_uint32,
 244             {1*SECONDS, UINT32_MAX, 10*SECONDS},
 245             {10*SECONDS} },
 246 
 247         { "_ip_notify_interval", MOD_PROTO_TCP,
 248             mod_set_uint32, mod_get_uint32,
 249             {500*MS, UINT32_MAX, 10*SECONDS}, {10*SECONDS} },
 250 
 251         { "_ipv4_ttl", MOD_PROTO_TCP,
 252             mod_set_uint32, mod_get_uint32,
 253             {1, 255, 64}, {64} },
 254 
 255         { "_keepalive_interval", MOD_PROTO_TCP,
 256             mod_set_uint32, mod_get_uint32,
 257             {10*SECONDS, 10*DAYS, 2*HOURS}, {2*HOURS} },
 258 
 259         { "_maxpsz_multiplier", MOD_PROTO_TCP,
 260             mod_set_uint32, mod_get_uint32,
 261             {0, 100, 10}, {10} },
 262 
 263         { "_mss_def_ipv4", MOD_PROTO_TCP,
 264             mod_set_uint32, mod_get_uint32,
 265             {1, TCP_MSS_MAX_IPV4, 536}, {536} },
 266 
 267         { "_mss_max_ipv4", MOD_PROTO_TCP,
 268             mod_set_uint32, mod_get_uint32,
 269             {1, TCP_MSS_MAX_IPV4, TCP_MSS_MAX_IPV4},
 270             {TCP_MSS_MAX_IPV4} },
 271 
 272         { "_mss_min", MOD_PROTO_TCP,
 273             mod_set_uint32, mod_get_uint32,
 274             {1, TCP_MSS_MAX, 108}, {108} },
 275 
 276         { "_naglim_def", MOD_PROTO_TCP,
 277             mod_set_uint32, mod_get_uint32,
 278             {1, (64*1024)-1, (4*1024)-1}, {(4*1024)-1} },
 279 
 280         /* tunable - 20 */
 281         { "_rexmit_interval_initial", MOD_PROTO_TCP,
 282             mod_set_uint32, mod_get_uint32,
 283             {1*MS, 20*SECONDS, 1*SECONDS}, {1*SECONDS} },
 284 
 285         { "_rexmit_interval_max", MOD_PROTO_TCP,
 286             mod_set_uint32, mod_get_uint32,
 287             {1*MS, 2*HOURS, 60*SECONDS}, {60*SECONDS} },
 288 
 289         { "_rexmit_interval_min", MOD_PROTO_TCP,
 290             mod_set_uint32, mod_get_uint32,
 291             {1*MS, 2*HOURS, 400*MS}, {400*MS} },
 292 
 293         { "_deferred_ack_interval", MOD_PROTO_TCP,
 294             mod_set_uint32, mod_get_uint32,
 295             {1*MS, 1*MINUTES, 100*MS}, {100*MS} },
 296 
 297         { "_snd_lowat_fraction", MOD_PROTO_TCP,
 298             mod_set_uint32, mod_get_uint32,
 299             {0, 16, 0}, {0} },
 300 
 301         { "_dupack_fast_retransmit", MOD_PROTO_TCP,
 302             mod_set_uint32, mod_get_uint32,
 303             {1, 10000, 3}, {3} },
 304 
 305         { "_ignore_path_mtu", MOD_PROTO_TCP,
 306             mod_set_boolean, mod_get_boolean,
 307             {B_FALSE}, {B_FALSE} },
 308 
 309         { "smallest_anon_port", MOD_PROTO_TCP,
 310             mod_set_uint32, mod_get_uint32,
 311             {1024, ULP_MAX_PORT, 32*1024}, {32*1024} },
 312 
 313         { "largest_anon_port", MOD_PROTO_TCP,
 314             mod_set_uint32, mod_get_uint32,
 315             {1024, ULP_MAX_PORT, ULP_MAX_PORT},
 316             {ULP_MAX_PORT} },
 317 
 318         { "send_maxbuf", MOD_PROTO_TCP,
 319             mod_set_uint32, mod_get_uint32,
 320             {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_HIWATER},
 321             {TCP_XMIT_HIWATER} },
 322 
 323         /* tunable - 30 */
 324         { "_xmit_lowat", MOD_PROTO_TCP,
 325             mod_set_uint32, mod_get_uint32,
 326             {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_LOWATER},
 327             {TCP_XMIT_LOWATER} },
 328 
 329         { "recv_maxbuf", MOD_PROTO_TCP,
 330             mod_set_uint32, mod_get_uint32,
 331             {TCP_RECV_LOWATER, (1<<30), TCP_RECV_HIWATER},
 332             {TCP_RECV_HIWATER} },
 333 
 334         { "_recv_hiwat_minmss", MOD_PROTO_TCP,
 335             mod_set_uint32, mod_get_uint32,
 336             {1, 65536, 4}, {4} },
 337 
 338         { "_fin_wait_2_flush_interval", MOD_PROTO_TCP,
 339             mod_set_uint32, mod_get_uint32,
 340             {1*SECONDS, 2*HOURS, 60*SECONDS},
 341             {60*SECONDS} },
 342 
 343         { "_max_buf", MOD_PROTO_TCP,
 344             mod_set_uint32, mod_get_uint32,
 345             {8192, (1<<30), 1024*1024}, {1024*1024} },
 346 
 347         /*
 348          * Question:  What default value should I set for tcp_strong_iss?
 349          */
 350         { "_strong_iss", MOD_PROTO_TCP,
 351             mod_set_uint32, mod_get_uint32,
 352             {0, 2, 1}, {1} },
 353 
 354         { "_rtt_updates", MOD_PROTO_TCP,
 355             mod_set_uint32, mod_get_uint32,
 356             {0, 65536, 20}, {20} },
 357 
 358         { "_wscale_always", MOD_PROTO_TCP,
 359             mod_set_boolean, mod_get_boolean,
 360             {B_TRUE}, {B_TRUE} },
 361 
 362         { "_tstamp_always", MOD_PROTO_TCP,
 363             mod_set_boolean, mod_get_boolean,
 364             {B_FALSE}, {B_FALSE} },
 365 
 366         { "_tstamp_if_wscale", MOD_PROTO_TCP,
 367             mod_set_boolean, mod_get_boolean,
 368             {B_TRUE}, {B_TRUE} },
 369 
 370         /* tunable - 40 */
 371         { "_rexmit_interval_extra", MOD_PROTO_TCP,
 372             mod_set_uint32, mod_get_uint32,
 373             {0*MS, 2*HOURS, 0*MS}, {0*MS} },
 374 
 375         { "_deferred_acks_max", MOD_PROTO_TCP,
 376             mod_set_uint32, mod_get_uint32,
 377             {0, 16, 2}, {2} },
 378 
 379         { "_slow_start_after_idle", MOD_PROTO_TCP,
 380             mod_set_uint32, mod_get_uint32,
 381             {1, 16384, 4}, {4} },
 382 
 383         { "_slow_start_initial", MOD_PROTO_TCP,
 384             mod_set_uint32, mod_get_uint32,
 385             {1, 4, 4}, {4} },
 386 
 387         { "sack", MOD_PROTO_TCP,
 388             mod_set_uint32, mod_get_uint32,
 389             {0, 2, 2}, {2} },
 390 
 391         { "_ipv6_hoplimit", MOD_PROTO_TCP,
 392             mod_set_uint32, mod_get_uint32,
 393             {0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS},
 394             {IPV6_DEFAULT_HOPS} },
 395 
 396         { "_mss_def_ipv6", MOD_PROTO_TCP,
 397             mod_set_uint32, mod_get_uint32,
 398             {1, TCP_MSS_MAX_IPV6, 1220}, {1220} },
 399 
 400         { "_mss_max_ipv6", MOD_PROTO_TCP,
 401             mod_set_uint32, mod_get_uint32,
 402             {1, TCP_MSS_MAX_IPV6, TCP_MSS_MAX_IPV6},
 403             {TCP_MSS_MAX_IPV6} },
 404 
 405         { "_rev_src_routes", MOD_PROTO_TCP,
 406             mod_set_boolean, mod_get_boolean,
 407             {B_FALSE}, {B_FALSE} },
 408 
 409         { "_local_dack_interval", MOD_PROTO_TCP,
 410             mod_set_uint32, mod_get_uint32,
 411             {10*MS, 500*MS, 50*MS}, {50*MS} },
 412 
 413         /* tunable - 50 */
 414         { "_local_dacks_max", MOD_PROTO_TCP,
 415             mod_set_uint32, mod_get_uint32,
 416             {0, 16, 8}, {8} },
 417 
 418         { "ecn", MOD_PROTO_TCP,
 419             mod_set_uint32, mod_get_uint32,
 420             {0, 2, 1}, {1} },
 421 
 422         { "_rst_sent_rate_enabled", MOD_PROTO_TCP,
 423             mod_set_boolean, mod_get_boolean,
 424             {B_TRUE}, {B_TRUE} },
 425 
 426         { "_rst_sent_rate", MOD_PROTO_TCP,
 427             mod_set_uint32, mod_get_uint32,
 428             {0, UINT32_MAX, 40}, {40} },
 429 
 430         { "_push_timer_interval", MOD_PROTO_TCP,
 431             mod_set_uint32, mod_get_uint32,
 432             {0, 100*MS, 50*MS}, {50*MS} },
 433 
 434         { "_use_smss_as_mss_opt", MOD_PROTO_TCP,
 435             mod_set_boolean, mod_get_boolean,
 436             {B_FALSE}, {B_FALSE} },
 437 
 438         { "_keepalive_abort_interval", MOD_PROTO_TCP,
 439             mod_set_uint32, mod_get_uint32,
 440             {0, UINT32_MAX, 8*MINUTES}, {8*MINUTES} },
 441 
 442         /*
 443          * tcp_wroff_xtra is the extra space in front of TCP/IP header for link
 444          * layer header.  It has to be a multiple of 8.
 445          */
 446         { "_wroff_xtra", MOD_PROTO_TCP,
 447             mod_set_aligned, mod_get_uint32,
 448             {0, 256, 32}, {32} },
 449 
 450         { "_dev_flow_ctl", MOD_PROTO_TCP,
 451             mod_set_boolean, mod_get_boolean,
 452             {B_FALSE}, {B_FALSE} },
 453 
 454         { "_reass_timeout", MOD_PROTO_TCP,
 455             mod_set_uint32, mod_get_uint32,
 456             {0, UINT32_MAX, 100*SECONDS}, {100*SECONDS} },
 457 
 458         /* tunable - 60 */
 459         { "extra_priv_ports", MOD_PROTO_TCP,
 460             mod_set_extra_privports, mod_get_extra_privports,
 461             {1, ULP_MAX_PORT, 0}, {0} },
 462 
 463         { "_1948_phrase", MOD_PROTO_TCP,
 464             tcp_set_1948phrase, NULL, {0}, {0} },
 465 
 466         { "_listener_limit_conf", MOD_PROTO_TCP,
 467             NULL, tcp_listener_conf_get, {0}, {0} },
 468 
 469         { "_listener_limit_conf_add", MOD_PROTO_TCP,
 470             tcp_listener_conf_add, NULL, {0}, {0} },
 471 
 472         { "_listener_limit_conf_del", MOD_PROTO_TCP,
 473             tcp_listener_conf_del, NULL, {0}, {0} },
 474 
 475         { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
 476 
 477         { NULL, 0, NULL, NULL, {0}, {0} }
 478 };
 479 
 480 int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);