Print this page
917 Make TCP's iss_incr a tunable
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/tcp/tcp_tunables.c
+++ new/usr/src/uts/common/inet/tcp/tcp_tunables.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
|
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 + * Copyright (c) 2011, Joyent Inc. All rights reserved.
23 24 */
24 25 /* Copyright (c) 1990 Mentat Inc. */
25 26
26 27 #include <inet/ip.h>
27 28 #include <inet/tcp_impl.h>
28 29 #include <sys/multidata.h>
29 30 #include <sys/sunddi.h>
30 31
31 32 /* Max size IP datagram is 64k - 1 */
32 33 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
33 34 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
34 35
35 36 /* Max of the above */
36 37 #define TCP_MSS_MAX TCP_MSS_MAX_IPV4
37 38
38 39 #define TCP_XMIT_LOWATER 4096
39 40 #define TCP_XMIT_HIWATER 49152
40 41 #define TCP_RECV_LOWATER 2048
41 42 #define TCP_RECV_HIWATER 128000
42 43
43 44 /*
44 45 * Set the RFC 1948 pass phrase
45 46 */
46 47 /* ARGSUSED */
47 48 static int
48 49 tcp_set_1948phrase(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
49 50 const char *ifname, const void* pr_val, uint_t flags)
50 51 {
51 52 tcp_stack_t *tcps = (tcp_stack_t *)cbarg;
52 53
53 54 if (flags & MOD_PROP_DEFAULT)
54 55 return (ENOTSUP);
55 56
56 57 /*
57 58 * Basically, value contains a new pass phrase. Pass it along!
58 59 */
59 60 tcp_iss_key_init((uint8_t *)pr_val, strlen(pr_val), tcps);
60 61 return (0);
61 62 }
62 63
63 64 /*
64 65 * returns the current list of listener limit configuration.
65 66 */
66 67 /* ARGSUSED */
67 68 static int
68 69 tcp_listener_conf_get(void *cbarg, mod_prop_info_t *pinfo, const char *ifname,
69 70 void *val, uint_t psize, uint_t flags)
70 71 {
71 72 tcp_stack_t *tcps = (tcp_stack_t *)cbarg;
72 73 tcp_listener_t *tl;
73 74 char *pval = val;
74 75 size_t nbytes = 0, tbytes = 0;
75 76 uint_t size;
76 77 int err = 0;
77 78
78 79 bzero(pval, psize);
79 80 size = psize;
80 81
81 82 if (flags & (MOD_PROP_DEFAULT|MOD_PROP_PERM|MOD_PROP_POSSIBLE))
82 83 return (0);
83 84
84 85 mutex_enter(&tcps->tcps_listener_conf_lock);
85 86 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
86 87 tl = list_next(&tcps->tcps_listener_conf, tl)) {
87 88 if (psize == size)
88 89 nbytes = snprintf(pval, size, "%d:%d", tl->tl_port,
89 90 tl->tl_ratio);
90 91 else
91 92 nbytes = snprintf(pval, size, ",%d:%d", tl->tl_port,
92 93 tl->tl_ratio);
93 94 size -= nbytes;
94 95 pval += nbytes;
95 96 tbytes += nbytes;
96 97 if (tbytes >= psize) {
97 98 /* Buffer overflow, stop copying information */
98 99 err = ENOBUFS;
99 100 break;
100 101 }
101 102 }
102 103
103 104 mutex_exit(&tcps->tcps_listener_conf_lock);
104 105 return (err);
105 106 }
106 107
107 108 /*
108 109 * add a new listener limit configuration.
109 110 */
110 111 /* ARGSUSED */
111 112 static int
112 113 tcp_listener_conf_add(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
113 114 const char *ifname, const void* pval, uint_t flags)
114 115 {
115 116 tcp_listener_t *new_tl;
116 117 tcp_listener_t *tl;
117 118 long lport;
118 119 long ratio;
119 120 char *colon;
120 121 tcp_stack_t *tcps = (tcp_stack_t *)cbarg;
121 122
122 123 if (flags & MOD_PROP_DEFAULT)
123 124 return (ENOTSUP);
124 125
125 126 if (ddi_strtol(pval, &colon, 10, &lport) != 0 || lport <= 0 ||
126 127 lport > USHRT_MAX || *colon != ':') {
127 128 return (EINVAL);
128 129 }
129 130 if (ddi_strtol(colon + 1, NULL, 10, &ratio) != 0 || ratio <= 0)
130 131 return (EINVAL);
131 132
132 133 mutex_enter(&tcps->tcps_listener_conf_lock);
133 134 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
134 135 tl = list_next(&tcps->tcps_listener_conf, tl)) {
135 136 /* There is an existing entry, so update its ratio value. */
136 137 if (tl->tl_port == lport) {
137 138 tl->tl_ratio = ratio;
138 139 mutex_exit(&tcps->tcps_listener_conf_lock);
139 140 return (0);
140 141 }
141 142 }
142 143
143 144 if ((new_tl = kmem_alloc(sizeof (tcp_listener_t), KM_NOSLEEP)) ==
144 145 NULL) {
145 146 mutex_exit(&tcps->tcps_listener_conf_lock);
146 147 return (ENOMEM);
147 148 }
148 149
149 150 new_tl->tl_port = lport;
150 151 new_tl->tl_ratio = ratio;
151 152 list_insert_tail(&tcps->tcps_listener_conf, new_tl);
152 153 mutex_exit(&tcps->tcps_listener_conf_lock);
153 154 return (0);
154 155 }
155 156
156 157 /*
157 158 * remove a listener limit configuration.
158 159 */
159 160 /* ARGSUSED */
160 161 static int
161 162 tcp_listener_conf_del(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
162 163 const char *ifname, const void* pval, uint_t flags)
163 164 {
164 165 tcp_listener_t *tl;
165 166 long lport;
166 167 tcp_stack_t *tcps = (tcp_stack_t *)cbarg;
167 168
168 169 if (flags & MOD_PROP_DEFAULT)
169 170 return (ENOTSUP);
170 171
171 172 if (ddi_strtol(pval, NULL, 10, &lport) != 0 || lport <= 0 ||
172 173 lport > USHRT_MAX) {
173 174 return (EINVAL);
174 175 }
175 176 mutex_enter(&tcps->tcps_listener_conf_lock);
176 177 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
177 178 tl = list_next(&tcps->tcps_listener_conf, tl)) {
178 179 if (tl->tl_port == lport) {
179 180 list_remove(&tcps->tcps_listener_conf, tl);
180 181 mutex_exit(&tcps->tcps_listener_conf_lock);
181 182 kmem_free(tl, sizeof (tcp_listener_t));
182 183 return (0);
183 184 }
184 185 }
185 186 mutex_exit(&tcps->tcps_listener_conf_lock);
186 187 return (ESRCH);
187 188 }
188 189
189 190 /*
190 191 * All of these are alterable, within the min/max values given, at run time.
191 192 *
192 193 * Note: All those tunables which do not start with "_" are Committed and
193 194 * therefore are public. See PSARC 2010/080.
194 195 */
195 196 mod_prop_info_t tcp_propinfo_tbl[] = {
196 197 /* tunable - 0 */
197 198 { "_time_wait_interval", MOD_PROTO_TCP,
198 199 mod_set_uint32, mod_get_uint32,
199 200 {1*SECONDS, 10*MINUTES, 1*MINUTES}, {1*MINUTES} },
200 201
201 202 { "_conn_req_max_q", MOD_PROTO_TCP,
202 203 mod_set_uint32, mod_get_uint32,
203 204 {1, UINT32_MAX, 128}, {128} },
204 205
205 206 { "_conn_req_max_q0", MOD_PROTO_TCP,
206 207 mod_set_uint32, mod_get_uint32,
207 208 {0, UINT32_MAX, 1024}, {1024} },
208 209
209 210 { "_conn_req_min", MOD_PROTO_TCP,
210 211 mod_set_uint32, mod_get_uint32,
211 212 {1, 1024, 1}, {1} },
212 213
213 214 { "_conn_grace_period", MOD_PROTO_TCP,
214 215 mod_set_uint32, mod_get_uint32,
215 216 {0*MS, 20*SECONDS, 0*MS}, {0*MS} },
216 217
217 218 { "_cwnd_max", MOD_PROTO_TCP,
218 219 mod_set_uint32, mod_get_uint32,
219 220 {128, (1<<30), 1024*1024}, {1024*1024} },
220 221
221 222 { "_debug", MOD_PROTO_TCP,
222 223 mod_set_uint32, mod_get_uint32,
223 224 {0, 10, 0}, {0} },
224 225
225 226 { "smallest_nonpriv_port", MOD_PROTO_TCP,
226 227 mod_set_uint32, mod_get_uint32,
227 228 {1024, (32*1024), 1024}, {1024} },
228 229
229 230 { "_ip_abort_cinterval", MOD_PROTO_TCP,
230 231 mod_set_uint32, mod_get_uint32,
231 232 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
232 233
233 234 { "_ip_abort_linterval", MOD_PROTO_TCP,
234 235 mod_set_uint32, mod_get_uint32,
235 236 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
236 237
237 238 /* tunable - 10 */
238 239 { "_ip_abort_interval", MOD_PROTO_TCP,
239 240 mod_set_uint32, mod_get_uint32,
240 241 {500*MS, UINT32_MAX, 5*MINUTES}, {5*MINUTES} },
241 242
242 243 { "_ip_notify_cinterval", MOD_PROTO_TCP,
243 244 mod_set_uint32, mod_get_uint32,
244 245 {1*SECONDS, UINT32_MAX, 10*SECONDS},
245 246 {10*SECONDS} },
246 247
247 248 { "_ip_notify_interval", MOD_PROTO_TCP,
248 249 mod_set_uint32, mod_get_uint32,
249 250 {500*MS, UINT32_MAX, 10*SECONDS}, {10*SECONDS} },
250 251
251 252 { "_ipv4_ttl", MOD_PROTO_TCP,
252 253 mod_set_uint32, mod_get_uint32,
253 254 {1, 255, 64}, {64} },
254 255
255 256 { "_keepalive_interval", MOD_PROTO_TCP,
256 257 mod_set_uint32, mod_get_uint32,
257 258 {10*SECONDS, 10*DAYS, 2*HOURS}, {2*HOURS} },
258 259
259 260 { "_maxpsz_multiplier", MOD_PROTO_TCP,
260 261 mod_set_uint32, mod_get_uint32,
261 262 {0, 100, 10}, {10} },
262 263
263 264 { "_mss_def_ipv4", MOD_PROTO_TCP,
264 265 mod_set_uint32, mod_get_uint32,
265 266 {1, TCP_MSS_MAX_IPV4, 536}, {536} },
266 267
267 268 { "_mss_max_ipv4", MOD_PROTO_TCP,
268 269 mod_set_uint32, mod_get_uint32,
269 270 {1, TCP_MSS_MAX_IPV4, TCP_MSS_MAX_IPV4},
270 271 {TCP_MSS_MAX_IPV4} },
271 272
272 273 { "_mss_min", MOD_PROTO_TCP,
273 274 mod_set_uint32, mod_get_uint32,
274 275 {1, TCP_MSS_MAX, 108}, {108} },
275 276
276 277 { "_naglim_def", MOD_PROTO_TCP,
277 278 mod_set_uint32, mod_get_uint32,
278 279 {1, (64*1024)-1, (4*1024)-1}, {(4*1024)-1} },
279 280
280 281 /* tunable - 20 */
281 282 { "_rexmit_interval_initial", MOD_PROTO_TCP,
282 283 mod_set_uint32, mod_get_uint32,
283 284 {1*MS, 20*SECONDS, 1*SECONDS}, {1*SECONDS} },
284 285
285 286 { "_rexmit_interval_max", MOD_PROTO_TCP,
286 287 mod_set_uint32, mod_get_uint32,
287 288 {1*MS, 2*HOURS, 60*SECONDS}, {60*SECONDS} },
288 289
289 290 { "_rexmit_interval_min", MOD_PROTO_TCP,
290 291 mod_set_uint32, mod_get_uint32,
291 292 {1*MS, 2*HOURS, 400*MS}, {400*MS} },
292 293
293 294 { "_deferred_ack_interval", MOD_PROTO_TCP,
294 295 mod_set_uint32, mod_get_uint32,
295 296 {1*MS, 1*MINUTES, 100*MS}, {100*MS} },
296 297
297 298 { "_snd_lowat_fraction", MOD_PROTO_TCP,
298 299 mod_set_uint32, mod_get_uint32,
299 300 {0, 16, 0}, {0} },
300 301
301 302 { "_dupack_fast_retransmit", MOD_PROTO_TCP,
302 303 mod_set_uint32, mod_get_uint32,
303 304 {1, 10000, 3}, {3} },
304 305
305 306 { "_ignore_path_mtu", MOD_PROTO_TCP,
306 307 mod_set_boolean, mod_get_boolean,
307 308 {B_FALSE}, {B_FALSE} },
308 309
309 310 { "smallest_anon_port", MOD_PROTO_TCP,
310 311 mod_set_uint32, mod_get_uint32,
311 312 {1024, ULP_MAX_PORT, 32*1024}, {32*1024} },
312 313
313 314 { "largest_anon_port", MOD_PROTO_TCP,
314 315 mod_set_uint32, mod_get_uint32,
315 316 {1024, ULP_MAX_PORT, ULP_MAX_PORT},
316 317 {ULP_MAX_PORT} },
317 318
318 319 { "send_maxbuf", MOD_PROTO_TCP,
319 320 mod_set_uint32, mod_get_uint32,
320 321 {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_HIWATER},
321 322 {TCP_XMIT_HIWATER} },
322 323
323 324 /* tunable - 30 */
324 325 { "_xmit_lowat", MOD_PROTO_TCP,
325 326 mod_set_uint32, mod_get_uint32,
326 327 {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_LOWATER},
327 328 {TCP_XMIT_LOWATER} },
328 329
329 330 { "recv_maxbuf", MOD_PROTO_TCP,
330 331 mod_set_uint32, mod_get_uint32,
331 332 {TCP_RECV_LOWATER, (1<<30), TCP_RECV_HIWATER},
332 333 {TCP_RECV_HIWATER} },
333 334
334 335 { "_recv_hiwat_minmss", MOD_PROTO_TCP,
335 336 mod_set_uint32, mod_get_uint32,
336 337 {1, 65536, 4}, {4} },
337 338
338 339 { "_fin_wait_2_flush_interval", MOD_PROTO_TCP,
339 340 mod_set_uint32, mod_get_uint32,
340 341 {1*SECONDS, 2*HOURS, 60*SECONDS},
341 342 {60*SECONDS} },
342 343
343 344 { "_max_buf", MOD_PROTO_TCP,
344 345 mod_set_uint32, mod_get_uint32,
345 346 {8192, (1<<30), 1024*1024}, {1024*1024} },
346 347
347 348 /*
348 349 * Question: What default value should I set for tcp_strong_iss?
349 350 */
350 351 { "_strong_iss", MOD_PROTO_TCP,
351 352 mod_set_uint32, mod_get_uint32,
352 353 {0, 2, 1}, {1} },
353 354
354 355 { "_rtt_updates", MOD_PROTO_TCP,
355 356 mod_set_uint32, mod_get_uint32,
356 357 {0, 65536, 20}, {20} },
357 358
358 359 { "_wscale_always", MOD_PROTO_TCP,
359 360 mod_set_boolean, mod_get_boolean,
360 361 {B_TRUE}, {B_TRUE} },
361 362
362 363 { "_tstamp_always", MOD_PROTO_TCP,
363 364 mod_set_boolean, mod_get_boolean,
364 365 {B_FALSE}, {B_FALSE} },
365 366
366 367 { "_tstamp_if_wscale", MOD_PROTO_TCP,
367 368 mod_set_boolean, mod_get_boolean,
368 369 {B_TRUE}, {B_TRUE} },
369 370
370 371 /* tunable - 40 */
371 372 { "_rexmit_interval_extra", MOD_PROTO_TCP,
372 373 mod_set_uint32, mod_get_uint32,
373 374 {0*MS, 2*HOURS, 0*MS}, {0*MS} },
374 375
375 376 { "_deferred_acks_max", MOD_PROTO_TCP,
376 377 mod_set_uint32, mod_get_uint32,
377 378 {0, 16, 2}, {2} },
378 379
379 380 { "_slow_start_after_idle", MOD_PROTO_TCP,
380 381 mod_set_uint32, mod_get_uint32,
381 382 {1, 16384, 4}, {4} },
382 383
383 384 { "_slow_start_initial", MOD_PROTO_TCP,
384 385 mod_set_uint32, mod_get_uint32,
385 386 {1, 4, 4}, {4} },
386 387
387 388 { "sack", MOD_PROTO_TCP,
388 389 mod_set_uint32, mod_get_uint32,
389 390 {0, 2, 2}, {2} },
390 391
391 392 { "_ipv6_hoplimit", MOD_PROTO_TCP,
392 393 mod_set_uint32, mod_get_uint32,
393 394 {0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS},
394 395 {IPV6_DEFAULT_HOPS} },
395 396
396 397 { "_mss_def_ipv6", MOD_PROTO_TCP,
397 398 mod_set_uint32, mod_get_uint32,
398 399 {1, TCP_MSS_MAX_IPV6, 1220}, {1220} },
399 400
400 401 { "_mss_max_ipv6", MOD_PROTO_TCP,
401 402 mod_set_uint32, mod_get_uint32,
402 403 {1, TCP_MSS_MAX_IPV6, TCP_MSS_MAX_IPV6},
403 404 {TCP_MSS_MAX_IPV6} },
404 405
405 406 { "_rev_src_routes", MOD_PROTO_TCP,
406 407 mod_set_boolean, mod_get_boolean,
407 408 {B_FALSE}, {B_FALSE} },
408 409
409 410 { "_local_dack_interval", MOD_PROTO_TCP,
410 411 mod_set_uint32, mod_get_uint32,
411 412 {10*MS, 500*MS, 50*MS}, {50*MS} },
412 413
413 414 /* tunable - 50 */
414 415 { "_local_dacks_max", MOD_PROTO_TCP,
415 416 mod_set_uint32, mod_get_uint32,
416 417 {0, 16, 8}, {8} },
417 418
418 419 { "ecn", MOD_PROTO_TCP,
419 420 mod_set_uint32, mod_get_uint32,
420 421 {0, 2, 1}, {1} },
421 422
422 423 { "_rst_sent_rate_enabled", MOD_PROTO_TCP,
423 424 mod_set_boolean, mod_get_boolean,
424 425 {B_TRUE}, {B_TRUE} },
425 426
426 427 { "_rst_sent_rate", MOD_PROTO_TCP,
427 428 mod_set_uint32, mod_get_uint32,
428 429 {0, UINT32_MAX, 40}, {40} },
429 430
430 431 { "_push_timer_interval", MOD_PROTO_TCP,
431 432 mod_set_uint32, mod_get_uint32,
432 433 {0, 100*MS, 50*MS}, {50*MS} },
433 434
434 435 { "_use_smss_as_mss_opt", MOD_PROTO_TCP,
435 436 mod_set_boolean, mod_get_boolean,
436 437 {B_FALSE}, {B_FALSE} },
437 438
438 439 { "_keepalive_abort_interval", MOD_PROTO_TCP,
439 440 mod_set_uint32, mod_get_uint32,
440 441 {0, UINT32_MAX, 8*MINUTES}, {8*MINUTES} },
441 442
442 443 /*
443 444 * tcp_wroff_xtra is the extra space in front of TCP/IP header for link
444 445 * layer header. It has to be a multiple of 8.
445 446 */
446 447 { "_wroff_xtra", MOD_PROTO_TCP,
447 448 mod_set_aligned, mod_get_uint32,
448 449 {0, 256, 32}, {32} },
449 450
450 451 { "_dev_flow_ctl", MOD_PROTO_TCP,
451 452 mod_set_boolean, mod_get_boolean,
452 453 {B_FALSE}, {B_FALSE} },
453 454
454 455 { "_reass_timeout", MOD_PROTO_TCP,
455 456 mod_set_uint32, mod_get_uint32,
456 457 {0, UINT32_MAX, 100*SECONDS}, {100*SECONDS} },
457 458
458 459 /* tunable - 60 */
459 460 { "extra_priv_ports", MOD_PROTO_TCP,
460 461 mod_set_extra_privports, mod_get_extra_privports,
461 462 {1, ULP_MAX_PORT, 0}, {0} },
462 463
463 464 { "_1948_phrase", MOD_PROTO_TCP,
464 465 tcp_set_1948phrase, NULL, {0}, {0} },
|
↓ open down ↓ |
432 lines elided |
↑ open up ↑ |
465 466
466 467 { "_listener_limit_conf", MOD_PROTO_TCP,
467 468 NULL, tcp_listener_conf_get, {0}, {0} },
468 469
469 470 { "_listener_limit_conf_add", MOD_PROTO_TCP,
470 471 tcp_listener_conf_add, NULL, {0}, {0} },
471 472
472 473 { "_listener_limit_conf_del", MOD_PROTO_TCP,
473 474 tcp_listener_conf_del, NULL, {0}, {0} },
474 475
476 + { "_iss_incr", MOD_PROTO_TCP,
477 + mod_set_uint32, mod_get_uint32,
478 + {1, ISS_INCR, ISS_INCR},
479 + {ISS_INCR} },
480 +
475 481 { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
476 482
477 483 { NULL, 0, NULL, NULL, {0}, {0} }
478 484 };
479 485
480 486 int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX