Print this page
DLPX-43064 include high-resolution round-trip times in connstat (EP-652)
DLPX-42721 Create inline function for TCP RTO calculation
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/tcp/tcp_opt_data.c
+++ new/usr/src/uts/common/inet/tcp/tcp_opt_data.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
24 24 * Copyright 2016 Joyent, Inc.
25 + * Copyright (c) 2016 by Delphix. All rights reserved.
25 26 */
26 27
27 28 #include <sys/types.h>
28 29 #include <sys/stream.h>
29 30 #define _SUN_TPI_VERSION 2
30 31 #include <sys/tihdr.h>
31 32 #include <sys/socket.h>
32 33 #include <sys/xti_xtiopt.h>
33 34 #include <sys/xti_inet.h>
34 35 #include <sys/policy.h>
35 36
36 37 #include <inet/common.h>
37 38 #include <netinet/ip6.h>
38 39 #include <inet/ip.h>
39 40
40 41 #include <netinet/in.h>
41 42 #include <netinet/tcp.h>
42 43 #include <inet/optcom.h>
43 44 #include <inet/proto_set.h>
44 45 #include <inet/tcp_impl.h>
45 46
46 47 static int tcp_opt_default(queue_t *, int, int, uchar_t *);
47 48
48 49 /*
49 50 * Table of all known options handled on a TCP protocol stack.
50 51 *
51 52 * Note: This table contains options processed by both TCP and IP levels
52 53 * and is the superset of options that can be performed on a TCP over IP
53 54 * stack.
54 55 */
55 56 opdes_t tcp_opt_arr[] = {
56 57
57 58 { SO_LINGER, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
58 59 sizeof (struct linger), 0 },
59 60
60 61 { SO_DEBUG, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
61 62 { SO_KEEPALIVE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
62 63 { SO_DONTROUTE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
63 64 { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
64 65 },
65 66 { SO_BROADCAST, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
66 67 { SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
67 68 { SO_REUSEPORT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
68 69 { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
69 70 { SO_TYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
70 71 { SO_SNDBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
71 72 { SO_RCVBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
72 73 { SO_SNDTIMEO, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
73 74 sizeof (struct timeval), 0 },
74 75 { SO_RCVTIMEO, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
75 76 sizeof (struct timeval), 0 },
76 77 { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
77 78 },
78 79 { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
79 80 { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
80 81 0 },
81 82 { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
82 83 0 },
83 84 { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
84 85 0 },
85 86 { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int),
86 87 0 },
87 88 { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
88 89
89 90 { SO_DOMAIN, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
90 91
91 92 { SO_PROTOTYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
92 93
93 94 { TCP_NODELAY, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
94 95 },
95 96 { TCP_MAXSEG, IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t),
96 97 536 },
97 98
98 99 { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
99 100 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
100 101
101 102 { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
102 103 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
103 104
104 105 { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
105 106 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
106 107
107 108 { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
108 109 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
109 110
110 111 { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
111 112 0 },
112 113
113 114 { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0,
114 115 sizeof (int), 0 },
115 116
116 117 { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
117 118 },
118 119
119 120 { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0,
120 121 sizeof (int), 0 },
121 122
122 123 { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
123 124 sizeof (int), 0 },
124 125
125 126 { TCP_KEEPIDLE, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
126 127
127 128 { TCP_KEEPCNT, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
128 129
129 130 { TCP_KEEPINTVL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
130 131
131 132 { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
132 133 sizeof (int), 0 },
133 134
134 135 { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
135 136
136 137 { TCP_RTO_INITIAL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
137 138
138 139 { TCP_RTO_MIN, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
139 140
140 141 { TCP_RTO_MAX, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
141 142
142 143 { TCP_LINGER2, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
143 144
144 145 { IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
145 146 (OP_VARLEN|OP_NODEFAULT),
146 147 IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
147 148 { T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
148 149 (OP_VARLEN|OP_NODEFAULT),
149 150 IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
150 151
151 152 { IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
152 153 { T_IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
153 154 { IP_TTL, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
154 155 sizeof (int), -1 /* not initialized */ },
155 156
156 157 { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
157 158 sizeof (ipsec_req_t), -1 /* not initialized */ },
158 159
159 160 { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0,
160 161 sizeof (int), 0 /* no ifindex */ },
161 162
162 163 { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0,
163 164 sizeof (int), 0 },
164 165
165 166 { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
166 167 sizeof (int), -1 /* not initialized */ },
167 168
168 169 { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
169 170 sizeof (int), 0 /* no ifindex */ },
170 171
171 172 { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
172 173
173 174 { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0,
174 175 sizeof (in_addr_t), -1 /* not initialized */ },
175 176
176 177 { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0,
177 178 sizeof (int), 0 },
178 179
179 180 { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
180 181 (OP_NODEFAULT|OP_VARLEN),
181 182 sizeof (struct in6_pktinfo), -1 /* not initialized */ },
182 183 { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
183 184 OP_NODEFAULT,
184 185 sizeof (sin6_t), -1 /* not initialized */ },
185 186 { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
186 187 (OP_VARLEN|OP_NODEFAULT), 255*8,
187 188 -1 /* not initialized */ },
188 189 { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
189 190 (OP_VARLEN|OP_NODEFAULT), 255*8,
190 191 -1 /* not initialized */ },
191 192 { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
192 193 (OP_VARLEN|OP_NODEFAULT), 255*8,
193 194 -1 /* not initialized */ },
194 195 { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
195 196 (OP_VARLEN|OP_NODEFAULT), 255*8,
196 197 -1 /* not initialized */ },
197 198 { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
198 199 OP_NODEFAULT,
199 200 sizeof (int), -1 /* not initialized */ },
200 201 { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
201 202 OP_NODEFAULT,
202 203 sizeof (struct ip6_mtuinfo), -1 /* not initialized */ },
203 204 { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
204 205 sizeof (int), 0 },
205 206 { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
206 207 sizeof (int), 0 },
207 208 { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
208 209 sizeof (int), 0 },
209 210
210 211 /* Enable receipt of ancillary data */
211 212 { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
212 213 sizeof (int), 0 },
213 214 { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
214 215 sizeof (int), 0 },
215 216 { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
216 217 sizeof (int), 0 },
217 218 { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
218 219 sizeof (int), 0 },
219 220 { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
220 221 sizeof (int), 0 },
221 222 { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
222 223 sizeof (int), 0 },
223 224 { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
224 225 sizeof (int), 0 },
225 226 { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
226 227 sizeof (int), 0 },
227 228
228 229 { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
229 230 sizeof (ipsec_req_t), -1 /* not initialized */ },
230 231 { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
231 232 sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT },
232 233 };
233 234
234 235 /*
235 236 * Table of all supported levels
236 237 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
237 238 * any supported options so we need this info separately.
238 239 *
239 240 * This is needed only for topmost tpi providers and is used only by
240 241 * XTI interfaces.
241 242 */
242 243 optlevel_t tcp_valid_levels_arr[] = {
243 244 XTI_GENERIC,
244 245 SOL_SOCKET,
245 246 IPPROTO_TCP,
246 247 IPPROTO_IP,
247 248 IPPROTO_IPV6
248 249 };
249 250
250 251
251 252 #define TCP_OPT_ARR_CNT A_CNT(tcp_opt_arr)
252 253 #define TCP_VALID_LEVELS_CNT A_CNT(tcp_valid_levels_arr)
253 254
254 255 uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */
255 256
256 257 /*
257 258 * Initialize option database object for TCP
258 259 *
259 260 * This object represents database of options to search passed to
260 261 * {sock,tpi}optcom_req() interface routine to take care of option
261 262 * management and associated methods.
262 263 */
263 264
264 265 optdb_obj_t tcp_opt_obj = {
265 266 tcp_opt_default, /* TCP default value function pointer */
266 267 tcp_tpi_opt_get, /* TCP get function pointer */
267 268 tcp_tpi_opt_set, /* TCP set function pointer */
268 269 TCP_OPT_ARR_CNT, /* TCP option database count of entries */
269 270 tcp_opt_arr, /* TCP option database */
270 271 TCP_VALID_LEVELS_CNT, /* TCP valid level count of entries */
271 272 tcp_valid_levels_arr /* TCP valid level array */
272 273 };
273 274
274 275 static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND;
275 276
276 277 /*
277 278 * Some TCP options can be "set" by requesting them in the option
278 279 * buffer. This is needed for XTI feature test though we do not
279 280 * allow it in general. We interpret that this mechanism is more
280 281 * applicable to OSI protocols and need not be allowed in general.
281 282 * This routine filters out options for which it is not allowed (most)
282 283 * and lets through those (few) for which it is. [ The XTI interface
283 284 * test suite specifics will imply that any XTI_GENERIC level XTI_* if
284 285 * ever implemented will have to be allowed here ].
285 286 */
286 287 static boolean_t
287 288 tcp_allow_connopt_set(int level, int name)
288 289 {
289 290
290 291 switch (level) {
291 292 case IPPROTO_TCP:
292 293 switch (name) {
293 294 case TCP_NODELAY:
294 295 return (B_TRUE);
295 296 default:
296 297 return (B_FALSE);
297 298 }
298 299 /*NOTREACHED*/
299 300 default:
300 301 return (B_FALSE);
301 302 }
302 303 /*NOTREACHED*/
303 304 }
304 305
305 306 /*
306 307 * This routine gets default values of certain options whose default
307 308 * values are maintained by protocol specific code
308 309 */
309 310 /* ARGSUSED */
310 311 static int
311 312 tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
312 313 {
313 314 int32_t *i1 = (int32_t *)ptr;
314 315 tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps;
315 316
316 317 switch (level) {
317 318 case IPPROTO_TCP:
318 319 switch (name) {
319 320 case TCP_NOTIFY_THRESHOLD:
320 321 *i1 = tcps->tcps_ip_notify_interval;
321 322 break;
322 323 case TCP_ABORT_THRESHOLD:
323 324 *i1 = tcps->tcps_ip_abort_interval;
324 325 break;
325 326 case TCP_CONN_NOTIFY_THRESHOLD:
326 327 *i1 = tcps->tcps_ip_notify_cinterval;
327 328 break;
328 329 case TCP_CONN_ABORT_THRESHOLD:
329 330 *i1 = tcps->tcps_ip_abort_cinterval;
330 331 break;
331 332 default:
332 333 return (-1);
333 334 }
334 335 break;
335 336 case IPPROTO_IP:
336 337 switch (name) {
337 338 case IP_TTL:
338 339 *i1 = tcps->tcps_ipv4_ttl;
339 340 break;
340 341 default:
341 342 return (-1);
342 343 }
343 344 break;
344 345 case IPPROTO_IPV6:
345 346 switch (name) {
346 347 case IPV6_UNICAST_HOPS:
347 348 *i1 = tcps->tcps_ipv6_hoplimit;
348 349 break;
349 350 default:
350 351 return (-1);
351 352 }
352 353 break;
353 354 default:
354 355 return (-1);
355 356 }
356 357 return (sizeof (int));
357 358 }
358 359
359 360 /*
360 361 * TCP routine to get the values of options.
361 362 */
362 363 int
363 364 tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
364 365 {
365 366 int *i1 = (int *)ptr;
366 367 tcp_t *tcp = connp->conn_tcp;
367 368 conn_opt_arg_t coas;
368 369 int retval;
369 370
370 371 coas.coa_connp = connp;
371 372 coas.coa_ixa = connp->conn_ixa;
372 373 coas.coa_ipp = &connp->conn_xmit_ipp;
373 374 coas.coa_ancillary = B_FALSE;
374 375 coas.coa_changed = 0;
375 376
376 377 switch (level) {
377 378 case SOL_SOCKET:
378 379 switch (name) {
379 380 case SO_SND_COPYAVOID:
380 381 *i1 = tcp->tcp_snd_zcopy_on ?
381 382 SO_SND_COPYAVOID : 0;
382 383 return (sizeof (int));
383 384 case SO_ACCEPTCONN:
384 385 *i1 = (tcp->tcp_state == TCPS_LISTEN);
385 386 return (sizeof (int));
386 387 }
387 388 break;
388 389 case IPPROTO_TCP:
389 390 switch (name) {
390 391 case TCP_NODELAY:
391 392 *i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0;
392 393 return (sizeof (int));
393 394 case TCP_MAXSEG:
394 395 *i1 = tcp->tcp_mss;
395 396 return (sizeof (int));
396 397 case TCP_NOTIFY_THRESHOLD:
397 398 *i1 = (int)tcp->tcp_first_timer_threshold;
398 399 return (sizeof (int));
399 400 case TCP_ABORT_THRESHOLD:
400 401 *i1 = tcp->tcp_second_timer_threshold;
401 402 return (sizeof (int));
402 403 case TCP_CONN_NOTIFY_THRESHOLD:
403 404 *i1 = tcp->tcp_first_ctimer_threshold;
404 405 return (sizeof (int));
405 406 case TCP_CONN_ABORT_THRESHOLD:
406 407 *i1 = tcp->tcp_second_ctimer_threshold;
407 408 return (sizeof (int));
408 409 case TCP_INIT_CWND:
409 410 *i1 = tcp->tcp_init_cwnd;
410 411 return (sizeof (int));
411 412 case TCP_KEEPALIVE_THRESHOLD:
412 413 *i1 = tcp->tcp_ka_interval;
413 414 return (sizeof (int));
414 415
415 416 /*
416 417 * TCP_KEEPIDLE expects value in seconds, but
417 418 * tcp_ka_interval is in milliseconds.
418 419 */
419 420 case TCP_KEEPIDLE:
420 421 *i1 = tcp->tcp_ka_interval / 1000;
421 422 return (sizeof (int));
422 423 case TCP_KEEPCNT:
423 424 *i1 = tcp->tcp_ka_cnt;
424 425 return (sizeof (int));
425 426
426 427 /*
427 428 * TCP_KEEPINTVL expects value in seconds, but
428 429 * tcp_ka_rinterval is in milliseconds.
429 430 */
430 431 case TCP_KEEPINTVL:
431 432 *i1 = tcp->tcp_ka_rinterval / 1000;
432 433 return (sizeof (int));
433 434 case TCP_KEEPALIVE_ABORT_THRESHOLD:
434 435 *i1 = tcp->tcp_ka_abort_thres;
435 436 return (sizeof (int));
436 437 case TCP_CORK:
437 438 *i1 = tcp->tcp_cork;
438 439 return (sizeof (int));
439 440 case TCP_RTO_INITIAL:
440 441 *i1 = tcp->tcp_rto_initial;
441 442 return (sizeof (uint32_t));
442 443 case TCP_RTO_MIN:
443 444 *i1 = tcp->tcp_rto_min;
444 445 return (sizeof (uint32_t));
445 446 case TCP_RTO_MAX:
446 447 *i1 = tcp->tcp_rto_max;
447 448 return (sizeof (uint32_t));
448 449 case TCP_LINGER2:
449 450 *i1 = tcp->tcp_fin_wait_2_flush_interval / SECONDS;
450 451 return (sizeof (int));
451 452 }
452 453 break;
453 454 case IPPROTO_IP:
454 455 if (connp->conn_family != AF_INET)
455 456 return (-1);
456 457 switch (name) {
457 458 case IP_OPTIONS:
458 459 case T_IP_OPTIONS:
459 460 /* Caller ensures enough space */
460 461 return (ip_opt_get_user(connp, ptr));
461 462 default:
462 463 break;
463 464 }
464 465 break;
465 466
466 467 case IPPROTO_IPV6:
467 468 /*
468 469 * IPPROTO_IPV6 options are only supported for sockets
469 470 * that are using IPv6 on the wire.
470 471 */
471 472 if (connp->conn_ipversion != IPV6_VERSION) {
472 473 return (-1);
473 474 }
474 475 switch (name) {
475 476 case IPV6_PATHMTU:
476 477 if (tcp->tcp_state < TCPS_ESTABLISHED)
477 478 return (-1);
478 479 break;
479 480 }
480 481 break;
481 482 }
482 483 mutex_enter(&connp->conn_lock);
483 484 retval = conn_opt_get(&coas, level, name, ptr);
484 485 mutex_exit(&connp->conn_lock);
485 486 return (retval);
486 487 }
487 488
488 489 /*
489 490 * Set a TCP connection's participation in SO_REUSEPORT. This operation is
490 491 * performed under the protection of the squeue via tcp_setsockopt.
491 492 * The manipulation of tcp_rg_bind, as part of this operation, is subject to
492 493 * these constraints:
493 494 * 1. Prior to bind(), tcp_rg_bind can be set/cleared in tcp_set_reuseport
494 495 * under the protection of the squeue.
495 496 * 2. Once the connection has been bound, the tcp_rg_bind pointer must not be
496 497 * altered until such time as tcp_free() cleans up the connection.
497 498 * 3. A connection undergoing bind, which matches to a connection participating
498 499 * in port-reuse, will switch its tcp_rg_bind pointer when it joins the
499 500 * group of an existing connection in tcp_bindi().
500 501 */
501 502 static int
502 503 tcp_set_reuseport(conn_t *connp, boolean_t do_enable)
503 504 {
504 505 tcp_t *tcp = connp->conn_tcp;
505 506 struct tcp_rg_s *rg;
506 507
507 508 if (!IPCL_IS_NONSTR(connp)) {
508 509 if (do_enable) {
509 510 /*
510 511 * SO_REUSEPORT cannot be enabled on sockets which have
511 512 * fallen back to the STREAMS API.
512 513 */
513 514 return (EINVAL);
514 515 } else {
515 516 /*
516 517 * A connection with SO_REUSEPORT enabled should be
517 518 * prevented from falling back to STREAMS mode via
518 519 * logic in tcp_fallback. It is legal, however, for
519 520 * fallen-back connections to affirm the disabled state
520 521 * of SO_REUSEPORT.
521 522 */
522 523 ASSERT(connp->conn_reuseport == 0);
523 524 return (0);
524 525 }
525 526 }
526 527 if (tcp->tcp_state <= TCPS_CLOSED) {
527 528 return (EINVAL);
528 529 }
529 530 if (connp->conn_reuseport == 0 && do_enable) {
530 531 /* disabled -> enabled */
531 532 if (tcp->tcp_rg_bind != NULL) {
532 533 tcp_rg_setactive(tcp->tcp_rg_bind, do_enable);
533 534 } else {
534 535 /*
535 536 * Connection state is not a concern when initially
536 537 * populating tcp_rg_bind. Setting it to non-NULL on a
537 538 * bound or listening connection would only mean that
538 539 * new reused-port binds become a possibility.
539 540 */
540 541 if ((rg = tcp_rg_init(tcp)) == NULL) {
541 542 return (ENOMEM);
542 543 }
543 544 tcp->tcp_rg_bind = rg;
544 545 }
545 546 connp->conn_reuseport = 1;
546 547 } else if (connp->conn_reuseport != 0 && !do_enable) {
547 548 /* enabled -> disabled */
548 549 ASSERT(tcp->tcp_rg_bind != NULL);
549 550 if (tcp->tcp_state == TCPS_IDLE) {
550 551 /*
551 552 * If the connection has not been bound yet, discard
552 553 * the reuse group state. Since disabling SO_REUSEPORT
553 554 * on a bound socket will _not_ prevent others from
554 555 * reusing the port, the presence of tcp_rg_bind is
555 556 * used to determine reuse availability, not
556 557 * conn_reuseport.
557 558 *
558 559 * This allows proper behavior for examples such as:
559 560 *
560 561 * setsockopt(fd1, ... SO_REUSEPORT, &on_val...);
561 562 * bind(fd1, &myaddr, ...);
562 563 * setsockopt(fd1, ... SO_REUSEPORT, &off_val...);
563 564 *
564 565 * setsockopt(fd2, ... SO_REUSEPORT, &on_val...);
565 566 * bind(fd2, &myaddr, ...); // <- SHOULD SUCCEED
566 567 *
567 568 */
568 569 rg = tcp->tcp_rg_bind;
569 570 tcp->tcp_rg_bind = NULL;
570 571 VERIFY(tcp_rg_remove(rg, tcp));
571 572 tcp_rg_destroy(rg);
572 573 } else {
573 574 /*
574 575 * If a connection has been bound, it's no longer safe
575 576 * to manipulate tcp_rg_bind until connection clean-up
576 577 * during tcp_free. Just mark the member status of the
577 578 * connection as inactive.
578 579 */
579 580 tcp_rg_setactive(tcp->tcp_rg_bind, do_enable);
580 581 }
581 582 connp->conn_reuseport = 0;
582 583 }
583 584 return (0);
584 585 }
585 586
586 587 /*
587 588 * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
588 589 * Parameters are assumed to be verified by the caller.
589 590 */
590 591 /* ARGSUSED */
591 592 int
592 593 tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
593 594 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
594 595 void *thisdg_attrs, cred_t *cr)
595 596 {
596 597 tcp_t *tcp = connp->conn_tcp;
597 598 int *i1 = (int *)invalp;
598 599 boolean_t onoff = (*i1 == 0) ? 0 : 1;
599 600 boolean_t checkonly;
600 601 int reterr;
601 602 tcp_stack_t *tcps = tcp->tcp_tcps;
602 603 conn_opt_arg_t coas;
603 604 uint32_t val = *((uint32_t *)invalp);
604 605
605 606 coas.coa_connp = connp;
606 607 coas.coa_ixa = connp->conn_ixa;
607 608 coas.coa_ipp = &connp->conn_xmit_ipp;
608 609 coas.coa_ancillary = B_FALSE;
609 610 coas.coa_changed = 0;
610 611
611 612 switch (optset_context) {
612 613 case SETFN_OPTCOM_CHECKONLY:
613 614 checkonly = B_TRUE;
614 615 /*
615 616 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
616 617 * inlen != 0 implies value supplied and
617 618 * we have to "pretend" to set it.
618 619 * inlen == 0 implies that there is no
619 620 * value part in T_CHECK request and just validation
620 621 * done elsewhere should be enough, we just return here.
621 622 */
622 623 if (inlen == 0) {
623 624 *outlenp = 0;
624 625 return (0);
625 626 }
626 627 break;
627 628 case SETFN_OPTCOM_NEGOTIATE:
628 629 checkonly = B_FALSE;
629 630 break;
630 631 case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */
631 632 case SETFN_CONN_NEGOTIATE:
632 633 checkonly = B_FALSE;
633 634 /*
634 635 * Negotiating local and "association-related" options
635 636 * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ)
636 637 * primitives is allowed by XTI, but we choose
637 638 * to not implement this style negotiation for Internet
638 639 * protocols (We interpret it is a must for OSI world but
639 640 * optional for Internet protocols) for all options.
640 641 * [ Will do only for the few options that enable test
641 642 * suites that our XTI implementation of this feature
642 643 * works for transports that do allow it ]
643 644 */
644 645 if (!tcp_allow_connopt_set(level, name)) {
645 646 *outlenp = 0;
646 647 return (EINVAL);
647 648 }
648 649 break;
649 650 default:
650 651 /*
651 652 * We should never get here
652 653 */
653 654 *outlenp = 0;
654 655 return (EINVAL);
655 656 }
656 657
657 658 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
658 659 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
659 660
660 661 /*
661 662 * For TCP, we should have no ancillary data sent down
662 663 * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs
663 664 * has to be zero.
664 665 */
665 666 ASSERT(thisdg_attrs == NULL);
666 667
667 668 /*
668 669 * For fixed length options, no sanity check
669 670 * of passed in length is done. It is assumed *_optcom_req()
670 671 * routines do the right thing.
671 672 */
672 673 switch (level) {
673 674 case SOL_SOCKET:
674 675 switch (name) {
675 676 case SO_KEEPALIVE:
676 677 if (checkonly) {
677 678 /* check only case */
678 679 break;
679 680 }
680 681
681 682 if (!onoff) {
682 683 if (connp->conn_keepalive) {
683 684 if (tcp->tcp_ka_tid != 0) {
684 685 (void) TCP_TIMER_CANCEL(tcp,
685 686 tcp->tcp_ka_tid);
686 687 tcp->tcp_ka_tid = 0;
687 688 }
688 689 connp->conn_keepalive = 0;
689 690 }
690 691 break;
691 692 }
692 693 if (!connp->conn_keepalive) {
693 694 /* Crank up the keepalive timer */
694 695 tcp->tcp_ka_last_intrvl = 0;
695 696 tcp->tcp_ka_tid = TCP_TIMER(tcp,
696 697 tcp_keepalive_timer, tcp->tcp_ka_interval);
697 698 connp->conn_keepalive = 1;
698 699 }
699 700 break;
700 701 case SO_SNDBUF: {
701 702 if (*i1 > tcps->tcps_max_buf) {
702 703 *outlenp = 0;
703 704 return (ENOBUFS);
704 705 }
705 706 if (checkonly)
706 707 break;
707 708
708 709 connp->conn_sndbuf = *i1;
709 710 if (tcps->tcps_snd_lowat_fraction != 0) {
710 711 connp->conn_sndlowat = connp->conn_sndbuf /
711 712 tcps->tcps_snd_lowat_fraction;
712 713 }
713 714 (void) tcp_maxpsz_set(tcp, B_TRUE);
714 715 /*
715 716 * If we are flow-controlled, recheck the condition.
716 717 * There are apps that increase SO_SNDBUF size when
717 718 * flow-controlled (EWOULDBLOCK), and expect the flow
718 719 * control condition to be lifted right away.
719 720 */
720 721 mutex_enter(&tcp->tcp_non_sq_lock);
721 722 if (tcp->tcp_flow_stopped &&
722 723 TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) {
723 724 tcp_clrqfull(tcp);
724 725 }
725 726 mutex_exit(&tcp->tcp_non_sq_lock);
726 727 *outlenp = inlen;
727 728 return (0);
728 729 }
729 730 case SO_RCVBUF:
730 731 if (*i1 > tcps->tcps_max_buf) {
731 732 *outlenp = 0;
732 733 return (ENOBUFS);
733 734 }
734 735 /* Silently ignore zero */
735 736 if (!checkonly && *i1 != 0) {
736 737 *i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss);
737 738 (void) tcp_rwnd_set(tcp, *i1);
738 739 }
739 740 /*
740 741 * XXX should we return the rwnd here
741 742 * and tcp_opt_get ?
742 743 */
743 744 *outlenp = inlen;
744 745 return (0);
745 746 case SO_SND_COPYAVOID:
746 747 if (!checkonly) {
747 748 if (tcp->tcp_loopback ||
748 749 (onoff != 1) || !tcp_zcopy_check(tcp)) {
749 750 *outlenp = 0;
750 751 return (EOPNOTSUPP);
751 752 }
752 753 tcp->tcp_snd_zcopy_aware = 1;
753 754 }
754 755 *outlenp = inlen;
755 756 return (0);
756 757 case SO_REUSEPORT:
757 758 if (!checkonly) {
758 759 return (tcp_set_reuseport(connp, *i1 != 0));
759 760 }
760 761 return (0);
761 762 }
762 763 break;
763 764 case IPPROTO_TCP:
764 765 switch (name) {
765 766 case TCP_NODELAY:
766 767 if (!checkonly)
767 768 tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss;
768 769 break;
769 770 case TCP_NOTIFY_THRESHOLD:
770 771 if (!checkonly)
771 772 tcp->tcp_first_timer_threshold = *i1;
772 773 break;
773 774 case TCP_ABORT_THRESHOLD:
774 775 if (!checkonly)
775 776 tcp->tcp_second_timer_threshold = *i1;
776 777 break;
777 778 case TCP_CONN_NOTIFY_THRESHOLD:
778 779 if (!checkonly)
779 780 tcp->tcp_first_ctimer_threshold = *i1;
780 781 break;
781 782 case TCP_CONN_ABORT_THRESHOLD:
782 783 if (!checkonly)
783 784 tcp->tcp_second_ctimer_threshold = *i1;
784 785 break;
785 786 case TCP_RECVDSTADDR:
786 787 if (tcp->tcp_state > TCPS_LISTEN) {
787 788 *outlenp = 0;
788 789 return (EOPNOTSUPP);
789 790 }
790 791 /* Setting done in conn_opt_set */
791 792 break;
792 793 case TCP_INIT_CWND:
793 794 if (checkonly)
794 795 break;
795 796
796 797 /*
797 798 * Only allow socket with network configuration
798 799 * privilege to set the initial cwnd to be larger
799 800 * than allowed by RFC 3390.
800 801 */
801 802 if (val > MIN(4, MAX(2, 4380 / tcp->tcp_mss))) {
802 803 if ((reterr = secpolicy_ip_config(cr, B_TRUE))
803 804 != 0) {
804 805 *outlenp = 0;
805 806 return (reterr);
806 807 }
807 808 if (val > tcp_max_init_cwnd) {
808 809 *outlenp = 0;
809 810 return (EINVAL);
810 811 }
811 812 }
812 813
813 814 tcp->tcp_init_cwnd = val;
814 815
815 816 /*
816 817 * If the socket is connected, AND no outbound data
817 818 * has been sent, reset the actual cwnd values.
818 819 */
819 820 if (tcp->tcp_state == TCPS_ESTABLISHED &&
820 821 tcp->tcp_iss == tcp->tcp_snxt - 1) {
821 822 tcp->tcp_cwnd =
822 823 MIN(tcp->tcp_rwnd, val * tcp->tcp_mss);
823 824 }
824 825 break;
825 826
826 827 /*
827 828 * TCP_KEEPIDLE is in seconds but TCP_KEEPALIVE_THRESHOLD
828 829 * is in milliseconds. TCP_KEEPIDLE is introduced for
829 830 * compatibility with other Unix flavors.
830 831 * We can fall through TCP_KEEPALIVE_THRESHOLD logic after
831 832 * converting the input to milliseconds.
832 833 */
833 834 case TCP_KEEPIDLE:
834 835 *i1 *= 1000;
835 836 /* FALLTHRU */
836 837
837 838 case TCP_KEEPALIVE_THRESHOLD:
838 839 if (checkonly)
839 840 break;
840 841
841 842 if (*i1 < tcps->tcps_keepalive_interval_low ||
842 843 *i1 > tcps->tcps_keepalive_interval_high) {
843 844 *outlenp = 0;
844 845 return (EINVAL);
845 846 }
846 847 if (*i1 != tcp->tcp_ka_interval) {
847 848 tcp->tcp_ka_interval = *i1;
848 849 /*
849 850 * Check if we need to restart the
850 851 * keepalive timer.
851 852 */
852 853 if (tcp->tcp_ka_tid != 0) {
853 854 ASSERT(connp->conn_keepalive);
854 855 (void) TCP_TIMER_CANCEL(tcp,
855 856 tcp->tcp_ka_tid);
856 857 tcp->tcp_ka_last_intrvl = 0;
857 858 tcp->tcp_ka_tid = TCP_TIMER(tcp,
858 859 tcp_keepalive_timer,
859 860 tcp->tcp_ka_interval);
860 861 }
861 862 }
862 863 break;
863 864
864 865 /*
865 866 * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt.
866 867 * So setting TCP_KEEPCNT or TCP_KEEPINTVL can affect all the
867 868 * three members - tcp_ka_abort_thres, tcp_ka_rinterval and
868 869 * tcp_ka_cnt.
869 870 */
870 871 case TCP_KEEPCNT:
871 872 if (checkonly)
872 873 break;
873 874
874 875 if (*i1 == 0) {
875 876 return (EINVAL);
876 877 } else if (tcp->tcp_ka_rinterval == 0) {
877 878 /*
878 879 * When TCP_KEEPCNT is specified without first
879 880 * specifying a TCP_KEEPINTVL, we infer an
880 881 * interval based on a tunable specific to our
881 882 * stack: the tcp_keepalive_abort_interval.
882 883 * (Or the TCP_KEEPALIVE_ABORT_THRESHOLD, in
883 884 * the unlikely event that that has been set.)
884 885 * Given the abort interval's default value of
885 886 * 480 seconds, low TCP_KEEPCNT values can
886 887 * result in intervals that exceed the default
887 888 * maximum RTO of 60 seconds. Rather than
888 889 * fail in these cases, we (implicitly) clamp
889 890 * the interval at the maximum RTO; if the
890 891 * TCP_KEEPCNT is shortly followed by a
891 892 * TCP_KEEPINTVL (as we expect), the abort
892 893 * threshold will be recalculated correctly --
893 894 * and if a TCP_KEEPINTVL is not forthcoming,
894 895 * keep-alive will at least operate reasonably
895 896 * given the underconfigured state.
896 897 */
897 898 uint32_t interval;
898 899
899 900 interval = tcp->tcp_ka_abort_thres / *i1;
900 901
901 902 if (interval < tcp->tcp_rto_min)
902 903 interval = tcp->tcp_rto_min;
903 904
904 905 if (interval > tcp->tcp_rto_max)
905 906 interval = tcp->tcp_rto_max;
906 907
907 908 tcp->tcp_ka_rinterval = interval;
908 909 } else {
909 910 if ((*i1 * tcp->tcp_ka_rinterval) <
910 911 tcps->tcps_keepalive_abort_interval_low ||
911 912 (*i1 * tcp->tcp_ka_rinterval) >
912 913 tcps->tcps_keepalive_abort_interval_high)
913 914 return (EINVAL);
914 915 tcp->tcp_ka_abort_thres =
915 916 (*i1 * tcp->tcp_ka_rinterval);
916 917 }
917 918 tcp->tcp_ka_cnt = *i1;
918 919 break;
919 920 case TCP_KEEPINTVL:
920 921 /*
921 922 * TCP_KEEPINTVL is specified in seconds, but
922 923 * tcp_ka_rinterval is in milliseconds.
923 924 */
924 925
925 926 if (checkonly)
926 927 break;
927 928
928 929 if ((*i1 * 1000) < tcp->tcp_rto_min ||
929 930 (*i1 * 1000) > tcp->tcp_rto_max)
930 931 return (EINVAL);
931 932
932 933 if (tcp->tcp_ka_cnt == 0) {
933 934 tcp->tcp_ka_cnt =
934 935 tcp->tcp_ka_abort_thres / (*i1 * 1000);
935 936 } else {
936 937 if ((*i1 * tcp->tcp_ka_cnt * 1000) <
937 938 tcps->tcps_keepalive_abort_interval_low ||
938 939 (*i1 * tcp->tcp_ka_cnt * 1000) >
939 940 tcps->tcps_keepalive_abort_interval_high)
940 941 return (EINVAL);
941 942 tcp->tcp_ka_abort_thres =
942 943 (*i1 * tcp->tcp_ka_cnt * 1000);
943 944 }
944 945 tcp->tcp_ka_rinterval = *i1 * 1000;
945 946 break;
946 947 case TCP_KEEPALIVE_ABORT_THRESHOLD:
947 948 if (!checkonly) {
948 949 if (*i1 <
949 950 tcps->tcps_keepalive_abort_interval_low ||
950 951 *i1 >
951 952 tcps->tcps_keepalive_abort_interval_high) {
952 953 *outlenp = 0;
953 954 return (EINVAL);
954 955 }
955 956 tcp->tcp_ka_abort_thres = *i1;
956 957 tcp->tcp_ka_cnt = 0;
957 958 tcp->tcp_ka_rinterval = 0;
958 959 }
959 960 break;
960 961 case TCP_CORK:
961 962 if (!checkonly) {
962 963 /*
963 964 * if tcp->tcp_cork was set and is now
964 965 * being unset, we have to make sure that
965 966 * the remaining data gets sent out. Also
966 967 * unset tcp->tcp_cork so that tcp_wput_data()
|
↓ open down ↓ |
932 lines elided |
↑ open up ↑ |
967 968 * can send data even if it is less than mss
968 969 */
969 970 if (tcp->tcp_cork && onoff == 0 &&
970 971 tcp->tcp_unsent > 0) {
971 972 tcp->tcp_cork = B_FALSE;
972 973 tcp_wput_data(tcp, NULL, B_FALSE);
973 974 }
974 975 tcp->tcp_cork = onoff;
975 976 }
976 977 break;
977 - case TCP_RTO_INITIAL: {
978 - clock_t rto;
979 -
978 + case TCP_RTO_INITIAL:
980 979 if (checkonly || val == 0)
981 980 break;
982 981
983 982 /*
984 983 * Sanity checks
985 984 *
986 985 * The initial RTO should be bounded by the minimum
987 986 * and maximum RTO. And it should also be smaller
988 987 * than the connect attempt abort timeout. Otherwise,
989 988 * the connection won't be aborted in a period
990 989 * reasonably close to that timeout.
991 990 */
992 991 if (val < tcp->tcp_rto_min || val > tcp->tcp_rto_max ||
993 992 val > tcp->tcp_second_ctimer_threshold ||
994 993 val < tcps->tcps_rexmit_interval_initial_low ||
995 994 val > tcps->tcps_rexmit_interval_initial_high) {
996 995 *outlenp = 0;
997 996 return (EINVAL);
998 997 }
|
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
999 998 tcp->tcp_rto_initial = val;
1000 999
1001 1000 /*
1002 1001 * If TCP has not sent anything, need to re-calculate
1003 1002 * tcp_rto. Otherwise, this option change does not
1004 1003 * really affect anything.
1005 1004 */
1006 1005 if (tcp->tcp_state >= TCPS_SYN_SENT)
1007 1006 break;
1008 1007
1009 - tcp->tcp_rtt_sa = tcp->tcp_rto_initial << 2;
1010 - tcp->tcp_rtt_sd = tcp->tcp_rto_initial >> 1;
1011 - rto = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd +
1012 - tcps->tcps_rexmit_interval_extra +
1013 - (tcp->tcp_rtt_sa >> 5) +
1014 - tcps->tcps_conn_grace_period;
1015 - TCP_SET_RTO(tcp, rto);
1008 + tcp->tcp_rtt_sa = MSEC2NSEC(tcp->tcp_rto_initial) << 2;
1009 + tcp->tcp_rtt_sd = MSEC2NSEC(tcp->tcp_rto_initial) >> 1;
1010 + tcp->tcp_rto = tcp_calculate_rto(tcp, tcps);
1016 1011 break;
1017 - }
1018 1012 case TCP_RTO_MIN:
1019 1013 if (checkonly || val == 0)
1020 1014 break;
1021 1015
1022 1016 if (val < tcps->tcps_rexmit_interval_min_low ||
1023 1017 val > tcps->tcps_rexmit_interval_min_high ||
1024 1018 val > tcp->tcp_rto_max) {
1025 1019 *outlenp = 0;
1026 1020 return (EINVAL);
1027 1021 }
1028 1022 tcp->tcp_rto_min = val;
1029 1023 if (tcp->tcp_rto < val)
1030 1024 tcp->tcp_rto = val;
1031 1025 break;
1032 1026 case TCP_RTO_MAX:
1033 1027 if (checkonly || val == 0)
1034 1028 break;
1035 1029
1036 1030 /*
1037 1031 * Sanity checks
1038 1032 *
1039 1033 * The maximum RTO should not be larger than the
1040 1034 * connection abort timeout. Otherwise, the
1041 1035 * connection won't be aborted in a period reasonably
1042 1036 * close to that timeout.
1043 1037 */
1044 1038 if (val < tcps->tcps_rexmit_interval_max_low ||
1045 1039 val > tcps->tcps_rexmit_interval_max_high ||
1046 1040 val < tcp->tcp_rto_min ||
1047 1041 val > tcp->tcp_second_timer_threshold) {
1048 1042 *outlenp = 0;
1049 1043 return (EINVAL);
1050 1044 }
1051 1045 tcp->tcp_rto_max = val;
1052 1046 if (tcp->tcp_rto > val)
1053 1047 tcp->tcp_rto = val;
1054 1048 break;
1055 1049 case TCP_LINGER2:
1056 1050 if (checkonly || *i1 == 0)
1057 1051 break;
1058 1052
1059 1053 /*
1060 1054 * Note that the option value's unit is second. And
1061 1055 * the value should be bigger than the private
1062 1056 * parameter tcp_fin_wait_2_flush_interval's lower
1063 1057 * bound and smaller than the current value of that
1064 1058 * parameter. It should be smaller than the current
1065 1059 * value to avoid an app setting TCP_LINGER2 to a big
1066 1060 * value, causing resource to be held up too long in
1067 1061 * FIN-WAIT-2 state.
1068 1062 */
1069 1063 if (*i1 < 0 ||
1070 1064 tcps->tcps_fin_wait_2_flush_interval_low/SECONDS >
1071 1065 *i1 ||
1072 1066 tcps->tcps_fin_wait_2_flush_interval/SECONDS <
1073 1067 *i1) {
1074 1068 *outlenp = 0;
1075 1069 return (EINVAL);
1076 1070 }
1077 1071 tcp->tcp_fin_wait_2_flush_interval = *i1 * SECONDS;
1078 1072 break;
1079 1073 default:
1080 1074 break;
1081 1075 }
1082 1076 break;
1083 1077 case IPPROTO_IP:
1084 1078 switch (name) {
1085 1079 case IP_SEC_OPT:
1086 1080 /*
1087 1081 * We should not allow policy setting after
1088 1082 * we start listening for connections.
1089 1083 */
1090 1084 if (tcp->tcp_state == TCPS_LISTEN) {
1091 1085 return (EINVAL);
1092 1086 }
1093 1087 break;
1094 1088 }
1095 1089 break;
1096 1090 case IPPROTO_IPV6:
1097 1091 /*
1098 1092 * IPPROTO_IPV6 options are only supported for sockets
1099 1093 * that are using IPv6 on the wire.
1100 1094 */
1101 1095 if (connp->conn_ipversion != IPV6_VERSION) {
1102 1096 *outlenp = 0;
1103 1097 return (EINVAL);
1104 1098 }
1105 1099
1106 1100 switch (name) {
1107 1101 case IPV6_RECVPKTINFO:
1108 1102 if (!checkonly) {
1109 1103 /* Force it to be sent up with the next msg */
1110 1104 tcp->tcp_recvifindex = 0;
1111 1105 }
1112 1106 break;
1113 1107 case IPV6_RECVTCLASS:
1114 1108 if (!checkonly) {
1115 1109 /* Force it to be sent up with the next msg */
1116 1110 tcp->tcp_recvtclass = 0xffffffffU;
1117 1111 }
1118 1112 break;
1119 1113 case IPV6_RECVHOPLIMIT:
1120 1114 if (!checkonly) {
1121 1115 /* Force it to be sent up with the next msg */
1122 1116 tcp->tcp_recvhops = 0xffffffffU;
1123 1117 }
1124 1118 break;
1125 1119 case IPV6_PKTINFO:
1126 1120 /* This is an extra check for TCP */
1127 1121 if (inlen == sizeof (struct in6_pktinfo)) {
1128 1122 struct in6_pktinfo *pkti;
1129 1123
1130 1124 pkti = (struct in6_pktinfo *)invalp;
1131 1125 /*
1132 1126 * RFC 3542 states that ipi6_addr must be
1133 1127 * the unspecified address when setting the
1134 1128 * IPV6_PKTINFO sticky socket option on a
1135 1129 * TCP socket.
1136 1130 */
1137 1131 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
1138 1132 return (EINVAL);
1139 1133 }
1140 1134 break;
1141 1135 case IPV6_SEC_OPT:
1142 1136 /*
1143 1137 * We should not allow policy setting after
1144 1138 * we start listening for connections.
1145 1139 */
1146 1140 if (tcp->tcp_state == TCPS_LISTEN) {
1147 1141 return (EINVAL);
1148 1142 }
1149 1143 break;
1150 1144 }
1151 1145 break;
1152 1146 }
1153 1147 reterr = conn_opt_set(&coas, level, name, inlen, invalp,
1154 1148 checkonly, cr);
1155 1149 if (reterr != 0) {
1156 1150 *outlenp = 0;
1157 1151 return (reterr);
1158 1152 }
1159 1153
1160 1154 /*
1161 1155 * Common case of OK return with outval same as inval
1162 1156 */
1163 1157 if (invalp != outvalp) {
1164 1158 /* don't trust bcopy for identical src/dst */
1165 1159 (void) bcopy(invalp, outvalp, inlen);
1166 1160 }
1167 1161 *outlenp = inlen;
1168 1162
1169 1163 if (coas.coa_changed & COA_HEADER_CHANGED) {
1170 1164 /* If we are connected we rebuilt the headers */
1171 1165 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1172 1166 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1173 1167 reterr = tcp_build_hdrs(tcp);
1174 1168 if (reterr != 0)
1175 1169 return (reterr);
1176 1170 }
1177 1171 }
1178 1172 if (coas.coa_changed & COA_ROUTE_CHANGED) {
1179 1173 in6_addr_t nexthop;
1180 1174
1181 1175 /*
1182 1176 * If we are connected we re-cache the information.
1183 1177 * We ignore errors to preserve BSD behavior.
1184 1178 * Note that we don't redo IPsec policy lookup here
1185 1179 * since the final destination (or source) didn't change.
1186 1180 */
1187 1181 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
1188 1182 &connp->conn_faddr_v6, &nexthop);
1189 1183
1190 1184 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1191 1185 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1192 1186 (void) ip_attr_connect(connp, connp->conn_ixa,
1193 1187 &connp->conn_laddr_v6, &connp->conn_faddr_v6,
1194 1188 &nexthop, connp->conn_fport, NULL, NULL,
1195 1189 IPDF_VERIFY_DST);
1196 1190 }
1197 1191 }
1198 1192 if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1199 1193 connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1200 1194 }
1201 1195 if (coas.coa_changed & COA_WROFF_CHANGED) {
1202 1196 connp->conn_wroff = connp->conn_ht_iphc_allocated +
1203 1197 tcps->tcps_wroff_xtra;
1204 1198 (void) proto_set_tx_wroff(connp->conn_rq, connp,
1205 1199 connp->conn_wroff);
1206 1200 }
1207 1201 if (coas.coa_changed & COA_OOBINLINE_CHANGED) {
1208 1202 if (IPCL_IS_NONSTR(connp))
1209 1203 proto_set_rx_oob_opt(connp, onoff);
1210 1204 }
1211 1205 return (0);
1212 1206 }
|
↓ open down ↓ |
185 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX