Print this page
OS-5598 newproc() performs inadequate clean-up after failed lwp_create() [fix debug build]
OS-5613 SO_REUSEPORT needs better state-change coverage
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-5217 setsockopt(TCP_KEEPCNT) can return EINVAL spuriously
Reviewed by: Dave Pacheco <dap@joyent.com>
OS-4699 lxbrand netty complains about SO_LINGER (really IP_TOS)
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4018 lxbrand support TCP SO_REUSEPORT
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Cody Mello <cody.mello@joyent.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/tcp/tcp_opt_data.c
+++ new/usr/src/uts/common/inet/tcp/tcp_opt_data.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
24 + * Copyright 2016 Joyent, Inc.
24 25 */
25 26
26 27 #include <sys/types.h>
27 28 #include <sys/stream.h>
28 29 #define _SUN_TPI_VERSION 2
29 30 #include <sys/tihdr.h>
30 31 #include <sys/socket.h>
31 32 #include <sys/xti_xtiopt.h>
32 33 #include <sys/xti_inet.h>
33 34 #include <sys/policy.h>
34 35
35 36 #include <inet/common.h>
36 37 #include <netinet/ip6.h>
37 38 #include <inet/ip.h>
38 39
39 40 #include <netinet/in.h>
40 41 #include <netinet/tcp.h>
41 42 #include <inet/optcom.h>
42 43 #include <inet/proto_set.h>
43 44 #include <inet/tcp_impl.h>
44 45
45 46 static int tcp_opt_default(queue_t *, int, int, uchar_t *);
46 47
47 48 /*
48 49 * Table of all known options handled on a TCP protocol stack.
49 50 *
50 51 * Note: This table contains options processed by both TCP and IP levels
51 52 * and is the superset of options that can be performed on a TCP over IP
52 53 * stack.
53 54 */
54 55 opdes_t tcp_opt_arr[] = {
|
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
55 56
56 57 { SO_LINGER, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
57 58 sizeof (struct linger), 0 },
58 59
59 60 { SO_DEBUG, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
60 61 { SO_KEEPALIVE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
61 62 { SO_DONTROUTE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
62 63 { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
63 64 },
64 65 { SO_BROADCAST, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
65 -{ SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
66 +{ SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
67 +{ SO_REUSEPORT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
66 68 { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
67 69 { SO_TYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
68 70 { SO_SNDBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
69 71 { SO_RCVBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
70 72 { SO_SNDTIMEO, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
71 73 sizeof (struct timeval), 0 },
72 74 { SO_RCVTIMEO, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
73 75 sizeof (struct timeval), 0 },
74 76 { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
75 77 },
76 78 { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
77 79 { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
78 80 0 },
79 81 { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
80 82 0 },
81 83 { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
82 84 0 },
83 85 { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int),
84 86 0 },
85 87 { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
86 88
87 89 { SO_DOMAIN, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
88 90
89 91 { SO_PROTOTYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
90 92
91 93 { TCP_NODELAY, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
92 94 },
93 95 { TCP_MAXSEG, IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t),
94 96 536 },
95 97
96 98 { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
97 99 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
98 100
99 101 { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
100 102 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
101 103
102 104 { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
103 105 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
104 106
105 107 { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
106 108 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
107 109
108 110 { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
109 111 0 },
110 112
111 113 { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0,
112 114 sizeof (int), 0 },
113 115
114 116 { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
115 117 },
116 118
117 119 { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0,
118 120 sizeof (int), 0 },
119 121
120 122 { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
121 123 sizeof (int), 0 },
122 124
123 125 { TCP_KEEPIDLE, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
124 126
125 127 { TCP_KEEPCNT, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
126 128
127 129 { TCP_KEEPINTVL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
128 130
129 131 { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
130 132 sizeof (int), 0 },
131 133
132 134 { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
133 135
134 136 { TCP_RTO_INITIAL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
135 137
136 138 { TCP_RTO_MIN, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
137 139
138 140 { TCP_RTO_MAX, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
139 141
140 142 { TCP_LINGER2, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
141 143
142 144 { IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
143 145 (OP_VARLEN|OP_NODEFAULT),
144 146 IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
145 147 { T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
146 148 (OP_VARLEN|OP_NODEFAULT),
147 149 IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
148 150
149 151 { IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
150 152 { T_IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
151 153 { IP_TTL, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
152 154 sizeof (int), -1 /* not initialized */ },
153 155
154 156 { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
155 157 sizeof (ipsec_req_t), -1 /* not initialized */ },
156 158
157 159 { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0,
158 160 sizeof (int), 0 /* no ifindex */ },
159 161
160 162 { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0,
161 163 sizeof (int), 0 },
162 164
163 165 { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
164 166 sizeof (int), -1 /* not initialized */ },
165 167
166 168 { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
167 169 sizeof (int), 0 /* no ifindex */ },
168 170
169 171 { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
170 172
171 173 { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0,
172 174 sizeof (in_addr_t), -1 /* not initialized */ },
173 175
174 176 { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0,
175 177 sizeof (int), 0 },
176 178
177 179 { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
178 180 (OP_NODEFAULT|OP_VARLEN),
179 181 sizeof (struct in6_pktinfo), -1 /* not initialized */ },
180 182 { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
181 183 OP_NODEFAULT,
182 184 sizeof (sin6_t), -1 /* not initialized */ },
183 185 { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
184 186 (OP_VARLEN|OP_NODEFAULT), 255*8,
185 187 -1 /* not initialized */ },
186 188 { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
187 189 (OP_VARLEN|OP_NODEFAULT), 255*8,
188 190 -1 /* not initialized */ },
189 191 { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
190 192 (OP_VARLEN|OP_NODEFAULT), 255*8,
191 193 -1 /* not initialized */ },
192 194 { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
193 195 (OP_VARLEN|OP_NODEFAULT), 255*8,
194 196 -1 /* not initialized */ },
195 197 { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
196 198 OP_NODEFAULT,
197 199 sizeof (int), -1 /* not initialized */ },
198 200 { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
199 201 OP_NODEFAULT,
200 202 sizeof (struct ip6_mtuinfo), -1 /* not initialized */ },
201 203 { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
202 204 sizeof (int), 0 },
203 205 { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
204 206 sizeof (int), 0 },
205 207 { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
206 208 sizeof (int), 0 },
207 209
208 210 /* Enable receipt of ancillary data */
209 211 { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
210 212 sizeof (int), 0 },
211 213 { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
212 214 sizeof (int), 0 },
213 215 { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
214 216 sizeof (int), 0 },
215 217 { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
216 218 sizeof (int), 0 },
217 219 { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
218 220 sizeof (int), 0 },
219 221 { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
220 222 sizeof (int), 0 },
221 223 { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
222 224 sizeof (int), 0 },
223 225 { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
224 226 sizeof (int), 0 },
225 227
226 228 { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
227 229 sizeof (ipsec_req_t), -1 /* not initialized */ },
228 230 { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
229 231 sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT },
230 232 };
231 233
232 234 /*
233 235 * Table of all supported levels
234 236 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
235 237 * any supported options so we need this info separately.
236 238 *
237 239 * This is needed only for topmost tpi providers and is used only by
238 240 * XTI interfaces.
239 241 */
240 242 optlevel_t tcp_valid_levels_arr[] = {
241 243 XTI_GENERIC,
242 244 SOL_SOCKET,
243 245 IPPROTO_TCP,
244 246 IPPROTO_IP,
245 247 IPPROTO_IPV6
246 248 };
247 249
248 250
249 251 #define TCP_OPT_ARR_CNT A_CNT(tcp_opt_arr)
250 252 #define TCP_VALID_LEVELS_CNT A_CNT(tcp_valid_levels_arr)
251 253
252 254 uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */
253 255
254 256 /*
255 257 * Initialize option database object for TCP
256 258 *
257 259 * This object represents database of options to search passed to
258 260 * {sock,tpi}optcom_req() interface routine to take care of option
259 261 * management and associated methods.
260 262 */
261 263
262 264 optdb_obj_t tcp_opt_obj = {
263 265 tcp_opt_default, /* TCP default value function pointer */
264 266 tcp_tpi_opt_get, /* TCP get function pointer */
265 267 tcp_tpi_opt_set, /* TCP set function pointer */
266 268 TCP_OPT_ARR_CNT, /* TCP option database count of entries */
267 269 tcp_opt_arr, /* TCP option database */
268 270 TCP_VALID_LEVELS_CNT, /* TCP valid level count of entries */
269 271 tcp_valid_levels_arr /* TCP valid level array */
270 272 };
271 273
272 274 static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND;
273 275
274 276 /*
275 277 * Some TCP options can be "set" by requesting them in the option
276 278 * buffer. This is needed for XTI feature test though we do not
277 279 * allow it in general. We interpret that this mechanism is more
278 280 * applicable to OSI protocols and need not be allowed in general.
279 281 * This routine filters out options for which it is not allowed (most)
280 282 * and lets through those (few) for which it is. [ The XTI interface
281 283 * test suite specifics will imply that any XTI_GENERIC level XTI_* if
282 284 * ever implemented will have to be allowed here ].
283 285 */
284 286 static boolean_t
285 287 tcp_allow_connopt_set(int level, int name)
286 288 {
287 289
288 290 switch (level) {
289 291 case IPPROTO_TCP:
290 292 switch (name) {
291 293 case TCP_NODELAY:
292 294 return (B_TRUE);
293 295 default:
294 296 return (B_FALSE);
295 297 }
296 298 /*NOTREACHED*/
297 299 default:
298 300 return (B_FALSE);
299 301 }
300 302 /*NOTREACHED*/
301 303 }
302 304
303 305 /*
304 306 * This routine gets default values of certain options whose default
305 307 * values are maintained by protocol specific code
306 308 */
307 309 /* ARGSUSED */
308 310 static int
309 311 tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
310 312 {
311 313 int32_t *i1 = (int32_t *)ptr;
312 314 tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps;
313 315
314 316 switch (level) {
315 317 case IPPROTO_TCP:
316 318 switch (name) {
317 319 case TCP_NOTIFY_THRESHOLD:
318 320 *i1 = tcps->tcps_ip_notify_interval;
319 321 break;
320 322 case TCP_ABORT_THRESHOLD:
321 323 *i1 = tcps->tcps_ip_abort_interval;
322 324 break;
323 325 case TCP_CONN_NOTIFY_THRESHOLD:
324 326 *i1 = tcps->tcps_ip_notify_cinterval;
325 327 break;
326 328 case TCP_CONN_ABORT_THRESHOLD:
327 329 *i1 = tcps->tcps_ip_abort_cinterval;
328 330 break;
329 331 default:
330 332 return (-1);
331 333 }
332 334 break;
333 335 case IPPROTO_IP:
334 336 switch (name) {
335 337 case IP_TTL:
336 338 *i1 = tcps->tcps_ipv4_ttl;
337 339 break;
338 340 default:
339 341 return (-1);
340 342 }
341 343 break;
342 344 case IPPROTO_IPV6:
343 345 switch (name) {
344 346 case IPV6_UNICAST_HOPS:
345 347 *i1 = tcps->tcps_ipv6_hoplimit;
346 348 break;
347 349 default:
348 350 return (-1);
349 351 }
350 352 break;
351 353 default:
352 354 return (-1);
353 355 }
354 356 return (sizeof (int));
355 357 }
356 358
357 359 /*
358 360 * TCP routine to get the values of options.
359 361 */
360 362 int
361 363 tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
362 364 {
363 365 int *i1 = (int *)ptr;
364 366 tcp_t *tcp = connp->conn_tcp;
365 367 conn_opt_arg_t coas;
366 368 int retval;
367 369
368 370 coas.coa_connp = connp;
369 371 coas.coa_ixa = connp->conn_ixa;
370 372 coas.coa_ipp = &connp->conn_xmit_ipp;
371 373 coas.coa_ancillary = B_FALSE;
372 374 coas.coa_changed = 0;
373 375
374 376 switch (level) {
375 377 case SOL_SOCKET:
376 378 switch (name) {
377 379 case SO_SND_COPYAVOID:
378 380 *i1 = tcp->tcp_snd_zcopy_on ?
379 381 SO_SND_COPYAVOID : 0;
380 382 return (sizeof (int));
381 383 case SO_ACCEPTCONN:
382 384 *i1 = (tcp->tcp_state == TCPS_LISTEN);
383 385 return (sizeof (int));
384 386 }
385 387 break;
386 388 case IPPROTO_TCP:
387 389 switch (name) {
388 390 case TCP_NODELAY:
389 391 *i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0;
390 392 return (sizeof (int));
391 393 case TCP_MAXSEG:
392 394 *i1 = tcp->tcp_mss;
393 395 return (sizeof (int));
394 396 case TCP_NOTIFY_THRESHOLD:
395 397 *i1 = (int)tcp->tcp_first_timer_threshold;
396 398 return (sizeof (int));
397 399 case TCP_ABORT_THRESHOLD:
398 400 *i1 = tcp->tcp_second_timer_threshold;
399 401 return (sizeof (int));
400 402 case TCP_CONN_NOTIFY_THRESHOLD:
401 403 *i1 = tcp->tcp_first_ctimer_threshold;
402 404 return (sizeof (int));
403 405 case TCP_CONN_ABORT_THRESHOLD:
404 406 *i1 = tcp->tcp_second_ctimer_threshold;
405 407 return (sizeof (int));
406 408 case TCP_INIT_CWND:
407 409 *i1 = tcp->tcp_init_cwnd;
408 410 return (sizeof (int));
409 411 case TCP_KEEPALIVE_THRESHOLD:
410 412 *i1 = tcp->tcp_ka_interval;
411 413 return (sizeof (int));
412 414
413 415 /*
414 416 * TCP_KEEPIDLE expects value in seconds, but
415 417 * tcp_ka_interval is in milliseconds.
416 418 */
417 419 case TCP_KEEPIDLE:
418 420 *i1 = tcp->tcp_ka_interval / 1000;
419 421 return (sizeof (int));
420 422 case TCP_KEEPCNT:
421 423 *i1 = tcp->tcp_ka_cnt;
422 424 return (sizeof (int));
423 425
424 426 /*
425 427 * TCP_KEEPINTVL expects value in seconds, but
426 428 * tcp_ka_rinterval is in milliseconds.
427 429 */
428 430 case TCP_KEEPINTVL:
429 431 *i1 = tcp->tcp_ka_rinterval / 1000;
430 432 return (sizeof (int));
431 433 case TCP_KEEPALIVE_ABORT_THRESHOLD:
432 434 *i1 = tcp->tcp_ka_abort_thres;
433 435 return (sizeof (int));
434 436 case TCP_CORK:
435 437 *i1 = tcp->tcp_cork;
436 438 return (sizeof (int));
437 439 case TCP_RTO_INITIAL:
438 440 *i1 = tcp->tcp_rto_initial;
439 441 return (sizeof (uint32_t));
440 442 case TCP_RTO_MIN:
441 443 *i1 = tcp->tcp_rto_min;
442 444 return (sizeof (uint32_t));
443 445 case TCP_RTO_MAX:
444 446 *i1 = tcp->tcp_rto_max;
445 447 return (sizeof (uint32_t));
446 448 case TCP_LINGER2:
447 449 *i1 = tcp->tcp_fin_wait_2_flush_interval / SECONDS;
448 450 return (sizeof (int));
449 451 }
450 452 break;
451 453 case IPPROTO_IP:
452 454 if (connp->conn_family != AF_INET)
453 455 return (-1);
454 456 switch (name) {
455 457 case IP_OPTIONS:
456 458 case T_IP_OPTIONS:
457 459 /* Caller ensures enough space */
458 460 return (ip_opt_get_user(connp, ptr));
459 461 default:
460 462 break;
461 463 }
462 464 break;
463 465
464 466 case IPPROTO_IPV6:
465 467 /*
466 468 * IPPROTO_IPV6 options are only supported for sockets
467 469 * that are using IPv6 on the wire.
468 470 */
469 471 if (connp->conn_ipversion != IPV6_VERSION) {
470 472 return (-1);
471 473 }
472 474 switch (name) {
473 475 case IPV6_PATHMTU:
474 476 if (tcp->tcp_state < TCPS_ESTABLISHED)
475 477 return (-1);
476 478 break;
|
↓ open down ↓ |
401 lines elided |
↑ open up ↑ |
477 479 }
478 480 break;
479 481 }
480 482 mutex_enter(&connp->conn_lock);
481 483 retval = conn_opt_get(&coas, level, name, ptr);
482 484 mutex_exit(&connp->conn_lock);
483 485 return (retval);
484 486 }
485 487
486 488 /*
489 + * Set a TCP connection's participation in SO_REUSEPORT. This operation is
490 + * performed under the protection of the squeue via tcp_setsockopt.
491 + * The manipulation of tcp_rg_bind, as part of this operation, is subject to
492 + * these constraints:
493 + * 1. Prior to bind(), tcp_rg_bind can be set/cleared in tcp_set_reuseport
494 + * under the protection of the squeue.
495 + * 2. Once the connection has been bound, the tcp_rg_bind pointer must not be
496 + * altered until such time as tcp_free() cleans up the connection.
497 + * 3. A connection undergoing bind, which matches to a connection participating
498 + * in port-reuse, will switch its tcp_rg_bind pointer when it joins the
499 + * group of an existing connection in tcp_bindi().
500 + */
501 +static int
502 +tcp_set_reuseport(conn_t *connp, boolean_t do_enable)
503 +{
504 + tcp_t *tcp = connp->conn_tcp;
505 + struct tcp_rg_s *rg;
506 +
507 + if (!IPCL_IS_NONSTR(connp)) {
508 + if (do_enable) {
509 + /*
510 + * SO_REUSEPORT cannot be enabled on sockets which have
511 + * fallen back to the STREAMS API.
512 + */
513 + return (EINVAL);
514 + } else {
515 + /*
516 + * A connection with SO_REUSEPORT enabled should be
517 + * prevented from falling back to STREAMS mode via
518 + * logic in tcp_fallback. It is legal, however, for
519 + * fallen-back connections to affirm the disabled state
520 + * of SO_REUSEPORT.
521 + */
522 + ASSERT(connp->conn_reuseport == 0);
523 + return (0);
524 + }
525 + }
526 + if (tcp->tcp_state <= TCPS_CLOSED) {
527 + return (EINVAL);
528 + }
529 + if (connp->conn_reuseport == 0 && do_enable) {
530 + /* disabled -> enabled */
531 + if (tcp->tcp_rg_bind != NULL) {
532 + tcp_rg_setactive(tcp->tcp_rg_bind, do_enable);
533 + } else {
534 + /*
535 + * Connection state is not a concern when initially
536 + * populating tcp_rg_bind. Setting it to non-NULL on a
537 + * bound or listening connection would only mean that
538 + * new reused-port binds become a possibility.
539 + */
540 + if ((rg = tcp_rg_init(tcp)) == NULL) {
541 + return (ENOMEM);
542 + }
543 + tcp->tcp_rg_bind = rg;
544 + }
545 + connp->conn_reuseport = 1;
546 + } else if (connp->conn_reuseport != 0 && !do_enable) {
547 + /* enabled -> disabled */
548 + ASSERT(tcp->tcp_rg_bind != NULL);
549 + if (tcp->tcp_state == TCPS_IDLE) {
550 + /*
551 + * If the connection has not been bound yet, discard
552 + * the reuse group state. Since disabling SO_REUSEPORT
553 + * on a bound socket will _not_ prevent others from
554 + * reusing the port, the presence of tcp_rg_bind is
555 + * used to determine reuse availability, not
556 + * conn_reuseport.
557 + *
558 + * This allows proper behavior for examples such as:
559 + *
560 + * setsockopt(fd1, ... SO_REUSEPORT, &on_val...);
561 + * bind(fd1, &myaddr, ...);
562 + * setsockopt(fd1, ... SO_REUSEPORT, &off_val...);
563 + *
564 + * setsockopt(fd2, ... SO_REUSEPORT, &on_val...);
565 + * bind(fd2, &myaddr, ...); // <- SHOULD SUCCEED
566 + *
567 + */
568 + rg = tcp->tcp_rg_bind;
569 + tcp->tcp_rg_bind = NULL;
570 + VERIFY(tcp_rg_remove(rg, tcp));
571 + tcp_rg_destroy(rg);
572 + } else {
573 + /*
574 + * If a connection has been bound, it's no longer safe
575 + * to manipulate tcp_rg_bind until connection clean-up
576 + * during tcp_free. Just mark the member status of the
577 + * connection as inactive.
578 + */
579 + tcp_rg_setactive(tcp->tcp_rg_bind, do_enable);
580 + }
581 + connp->conn_reuseport = 0;
582 + }
583 + return (0);
584 +}
585 +
586 +/*
487 587 * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
488 588 * Parameters are assumed to be verified by the caller.
489 589 */
490 590 /* ARGSUSED */
491 591 int
492 592 tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
493 593 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
494 594 void *thisdg_attrs, cred_t *cr)
495 595 {
496 596 tcp_t *tcp = connp->conn_tcp;
497 597 int *i1 = (int *)invalp;
498 598 boolean_t onoff = (*i1 == 0) ? 0 : 1;
499 599 boolean_t checkonly;
500 600 int reterr;
501 601 tcp_stack_t *tcps = tcp->tcp_tcps;
502 602 conn_opt_arg_t coas;
503 603 uint32_t val = *((uint32_t *)invalp);
504 604
505 605 coas.coa_connp = connp;
506 606 coas.coa_ixa = connp->conn_ixa;
507 607 coas.coa_ipp = &connp->conn_xmit_ipp;
508 608 coas.coa_ancillary = B_FALSE;
509 609 coas.coa_changed = 0;
510 610
511 611 switch (optset_context) {
512 612 case SETFN_OPTCOM_CHECKONLY:
513 613 checkonly = B_TRUE;
514 614 /*
515 615 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
516 616 * inlen != 0 implies value supplied and
517 617 * we have to "pretend" to set it.
518 618 * inlen == 0 implies that there is no
519 619 * value part in T_CHECK request and just validation
520 620 * done elsewhere should be enough, we just return here.
521 621 */
522 622 if (inlen == 0) {
523 623 *outlenp = 0;
524 624 return (0);
525 625 }
526 626 break;
527 627 case SETFN_OPTCOM_NEGOTIATE:
528 628 checkonly = B_FALSE;
529 629 break;
530 630 case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */
531 631 case SETFN_CONN_NEGOTIATE:
532 632 checkonly = B_FALSE;
533 633 /*
534 634 * Negotiating local and "association-related" options
535 635 * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ)
536 636 * primitives is allowed by XTI, but we choose
537 637 * to not implement this style negotiation for Internet
538 638 * protocols (We interpret it is a must for OSI world but
539 639 * optional for Internet protocols) for all options.
540 640 * [ Will do only for the few options that enable test
541 641 * suites that our XTI implementation of this feature
542 642 * works for transports that do allow it ]
543 643 */
544 644 if (!tcp_allow_connopt_set(level, name)) {
545 645 *outlenp = 0;
546 646 return (EINVAL);
547 647 }
548 648 break;
549 649 default:
550 650 /*
551 651 * We should never get here
552 652 */
553 653 *outlenp = 0;
554 654 return (EINVAL);
555 655 }
556 656
557 657 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
558 658 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
559 659
560 660 /*
561 661 * For TCP, we should have no ancillary data sent down
562 662 * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs
563 663 * has to be zero.
564 664 */
565 665 ASSERT(thisdg_attrs == NULL);
566 666
567 667 /*
568 668 * For fixed length options, no sanity check
569 669 * of passed in length is done. It is assumed *_optcom_req()
570 670 * routines do the right thing.
571 671 */
572 672 switch (level) {
573 673 case SOL_SOCKET:
574 674 switch (name) {
575 675 case SO_KEEPALIVE:
576 676 if (checkonly) {
577 677 /* check only case */
578 678 break;
579 679 }
580 680
581 681 if (!onoff) {
582 682 if (connp->conn_keepalive) {
583 683 if (tcp->tcp_ka_tid != 0) {
584 684 (void) TCP_TIMER_CANCEL(tcp,
585 685 tcp->tcp_ka_tid);
586 686 tcp->tcp_ka_tid = 0;
587 687 }
588 688 connp->conn_keepalive = 0;
589 689 }
590 690 break;
591 691 }
592 692 if (!connp->conn_keepalive) {
593 693 /* Crank up the keepalive timer */
594 694 tcp->tcp_ka_last_intrvl = 0;
595 695 tcp->tcp_ka_tid = TCP_TIMER(tcp,
596 696 tcp_keepalive_timer, tcp->tcp_ka_interval);
597 697 connp->conn_keepalive = 1;
598 698 }
599 699 break;
600 700 case SO_SNDBUF: {
601 701 if (*i1 > tcps->tcps_max_buf) {
602 702 *outlenp = 0;
603 703 return (ENOBUFS);
604 704 }
605 705 if (checkonly)
606 706 break;
607 707
608 708 connp->conn_sndbuf = *i1;
609 709 if (tcps->tcps_snd_lowat_fraction != 0) {
610 710 connp->conn_sndlowat = connp->conn_sndbuf /
611 711 tcps->tcps_snd_lowat_fraction;
612 712 }
613 713 (void) tcp_maxpsz_set(tcp, B_TRUE);
614 714 /*
615 715 * If we are flow-controlled, recheck the condition.
616 716 * There are apps that increase SO_SNDBUF size when
617 717 * flow-controlled (EWOULDBLOCK), and expect the flow
618 718 * control condition to be lifted right away.
619 719 */
620 720 mutex_enter(&tcp->tcp_non_sq_lock);
621 721 if (tcp->tcp_flow_stopped &&
622 722 TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) {
623 723 tcp_clrqfull(tcp);
624 724 }
625 725 mutex_exit(&tcp->tcp_non_sq_lock);
626 726 *outlenp = inlen;
627 727 return (0);
628 728 }
629 729 case SO_RCVBUF:
630 730 if (*i1 > tcps->tcps_max_buf) {
631 731 *outlenp = 0;
632 732 return (ENOBUFS);
633 733 }
634 734 /* Silently ignore zero */
635 735 if (!checkonly && *i1 != 0) {
636 736 *i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss);
637 737 (void) tcp_rwnd_set(tcp, *i1);
638 738 }
639 739 /*
640 740 * XXX should we return the rwnd here
641 741 * and tcp_opt_get ?
642 742 */
643 743 *outlenp = inlen;
644 744 return (0);
645 745 case SO_SND_COPYAVOID:
|
↓ open down ↓ |
149 lines elided |
↑ open up ↑ |
646 746 if (!checkonly) {
647 747 if (tcp->tcp_loopback ||
648 748 (onoff != 1) || !tcp_zcopy_check(tcp)) {
649 749 *outlenp = 0;
650 750 return (EOPNOTSUPP);
651 751 }
652 752 tcp->tcp_snd_zcopy_aware = 1;
653 753 }
654 754 *outlenp = inlen;
655 755 return (0);
756 + case SO_REUSEPORT:
757 + if (!checkonly) {
758 + return (tcp_set_reuseport(connp, *i1 != 0));
759 + }
760 + return (0);
656 761 }
657 762 break;
658 763 case IPPROTO_TCP:
659 764 switch (name) {
660 765 case TCP_NODELAY:
661 766 if (!checkonly)
662 767 tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss;
663 768 break;
664 769 case TCP_NOTIFY_THRESHOLD:
665 770 if (!checkonly)
666 771 tcp->tcp_first_timer_threshold = *i1;
667 772 break;
668 773 case TCP_ABORT_THRESHOLD:
669 774 if (!checkonly)
670 775 tcp->tcp_second_timer_threshold = *i1;
671 776 break;
672 777 case TCP_CONN_NOTIFY_THRESHOLD:
673 778 if (!checkonly)
674 779 tcp->tcp_first_ctimer_threshold = *i1;
675 780 break;
676 781 case TCP_CONN_ABORT_THRESHOLD:
677 782 if (!checkonly)
678 783 tcp->tcp_second_ctimer_threshold = *i1;
679 784 break;
680 785 case TCP_RECVDSTADDR:
681 786 if (tcp->tcp_state > TCPS_LISTEN) {
682 787 *outlenp = 0;
683 788 return (EOPNOTSUPP);
684 789 }
685 790 /* Setting done in conn_opt_set */
686 791 break;
687 792 case TCP_INIT_CWND:
688 793 if (checkonly)
689 794 break;
690 795
691 796 /*
692 797 * Only allow socket with network configuration
693 798 * privilege to set the initial cwnd to be larger
694 799 * than allowed by RFC 3390.
695 800 */
696 801 if (val > MIN(4, MAX(2, 4380 / tcp->tcp_mss))) {
697 802 if ((reterr = secpolicy_ip_config(cr, B_TRUE))
698 803 != 0) {
699 804 *outlenp = 0;
700 805 return (reterr);
701 806 }
702 807 if (val > tcp_max_init_cwnd) {
703 808 *outlenp = 0;
704 809 return (EINVAL);
705 810 }
706 811 }
707 812
708 813 tcp->tcp_init_cwnd = val;
709 814
710 815 /*
711 816 * If the socket is connected, AND no outbound data
712 817 * has been sent, reset the actual cwnd values.
713 818 */
714 819 if (tcp->tcp_state == TCPS_ESTABLISHED &&
715 820 tcp->tcp_iss == tcp->tcp_snxt - 1) {
716 821 tcp->tcp_cwnd =
717 822 MIN(tcp->tcp_rwnd, val * tcp->tcp_mss);
718 823 }
719 824 break;
720 825
721 826 /*
722 827 * TCP_KEEPIDLE is in seconds but TCP_KEEPALIVE_THRESHOLD
723 828 * is in milliseconds. TCP_KEEPIDLE is introduced for
724 829 * compatibility with other Unix flavors.
725 830 * We can fall through TCP_KEEPALIVE_THRESHOLD logic after
726 831 * converting the input to milliseconds.
727 832 */
728 833 case TCP_KEEPIDLE:
729 834 *i1 *= 1000;
730 835 /* FALLTHRU */
731 836
732 837 case TCP_KEEPALIVE_THRESHOLD:
733 838 if (checkonly)
734 839 break;
735 840
736 841 if (*i1 < tcps->tcps_keepalive_interval_low ||
737 842 *i1 > tcps->tcps_keepalive_interval_high) {
738 843 *outlenp = 0;
739 844 return (EINVAL);
740 845 }
741 846 if (*i1 != tcp->tcp_ka_interval) {
742 847 tcp->tcp_ka_interval = *i1;
743 848 /*
744 849 * Check if we need to restart the
745 850 * keepalive timer.
746 851 */
747 852 if (tcp->tcp_ka_tid != 0) {
748 853 ASSERT(connp->conn_keepalive);
749 854 (void) TCP_TIMER_CANCEL(tcp,
750 855 tcp->tcp_ka_tid);
751 856 tcp->tcp_ka_last_intrvl = 0;
752 857 tcp->tcp_ka_tid = TCP_TIMER(tcp,
753 858 tcp_keepalive_timer,
754 859 tcp->tcp_ka_interval);
755 860 }
756 861 }
757 862 break;
758 863
759 864 /*
760 865 * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt.
761 866 * So setting TCP_KEEPCNT or TCP_KEEPINTVL can affect all the
|
↓ open down ↓ |
96 lines elided |
↑ open up ↑ |
762 867 * three members - tcp_ka_abort_thres, tcp_ka_rinterval and
763 868 * tcp_ka_cnt.
764 869 */
765 870 case TCP_KEEPCNT:
766 871 if (checkonly)
767 872 break;
768 873
769 874 if (*i1 == 0) {
770 875 return (EINVAL);
771 876 } else if (tcp->tcp_ka_rinterval == 0) {
772 - if ((tcp->tcp_ka_abort_thres / *i1) <
773 - tcp->tcp_rto_min ||
774 - (tcp->tcp_ka_abort_thres / *i1) >
775 - tcp->tcp_rto_max)
776 - return (EINVAL);
877 + /*
878 + * When TCP_KEEPCNT is specified without first
879 + * specifying a TCP_KEEPINTVL, we infer an
880 + * interval based on a tunable specific to our
881 + * stack: the tcp_keepalive_abort_interval.
882 + * (Or the TCP_KEEPALIVE_ABORT_THRESHOLD, in
883 + * the unlikely event that that has been set.)
884 + * Given the abort interval's default value of
885 + * 480 seconds, low TCP_KEEPCNT values can
886 + * result in intervals that exceed the default
887 + * maximum RTO of 60 seconds. Rather than
888 + * fail in these cases, we (implicitly) clamp
889 + * the interval at the maximum RTO; if the
890 + * TCP_KEEPCNT is shortly followed by a
891 + * TCP_KEEPINTVL (as we expect), the abort
892 + * threshold will be recalculated correctly --
893 + * and if a TCP_KEEPINTVL is not forthcoming,
894 + * keep-alive will at least operate reasonably
895 + * given the underconfigured state.
896 + */
897 + uint32_t interval;
777 898
778 - tcp->tcp_ka_rinterval =
779 - tcp->tcp_ka_abort_thres / *i1;
899 + interval = tcp->tcp_ka_abort_thres / *i1;
900 +
901 + if (interval < tcp->tcp_rto_min)
902 + interval = tcp->tcp_rto_min;
903 +
904 + if (interval > tcp->tcp_rto_max)
905 + interval = tcp->tcp_rto_max;
906 +
907 + tcp->tcp_ka_rinterval = interval;
780 908 } else {
781 909 if ((*i1 * tcp->tcp_ka_rinterval) <
782 910 tcps->tcps_keepalive_abort_interval_low ||
783 911 (*i1 * tcp->tcp_ka_rinterval) >
784 912 tcps->tcps_keepalive_abort_interval_high)
785 913 return (EINVAL);
786 914 tcp->tcp_ka_abort_thres =
787 915 (*i1 * tcp->tcp_ka_rinterval);
788 916 }
789 917 tcp->tcp_ka_cnt = *i1;
790 918 break;
791 919 case TCP_KEEPINTVL:
792 920 /*
793 921 * TCP_KEEPINTVL is specified in seconds, but
794 922 * tcp_ka_rinterval is in milliseconds.
795 923 */
796 924
797 925 if (checkonly)
798 926 break;
799 927
800 928 if ((*i1 * 1000) < tcp->tcp_rto_min ||
801 929 (*i1 * 1000) > tcp->tcp_rto_max)
802 930 return (EINVAL);
803 931
804 932 if (tcp->tcp_ka_cnt == 0) {
805 933 tcp->tcp_ka_cnt =
806 934 tcp->tcp_ka_abort_thres / (*i1 * 1000);
807 935 } else {
808 936 if ((*i1 * tcp->tcp_ka_cnt * 1000) <
809 937 tcps->tcps_keepalive_abort_interval_low ||
810 938 (*i1 * tcp->tcp_ka_cnt * 1000) >
811 939 tcps->tcps_keepalive_abort_interval_high)
812 940 return (EINVAL);
813 941 tcp->tcp_ka_abort_thres =
814 942 (*i1 * tcp->tcp_ka_cnt * 1000);
815 943 }
816 944 tcp->tcp_ka_rinterval = *i1 * 1000;
817 945 break;
818 946 case TCP_KEEPALIVE_ABORT_THRESHOLD:
819 947 if (!checkonly) {
820 948 if (*i1 <
821 949 tcps->tcps_keepalive_abort_interval_low ||
822 950 *i1 >
823 951 tcps->tcps_keepalive_abort_interval_high) {
824 952 *outlenp = 0;
825 953 return (EINVAL);
826 954 }
827 955 tcp->tcp_ka_abort_thres = *i1;
828 956 tcp->tcp_ka_cnt = 0;
829 957 tcp->tcp_ka_rinterval = 0;
830 958 }
831 959 break;
832 960 case TCP_CORK:
833 961 if (!checkonly) {
834 962 /*
835 963 * if tcp->tcp_cork was set and is now
836 964 * being unset, we have to make sure that
837 965 * the remaining data gets sent out. Also
838 966 * unset tcp->tcp_cork so that tcp_wput_data()
839 967 * can send data even if it is less than mss
840 968 */
841 969 if (tcp->tcp_cork && onoff == 0 &&
842 970 tcp->tcp_unsent > 0) {
843 971 tcp->tcp_cork = B_FALSE;
844 972 tcp_wput_data(tcp, NULL, B_FALSE);
845 973 }
846 974 tcp->tcp_cork = onoff;
847 975 }
848 976 break;
849 977 case TCP_RTO_INITIAL: {
850 978 clock_t rto;
851 979
852 980 if (checkonly || val == 0)
853 981 break;
854 982
855 983 /*
856 984 * Sanity checks
857 985 *
858 986 * The initial RTO should be bounded by the minimum
859 987 * and maximum RTO. And it should also be smaller
860 988 * than the connect attempt abort timeout. Otherwise,
861 989 * the connection won't be aborted in a period
862 990 * reasonably close to that timeout.
863 991 */
864 992 if (val < tcp->tcp_rto_min || val > tcp->tcp_rto_max ||
865 993 val > tcp->tcp_second_ctimer_threshold ||
866 994 val < tcps->tcps_rexmit_interval_initial_low ||
867 995 val > tcps->tcps_rexmit_interval_initial_high) {
868 996 *outlenp = 0;
869 997 return (EINVAL);
870 998 }
871 999 tcp->tcp_rto_initial = val;
872 1000
873 1001 /*
874 1002 * If TCP has not sent anything, need to re-calculate
875 1003 * tcp_rto. Otherwise, this option change does not
876 1004 * really affect anything.
877 1005 */
878 1006 if (tcp->tcp_state >= TCPS_SYN_SENT)
879 1007 break;
880 1008
881 1009 tcp->tcp_rtt_sa = tcp->tcp_rto_initial << 2;
882 1010 tcp->tcp_rtt_sd = tcp->tcp_rto_initial >> 1;
883 1011 rto = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd +
884 1012 tcps->tcps_rexmit_interval_extra +
885 1013 (tcp->tcp_rtt_sa >> 5) +
886 1014 tcps->tcps_conn_grace_period;
887 1015 TCP_SET_RTO(tcp, rto);
888 1016 break;
889 1017 }
890 1018 case TCP_RTO_MIN:
891 1019 if (checkonly || val == 0)
892 1020 break;
893 1021
894 1022 if (val < tcps->tcps_rexmit_interval_min_low ||
895 1023 val > tcps->tcps_rexmit_interval_min_high ||
896 1024 val > tcp->tcp_rto_max) {
897 1025 *outlenp = 0;
898 1026 return (EINVAL);
899 1027 }
900 1028 tcp->tcp_rto_min = val;
901 1029 if (tcp->tcp_rto < val)
902 1030 tcp->tcp_rto = val;
903 1031 break;
904 1032 case TCP_RTO_MAX:
905 1033 if (checkonly || val == 0)
906 1034 break;
907 1035
908 1036 /*
909 1037 * Sanity checks
910 1038 *
911 1039 * The maximum RTO should not be larger than the
912 1040 * connection abort timeout. Otherwise, the
913 1041 * connection won't be aborted in a period reasonably
914 1042 * close to that timeout.
915 1043 */
916 1044 if (val < tcps->tcps_rexmit_interval_max_low ||
917 1045 val > tcps->tcps_rexmit_interval_max_high ||
918 1046 val < tcp->tcp_rto_min ||
919 1047 val > tcp->tcp_second_timer_threshold) {
920 1048 *outlenp = 0;
921 1049 return (EINVAL);
922 1050 }
923 1051 tcp->tcp_rto_max = val;
924 1052 if (tcp->tcp_rto > val)
925 1053 tcp->tcp_rto = val;
926 1054 break;
927 1055 case TCP_LINGER2:
928 1056 if (checkonly || *i1 == 0)
929 1057 break;
930 1058
931 1059 /*
932 1060 * Note that the option value's unit is second. And
933 1061 * the value should be bigger than the private
934 1062 * parameter tcp_fin_wait_2_flush_interval's lower
935 1063 * bound and smaller than the current value of that
936 1064 * parameter. It should be smaller than the current
937 1065 * value to avoid an app setting TCP_LINGER2 to a big
938 1066 * value, causing resource to be held up too long in
939 1067 * FIN-WAIT-2 state.
940 1068 */
941 1069 if (*i1 < 0 ||
942 1070 tcps->tcps_fin_wait_2_flush_interval_low/SECONDS >
943 1071 *i1 ||
944 1072 tcps->tcps_fin_wait_2_flush_interval/SECONDS <
945 1073 *i1) {
|
↓ open down ↓ |
156 lines elided |
↑ open up ↑ |
946 1074 *outlenp = 0;
947 1075 return (EINVAL);
948 1076 }
949 1077 tcp->tcp_fin_wait_2_flush_interval = *i1 * SECONDS;
950 1078 break;
951 1079 default:
952 1080 break;
953 1081 }
954 1082 break;
955 1083 case IPPROTO_IP:
956 - if (connp->conn_family != AF_INET) {
957 - *outlenp = 0;
958 - return (EINVAL);
959 - }
960 1084 switch (name) {
961 1085 case IP_SEC_OPT:
962 1086 /*
963 1087 * We should not allow policy setting after
964 1088 * we start listening for connections.
965 1089 */
966 1090 if (tcp->tcp_state == TCPS_LISTEN) {
967 1091 return (EINVAL);
968 1092 }
969 1093 break;
970 1094 }
971 1095 break;
972 1096 case IPPROTO_IPV6:
973 1097 /*
974 1098 * IPPROTO_IPV6 options are only supported for sockets
975 1099 * that are using IPv6 on the wire.
976 1100 */
977 1101 if (connp->conn_ipversion != IPV6_VERSION) {
978 1102 *outlenp = 0;
979 1103 return (EINVAL);
980 1104 }
981 1105
982 1106 switch (name) {
983 1107 case IPV6_RECVPKTINFO:
984 1108 if (!checkonly) {
985 1109 /* Force it to be sent up with the next msg */
986 1110 tcp->tcp_recvifindex = 0;
987 1111 }
988 1112 break;
989 1113 case IPV6_RECVTCLASS:
990 1114 if (!checkonly) {
991 1115 /* Force it to be sent up with the next msg */
992 1116 tcp->tcp_recvtclass = 0xffffffffU;
993 1117 }
994 1118 break;
995 1119 case IPV6_RECVHOPLIMIT:
996 1120 if (!checkonly) {
997 1121 /* Force it to be sent up with the next msg */
998 1122 tcp->tcp_recvhops = 0xffffffffU;
999 1123 }
1000 1124 break;
1001 1125 case IPV6_PKTINFO:
1002 1126 /* This is an extra check for TCP */
1003 1127 if (inlen == sizeof (struct in6_pktinfo)) {
1004 1128 struct in6_pktinfo *pkti;
1005 1129
1006 1130 pkti = (struct in6_pktinfo *)invalp;
1007 1131 /*
1008 1132 * RFC 3542 states that ipi6_addr must be
1009 1133 * the unspecified address when setting the
1010 1134 * IPV6_PKTINFO sticky socket option on a
1011 1135 * TCP socket.
1012 1136 */
1013 1137 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
1014 1138 return (EINVAL);
1015 1139 }
1016 1140 break;
1017 1141 case IPV6_SEC_OPT:
1018 1142 /*
1019 1143 * We should not allow policy setting after
1020 1144 * we start listening for connections.
1021 1145 */
1022 1146 if (tcp->tcp_state == TCPS_LISTEN) {
1023 1147 return (EINVAL);
1024 1148 }
1025 1149 break;
1026 1150 }
1027 1151 break;
1028 1152 }
1029 1153 reterr = conn_opt_set(&coas, level, name, inlen, invalp,
1030 1154 checkonly, cr);
1031 1155 if (reterr != 0) {
1032 1156 *outlenp = 0;
1033 1157 return (reterr);
1034 1158 }
1035 1159
1036 1160 /*
1037 1161 * Common case of OK return with outval same as inval
1038 1162 */
1039 1163 if (invalp != outvalp) {
1040 1164 /* don't trust bcopy for identical src/dst */
1041 1165 (void) bcopy(invalp, outvalp, inlen);
1042 1166 }
1043 1167 *outlenp = inlen;
1044 1168
1045 1169 if (coas.coa_changed & COA_HEADER_CHANGED) {
1046 1170 /* If we are connected we rebuilt the headers */
1047 1171 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1048 1172 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1049 1173 reterr = tcp_build_hdrs(tcp);
1050 1174 if (reterr != 0)
1051 1175 return (reterr);
1052 1176 }
1053 1177 }
1054 1178 if (coas.coa_changed & COA_ROUTE_CHANGED) {
1055 1179 in6_addr_t nexthop;
1056 1180
1057 1181 /*
1058 1182 * If we are connected we re-cache the information.
1059 1183 * We ignore errors to preserve BSD behavior.
1060 1184 * Note that we don't redo IPsec policy lookup here
1061 1185 * since the final destination (or source) didn't change.
1062 1186 */
1063 1187 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
1064 1188 &connp->conn_faddr_v6, &nexthop);
1065 1189
1066 1190 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1067 1191 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1068 1192 (void) ip_attr_connect(connp, connp->conn_ixa,
1069 1193 &connp->conn_laddr_v6, &connp->conn_faddr_v6,
1070 1194 &nexthop, connp->conn_fport, NULL, NULL,
1071 1195 IPDF_VERIFY_DST);
1072 1196 }
1073 1197 }
1074 1198 if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1075 1199 connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1076 1200 }
1077 1201 if (coas.coa_changed & COA_WROFF_CHANGED) {
1078 1202 connp->conn_wroff = connp->conn_ht_iphc_allocated +
1079 1203 tcps->tcps_wroff_xtra;
1080 1204 (void) proto_set_tx_wroff(connp->conn_rq, connp,
1081 1205 connp->conn_wroff);
1082 1206 }
1083 1207 if (coas.coa_changed & COA_OOBINLINE_CHANGED) {
1084 1208 if (IPCL_IS_NONSTR(connp))
1085 1209 proto_set_rx_oob_opt(connp, onoff);
1086 1210 }
1087 1211 return (0);
1088 1212 }
|
↓ open down ↓ |
119 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX