Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/tcp/tcp_opt_data.c
+++ new/usr/src/uts/common/inet/tcp/tcp_opt_data.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
24 24 * Copyright 2016 Joyent, Inc.
25 25 */
26 26
27 27 #include <sys/types.h>
28 28 #include <sys/stream.h>
29 29 #define _SUN_TPI_VERSION 2
30 30 #include <sys/tihdr.h>
31 31 #include <sys/socket.h>
32 32 #include <sys/xti_xtiopt.h>
33 33 #include <sys/xti_inet.h>
34 34 #include <sys/policy.h>
35 35
36 36 #include <inet/common.h>
37 37 #include <netinet/ip6.h>
38 38 #include <inet/ip.h>
39 39
40 40 #include <netinet/in.h>
41 41 #include <netinet/tcp.h>
42 42 #include <inet/optcom.h>
43 43 #include <inet/proto_set.h>
44 44 #include <inet/tcp_impl.h>
45 45
46 46 static int tcp_opt_default(queue_t *, int, int, uchar_t *);
47 47
48 48 /*
49 49 * Table of all known options handled on a TCP protocol stack.
50 50 *
51 51 * Note: This table contains options processed by both TCP and IP levels
52 52 * and is the superset of options that can be performed on a TCP over IP
53 53 * stack.
54 54 */
55 55 opdes_t tcp_opt_arr[] = {
56 56
57 57 { SO_LINGER, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
58 58 sizeof (struct linger), 0 },
59 59
60 60 { SO_DEBUG, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
61 61 { SO_KEEPALIVE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
62 62 { SO_DONTROUTE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
63 63 { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
64 64 },
65 65 { SO_BROADCAST, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
66 66 { SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
67 67 { SO_REUSEPORT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
68 68 { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
69 69 { SO_TYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
70 70 { SO_SNDBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
71 71 { SO_RCVBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
72 72 { SO_SNDTIMEO, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
73 73 sizeof (struct timeval), 0 },
74 74 { SO_RCVTIMEO, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
75 75 sizeof (struct timeval), 0 },
76 76 { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
77 77 },
78 78 { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
79 79 { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
80 80 0 },
81 81 { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
82 82 0 },
83 83 { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
84 84 0 },
85 85 { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int),
86 86 0 },
87 87 { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
88 88
89 89 { SO_DOMAIN, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
90 90
91 91 { SO_PROTOTYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
92 92
93 93 { TCP_NODELAY, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
94 94 },
95 95 { TCP_MAXSEG, IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t),
96 96 536 },
97 97
98 98 { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
99 99 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
100 100
101 101 { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
102 102 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
103 103
104 104 { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
105 105 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
106 106
107 107 { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
108 108 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
109 109
110 110 { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
111 111 0 },
112 112
113 113 { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0,
114 114 sizeof (int), 0 },
115 115
116 116 { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
117 117 },
118 118
119 119 { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0,
120 120 sizeof (int), 0 },
121 121
122 122 { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
123 123 sizeof (int), 0 },
124 124
125 125 { TCP_KEEPIDLE, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
126 126
127 127 { TCP_KEEPCNT, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
128 128
129 129 { TCP_KEEPINTVL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
130 130
131 131 { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
132 132 sizeof (int), 0 },
133 133
134 134 { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
135 135
136 136 { TCP_RTO_INITIAL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
137 137
138 138 { TCP_RTO_MIN, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
139 139
140 140 { TCP_RTO_MAX, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
141 141
142 142 { TCP_LINGER2, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
143 143
144 144 { IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
145 145 (OP_VARLEN|OP_NODEFAULT),
146 146 IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
147 147 { T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
148 148 (OP_VARLEN|OP_NODEFAULT),
149 149 IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
150 150
151 151 { IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
152 152 { T_IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
153 153 { IP_TTL, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
154 154 sizeof (int), -1 /* not initialized */ },
155 155
156 156 { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
157 157 sizeof (ipsec_req_t), -1 /* not initialized */ },
158 158
159 159 { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0,
160 160 sizeof (int), 0 /* no ifindex */ },
161 161
162 162 { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0,
163 163 sizeof (int), 0 },
164 164
165 165 { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
166 166 sizeof (int), -1 /* not initialized */ },
167 167
168 168 { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
169 169 sizeof (int), 0 /* no ifindex */ },
170 170
171 171 { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
172 172
173 173 { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0,
174 174 sizeof (in_addr_t), -1 /* not initialized */ },
175 175
176 176 { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0,
177 177 sizeof (int), 0 },
178 178
179 179 { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
180 180 (OP_NODEFAULT|OP_VARLEN),
181 181 sizeof (struct in6_pktinfo), -1 /* not initialized */ },
182 182 { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
183 183 OP_NODEFAULT,
184 184 sizeof (sin6_t), -1 /* not initialized */ },
185 185 { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
186 186 (OP_VARLEN|OP_NODEFAULT), 255*8,
187 187 -1 /* not initialized */ },
188 188 { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
189 189 (OP_VARLEN|OP_NODEFAULT), 255*8,
190 190 -1 /* not initialized */ },
191 191 { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
192 192 (OP_VARLEN|OP_NODEFAULT), 255*8,
193 193 -1 /* not initialized */ },
194 194 { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
195 195 (OP_VARLEN|OP_NODEFAULT), 255*8,
196 196 -1 /* not initialized */ },
197 197 { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
198 198 OP_NODEFAULT,
199 199 sizeof (int), -1 /* not initialized */ },
200 200 { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
201 201 OP_NODEFAULT,
202 202 sizeof (struct ip6_mtuinfo), -1 /* not initialized */ },
203 203 { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
204 204 sizeof (int), 0 },
205 205 { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
206 206 sizeof (int), 0 },
207 207 { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
208 208 sizeof (int), 0 },
209 209
210 210 /* Enable receipt of ancillary data */
211 211 { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
212 212 sizeof (int), 0 },
213 213 { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
214 214 sizeof (int), 0 },
215 215 { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
216 216 sizeof (int), 0 },
217 217 { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
218 218 sizeof (int), 0 },
219 219 { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
220 220 sizeof (int), 0 },
221 221 { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
222 222 sizeof (int), 0 },
223 223 { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
224 224 sizeof (int), 0 },
225 225 { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
226 226 sizeof (int), 0 },
227 227
228 228 { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
229 229 sizeof (ipsec_req_t), -1 /* not initialized */ },
230 230 { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
231 231 sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT },
232 232 };
233 233
234 234 /*
235 235 * Table of all supported levels
236 236 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
237 237 * any supported options so we need this info separately.
238 238 *
239 239 * This is needed only for topmost tpi providers and is used only by
240 240 * XTI interfaces.
241 241 */
242 242 optlevel_t tcp_valid_levels_arr[] = {
243 243 XTI_GENERIC,
244 244 SOL_SOCKET,
245 245 IPPROTO_TCP,
246 246 IPPROTO_IP,
247 247 IPPROTO_IPV6
248 248 };
249 249
250 250
251 251 #define TCP_OPT_ARR_CNT A_CNT(tcp_opt_arr)
252 252 #define TCP_VALID_LEVELS_CNT A_CNT(tcp_valid_levels_arr)
253 253
254 254 uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */
255 255
256 256 /*
257 257 * Initialize option database object for TCP
258 258 *
259 259 * This object represents database of options to search passed to
260 260 * {sock,tpi}optcom_req() interface routine to take care of option
261 261 * management and associated methods.
262 262 */
263 263
264 264 optdb_obj_t tcp_opt_obj = {
265 265 tcp_opt_default, /* TCP default value function pointer */
266 266 tcp_tpi_opt_get, /* TCP get function pointer */
267 267 tcp_tpi_opt_set, /* TCP set function pointer */
268 268 TCP_OPT_ARR_CNT, /* TCP option database count of entries */
269 269 tcp_opt_arr, /* TCP option database */
270 270 TCP_VALID_LEVELS_CNT, /* TCP valid level count of entries */
271 271 tcp_valid_levels_arr /* TCP valid level array */
272 272 };
273 273
274 274 static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND;
275 275
276 276 /*
277 277 * Some TCP options can be "set" by requesting them in the option
278 278 * buffer. This is needed for XTI feature test though we do not
279 279 * allow it in general. We interpret that this mechanism is more
280 280 * applicable to OSI protocols and need not be allowed in general.
281 281 * This routine filters out options for which it is not allowed (most)
282 282 * and lets through those (few) for which it is. [ The XTI interface
283 283 * test suite specifics will imply that any XTI_GENERIC level XTI_* if
284 284 * ever implemented will have to be allowed here ].
285 285 */
286 286 static boolean_t
287 287 tcp_allow_connopt_set(int level, int name)
288 288 {
289 289
290 290 switch (level) {
291 291 case IPPROTO_TCP:
292 292 switch (name) {
293 293 case TCP_NODELAY:
294 294 return (B_TRUE);
295 295 default:
296 296 return (B_FALSE);
297 297 }
298 298 /*NOTREACHED*/
299 299 default:
300 300 return (B_FALSE);
301 301 }
302 302 /*NOTREACHED*/
303 303 }
304 304
305 305 /*
306 306 * This routine gets default values of certain options whose default
307 307 * values are maintained by protocol specific code
308 308 */
309 309 /* ARGSUSED */
310 310 static int
311 311 tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
312 312 {
313 313 int32_t *i1 = (int32_t *)ptr;
314 314 tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps;
315 315
316 316 switch (level) {
317 317 case IPPROTO_TCP:
318 318 switch (name) {
319 319 case TCP_NOTIFY_THRESHOLD:
320 320 *i1 = tcps->tcps_ip_notify_interval;
321 321 break;
322 322 case TCP_ABORT_THRESHOLD:
323 323 *i1 = tcps->tcps_ip_abort_interval;
324 324 break;
325 325 case TCP_CONN_NOTIFY_THRESHOLD:
326 326 *i1 = tcps->tcps_ip_notify_cinterval;
327 327 break;
328 328 case TCP_CONN_ABORT_THRESHOLD:
329 329 *i1 = tcps->tcps_ip_abort_cinterval;
330 330 break;
331 331 default:
332 332 return (-1);
333 333 }
334 334 break;
335 335 case IPPROTO_IP:
336 336 switch (name) {
337 337 case IP_TTL:
338 338 *i1 = tcps->tcps_ipv4_ttl;
339 339 break;
340 340 default:
341 341 return (-1);
342 342 }
343 343 break;
344 344 case IPPROTO_IPV6:
345 345 switch (name) {
346 346 case IPV6_UNICAST_HOPS:
347 347 *i1 = tcps->tcps_ipv6_hoplimit;
348 348 break;
349 349 default:
350 350 return (-1);
351 351 }
352 352 break;
353 353 default:
354 354 return (-1);
355 355 }
356 356 return (sizeof (int));
357 357 }
358 358
359 359 /*
360 360 * TCP routine to get the values of options.
361 361 */
362 362 int
363 363 tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
364 364 {
365 365 int *i1 = (int *)ptr;
366 366 tcp_t *tcp = connp->conn_tcp;
367 367 conn_opt_arg_t coas;
368 368 int retval;
369 369
370 370 coas.coa_connp = connp;
371 371 coas.coa_ixa = connp->conn_ixa;
372 372 coas.coa_ipp = &connp->conn_xmit_ipp;
373 373 coas.coa_ancillary = B_FALSE;
374 374 coas.coa_changed = 0;
375 375
376 376 switch (level) {
377 377 case SOL_SOCKET:
378 378 switch (name) {
379 379 case SO_SND_COPYAVOID:
380 380 *i1 = tcp->tcp_snd_zcopy_on ?
381 381 SO_SND_COPYAVOID : 0;
382 382 return (sizeof (int));
383 383 case SO_ACCEPTCONN:
384 384 *i1 = (tcp->tcp_state == TCPS_LISTEN);
385 385 return (sizeof (int));
386 386 }
387 387 break;
388 388 case IPPROTO_TCP:
389 389 switch (name) {
390 390 case TCP_NODELAY:
391 391 *i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0;
392 392 return (sizeof (int));
393 393 case TCP_MAXSEG:
394 394 *i1 = tcp->tcp_mss;
395 395 return (sizeof (int));
396 396 case TCP_NOTIFY_THRESHOLD:
397 397 *i1 = (int)tcp->tcp_first_timer_threshold;
398 398 return (sizeof (int));
399 399 case TCP_ABORT_THRESHOLD:
400 400 *i1 = tcp->tcp_second_timer_threshold;
401 401 return (sizeof (int));
402 402 case TCP_CONN_NOTIFY_THRESHOLD:
403 403 *i1 = tcp->tcp_first_ctimer_threshold;
404 404 return (sizeof (int));
405 405 case TCP_CONN_ABORT_THRESHOLD:
406 406 *i1 = tcp->tcp_second_ctimer_threshold;
407 407 return (sizeof (int));
408 408 case TCP_INIT_CWND:
409 409 *i1 = tcp->tcp_init_cwnd;
410 410 return (sizeof (int));
411 411 case TCP_KEEPALIVE_THRESHOLD:
412 412 *i1 = tcp->tcp_ka_interval;
413 413 return (sizeof (int));
414 414
415 415 /*
416 416 * TCP_KEEPIDLE expects value in seconds, but
417 417 * tcp_ka_interval is in milliseconds.
418 418 */
419 419 case TCP_KEEPIDLE:
420 420 *i1 = tcp->tcp_ka_interval / 1000;
421 421 return (sizeof (int));
422 422 case TCP_KEEPCNT:
423 423 *i1 = tcp->tcp_ka_cnt;
424 424 return (sizeof (int));
425 425
426 426 /*
427 427 * TCP_KEEPINTVL expects value in seconds, but
428 428 * tcp_ka_rinterval is in milliseconds.
429 429 */
430 430 case TCP_KEEPINTVL:
431 431 *i1 = tcp->tcp_ka_rinterval / 1000;
432 432 return (sizeof (int));
433 433 case TCP_KEEPALIVE_ABORT_THRESHOLD:
434 434 *i1 = tcp->tcp_ka_abort_thres;
435 435 return (sizeof (int));
436 436 case TCP_CORK:
437 437 *i1 = tcp->tcp_cork;
438 438 return (sizeof (int));
439 439 case TCP_RTO_INITIAL:
440 440 *i1 = tcp->tcp_rto_initial;
441 441 return (sizeof (uint32_t));
442 442 case TCP_RTO_MIN:
443 443 *i1 = tcp->tcp_rto_min;
444 444 return (sizeof (uint32_t));
445 445 case TCP_RTO_MAX:
446 446 *i1 = tcp->tcp_rto_max;
447 447 return (sizeof (uint32_t));
448 448 case TCP_LINGER2:
449 449 *i1 = tcp->tcp_fin_wait_2_flush_interval / SECONDS;
450 450 return (sizeof (int));
451 451 }
452 452 break;
453 453 case IPPROTO_IP:
454 454 if (connp->conn_family != AF_INET)
455 455 return (-1);
456 456 switch (name) {
457 457 case IP_OPTIONS:
458 458 case T_IP_OPTIONS:
459 459 /* Caller ensures enough space */
460 460 return (ip_opt_get_user(connp, ptr));
461 461 default:
462 462 break;
463 463 }
464 464 break;
465 465
466 466 case IPPROTO_IPV6:
467 467 /*
468 468 * IPPROTO_IPV6 options are only supported for sockets
469 469 * that are using IPv6 on the wire.
470 470 */
471 471 if (connp->conn_ipversion != IPV6_VERSION) {
472 472 return (-1);
473 473 }
474 474 switch (name) {
475 475 case IPV6_PATHMTU:
476 476 if (tcp->tcp_state < TCPS_ESTABLISHED)
477 477 return (-1);
478 478 break;
479 479 }
480 480 break;
481 481 }
482 482 mutex_enter(&connp->conn_lock);
483 483 retval = conn_opt_get(&coas, level, name, ptr);
484 484 mutex_exit(&connp->conn_lock);
485 485 return (retval);
486 486 }
487 487
488 488 /*
489 489 * Set a TCP connection's participation in SO_REUSEPORT. This operation is
490 490 * performed under the protection of the squeue via tcp_setsockopt.
491 491 * The manipulation of tcp_rg_bind, as part of this operation, is subject to
492 492 * these constraints:
493 493 * 1. Prior to bind(), tcp_rg_bind can be set/cleared in tcp_set_reuseport
494 494 * under the protection of the squeue.
495 495 * 2. Once the connection has been bound, the tcp_rg_bind pointer must not be
496 496 * altered until such time as tcp_free() cleans up the connection.
497 497 * 3. A connection undergoing bind, which matches to a connection participating
498 498 * in port-reuse, will switch its tcp_rg_bind pointer when it joins the
499 499 * group of an existing connection in tcp_bindi().
500 500 */
501 501 static int
502 502 tcp_set_reuseport(conn_t *connp, boolean_t do_enable)
503 503 {
504 504 tcp_t *tcp = connp->conn_tcp;
505 505 struct tcp_rg_s *rg;
506 506
507 507 if (!IPCL_IS_NONSTR(connp)) {
508 508 if (do_enable) {
509 509 /*
510 510 * SO_REUSEPORT cannot be enabled on sockets which have
511 511 * fallen back to the STREAMS API.
512 512 */
513 513 return (EINVAL);
514 514 } else {
515 515 /*
516 516 * A connection with SO_REUSEPORT enabled should be
517 517 * prevented from falling back to STREAMS mode via
518 518 * logic in tcp_fallback. It is legal, however, for
519 519 * fallen-back connections to affirm the disabled state
520 520 * of SO_REUSEPORT.
521 521 */
522 522 ASSERT(connp->conn_reuseport == 0);
523 523 return (0);
524 524 }
525 525 }
526 526 if (tcp->tcp_state <= TCPS_CLOSED) {
527 527 return (EINVAL);
528 528 }
529 529 if (connp->conn_reuseport == 0 && do_enable) {
530 530 /* disabled -> enabled */
531 531 if (tcp->tcp_rg_bind != NULL) {
532 532 tcp_rg_setactive(tcp->tcp_rg_bind, do_enable);
533 533 } else {
534 534 /*
535 535 * Connection state is not a concern when initially
536 536 * populating tcp_rg_bind. Setting it to non-NULL on a
537 537 * bound or listening connection would only mean that
538 538 * new reused-port binds become a possibility.
539 539 */
540 540 if ((rg = tcp_rg_init(tcp)) == NULL) {
541 541 return (ENOMEM);
542 542 }
543 543 tcp->tcp_rg_bind = rg;
544 544 }
545 545 connp->conn_reuseport = 1;
546 546 } else if (connp->conn_reuseport != 0 && !do_enable) {
547 547 /* enabled -> disabled */
548 548 ASSERT(tcp->tcp_rg_bind != NULL);
549 549 if (tcp->tcp_state == TCPS_IDLE) {
550 550 /*
551 551 * If the connection has not been bound yet, discard
552 552 * the reuse group state. Since disabling SO_REUSEPORT
553 553 * on a bound socket will _not_ prevent others from
554 554 * reusing the port, the presence of tcp_rg_bind is
555 555 * used to determine reuse availability, not
556 556 * conn_reuseport.
557 557 *
558 558 * This allows proper behavior for examples such as:
559 559 *
560 560 * setsockopt(fd1, ... SO_REUSEPORT, &on_val...);
561 561 * bind(fd1, &myaddr, ...);
562 562 * setsockopt(fd1, ... SO_REUSEPORT, &off_val...);
563 563 *
564 564 * setsockopt(fd2, ... SO_REUSEPORT, &on_val...);
565 565 * bind(fd2, &myaddr, ...); // <- SHOULD SUCCEED
566 566 *
567 567 */
568 568 rg = tcp->tcp_rg_bind;
569 569 tcp->tcp_rg_bind = NULL;
570 570 VERIFY(tcp_rg_remove(rg, tcp));
571 571 tcp_rg_destroy(rg);
572 572 } else {
573 573 /*
574 574 * If a connection has been bound, it's no longer safe
575 575 * to manipulate tcp_rg_bind until connection clean-up
576 576 * during tcp_free. Just mark the member status of the
577 577 * connection as inactive.
578 578 */
579 579 tcp_rg_setactive(tcp->tcp_rg_bind, do_enable);
580 580 }
581 581 connp->conn_reuseport = 0;
582 582 }
583 583 return (0);
584 584 }
585 585
586 586 /*
587 587 * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
588 588 * Parameters are assumed to be verified by the caller.
589 589 */
590 590 /* ARGSUSED */
591 591 int
592 592 tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
593 593 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
594 594 void *thisdg_attrs, cred_t *cr)
595 595 {
596 596 tcp_t *tcp = connp->conn_tcp;
597 597 int *i1 = (int *)invalp;
598 598 boolean_t onoff = (*i1 == 0) ? 0 : 1;
599 599 boolean_t checkonly;
600 600 int reterr;
601 601 tcp_stack_t *tcps = tcp->tcp_tcps;
602 602 conn_opt_arg_t coas;
603 603 uint32_t val = *((uint32_t *)invalp);
604 604
605 605 coas.coa_connp = connp;
606 606 coas.coa_ixa = connp->conn_ixa;
607 607 coas.coa_ipp = &connp->conn_xmit_ipp;
608 608 coas.coa_ancillary = B_FALSE;
609 609 coas.coa_changed = 0;
610 610
611 611 switch (optset_context) {
612 612 case SETFN_OPTCOM_CHECKONLY:
613 613 checkonly = B_TRUE;
614 614 /*
615 615 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
616 616 * inlen != 0 implies value supplied and
617 617 * we have to "pretend" to set it.
618 618 * inlen == 0 implies that there is no
619 619 * value part in T_CHECK request and just validation
620 620 * done elsewhere should be enough, we just return here.
621 621 */
622 622 if (inlen == 0) {
623 623 *outlenp = 0;
624 624 return (0);
625 625 }
626 626 break;
627 627 case SETFN_OPTCOM_NEGOTIATE:
628 628 checkonly = B_FALSE;
629 629 break;
630 630 case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */
631 631 case SETFN_CONN_NEGOTIATE:
632 632 checkonly = B_FALSE;
633 633 /*
634 634 * Negotiating local and "association-related" options
635 635 * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ)
636 636 * primitives is allowed by XTI, but we choose
637 637 * to not implement this style negotiation for Internet
638 638 * protocols (We interpret it is a must for OSI world but
639 639 * optional for Internet protocols) for all options.
640 640 * [ Will do only for the few options that enable test
641 641 * suites that our XTI implementation of this feature
642 642 * works for transports that do allow it ]
643 643 */
644 644 if (!tcp_allow_connopt_set(level, name)) {
645 645 *outlenp = 0;
646 646 return (EINVAL);
647 647 }
648 648 break;
649 649 default:
650 650 /*
651 651 * We should never get here
652 652 */
653 653 *outlenp = 0;
654 654 return (EINVAL);
655 655 }
656 656
657 657 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
658 658 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
659 659
660 660 /*
661 661 * For TCP, we should have no ancillary data sent down
662 662 * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs
663 663 * has to be zero.
664 664 */
665 665 ASSERT(thisdg_attrs == NULL);
666 666
667 667 /*
668 668 * For fixed length options, no sanity check
669 669 * of passed in length is done. It is assumed *_optcom_req()
670 670 * routines do the right thing.
671 671 */
672 672 switch (level) {
673 673 case SOL_SOCKET:
674 674 switch (name) {
675 675 case SO_KEEPALIVE:
676 676 if (checkonly) {
677 677 /* check only case */
678 678 break;
679 679 }
680 680
681 681 if (!onoff) {
682 682 if (connp->conn_keepalive) {
683 683 if (tcp->tcp_ka_tid != 0) {
684 684 (void) TCP_TIMER_CANCEL(tcp,
685 685 tcp->tcp_ka_tid);
686 686 tcp->tcp_ka_tid = 0;
687 687 }
688 688 connp->conn_keepalive = 0;
689 689 }
690 690 break;
691 691 }
692 692 if (!connp->conn_keepalive) {
693 693 /* Crank up the keepalive timer */
694 694 tcp->tcp_ka_last_intrvl = 0;
695 695 tcp->tcp_ka_tid = TCP_TIMER(tcp,
696 696 tcp_keepalive_timer, tcp->tcp_ka_interval);
697 697 connp->conn_keepalive = 1;
698 698 }
699 699 break;
700 700 case SO_SNDBUF: {
701 701 if (*i1 > tcps->tcps_max_buf) {
702 702 *outlenp = 0;
703 703 return (ENOBUFS);
704 704 }
705 705 if (checkonly)
706 706 break;
707 707
708 708 connp->conn_sndbuf = *i1;
709 709 if (tcps->tcps_snd_lowat_fraction != 0) {
710 710 connp->conn_sndlowat = connp->conn_sndbuf /
711 711 tcps->tcps_snd_lowat_fraction;
712 712 }
713 713 (void) tcp_maxpsz_set(tcp, B_TRUE);
714 714 /*
715 715 * If we are flow-controlled, recheck the condition.
716 716 * There are apps that increase SO_SNDBUF size when
717 717 * flow-controlled (EWOULDBLOCK), and expect the flow
718 718 * control condition to be lifted right away.
719 719 */
720 720 mutex_enter(&tcp->tcp_non_sq_lock);
721 721 if (tcp->tcp_flow_stopped &&
722 722 TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) {
723 723 tcp_clrqfull(tcp);
724 724 }
725 725 mutex_exit(&tcp->tcp_non_sq_lock);
726 726 *outlenp = inlen;
727 727 return (0);
728 728 }
729 729 case SO_RCVBUF:
730 730 if (*i1 > tcps->tcps_max_buf) {
731 731 *outlenp = 0;
732 732 return (ENOBUFS);
733 733 }
734 734 /* Silently ignore zero */
735 735 if (!checkonly && *i1 != 0) {
736 736 *i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss);
737 737 (void) tcp_rwnd_set(tcp, *i1);
738 738 }
739 739 /*
740 740 * XXX should we return the rwnd here
741 741 * and tcp_opt_get ?
742 742 */
743 743 *outlenp = inlen;
744 744 return (0);
745 745 case SO_SND_COPYAVOID:
746 746 if (!checkonly) {
747 747 if (tcp->tcp_loopback ||
748 748 (onoff != 1) || !tcp_zcopy_check(tcp)) {
749 749 *outlenp = 0;
750 750 return (EOPNOTSUPP);
751 751 }
752 752 tcp->tcp_snd_zcopy_aware = 1;
753 753 }
754 754 *outlenp = inlen;
755 755 return (0);
756 756 case SO_REUSEPORT:
757 757 if (!checkonly) {
758 758 return (tcp_set_reuseport(connp, *i1 != 0));
759 759 }
760 760 return (0);
761 761 }
762 762 break;
763 763 case IPPROTO_TCP:
764 764 switch (name) {
765 765 case TCP_NODELAY:
766 766 if (!checkonly)
767 767 tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss;
768 768 break;
769 769 case TCP_NOTIFY_THRESHOLD:
770 770 if (!checkonly)
771 771 tcp->tcp_first_timer_threshold = *i1;
772 772 break;
773 773 case TCP_ABORT_THRESHOLD:
774 774 if (!checkonly)
775 775 tcp->tcp_second_timer_threshold = *i1;
776 776 break;
777 777 case TCP_CONN_NOTIFY_THRESHOLD:
778 778 if (!checkonly)
779 779 tcp->tcp_first_ctimer_threshold = *i1;
780 780 break;
781 781 case TCP_CONN_ABORT_THRESHOLD:
782 782 if (!checkonly)
783 783 tcp->tcp_second_ctimer_threshold = *i1;
784 784 break;
785 785 case TCP_RECVDSTADDR:
786 786 if (tcp->tcp_state > TCPS_LISTEN) {
787 787 *outlenp = 0;
788 788 return (EOPNOTSUPP);
789 789 }
790 790 /* Setting done in conn_opt_set */
791 791 break;
792 792 case TCP_INIT_CWND:
793 793 if (checkonly)
794 794 break;
795 795
796 796 /*
797 797 * Only allow socket with network configuration
798 798 * privilege to set the initial cwnd to be larger
799 799 * than allowed by RFC 3390.
800 800 */
801 801 if (val > MIN(4, MAX(2, 4380 / tcp->tcp_mss))) {
802 802 if ((reterr = secpolicy_ip_config(cr, B_TRUE))
803 803 != 0) {
804 804 *outlenp = 0;
805 805 return (reterr);
806 806 }
807 807 if (val > tcp_max_init_cwnd) {
808 808 *outlenp = 0;
809 809 return (EINVAL);
810 810 }
811 811 }
812 812
813 813 tcp->tcp_init_cwnd = val;
814 814
815 815 /*
816 816 * If the socket is connected, AND no outbound data
817 817 * has been sent, reset the actual cwnd values.
818 818 */
819 819 if (tcp->tcp_state == TCPS_ESTABLISHED &&
820 820 tcp->tcp_iss == tcp->tcp_snxt - 1) {
821 821 tcp->tcp_cwnd =
822 822 MIN(tcp->tcp_rwnd, val * tcp->tcp_mss);
823 823 }
824 824 break;
825 825
826 826 /*
827 827 * TCP_KEEPIDLE is in seconds but TCP_KEEPALIVE_THRESHOLD
828 828 * is in milliseconds. TCP_KEEPIDLE is introduced for
829 829 * compatibility with other Unix flavors.
830 830 * We can fall through TCP_KEEPALIVE_THRESHOLD logic after
831 831 * converting the input to milliseconds.
832 832 */
833 833 case TCP_KEEPIDLE:
834 834 *i1 *= 1000;
835 835 /* FALLTHRU */
836 836
837 837 case TCP_KEEPALIVE_THRESHOLD:
838 838 if (checkonly)
839 839 break;
840 840
841 841 if (*i1 < tcps->tcps_keepalive_interval_low ||
842 842 *i1 > tcps->tcps_keepalive_interval_high) {
843 843 *outlenp = 0;
844 844 return (EINVAL);
845 845 }
846 846 if (*i1 != tcp->tcp_ka_interval) {
847 847 tcp->tcp_ka_interval = *i1;
848 848 /*
849 849 * Check if we need to restart the
850 850 * keepalive timer.
851 851 */
852 852 if (tcp->tcp_ka_tid != 0) {
853 853 ASSERT(connp->conn_keepalive);
854 854 (void) TCP_TIMER_CANCEL(tcp,
855 855 tcp->tcp_ka_tid);
856 856 tcp->tcp_ka_last_intrvl = 0;
857 857 tcp->tcp_ka_tid = TCP_TIMER(tcp,
858 858 tcp_keepalive_timer,
859 859 tcp->tcp_ka_interval);
860 860 }
861 861 }
862 862 break;
863 863
864 864 /*
865 865 * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt.
866 866 * So setting TCP_KEEPCNT or TCP_KEEPINTVL can affect all the
867 867 * three members - tcp_ka_abort_thres, tcp_ka_rinterval and
868 868 * tcp_ka_cnt.
869 869 */
870 870 case TCP_KEEPCNT:
871 871 if (checkonly)
872 872 break;
873 873
874 874 if (*i1 == 0) {
875 875 return (EINVAL);
876 876 } else if (tcp->tcp_ka_rinterval == 0) {
877 877 /*
878 878 * When TCP_KEEPCNT is specified without first
879 879 * specifying a TCP_KEEPINTVL, we infer an
880 880 * interval based on a tunable specific to our
881 881 * stack: the tcp_keepalive_abort_interval.
882 882 * (Or the TCP_KEEPALIVE_ABORT_THRESHOLD, in
883 883 * the unlikely event that that has been set.)
884 884 * Given the abort interval's default value of
885 885 * 480 seconds, low TCP_KEEPCNT values can
886 886 * result in intervals that exceed the default
887 887 * maximum RTO of 60 seconds. Rather than
888 888 * fail in these cases, we (implicitly) clamp
889 889 * the interval at the maximum RTO; if the
890 890 * TCP_KEEPCNT is shortly followed by a
891 891 * TCP_KEEPINTVL (as we expect), the abort
892 892 * threshold will be recalculated correctly --
893 893 * and if a TCP_KEEPINTVL is not forthcoming,
894 894 * keep-alive will at least operate reasonably
895 895 * given the underconfigured state.
896 896 */
897 897 uint32_t interval;
898 898
899 899 interval = tcp->tcp_ka_abort_thres / *i1;
900 900
901 901 if (interval < tcp->tcp_rto_min)
902 902 interval = tcp->tcp_rto_min;
903 903
904 904 if (interval > tcp->tcp_rto_max)
905 905 interval = tcp->tcp_rto_max;
906 906
907 907 tcp->tcp_ka_rinterval = interval;
908 908 } else {
909 909 if ((*i1 * tcp->tcp_ka_rinterval) <
910 910 tcps->tcps_keepalive_abort_interval_low ||
911 911 (*i1 * tcp->tcp_ka_rinterval) >
912 912 tcps->tcps_keepalive_abort_interval_high)
913 913 return (EINVAL);
914 914 tcp->tcp_ka_abort_thres =
915 915 (*i1 * tcp->tcp_ka_rinterval);
916 916 }
917 917 tcp->tcp_ka_cnt = *i1;
918 918 break;
919 919 case TCP_KEEPINTVL:
920 920 /*
921 921 * TCP_KEEPINTVL is specified in seconds, but
922 922 * tcp_ka_rinterval is in milliseconds.
923 923 */
924 924
925 925 if (checkonly)
926 926 break;
927 927
928 928 if ((*i1 * 1000) < tcp->tcp_rto_min ||
929 929 (*i1 * 1000) > tcp->tcp_rto_max)
930 930 return (EINVAL);
931 931
932 932 if (tcp->tcp_ka_cnt == 0) {
933 933 tcp->tcp_ka_cnt =
934 934 tcp->tcp_ka_abort_thres / (*i1 * 1000);
935 935 } else {
936 936 if ((*i1 * tcp->tcp_ka_cnt * 1000) <
937 937 tcps->tcps_keepalive_abort_interval_low ||
938 938 (*i1 * tcp->tcp_ka_cnt * 1000) >
939 939 tcps->tcps_keepalive_abort_interval_high)
940 940 return (EINVAL);
941 941 tcp->tcp_ka_abort_thres =
942 942 (*i1 * tcp->tcp_ka_cnt * 1000);
943 943 }
944 944 tcp->tcp_ka_rinterval = *i1 * 1000;
945 945 break;
946 946 case TCP_KEEPALIVE_ABORT_THRESHOLD:
947 947 if (!checkonly) {
948 948 if (*i1 <
949 949 tcps->tcps_keepalive_abort_interval_low ||
950 950 *i1 >
951 951 tcps->tcps_keepalive_abort_interval_high) {
952 952 *outlenp = 0;
953 953 return (EINVAL);
954 954 }
955 955 tcp->tcp_ka_abort_thres = *i1;
956 956 tcp->tcp_ka_cnt = 0;
957 957 tcp->tcp_ka_rinterval = 0;
958 958 }
959 959 break;
960 960 case TCP_CORK:
961 961 if (!checkonly) {
962 962 /*
963 963 * if tcp->tcp_cork was set and is now
964 964 * being unset, we have to make sure that
965 965 * the remaining data gets sent out. Also
966 966 * unset tcp->tcp_cork so that tcp_wput_data()
967 967 * can send data even if it is less than mss
968 968 */
969 969 if (tcp->tcp_cork && onoff == 0 &&
970 970 tcp->tcp_unsent > 0) {
971 971 tcp->tcp_cork = B_FALSE;
972 972 tcp_wput_data(tcp, NULL, B_FALSE);
973 973 }
974 974 tcp->tcp_cork = onoff;
975 975 }
976 976 break;
977 977 case TCP_RTO_INITIAL: {
978 978 clock_t rto;
979 979
980 980 if (checkonly || val == 0)
981 981 break;
982 982
983 983 /*
984 984 * Sanity checks
985 985 *
986 986 * The initial RTO should be bounded by the minimum
987 987 * and maximum RTO. And it should also be smaller
988 988 * than the connect attempt abort timeout. Otherwise,
989 989 * the connection won't be aborted in a period
990 990 * reasonably close to that timeout.
991 991 */
992 992 if (val < tcp->tcp_rto_min || val > tcp->tcp_rto_max ||
993 993 val > tcp->tcp_second_ctimer_threshold ||
994 994 val < tcps->tcps_rexmit_interval_initial_low ||
995 995 val > tcps->tcps_rexmit_interval_initial_high) {
996 996 *outlenp = 0;
997 997 return (EINVAL);
998 998 }
999 999 tcp->tcp_rto_initial = val;
1000 1000
1001 1001 /*
1002 1002 * If TCP has not sent anything, need to re-calculate
1003 1003 * tcp_rto. Otherwise, this option change does not
1004 1004 * really affect anything.
1005 1005 */
1006 1006 if (tcp->tcp_state >= TCPS_SYN_SENT)
1007 1007 break;
1008 1008
1009 1009 tcp->tcp_rtt_sa = tcp->tcp_rto_initial << 2;
1010 1010 tcp->tcp_rtt_sd = tcp->tcp_rto_initial >> 1;
1011 1011 rto = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd +
1012 1012 tcps->tcps_rexmit_interval_extra +
1013 1013 (tcp->tcp_rtt_sa >> 5) +
1014 1014 tcps->tcps_conn_grace_period;
1015 1015 TCP_SET_RTO(tcp, rto);
1016 1016 break;
1017 1017 }
1018 1018 case TCP_RTO_MIN:
1019 1019 if (checkonly || val == 0)
1020 1020 break;
1021 1021
1022 1022 if (val < tcps->tcps_rexmit_interval_min_low ||
1023 1023 val > tcps->tcps_rexmit_interval_min_high ||
1024 1024 val > tcp->tcp_rto_max) {
1025 1025 *outlenp = 0;
1026 1026 return (EINVAL);
1027 1027 }
1028 1028 tcp->tcp_rto_min = val;
1029 1029 if (tcp->tcp_rto < val)
1030 1030 tcp->tcp_rto = val;
1031 1031 break;
1032 1032 case TCP_RTO_MAX:
1033 1033 if (checkonly || val == 0)
1034 1034 break;
1035 1035
1036 1036 /*
1037 1037 * Sanity checks
1038 1038 *
1039 1039 * The maximum RTO should not be larger than the
1040 1040 * connection abort timeout. Otherwise, the
1041 1041 * connection won't be aborted in a period reasonably
1042 1042 * close to that timeout.
1043 1043 */
1044 1044 if (val < tcps->tcps_rexmit_interval_max_low ||
1045 1045 val > tcps->tcps_rexmit_interval_max_high ||
1046 1046 val < tcp->tcp_rto_min ||
1047 1047 val > tcp->tcp_second_timer_threshold) {
1048 1048 *outlenp = 0;
1049 1049 return (EINVAL);
1050 1050 }
1051 1051 tcp->tcp_rto_max = val;
1052 1052 if (tcp->tcp_rto > val)
1053 1053 tcp->tcp_rto = val;
1054 1054 break;
1055 1055 case TCP_LINGER2:
1056 1056 if (checkonly || *i1 == 0)
1057 1057 break;
1058 1058
1059 1059 /*
1060 1060 * Note that the option value's unit is second. And
1061 1061 * the value should be bigger than the private
1062 1062 * parameter tcp_fin_wait_2_flush_interval's lower
1063 1063 * bound and smaller than the current value of that
1064 1064 * parameter. It should be smaller than the current
1065 1065 * value to avoid an app setting TCP_LINGER2 to a big
1066 1066 * value, causing resource to be held up too long in
1067 1067 * FIN-WAIT-2 state.
1068 1068 */
1069 1069 if (*i1 < 0 ||
1070 1070 tcps->tcps_fin_wait_2_flush_interval_low/SECONDS >
1071 1071 *i1 ||
1072 1072 tcps->tcps_fin_wait_2_flush_interval/SECONDS <
1073 1073 *i1) {
1074 1074 *outlenp = 0;
1075 1075 return (EINVAL);
1076 1076 }
1077 1077 tcp->tcp_fin_wait_2_flush_interval = *i1 * SECONDS;
1078 1078 break;
1079 1079 default:
1080 1080 break;
1081 1081 }
1082 1082 break;
1083 1083 case IPPROTO_IP:
1084 1084 switch (name) {
1085 1085 case IP_SEC_OPT:
1086 1086 /*
1087 1087 * We should not allow policy setting after
1088 1088 * we start listening for connections.
1089 1089 */
1090 1090 if (tcp->tcp_state == TCPS_LISTEN) {
1091 1091 return (EINVAL);
1092 1092 }
1093 1093 break;
1094 1094 }
1095 1095 break;
1096 1096 case IPPROTO_IPV6:
1097 1097 /*
1098 1098 * IPPROTO_IPV6 options are only supported for sockets
1099 1099 * that are using IPv6 on the wire.
1100 1100 */
1101 1101 if (connp->conn_ipversion != IPV6_VERSION) {
1102 1102 *outlenp = 0;
1103 1103 return (EINVAL);
1104 1104 }
1105 1105
1106 1106 switch (name) {
1107 1107 case IPV6_RECVPKTINFO:
1108 1108 if (!checkonly) {
1109 1109 /* Force it to be sent up with the next msg */
1110 1110 tcp->tcp_recvifindex = 0;
1111 1111 }
1112 1112 break;
1113 1113 case IPV6_RECVTCLASS:
1114 1114 if (!checkonly) {
1115 1115 /* Force it to be sent up with the next msg */
1116 1116 tcp->tcp_recvtclass = 0xffffffffU;
1117 1117 }
1118 1118 break;
1119 1119 case IPV6_RECVHOPLIMIT:
1120 1120 if (!checkonly) {
1121 1121 /* Force it to be sent up with the next msg */
1122 1122 tcp->tcp_recvhops = 0xffffffffU;
1123 1123 }
1124 1124 break;
1125 1125 case IPV6_PKTINFO:
1126 1126 /* This is an extra check for TCP */
1127 1127 if (inlen == sizeof (struct in6_pktinfo)) {
1128 1128 struct in6_pktinfo *pkti;
1129 1129
1130 1130 pkti = (struct in6_pktinfo *)invalp;
1131 1131 /*
1132 1132 * RFC 3542 states that ipi6_addr must be
1133 1133 * the unspecified address when setting the
1134 1134 * IPV6_PKTINFO sticky socket option on a
1135 1135 * TCP socket.
1136 1136 */
1137 1137 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
1138 1138 return (EINVAL);
1139 1139 }
1140 1140 break;
1141 1141 case IPV6_SEC_OPT:
1142 1142 /*
1143 1143 * We should not allow policy setting after
1144 1144 * we start listening for connections.
1145 1145 */
1146 1146 if (tcp->tcp_state == TCPS_LISTEN) {
1147 1147 return (EINVAL);
1148 1148 }
1149 1149 break;
1150 1150 }
1151 1151 break;
1152 1152 }
1153 1153 reterr = conn_opt_set(&coas, level, name, inlen, invalp,
1154 1154 checkonly, cr);
1155 1155 if (reterr != 0) {
1156 1156 *outlenp = 0;
1157 1157 return (reterr);
1158 1158 }
1159 1159
1160 1160 /*
1161 1161 * Common case of OK return with outval same as inval
1162 1162 */
1163 1163 if (invalp != outvalp) {
1164 1164 /* don't trust bcopy for identical src/dst */
1165 1165 (void) bcopy(invalp, outvalp, inlen);
1166 1166 }
1167 1167 *outlenp = inlen;
1168 1168
1169 1169 if (coas.coa_changed & COA_HEADER_CHANGED) {
1170 1170 /* If we are connected we rebuilt the headers */
1171 1171 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1172 1172 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1173 1173 reterr = tcp_build_hdrs(tcp);
1174 1174 if (reterr != 0)
1175 1175 return (reterr);
1176 1176 }
1177 1177 }
1178 1178 if (coas.coa_changed & COA_ROUTE_CHANGED) {
1179 1179 in6_addr_t nexthop;
1180 1180
1181 1181 /*
1182 1182 * If we are connected we re-cache the information.
1183 1183 * We ignore errors to preserve BSD behavior.
1184 1184 * Note that we don't redo IPsec policy lookup here
1185 1185 * since the final destination (or source) didn't change.
1186 1186 */
1187 1187 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
1188 1188 &connp->conn_faddr_v6, &nexthop);
1189 1189
1190 1190 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1191 1191 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1192 1192 (void) ip_attr_connect(connp, connp->conn_ixa,
1193 1193 &connp->conn_laddr_v6, &connp->conn_faddr_v6,
1194 1194 &nexthop, connp->conn_fport, NULL, NULL,
1195 1195 IPDF_VERIFY_DST);
1196 1196 }
1197 1197 }
1198 1198 if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1199 1199 connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1200 1200 }
1201 1201 if (coas.coa_changed & COA_WROFF_CHANGED) {
1202 1202 connp->conn_wroff = connp->conn_ht_iphc_allocated +
1203 1203 tcps->tcps_wroff_xtra;
1204 1204 (void) proto_set_tx_wroff(connp->conn_rq, connp,
1205 1205 connp->conn_wroff);
1206 1206 }
1207 1207 if (coas.coa_changed & COA_OOBINLINE_CHANGED) {
1208 1208 if (IPCL_IS_NONSTR(connp))
1209 1209 proto_set_rx_oob_opt(connp, onoff);
1210 1210 }
1211 1211 return (0);
1212 1212 }
|
↓ open down ↓ |
1212 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX