Print this page
OS-4018 lxbrand support TCP SO_REUSEPORT
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Cody Mello <cody.mello@joyent.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/tcp/tcp_socket.c
+++ new/usr/src/uts/common/inet/tcp/tcp_socket.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 + * Copyright 2015 Joyent, Inc.
24 25 */
25 26
26 27 /* This file contains all TCP kernel socket related functions. */
27 28
28 29 #include <sys/types.h>
29 30 #include <sys/strlog.h>
30 31 #include <sys/policy.h>
31 32 #include <sys/sockio.h>
32 33 #include <sys/strsubr.h>
33 34 #include <sys/strsun.h>
34 35 #include <sys/squeue_impl.h>
35 36 #include <sys/squeue.h>
36 37 #define _SUN_TPI_VERSION 2
37 38 #include <sys/tihdr.h>
38 39 #include <sys/timod.h>
39 40 #include <sys/tpicommon.h>
40 41 #include <sys/socketvar.h>
41 42
42 43 #include <inet/common.h>
43 44 #include <inet/proto_set.h>
44 45 #include <inet/ip.h>
45 46 #include <inet/tcp.h>
46 47 #include <inet/tcp_impl.h>
47 48
48 49 static void tcp_activate(sock_lower_handle_t, sock_upper_handle_t,
49 50 sock_upcalls_t *, int, cred_t *);
50 51 static int tcp_accept(sock_lower_handle_t, sock_lower_handle_t,
51 52 sock_upper_handle_t, cred_t *);
52 53 static int tcp_bind(sock_lower_handle_t, struct sockaddr *,
53 54 socklen_t, cred_t *);
54 55 static int tcp_listen(sock_lower_handle_t, int, cred_t *);
55 56 static int tcp_connect(sock_lower_handle_t, const struct sockaddr *,
56 57 socklen_t, sock_connid_t *, cred_t *);
57 58 static int tcp_getpeername(sock_lower_handle_t, struct sockaddr *,
58 59 socklen_t *, cred_t *);
59 60 static int tcp_getsockname(sock_lower_handle_t, struct sockaddr *,
60 61 socklen_t *, cred_t *);
61 62 static int tcp_getsockopt(sock_lower_handle_t, int, int, void *,
62 63 socklen_t *, cred_t *);
63 64 static int tcp_setsockopt(sock_lower_handle_t, int, int, const void *,
64 65 socklen_t, cred_t *);
65 66 static int tcp_sendmsg(sock_lower_handle_t, mblk_t *, struct nmsghdr *,
66 67 cred_t *);
67 68 static int tcp_shutdown(sock_lower_handle_t, int, cred_t *);
68 69 static void tcp_clr_flowctrl(sock_lower_handle_t);
69 70 static int tcp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
70 71 cred_t *);
71 72 static int tcp_close(sock_lower_handle_t, int, cred_t *);
72 73
73 74 sock_downcalls_t sock_tcp_downcalls = {
74 75 tcp_activate,
75 76 tcp_accept,
76 77 tcp_bind,
77 78 tcp_listen,
78 79 tcp_connect,
79 80 tcp_getpeername,
80 81 tcp_getsockname,
81 82 tcp_getsockopt,
82 83 tcp_setsockopt,
83 84 tcp_sendmsg,
84 85 NULL,
85 86 NULL,
86 87 NULL,
87 88 tcp_shutdown,
88 89 tcp_clr_flowctrl,
89 90 tcp_ioctl,
90 91 tcp_close,
91 92 };
92 93
93 94 /* ARGSUSED */
94 95 static void
95 96 tcp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
96 97 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
97 98 {
98 99 conn_t *connp = (conn_t *)proto_handle;
99 100 struct sock_proto_props sopp;
100 101 extern struct module_info tcp_rinfo;
101 102
102 103 ASSERT(connp->conn_upper_handle == NULL);
103 104
104 105 /* All Solaris components should pass a cred for this operation. */
105 106 ASSERT(cr != NULL);
106 107
107 108 sopp.sopp_flags = SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
108 109 SOCKOPT_MAXPSZ | SOCKOPT_MAXBLK | SOCKOPT_RCVTIMER |
109 110 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ;
110 111
111 112 sopp.sopp_rxhiwat = SOCKET_RECVHIWATER;
112 113 sopp.sopp_rxlowat = SOCKET_RECVLOWATER;
113 114 sopp.sopp_maxpsz = INFPSZ;
114 115 sopp.sopp_maxblk = INFPSZ;
115 116 sopp.sopp_rcvtimer = SOCKET_TIMER_INTERVAL;
116 117 sopp.sopp_rcvthresh = SOCKET_RECVHIWATER >> 3;
117 118 sopp.sopp_maxaddrlen = sizeof (sin6_t);
118 119 sopp.sopp_minpsz = (tcp_rinfo.mi_minpsz == 1) ? 0 :
119 120 tcp_rinfo.mi_minpsz;
120 121
121 122 connp->conn_upcalls = sock_upcalls;
122 123 connp->conn_upper_handle = sock_handle;
123 124
124 125 ASSERT(connp->conn_rcvbuf != 0 &&
125 126 connp->conn_rcvbuf == connp->conn_tcp->tcp_rwnd);
126 127 (*sock_upcalls->su_set_proto_props)(sock_handle, &sopp);
127 128 }
128 129
129 130 /*ARGSUSED*/
130 131 static int
131 132 tcp_accept(sock_lower_handle_t lproto_handle,
132 133 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
133 134 cred_t *cr)
134 135 {
135 136 conn_t *lconnp, *econnp;
136 137 tcp_t *listener, *eager;
137 138
138 139 /*
139 140 * KSSL can move a socket from one listener to another, in which
140 141 * case `lproto_handle' points to the new listener. To ensure that
141 142 * the original listener is used the information is obtained from
142 143 * the eager.
143 144 */
144 145 econnp = (conn_t *)eproto_handle;
145 146 eager = econnp->conn_tcp;
146 147 ASSERT(IPCL_IS_NONSTR(econnp));
147 148 ASSERT(eager->tcp_listener != NULL);
148 149 listener = eager->tcp_listener;
149 150 lconnp = (conn_t *)listener->tcp_connp;
150 151 ASSERT(listener->tcp_state == TCPS_LISTEN);
151 152 ASSERT(lconnp->conn_upper_handle != NULL);
152 153
153 154 /*
154 155 * It is possible for the accept thread to race with the thread that
155 156 * made the su_newconn upcall in tcp_newconn_notify. Both
156 157 * tcp_newconn_notify and tcp_accept require that conn_upper_handle
157 158 * and conn_upcalls be set before returning, so they both write to
158 159 * them. However, we're guaranteed that the value written is the same
159 160 * for both threads.
160 161 */
161 162 ASSERT(econnp->conn_upper_handle == NULL ||
162 163 econnp->conn_upper_handle == sock_handle);
163 164 ASSERT(econnp->conn_upcalls == NULL ||
164 165 econnp->conn_upcalls == lconnp->conn_upcalls);
165 166 econnp->conn_upper_handle = sock_handle;
166 167 econnp->conn_upcalls = lconnp->conn_upcalls;
167 168
168 169 ASSERT(econnp->conn_netstack ==
169 170 listener->tcp_connp->conn_netstack);
170 171 ASSERT(eager->tcp_tcps == listener->tcp_tcps);
171 172
172 173 /*
173 174 * We should have a minimum of 2 references on the conn at this
174 175 * point. One for TCP and one for the newconn notification
175 176 * (which is now taken over by IP). In the normal case we would
176 177 * also have another reference (making a total of 3) for the conn
177 178 * being in the classifier hash list. However the eager could have
178 179 * received an RST subsequently and tcp_closei_local could have
179 180 * removed the eager from the classifier hash list, hence we can't
180 181 * assert that reference.
181 182 */
182 183 ASSERT(econnp->conn_ref >= 2);
183 184
184 185 mutex_enter(&listener->tcp_eager_lock);
185 186 /*
186 187 * Non-STREAMS listeners never defer the notification of new
187 188 * connections.
188 189 */
189 190 ASSERT(!listener->tcp_eager_prev_q0->tcp_conn_def_q0);
190 191 tcp_eager_unlink(eager);
191 192 mutex_exit(&listener->tcp_eager_lock);
192 193 CONN_DEC_REF(listener->tcp_connp);
193 194
194 195 return ((eager->tcp_state < TCPS_ESTABLISHED) ? ECONNABORTED : 0);
195 196 }
196 197
197 198 static int
198 199 tcp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
199 200 socklen_t len, cred_t *cr)
200 201 {
201 202 int error;
202 203 conn_t *connp = (conn_t *)proto_handle;
203 204
204 205 /* All Solaris components should pass a cred for this operation. */
205 206 ASSERT(cr != NULL);
206 207 ASSERT(connp->conn_upper_handle != NULL);
207 208
208 209 error = squeue_synch_enter(connp, NULL);
209 210 if (error != 0) {
210 211 /* failed to enter */
211 212 return (ENOSR);
212 213 }
213 214
214 215 /* binding to a NULL address really means unbind */
215 216 if (sa == NULL) {
216 217 if (connp->conn_tcp->tcp_state < TCPS_LISTEN)
217 218 error = tcp_do_unbind(connp);
218 219 else
219 220 error = EINVAL;
220 221 } else {
221 222 error = tcp_do_bind(connp, sa, len, cr, B_TRUE);
222 223 }
223 224
224 225 squeue_synch_exit(connp);
225 226
226 227 if (error < 0) {
227 228 if (error == -TOUTSTATE)
228 229 error = EINVAL;
229 230 else
230 231 error = proto_tlitosyserr(-error);
231 232 }
232 233
233 234 return (error);
234 235 }
235 236
236 237 /* ARGSUSED */
237 238 static int
238 239 tcp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
239 240 {
240 241 conn_t *connp = (conn_t *)proto_handle;
241 242 tcp_t *tcp = connp->conn_tcp;
242 243 int error;
243 244
244 245 ASSERT(connp->conn_upper_handle != NULL);
245 246
246 247 /* All Solaris components should pass a cred for this operation. */
247 248 ASSERT(cr != NULL);
248 249
249 250 error = squeue_synch_enter(connp, NULL);
250 251 if (error != 0) {
251 252 /* failed to enter */
252 253 return (ENOBUFS);
253 254 }
254 255
255 256 error = tcp_do_listen(connp, NULL, 0, backlog, cr, B_FALSE);
256 257 if (error == 0) {
257 258 /*
258 259 * sockfs needs to know what's the maximum number of socket
259 260 * that can be queued on the listener.
260 261 */
261 262 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
262 263 SOCK_OPCTL_ENAB_ACCEPT,
263 264 (uintptr_t)(tcp->tcp_conn_req_max +
264 265 tcp->tcp_tcps->tcps_conn_req_max_q0));
265 266 } else if (error < 0) {
266 267 if (error == -TOUTSTATE)
267 268 error = EINVAL;
268 269 else
269 270 error = proto_tlitosyserr(-error);
270 271 }
271 272 squeue_synch_exit(connp);
272 273 return (error);
273 274 }
274 275
275 276 static int
276 277 tcp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
277 278 socklen_t len, sock_connid_t *id, cred_t *cr)
278 279 {
279 280 conn_t *connp = (conn_t *)proto_handle;
280 281 int error;
281 282
282 283 ASSERT(connp->conn_upper_handle != NULL);
283 284
284 285 /* All Solaris components should pass a cred for this operation. */
285 286 ASSERT(cr != NULL);
286 287
287 288 error = proto_verify_ip_addr(connp->conn_family, sa, len);
288 289 if (error != 0) {
289 290 return (error);
290 291 }
291 292
292 293 error = squeue_synch_enter(connp, NULL);
293 294 if (error != 0) {
294 295 /* failed to enter */
295 296 return (ENOSR);
296 297 }
297 298
298 299 /*
299 300 * TCP supports quick connect, so no need to do an implicit bind
300 301 */
301 302 error = tcp_do_connect(connp, sa, len, cr, curproc->p_pid);
302 303 if (error == 0) {
303 304 *id = connp->conn_tcp->tcp_connid;
304 305 } else if (error < 0) {
305 306 if (error == -TOUTSTATE) {
306 307 switch (connp->conn_tcp->tcp_state) {
307 308 case TCPS_SYN_SENT:
308 309 error = EALREADY;
309 310 break;
310 311 case TCPS_ESTABLISHED:
311 312 error = EISCONN;
312 313 break;
313 314 case TCPS_LISTEN:
314 315 error = EOPNOTSUPP;
315 316 break;
316 317 default:
317 318 error = EINVAL;
318 319 break;
319 320 }
320 321 } else {
321 322 error = proto_tlitosyserr(-error);
322 323 }
323 324 }
324 325
325 326 if (connp->conn_tcp->tcp_loopback) {
326 327 struct sock_proto_props sopp;
327 328
328 329 sopp.sopp_flags = SOCKOPT_LOOPBACK;
329 330 sopp.sopp_loopback = B_TRUE;
330 331
331 332 (*connp->conn_upcalls->su_set_proto_props)(
332 333 connp->conn_upper_handle, &sopp);
333 334 }
334 335 done:
335 336 squeue_synch_exit(connp);
336 337
337 338 return ((error == 0) ? EINPROGRESS : error);
338 339 }
339 340
340 341 /* ARGSUSED3 */
341 342 static int
342 343 tcp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr,
343 344 socklen_t *addrlenp, cred_t *cr)
344 345 {
345 346 conn_t *connp = (conn_t *)proto_handle;
346 347 tcp_t *tcp = connp->conn_tcp;
347 348
348 349 /* All Solaris components should pass a cred for this operation. */
349 350 ASSERT(cr != NULL);
350 351
351 352 ASSERT(tcp != NULL);
352 353 if (tcp->tcp_state < TCPS_SYN_RCVD)
353 354 return (ENOTCONN);
354 355
355 356 return (conn_getpeername(connp, addr, addrlenp));
356 357 }
357 358
358 359 /* ARGSUSED3 */
359 360 static int
360 361 tcp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr,
361 362 socklen_t *addrlenp, cred_t *cr)
362 363 {
363 364 conn_t *connp = (conn_t *)proto_handle;
364 365
365 366 /* All Solaris components should pass a cred for this operation. */
366 367 ASSERT(cr != NULL);
367 368
368 369 return (conn_getsockname(connp, addr, addrlenp));
369 370 }
370 371
371 372 /* returns UNIX error, the optlen is a value-result arg */
372 373 static int
373 374 tcp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
374 375 void *optvalp, socklen_t *optlen, cred_t *cr)
375 376 {
376 377 conn_t *connp = (conn_t *)proto_handle;
377 378 int error;
378 379 t_uscalar_t max_optbuf_len;
379 380 void *optvalp_buf;
380 381 int len;
381 382
382 383 ASSERT(connp->conn_upper_handle != NULL);
383 384
384 385 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
385 386 tcp_opt_obj.odb_opt_des_arr,
386 387 tcp_opt_obj.odb_opt_arr_cnt,
387 388 B_FALSE, B_TRUE, cr);
388 389 if (error != 0) {
389 390 if (error < 0) {
390 391 error = proto_tlitosyserr(-error);
391 392 }
392 393 return (error);
393 394 }
394 395
395 396 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
396 397
397 398 error = squeue_synch_enter(connp, NULL);
398 399 if (error == ENOMEM) {
399 400 kmem_free(optvalp_buf, max_optbuf_len);
400 401 return (ENOMEM);
401 402 }
402 403
403 404 len = tcp_opt_get(connp, level, option_name, optvalp_buf);
404 405 squeue_synch_exit(connp);
405 406
406 407 if (len == -1) {
407 408 kmem_free(optvalp_buf, max_optbuf_len);
408 409 return (EINVAL);
409 410 }
410 411
411 412 /*
412 413 * update optlen and copy option value
413 414 */
414 415 t_uscalar_t size = MIN(len, *optlen);
415 416
416 417 bcopy(optvalp_buf, optvalp, size);
417 418 bcopy(&size, optlen, sizeof (size));
418 419
419 420 kmem_free(optvalp_buf, max_optbuf_len);
420 421 return (0);
421 422 }
422 423
423 424 static int
424 425 tcp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
425 426 const void *optvalp, socklen_t optlen, cred_t *cr)
426 427 {
427 428 conn_t *connp = (conn_t *)proto_handle;
428 429 int error;
429 430
430 431 ASSERT(connp->conn_upper_handle != NULL);
431 432 /*
432 433 * Entering the squeue synchronously can result in a context switch,
433 434 * which can cause a rather sever performance degradation. So we try to
434 435 * handle whatever options we can without entering the squeue.
435 436 */
436 437 if (level == IPPROTO_TCP) {
437 438 switch (option_name) {
438 439 case TCP_NODELAY:
439 440 if (optlen != sizeof (int32_t))
440 441 return (EINVAL);
441 442 mutex_enter(&connp->conn_tcp->tcp_non_sq_lock);
442 443 connp->conn_tcp->tcp_naglim = *(int *)optvalp ? 1 :
443 444 connp->conn_tcp->tcp_mss;
444 445 mutex_exit(&connp->conn_tcp->tcp_non_sq_lock);
445 446 return (0);
446 447 default:
447 448 break;
448 449 }
449 450 }
450 451
451 452 error = squeue_synch_enter(connp, NULL);
452 453 if (error == ENOMEM) {
453 454 return (ENOMEM);
454 455 }
455 456
456 457 error = proto_opt_check(level, option_name, optlen, NULL,
457 458 tcp_opt_obj.odb_opt_des_arr,
458 459 tcp_opt_obj.odb_opt_arr_cnt,
459 460 B_TRUE, B_FALSE, cr);
460 461
461 462 if (error != 0) {
462 463 if (error < 0) {
463 464 error = proto_tlitosyserr(-error);
464 465 }
465 466 squeue_synch_exit(connp);
466 467 return (error);
467 468 }
468 469
469 470 error = tcp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
470 471 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
471 472 NULL, cr);
472 473 squeue_synch_exit(connp);
473 474
474 475 ASSERT(error >= 0);
475 476
476 477 return (error);
477 478 }
478 479
479 480 /* ARGSUSED */
480 481 static int
481 482 tcp_sendmsg(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
482 483 cred_t *cr)
483 484 {
484 485 tcp_t *tcp;
485 486 uint32_t msize;
486 487 conn_t *connp = (conn_t *)proto_handle;
487 488 int32_t tcpstate;
488 489
489 490 /* All Solaris components should pass a cred for this operation. */
490 491 ASSERT(cr != NULL);
491 492
492 493 ASSERT(connp->conn_ref >= 2);
493 494 ASSERT(connp->conn_upper_handle != NULL);
494 495
495 496 if (msg->msg_controllen != 0) {
496 497 freemsg(mp);
497 498 return (EOPNOTSUPP);
498 499 }
499 500
500 501 switch (DB_TYPE(mp)) {
501 502 case M_DATA:
502 503 tcp = connp->conn_tcp;
503 504 ASSERT(tcp != NULL);
504 505
505 506 tcpstate = tcp->tcp_state;
506 507 if (tcpstate < TCPS_ESTABLISHED) {
507 508 freemsg(mp);
508 509 /*
509 510 * We return ENOTCONN if the endpoint is trying to
510 511 * connect or has never been connected, and EPIPE if it
511 512 * has been disconnected. The connection id helps us
512 513 * distinguish between the last two cases.
513 514 */
514 515 return ((tcpstate == TCPS_SYN_SENT) ? ENOTCONN :
515 516 ((tcp->tcp_connid > 0) ? EPIPE : ENOTCONN));
516 517 } else if (tcpstate > TCPS_CLOSE_WAIT) {
517 518 freemsg(mp);
518 519 return (EPIPE);
519 520 }
520 521
521 522 msize = msgdsize(mp);
522 523
523 524 mutex_enter(&tcp->tcp_non_sq_lock);
524 525 tcp->tcp_squeue_bytes += msize;
525 526 /*
526 527 * Squeue Flow Control
527 528 */
528 529 if (TCP_UNSENT_BYTES(tcp) > connp->conn_sndbuf) {
529 530 tcp_setqfull(tcp);
530 531 }
531 532 mutex_exit(&tcp->tcp_non_sq_lock);
532 533
533 534 /*
534 535 * The application may pass in an address in the msghdr, but
535 536 * we ignore the address on connection-oriented sockets.
536 537 * Just like BSD this code does not generate an error for
537 538 * TCP (a CONNREQUIRED socket) when sending to an address
538 539 * passed in with sendto/sendmsg. Instead the data is
539 540 * delivered on the connection as if no address had been
540 541 * supplied.
541 542 */
542 543 CONN_INC_REF(connp);
543 544
544 545 if (msg->msg_flags & MSG_OOB) {
545 546 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output_urgent,
546 547 connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT);
547 548 } else {
548 549 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output,
549 550 connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT);
550 551 }
551 552
552 553 return (0);
553 554
554 555 default:
555 556 ASSERT(0);
556 557 }
557 558
558 559 freemsg(mp);
559 560 return (0);
560 561 }
561 562
562 563 /* ARGSUSED */
563 564 static int
564 565 tcp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
565 566 {
566 567 conn_t *connp = (conn_t *)proto_handle;
567 568 tcp_t *tcp = connp->conn_tcp;
568 569
569 570 ASSERT(connp->conn_upper_handle != NULL);
570 571
571 572 /* All Solaris components should pass a cred for this operation. */
572 573 ASSERT(cr != NULL);
573 574
574 575 /*
575 576 * X/Open requires that we check the connected state.
576 577 */
577 578 if (tcp->tcp_state < TCPS_SYN_SENT)
578 579 return (ENOTCONN);
579 580
580 581 /* shutdown the send side */
581 582 if (how != SHUT_RD) {
582 583 mblk_t *bp;
583 584
584 585 bp = allocb_wait(0, BPRI_HI, STR_NOSIG, NULL);
585 586 CONN_INC_REF(connp);
586 587 SQUEUE_ENTER_ONE(connp->conn_sqp, bp, tcp_shutdown_output,
587 588 connp, NULL, SQ_NODRAIN, SQTAG_TCP_SHUTDOWN_OUTPUT);
588 589
589 590 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
590 591 SOCK_OPCTL_SHUT_SEND, 0);
591 592 }
592 593
593 594 /* shutdown the recv side */
594 595 if (how != SHUT_WR)
595 596 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
596 597 SOCK_OPCTL_SHUT_RECV, 0);
597 598
598 599 return (0);
599 600 }
600 601
601 602 static void
602 603 tcp_clr_flowctrl(sock_lower_handle_t proto_handle)
603 604 {
604 605 conn_t *connp = (conn_t *)proto_handle;
605 606 tcp_t *tcp = connp->conn_tcp;
606 607 mblk_t *mp;
607 608 int error;
608 609
609 610 ASSERT(connp->conn_upper_handle != NULL);
610 611
611 612 /*
612 613 * If tcp->tcp_rsrv_mp == NULL, it means that tcp_clr_flowctrl()
613 614 * is currently running.
614 615 */
615 616 mutex_enter(&tcp->tcp_rsrv_mp_lock);
616 617 if ((mp = tcp->tcp_rsrv_mp) == NULL) {
617 618 mutex_exit(&tcp->tcp_rsrv_mp_lock);
618 619 return;
619 620 }
620 621 tcp->tcp_rsrv_mp = NULL;
621 622 mutex_exit(&tcp->tcp_rsrv_mp_lock);
622 623
623 624 error = squeue_synch_enter(connp, mp);
624 625 ASSERT(error == 0);
625 626
626 627 mutex_enter(&tcp->tcp_rsrv_mp_lock);
627 628 tcp->tcp_rsrv_mp = mp;
628 629 mutex_exit(&tcp->tcp_rsrv_mp_lock);
629 630
630 631 if (tcp->tcp_fused) {
631 632 tcp_fuse_backenable(tcp);
632 633 } else {
633 634 tcp->tcp_rwnd = connp->conn_rcvbuf;
634 635 /*
635 636 * Send back a window update immediately if TCP is above
636 637 * ESTABLISHED state and the increase of the rcv window
637 638 * that the other side knows is at least 1 MSS after flow
638 639 * control is lifted.
639 640 */
640 641 if (tcp->tcp_state >= TCPS_ESTABLISHED &&
641 642 tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) {
642 643 tcp_xmit_ctl(NULL, tcp,
643 644 (tcp->tcp_swnd == 0) ? tcp->tcp_suna :
644 645 tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK);
645 646 }
646 647 }
647 648
648 649 squeue_synch_exit(connp);
649 650 }
650 651
651 652 /* ARGSUSED */
652 653 static int
653 654 tcp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
654 655 int mode, int32_t *rvalp, cred_t *cr)
655 656 {
656 657 conn_t *connp = (conn_t *)proto_handle;
657 658 int error;
658 659
659 660 ASSERT(connp->conn_upper_handle != NULL);
660 661
661 662 /* All Solaris components should pass a cred for this operation. */
662 663 ASSERT(cr != NULL);
663 664
664 665 /*
665 666 * If we don't have a helper stream then create one.
666 667 * ip_create_helper_stream takes care of locking the conn_t,
667 668 * so this check for NULL is just a performance optimization.
668 669 */
669 670 if (connp->conn_helper_info == NULL) {
670 671 tcp_stack_t *tcps = connp->conn_tcp->tcp_tcps;
671 672
672 673 /*
673 674 * Create a helper stream for non-STREAMS socket.
674 675 */
675 676 error = ip_create_helper_stream(connp, tcps->tcps_ldi_ident);
676 677 if (error != 0) {
677 678 ip0dbg(("tcp_ioctl: create of IP helper stream "
678 679 "failed %d\n", error));
679 680 return (error);
680 681 }
681 682 }
682 683
683 684 switch (cmd) {
684 685 case ND_SET:
685 686 case ND_GET:
686 687 case _SIOCSOCKFALLBACK:
687 688 case TCP_IOC_ABORT_CONN:
688 689 case TI_GETPEERNAME:
689 690 case TI_GETMYNAME:
690 691 ip1dbg(("tcp_ioctl: cmd 0x%x on non streams socket",
691 692 cmd));
692 693 error = EINVAL;
693 694 break;
694 695 default:
695 696 /*
696 697 * If the conn is not closing, pass on to IP using
697 698 * helper stream. Bump the ioctlref to prevent tcp_close
698 699 * from closing the rq/wq out from underneath the ioctl
699 700 * if it ends up queued or aborted/interrupted.
700 701 */
701 702 mutex_enter(&connp->conn_lock);
702 703 if (connp->conn_state_flags & (CONN_CLOSING)) {
703 704 mutex_exit(&connp->conn_lock);
704 705 error = EINVAL;
705 706 break;
706 707 }
707 708 CONN_INC_IOCTLREF_LOCKED(connp);
708 709 error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
709 710 cmd, arg, mode, cr, rvalp);
710 711 CONN_DEC_IOCTLREF(connp);
711 712 break;
712 713 }
713 714 return (error);
714 715 }
715 716
716 717 /* ARGSUSED */
717 718 static int
718 719 tcp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
719 720 {
720 721 conn_t *connp = (conn_t *)proto_handle;
721 722
722 723 ASSERT(connp->conn_upper_handle != NULL);
723 724
724 725 /* All Solaris components should pass a cred for this operation. */
725 726 ASSERT(cr != NULL);
726 727
727 728 tcp_close_common(connp, flags);
728 729
729 730 ip_free_helper_stream(connp);
730 731
731 732 /*
732 733 * Drop IP's reference on the conn. This is the last reference
733 734 * on the connp if the state was less than established. If the
734 735 * connection has gone into timewait state, then we will have
735 736 * one ref for the TCP and one more ref (total of two) for the
736 737 * classifier connected hash list (a timewait connections stays
737 738 * in connected hash till closed).
738 739 *
739 740 * We can't assert the references because there might be other
740 741 * transient reference places because of some walkers or queued
741 742 * packets in squeue for the timewait state.
742 743 */
743 744 CONN_DEC_REF(connp);
744 745
745 746 /*
746 747 * EINPROGRESS tells sockfs to wait for a 'closed' upcall before
747 748 * freeing the socket.
748 749 */
749 750 return (EINPROGRESS);
750 751 }
751 752
752 753 /* ARGSUSED */
753 754 sock_lower_handle_t
754 755 tcp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
755 756 uint_t *smodep, int *errorp, int flags, cred_t *credp)
756 757 {
757 758 conn_t *connp;
758 759 boolean_t isv6 = family == AF_INET6;
759 760
760 761 if (type != SOCK_STREAM || (family != AF_INET && family != AF_INET6) ||
761 762 (proto != 0 && proto != IPPROTO_TCP)) {
762 763 *errorp = EPROTONOSUPPORT;
763 764 return (NULL);
764 765 }
765 766
766 767 connp = tcp_create_common(credp, isv6, B_TRUE, errorp);
767 768 if (connp == NULL) {
768 769 return (NULL);
769 770 }
770 771
771 772 /*
772 773 * Put the ref for TCP. Ref for IP was already put
773 774 * by ipcl_conn_create. Also make the conn_t globally
774 775 * visible to walkers
775 776 */
776 777 mutex_enter(&connp->conn_lock);
777 778 CONN_INC_REF_LOCKED(connp);
778 779 ASSERT(connp->conn_ref == 2);
779 780 connp->conn_state_flags &= ~CONN_INCIPIENT;
780 781
781 782 connp->conn_flags |= IPCL_NONSTR;
782 783 mutex_exit(&connp->conn_lock);
783 784
784 785 ASSERT(errorp != NULL);
785 786 *errorp = 0;
786 787 *sock_downcalls = &sock_tcp_downcalls;
787 788 *smodep = SM_CONNREQUIRED | SM_EXDATA | SM_ACCEPTSUPP |
788 789 SM_SENDFILESUPP;
789 790
790 791 return ((sock_lower_handle_t)connp);
791 792 }
792 793
793 794 /*
794 795 * tcp_fallback
795 796 *
796 797 * A direct socket is falling back to using STREAMS. The queue
797 798 * that is being passed down was created using tcp_open() with
798 799 * the SO_FALLBACK flag set. As a result, the queue is not
799 800 * associated with a conn, and the q_ptrs instead contain the
800 801 * dev and minor area that should be used.
801 802 *
802 803 * The 'issocket' flag indicates whether the FireEngine
803 804 * optimizations should be used. The common case would be that
804 805 * optimizations are enabled, and they might be subsequently
805 806 * disabled using the _SIOCSOCKFALLBACK ioctl.
806 807 */
807 808
808 809 /*
809 810 * An active connection is falling back to TPI. Gather all the information
810 811 * required by the STREAM head and TPI sonode and send it up.
811 812 */
812 813 static void
813 814 tcp_fallback_noneager(tcp_t *tcp, mblk_t *stropt_mp, queue_t *q,
814 815 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb,
815 816 sock_quiesce_arg_t *arg)
816 817 {
817 818 conn_t *connp = tcp->tcp_connp;
818 819 struct stroptions *stropt;
819 820 struct T_capability_ack tca;
820 821 struct sockaddr_in6 laddr, faddr;
821 822 socklen_t laddrlen, faddrlen;
822 823 short opts;
823 824 int error;
824 825 mblk_t *mp, *mpnext;
825 826
826 827 connp->conn_dev = (dev_t)RD(q)->q_ptr;
827 828 connp->conn_minor_arena = WR(q)->q_ptr;
828 829
829 830 RD(q)->q_ptr = WR(q)->q_ptr = connp;
830 831
831 832 connp->conn_rq = RD(q);
832 833 connp->conn_wq = WR(q);
833 834
834 835 WR(q)->q_qinfo = &tcp_sock_winit;
835 836
836 837 if (!issocket)
837 838 tcp_use_pure_tpi(tcp);
838 839
839 840 /*
840 841 * free the helper stream
841 842 */
842 843 ip_free_helper_stream(connp);
843 844
844 845 /*
845 846 * Notify the STREAM head about options
846 847 */
847 848 DB_TYPE(stropt_mp) = M_SETOPTS;
848 849 stropt = (struct stroptions *)stropt_mp->b_rptr;
849 850 stropt_mp->b_wptr += sizeof (struct stroptions);
850 851 stropt->so_flags = SO_HIWAT | SO_WROFF | SO_MAXBLK;
851 852
852 853 stropt->so_wroff = connp->conn_ht_iphc_len + (tcp->tcp_loopback ? 0 :
853 854 tcp->tcp_tcps->tcps_wroff_xtra);
854 855 if (tcp->tcp_snd_sack_ok)
855 856 stropt->so_wroff += TCPOPT_MAX_SACK_LEN;
856 857 stropt->so_hiwat = connp->conn_rcvbuf;
857 858 stropt->so_maxblk = tcp_maxpsz_set(tcp, B_FALSE);
858 859
859 860 putnext(RD(q), stropt_mp);
860 861
861 862 /*
862 863 * Collect the information needed to sync with the sonode
863 864 */
864 865 tcp_do_capability_ack(tcp, &tca, TC1_INFO|TC1_ACCEPTOR_ID);
865 866
866 867 laddrlen = faddrlen = sizeof (sin6_t);
867 868 (void) tcp_getsockname((sock_lower_handle_t)connp,
868 869 (struct sockaddr *)&laddr, &laddrlen, CRED());
869 870 error = tcp_getpeername((sock_lower_handle_t)connp,
870 871 (struct sockaddr *)&faddr, &faddrlen, CRED());
871 872 if (error != 0)
872 873 faddrlen = 0;
873 874
874 875 opts = 0;
875 876 if (connp->conn_oobinline)
876 877 opts |= SO_OOBINLINE;
877 878 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
878 879 opts |= SO_DONTROUTE;
879 880
880 881 /*
881 882 * Notify the socket that the protocol is now quiescent,
882 883 * and it's therefore safe move data from the socket
883 884 * to the stream head.
884 885 */
885 886 mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca,
886 887 (struct sockaddr *)&laddr, laddrlen,
887 888 (struct sockaddr *)&faddr, faddrlen, opts);
888 889
889 890 while (mp != NULL) {
890 891 mpnext = mp->b_next;
891 892 tcp->tcp_rcv_list = mp->b_next;
892 893 mp->b_next = NULL;
893 894 putnext(q, mp);
894 895 mp = mpnext;
895 896 }
896 897 ASSERT(tcp->tcp_rcv_last_head == NULL);
897 898 ASSERT(tcp->tcp_rcv_last_tail == NULL);
898 899 ASSERT(tcp->tcp_rcv_cnt == 0);
899 900
900 901 /*
901 902 * All eagers in q0 are marked as being non-STREAM, so they will
902 903 * make su_newconn upcalls when the handshake completes, which
903 904 * will fail (resulting in the conn being closed). So we just blow
904 905 * off everything in q0 instead of waiting for the inevitable.
905 906 */
906 907 if (tcp->tcp_conn_req_cnt_q0 != 0)
907 908 tcp_eager_cleanup(tcp, B_TRUE);
908 909 }
909 910
910 911 /*
911 912 * An eager is falling back to TPI. All we have to do is send
912 913 * up a T_CONN_IND.
913 914 */
914 915 static void
915 916 tcp_fallback_eager(tcp_t *eager, boolean_t issocket,
916 917 so_proto_quiesced_cb_t quiesced_cb, sock_quiesce_arg_t *arg)
917 918 {
918 919 conn_t *connp = eager->tcp_connp;
919 920 tcp_t *listener = eager->tcp_listener;
920 921 mblk_t *mp;
921 922
922 923 ASSERT(listener != NULL);
923 924
924 925 /*
925 926 * Notify the socket that the protocol is now quiescent,
926 927 * and it's therefore safe move data from the socket
927 928 * to tcp's rcv queue.
928 929 */
929 930 mp = (*quiesced_cb)(connp->conn_upper_handle, arg, NULL, NULL, 0,
930 931 NULL, 0, 0);
931 932
932 933 if (mp != NULL) {
933 934 ASSERT(eager->tcp_rcv_cnt == 0);
934 935
935 936 eager->tcp_rcv_list = mp;
936 937 eager->tcp_rcv_cnt = msgdsize(mp);
937 938 while (mp->b_next != NULL) {
938 939 mp = mp->b_next;
939 940 eager->tcp_rcv_cnt += msgdsize(mp);
940 941 }
941 942 eager->tcp_rcv_last_head = mp;
942 943 while (mp->b_cont)
943 944 mp = mp->b_cont;
944 945 eager->tcp_rcv_last_tail = mp;
945 946 if (eager->tcp_rcv_cnt > eager->tcp_rwnd)
946 947 eager->tcp_rwnd = 0;
947 948 else
948 949 eager->tcp_rwnd -= eager->tcp_rcv_cnt;
949 950 }
950 951
951 952 if (!issocket)
952 953 eager->tcp_issocket = B_FALSE;
953 954 /*
954 955 * The stream for this eager does not yet exist, so mark it as
955 956 * being detached.
956 957 */
957 958 eager->tcp_detached = B_TRUE;
958 959 eager->tcp_hard_binding = B_TRUE;
959 960 connp->conn_rq = listener->tcp_connp->conn_rq;
960 961 connp->conn_wq = listener->tcp_connp->conn_wq;
961 962
962 963 /* Send up the connection indication */
963 964 mp = eager->tcp_conn.tcp_eager_conn_ind;
964 965 ASSERT(mp != NULL);
965 966 eager->tcp_conn.tcp_eager_conn_ind = NULL;
966 967
967 968 /*
968 969 * TLI/XTI applications will get confused by
969 970 * sending eager as an option since it violates
970 971 * the option semantics. So remove the eager as
971 972 * option since TLI/XTI app doesn't need it anyway.
972 973 */
973 974 if (!issocket) {
974 975 struct T_conn_ind *conn_ind;
975 976
976 977 conn_ind = (struct T_conn_ind *)mp->b_rptr;
977 978 conn_ind->OPT_length = 0;
978 979 conn_ind->OPT_offset = 0;
979 980 }
980 981
981 982 /*
982 983 * Sockfs guarantees that the listener will not be closed
983 984 * during fallback. So we can safely use the listener's queue.
984 985 */
985 986 putnext(listener->tcp_connp->conn_rq, mp);
986 987 }
987 988
988 989
989 990 int
990 991 tcp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
991 992 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb,
992 993 sock_quiesce_arg_t *arg)
993 994 {
994 995 tcp_t *tcp;
995 996 conn_t *connp = (conn_t *)proto_handle;
996 997 int error;
997 998 mblk_t *stropt_mp;
998 999 mblk_t *ordrel_mp;
999 1000
1000 1001 tcp = connp->conn_tcp;
1001 1002
1002 1003 stropt_mp = allocb_wait(sizeof (struct stroptions), BPRI_HI, STR_NOSIG,
1003 1004 NULL);
1004 1005
1005 1006 /* Pre-allocate the T_ordrel_ind mblk. */
1006 1007 ASSERT(tcp->tcp_ordrel_mp == NULL);
1007 1008 ordrel_mp = allocb_wait(sizeof (struct T_ordrel_ind), BPRI_HI,
1008 1009 STR_NOSIG, NULL);
1009 1010 ordrel_mp->b_datap->db_type = M_PROTO;
1010 1011 ((struct T_ordrel_ind *)ordrel_mp->b_rptr)->PRIM_type = T_ORDREL_IND;
1011 1012 ordrel_mp->b_wptr += sizeof (struct T_ordrel_ind);
1012 1013
1013 1014 /*
|
↓ open down ↓ |
980 lines elided |
↑ open up ↑ |
1014 1015 * Enter the squeue so that no new packets can come in
1015 1016 */
1016 1017 error = squeue_synch_enter(connp, NULL);
1017 1018 if (error != 0) {
1018 1019 /* failed to enter, free all the pre-allocated messages. */
1019 1020 freeb(stropt_mp);
1020 1021 freeb(ordrel_mp);
1021 1022 return (ENOMEM);
1022 1023 }
1023 1024
1025 + /*
1026 + * Do not allow fallback on connections making use of SO_REUSEPORT.
1027 + */
1028 + if (tcp->tcp_rg_bind != NULL) {
1029 + freeb(stropt_mp);
1030 + freeb(ordrel_mp);
1031 + squeue_synch_exit(connp);
1032 + return (EINVAL);
1033 + }
1034 +
1024 1035 /*
1025 1036 * Both endpoints must be of the same type (either STREAMS or
1026 1037 * non-STREAMS) for fusion to be enabled. So if we are fused,
1027 1038 * we have to unfuse.
1028 1039 */
1029 1040 if (tcp->tcp_fused)
1030 1041 tcp_unfuse(tcp);
1031 1042
1032 1043 if (tcp->tcp_listener != NULL) {
1033 1044 /* The eager will deal with opts when accept() is called */
1034 1045 freeb(stropt_mp);
1035 1046 tcp_fallback_eager(tcp, direct_sockfs, quiesced_cb, arg);
1036 1047 } else {
1037 1048 tcp_fallback_noneager(tcp, stropt_mp, q, direct_sockfs,
1038 1049 quiesced_cb, arg);
1039 1050 }
1040 1051
1041 1052 /*
1042 1053 * No longer a direct socket
1043 1054 *
1044 1055 * Note that we intentionally leave the upper_handle and upcalls
1045 1056 * intact, since eagers may still be using them.
1046 1057 */
1047 1058 connp->conn_flags &= ~IPCL_NONSTR;
1048 1059 tcp->tcp_ordrel_mp = ordrel_mp;
1049 1060
1050 1061 /*
1051 1062 * There should be atleast two ref's (IP + TCP)
1052 1063 */
1053 1064 ASSERT(connp->conn_ref >= 2);
1054 1065 squeue_synch_exit(connp);
1055 1066
1056 1067 return (0);
1057 1068 }
1058 1069
1059 1070 /*
1060 1071 * Notifies a non-STREAMS based listener about a new connection. This
1061 1072 * function is executed on the *eager*'s squeue once the 3 way handshake
1062 1073 * has completed. Note that the behavior differs from STREAMS, where the
1063 1074 * T_CONN_IND is sent up by tcp_send_conn_ind() while on the *listener*'s
1064 1075 * squeue.
1065 1076 *
1066 1077 * Returns B_TRUE if the notification succeeded and an upper handle was
1067 1078 * obtained. `tcp' should be closed on failure.
1068 1079 */
1069 1080 boolean_t
1070 1081 tcp_newconn_notify(tcp_t *tcp, ip_recv_attr_t *ira)
1071 1082 {
1072 1083 tcp_t *listener = tcp->tcp_listener;
1073 1084 conn_t *lconnp = listener->tcp_connp;
1074 1085 conn_t *econnp = tcp->tcp_connp;
1075 1086 tcp_t *tail;
1076 1087 ipaddr_t *addr_cache;
1077 1088 sock_upper_handle_t upper;
1078 1089 struct sock_proto_props sopp;
1079 1090
1080 1091 mutex_enter(&listener->tcp_eager_lock);
1081 1092 /*
1082 1093 * Take the eager out, if it is in the list of droppable eagers
1083 1094 * as we are here because the 3W handshake is over.
1084 1095 */
1085 1096 MAKE_UNDROPPABLE(tcp);
1086 1097 /*
1087 1098 * The eager already has an extra ref put in tcp_input_data
1088 1099 * so that it stays till accept comes back even though it
1089 1100 * might get into TCPS_CLOSED as a result of a TH_RST etc.
1090 1101 */
1091 1102 ASSERT(listener->tcp_conn_req_cnt_q0 > 0);
1092 1103 listener->tcp_conn_req_cnt_q0--;
1093 1104 listener->tcp_conn_req_cnt_q++;
1094 1105
1095 1106 /* Move from SYN_RCVD to ESTABLISHED list */
1096 1107 tcp->tcp_eager_next_q0->tcp_eager_prev_q0 = tcp->tcp_eager_prev_q0;
1097 1108 tcp->tcp_eager_prev_q0->tcp_eager_next_q0 = tcp->tcp_eager_next_q0;
1098 1109 tcp->tcp_eager_prev_q0 = NULL;
1099 1110 tcp->tcp_eager_next_q0 = NULL;
1100 1111
1101 1112 /*
1102 1113 * Insert at end of the queue because connections are accepted
1103 1114 * in chronological order. Leaving the older connections at front
1104 1115 * of the queue helps reducing search time.
1105 1116 */
1106 1117 tail = listener->tcp_eager_last_q;
1107 1118 if (tail != NULL)
1108 1119 tail->tcp_eager_next_q = tcp;
1109 1120 else
1110 1121 listener->tcp_eager_next_q = tcp;
1111 1122 listener->tcp_eager_last_q = tcp;
1112 1123 tcp->tcp_eager_next_q = NULL;
1113 1124
1114 1125 /* we have timed out before */
1115 1126 if (tcp->tcp_syn_rcvd_timeout != 0) {
1116 1127 tcp->tcp_syn_rcvd_timeout = 0;
1117 1128 listener->tcp_syn_rcvd_timeout--;
1118 1129 if (listener->tcp_syn_defense &&
1119 1130 listener->tcp_syn_rcvd_timeout <=
1120 1131 (listener->tcp_tcps->tcps_conn_req_max_q0 >> 5) &&
1121 1132 10*MINUTES < TICK_TO_MSEC(ddi_get_lbolt64() -
1122 1133 listener->tcp_last_rcv_lbolt)) {
1123 1134 /*
1124 1135 * Turn off the defense mode if we
1125 1136 * believe the SYN attack is over.
1126 1137 */
1127 1138 listener->tcp_syn_defense = B_FALSE;
1128 1139 if (listener->tcp_ip_addr_cache) {
1129 1140 kmem_free((void *)listener->tcp_ip_addr_cache,
1130 1141 IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t));
1131 1142 listener->tcp_ip_addr_cache = NULL;
1132 1143 }
1133 1144 }
1134 1145 }
1135 1146 addr_cache = (ipaddr_t *)(listener->tcp_ip_addr_cache);
1136 1147 if (addr_cache != NULL) {
1137 1148 /*
1138 1149 * We have finished a 3-way handshake with this
1139 1150 * remote host. This proves the IP addr is good.
1140 1151 * Cache it!
1141 1152 */
1142 1153 addr_cache[IP_ADDR_CACHE_HASH(tcp->tcp_connp->conn_faddr_v4)] =
1143 1154 tcp->tcp_connp->conn_faddr_v4;
1144 1155 }
1145 1156 mutex_exit(&listener->tcp_eager_lock);
1146 1157
1147 1158 /*
1148 1159 * Notify the ULP about the newconn. It is guaranteed that no
1149 1160 * tcp_accept() call will be made for the eager if the
1150 1161 * notification fails.
1151 1162 */
1152 1163 if ((upper = (*lconnp->conn_upcalls->su_newconn)
1153 1164 (lconnp->conn_upper_handle, (sock_lower_handle_t)econnp,
1154 1165 &sock_tcp_downcalls, ira->ira_cred, ira->ira_cpid,
1155 1166 &econnp->conn_upcalls)) == NULL) {
1156 1167 return (B_FALSE);
1157 1168 }
1158 1169 econnp->conn_upper_handle = upper;
1159 1170
1160 1171 tcp->tcp_detached = B_FALSE;
1161 1172 tcp->tcp_hard_binding = B_FALSE;
1162 1173 tcp->tcp_tconnind_started = B_TRUE;
1163 1174
1164 1175 if (econnp->conn_keepalive) {
1165 1176 tcp->tcp_ka_last_intrvl = 0;
1166 1177 tcp->tcp_ka_tid = TCP_TIMER(tcp, tcp_keepalive_timer,
1167 1178 tcp->tcp_ka_interval);
1168 1179 }
1169 1180
1170 1181 /* Update the necessary parameters */
1171 1182 tcp_get_proto_props(tcp, &sopp);
1172 1183
1173 1184 (*econnp->conn_upcalls->su_set_proto_props)
1174 1185 (econnp->conn_upper_handle, &sopp);
1175 1186
1176 1187 return (B_TRUE);
1177 1188 }
|
↓ open down ↓ |
144 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX