4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /* This file contains all TCP input processing functions. */
27
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/strsun.h>
31 #include <sys/strsubr.h>
32 #include <sys/stropts.h>
33 #include <sys/strlog.h>
34 #define _SUN_TPI_VERSION 2
35 #include <sys/tihdr.h>
36 #include <sys/suntpi.h>
37 #include <sys/xti_inet.h>
38 #include <sys/squeue_impl.h>
39 #include <sys/squeue.h>
40 #include <sys/tsol/tnet.h>
41
42 #include <inet/common.h>
43 #include <inet/ip.h>
2213
2214 /*
2215 * Prime pump for checksum calculation in IP. Include the
2216 * adjustment for a source route if any.
2217 */
2218 data_length = tcp_hdr_len + connp->conn_sum;
2219 data_length = (data_length >> 16) + (data_length & 0xFFFF);
2220 tcpha->tha_sum = htons(data_length);
2221
2222 if (tcp->tcp_ip_forward_progress) {
2223 tcp->tcp_ip_forward_progress = B_FALSE;
2224 connp->conn_ixa->ixa_flags |= IXAF_REACH_CONF;
2225 } else {
2226 connp->conn_ixa->ixa_flags &= ~IXAF_REACH_CONF;
2227 }
2228 return (mp1);
2229 }
2230 }
2231
2232 /*
2233 * Handle M_DATA messages from IP. Its called directly from IP via
2234 * squeue for received IP packets.
2235 *
2236 * The first argument is always the connp/tcp to which the mp belongs.
2237 * There are no exceptions to this rule. The caller has already put
2238 * a reference on this connp/tcp and once tcp_input_data() returns,
2239 * the squeue will do the refrele.
2240 *
2241 * The TH_SYN for the listener directly go to tcp_input_listener via
2242 * squeue. ICMP errors go directly to tcp_icmp_input().
2243 *
2244 * sqp: NULL = recursive, sqp != NULL means called from squeue
2245 */
2246 void
2247 tcp_input_data(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2248 {
2249 int32_t bytes_acked;
2250 int32_t gap;
2251 mblk_t *mp1;
2252 uint_t flags;
2254 uchar_t *iphdr;
2255 uchar_t *rptr;
2256 int32_t rgap;
2257 uint32_t seg_ack;
2258 int seg_len;
2259 uint_t ip_hdr_len;
2260 uint32_t seg_seq;
2261 tcpha_t *tcpha;
2262 int urp;
2263 tcp_opt_t tcpopt;
2264 ip_pkt_t ipp;
2265 boolean_t ofo_seg = B_FALSE; /* Out of order segment */
2266 uint32_t cwnd;
2267 uint32_t add;
2268 int npkt;
2269 int mss;
2270 conn_t *connp = (conn_t *)arg;
2271 squeue_t *sqp = (squeue_t *)arg2;
2272 tcp_t *tcp = connp->conn_tcp;
2273 tcp_stack_t *tcps = tcp->tcp_tcps;
2274
2275 /*
2276 * RST from fused tcp loopback peer should trigger an unfuse.
2277 */
2278 if (tcp->tcp_fused) {
2279 TCP_STAT(tcps, tcp_fusion_aborted);
2280 tcp_unfuse(tcp);
2281 }
2282
2283 iphdr = mp->b_rptr;
2284 rptr = mp->b_rptr;
2285 ASSERT(OK_32PTR(rptr));
2286
2287 ip_hdr_len = ira->ira_ip_hdr_length;
2288 if (connp->conn_recv_ancillary.crb_all != 0) {
2289 /*
2290 * Record packet information in the ip_pkt_t
2291 */
2292 ipp.ipp_fields = 0;
2293 if (ira->ira_flags & IRAF_IS_IPV4) {
2379 if (tcp->tcp_detached || !pullupmsg(mp, -1)) {
2380 freemsg(mp);
2381 return;
2382 }
2383 /* Update pointers into message */
2384 iphdr = rptr = mp->b_rptr;
2385 tcpha = (tcpha_t *)&rptr[ip_hdr_len];
2386 if (SEQ_GT(seg_seq, tcp->tcp_rnxt)) {
2387 /*
2388 * Since we can't handle any data with this urgent
2389 * pointer that is out of sequence, we expunge
2390 * the data. This allows us to still register
2391 * the urgent mark and generate the M_PCSIG,
2392 * which we can do.
2393 */
2394 mp->b_wptr = (uchar_t *)tcpha + TCP_HDR_LENGTH(tcpha);
2395 seg_len = 0;
2396 }
2397 }
2398
2399 switch (tcp->tcp_state) {
2400 case TCPS_SYN_SENT:
2401 if (connp->conn_final_sqp == NULL &&
2402 tcp_outbound_squeue_switch && sqp != NULL) {
2403 ASSERT(connp->conn_initial_sqp == connp->conn_sqp);
2404 connp->conn_final_sqp = sqp;
2405 if (connp->conn_final_sqp != connp->conn_sqp) {
2406 DTRACE_PROBE1(conn__final__sqp__switch,
2407 conn_t *, connp);
2408 CONN_INC_REF(connp);
2409 SQUEUE_SWITCH(connp, connp->conn_final_sqp);
2410 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
2411 tcp_input_data, connp, ira, ip_squeue_flag,
2412 SQTAG_CONNECT_FINISH);
2413 return;
2414 }
2415 DTRACE_PROBE1(conn__final__sqp__same, conn_t *, connp);
2416 }
2417 if (flags & TH_ACK) {
2418 /*
2590 */
2591 if ((ack_mp = tcp_ack_mp(tcp)) != NULL) {
2592 if (tcp->tcp_ack_tid != 0) {
2593 (void) TCP_TIMER_CANCEL(tcp,
2594 tcp->tcp_ack_tid);
2595 tcp->tcp_ack_tid = 0;
2596 }
2597 tcp_send_data(tcp, ack_mp);
2598 BUMP_LOCAL(tcp->tcp_obsegs);
2599 TCPS_BUMP_MIB(tcps, tcpOutAck);
2600
2601 if (!IPCL_IS_NONSTR(connp)) {
2602 /* Send up T_CONN_CON */
2603 if (ira->ira_cred != NULL) {
2604 mblk_setcred(mp1,
2605 ira->ira_cred,
2606 ira->ira_cpid);
2607 }
2608 putnext(connp->conn_rq, mp1);
2609 } else {
2610 (*connp->conn_upcalls->
2611 su_connected)
2612 (connp->conn_upper_handle,
2613 tcp->tcp_connid,
2614 ira->ira_cred,
2615 ira->ira_cpid);
2616 freemsg(mp1);
2617 }
2618
2619 freemsg(mp);
2620 return;
2621 }
2622 /*
2623 * Forget fusion; we need to handle more
2624 * complex cases below. Send the deferred
2625 * T_CONN_CON message upstream and proceed
2626 * as usual. Mark this tcp as not capable
2627 * of fusion.
2628 */
2629 TCP_STAT(tcps, tcp_fusion_unfusable);
2630 tcp->tcp_unfusable = B_TRUE;
2631 if (!IPCL_IS_NONSTR(connp)) {
2632 if (ira->ira_cred != NULL) {
2633 mblk_setcred(mp1, ira->ira_cred,
2634 ira->ira_cpid);
2635 }
2636 putnext(connp->conn_rq, mp1);
2637 } else {
2638 (*connp->conn_upcalls->su_connected)
2639 (connp->conn_upper_handle,
2640 tcp->tcp_connid, ira->ira_cred,
2641 ira->ira_cpid);
2642 freemsg(mp1);
2643 }
2644 }
2645
2646 /*
2647 * Check to see if there is data to be sent. If
2648 * yes, set the transmit flag. Then check to see
2649 * if received data processing needs to be done.
2650 * If not, go straight to xmit_check. This short
2651 * cut is OK as we don't support T/TCP.
2652 */
2653 if (tcp->tcp_unsent)
2654 flags |= TH_XMIT_NEEDED;
2655
2656 if (seg_len == 0 && !(flags & TH_URG)) {
2657 freemsg(mp);
2658 goto xmit_check;
2992 * for this connection or if this is a new urgent
2993 * byte. Also send a zero-length "unmarked" message
2994 * to inform SIOCATMARK that this is not the mark.
2995 *
2996 * tcp_urp_last_valid is cleared when the T_exdata_ind
2997 * is sent up. This plus the check for old data
2998 * (gap >= 0) handles the wraparound of the sequence
2999 * number space without having to always track the
3000 * correct MAX(tcp_urp_last, tcp_rnxt). (BSD tracks
3001 * this max in its rcv_up variable).
3002 *
3003 * This prevents duplicate SIGURGS due to a "late"
3004 * zero-window probe when the T_EXDATA_IND has already
3005 * been sent up.
3006 */
3007 if ((flags & TH_URG) &&
3008 (!tcp->tcp_urp_last_valid || SEQ_GT(urp + seg_seq,
3009 tcp->tcp_urp_last))) {
3010 if (IPCL_IS_NONSTR(connp)) {
3011 if (!TCP_IS_DETACHED(tcp)) {
3012 (*connp->conn_upcalls->
3013 su_signal_oob)
3014 (connp->conn_upper_handle,
3015 urp);
3016 }
3017 } else {
3018 mp1 = allocb(0, BPRI_MED);
3019 if (mp1 == NULL) {
3020 freemsg(mp);
3021 return;
3022 }
3023 if (!TCP_IS_DETACHED(tcp) &&
3024 !putnextctl1(connp->conn_rq,
3025 M_PCSIG, SIGURG)) {
3026 /* Try again on the rexmit. */
3027 freemsg(mp1);
3028 freemsg(mp);
3029 return;
3030 }
3031 /*
3032 * If the next byte would be the mark
3033 * then mark with MARKNEXT else mark
3271 */
3272 if (flags & TH_URG && urp >= 0) {
3273 if (!tcp->tcp_urp_last_valid ||
3274 SEQ_GT(urp + seg_seq, tcp->tcp_urp_last)) {
3275 /*
3276 * Non-STREAMS sockets handle the urgent data a litte
3277 * differently from STREAMS based sockets. There is no
3278 * need to mark any mblks with the MSG{NOT,}MARKNEXT
3279 * flags to keep SIOCATMARK happy. Instead a
3280 * su_signal_oob upcall is made to update the mark.
3281 * Neither is a T_EXDATA_IND mblk needed to be
3282 * prepended to the urgent data. The urgent data is
3283 * delivered using the su_recv upcall, where we set
3284 * the MSG_OOB flag to indicate that it is urg data.
3285 *
3286 * Neither TH_SEND_URP_MARK nor TH_MARKNEXT_NEEDED
3287 * are used by non-STREAMS sockets.
3288 */
3289 if (IPCL_IS_NONSTR(connp)) {
3290 if (!TCP_IS_DETACHED(tcp)) {
3291 (*connp->conn_upcalls->su_signal_oob)
3292 (connp->conn_upper_handle, urp);
3293 }
3294 } else {
3295 /*
3296 * If we haven't generated the signal yet for
3297 * this urgent pointer value, do it now. Also,
3298 * send up a zero-length M_DATA indicating
3299 * whether or not this is the mark. The latter
3300 * is not needed when a T_EXDATA_IND is sent up.
3301 * However, if there are allocation failures
3302 * this code relies on the sender retransmitting
3303 * and the socket code for determining the mark
3304 * should not block waiting for the peer to
3305 * transmit. Thus, for simplicity we always
3306 * send up the mark indication.
3307 */
3308 mp1 = allocb(0, BPRI_MED);
3309 if (mp1 == NULL) {
3310 freemsg(mp);
3311 return;
3430 * the remainder back in will cause a
3431 * loop. In this case, drop the
3432 * packet and let the sender try
3433 * sending a good packet.
3434 */
3435 if (tmp_rnxt == tcp->tcp_rnxt) {
3436 freemsg(mp);
3437 return;
3438 }
3439 }
3440 tcp_input_data(connp, mp, NULL, ira);
3441 return;
3442 }
3443 /*
3444 * This segment contains only the urgent byte. We
3445 * have to allocate the T_exdata_ind, if we can.
3446 */
3447 if (IPCL_IS_NONSTR(connp)) {
3448 int error;
3449
3450 (*connp->conn_upcalls->su_recv)
3451 (connp->conn_upper_handle, mp, seg_len,
3452 MSG_OOB, &error, NULL);
3453 /*
3454 * We should never be in middle of a
3455 * fallback, the squeue guarantees that.
3456 */
3457 ASSERT(error != EOPNOTSUPP);
3458 mp = NULL;
3459 goto update_ack;
3460 } else if (!tcp->tcp_urp_mp) {
3461 struct T_exdata_ind *tei;
3462 mp1 = allocb(sizeof (struct T_exdata_ind),
3463 BPRI_MED);
3464 if (!mp1) {
3465 /*
3466 * Sigh... It'll be back.
3467 * Generate any MSG*MARK message now.
3468 */
3469 freemsg(mp);
3470 seg_len = 0;
4609 tcp_display(tcp, NULL, DISP_PORT_ONLY));
4610 #endif /* DEBUG */
4611 }
4612
4613 /*
4614 * Check for ancillary data changes compared to last segment.
4615 */
4616 if (connp->conn_recv_ancillary.crb_all != 0) {
4617 mp = tcp_input_add_ancillary(tcp, mp, &ipp, ira);
4618 if (mp == NULL)
4619 return;
4620 }
4621
4622 if (IPCL_IS_NONSTR(connp)) {
4623 /*
4624 * Non-STREAMS socket
4625 */
4626 boolean_t push = flags & (TH_PUSH|TH_FIN);
4627 int error;
4628
4629 if ((*connp->conn_upcalls->su_recv)(
4630 connp->conn_upper_handle,
4631 mp, seg_len, 0, &error, &push) <= 0) {
4632 /*
4633 * We should never be in middle of a
4634 * fallback, the squeue guarantees that.
4635 */
4636 ASSERT(error != EOPNOTSUPP);
4637 if (error == ENOSPC)
4638 tcp->tcp_rwnd -= seg_len;
4639 } else if (push) {
4640 /* PUSH bit set and sockfs is not flow controlled */
4641 flags |= tcp_rwnd_reopen(tcp);
4642 }
4643 } else if (tcp->tcp_listener != NULL || tcp->tcp_hard_binding) {
4644 /*
4645 * Side queue inbound data until the accept happens.
4646 * tcp_accept/tcp_rput drains this when the accept happens.
4647 * M_DATA is queued on b_cont. Otherwise (T_OPTDATA_IND or
4648 * T_EXDATA_IND) it is queued on b_next.
4649 * XXX Make urgent data use this. Requires:
4650 * Removing tcp_listener check for TH_URG
4852 tcp->tcp_localnet ?
4853 tcps->tcps_local_dack_interval :
4854 tcps->tcps_deferred_ack_interval);
4855 }
4856 }
4857 if (flags & TH_ORDREL_NEEDED) {
4858 /*
4859 * Notify upper layer about an orderly release. If this is
4860 * a non-STREAMS socket, then just make an upcall. For STREAMS
4861 * we send up an ordrel_ind, unless this is an eager, in which
4862 * case the ordrel will be sent when tcp_accept_finish runs.
4863 * Note that for non-STREAMS we make an upcall even if it is an
4864 * eager, because we have an upper handle to send it to.
4865 */
4866 ASSERT(IPCL_IS_NONSTR(connp) || tcp->tcp_listener == NULL);
4867 ASSERT(!tcp->tcp_detached);
4868
4869 if (IPCL_IS_NONSTR(connp)) {
4870 ASSERT(tcp->tcp_ordrel_mp == NULL);
4871 tcp->tcp_ordrel_done = B_TRUE;
4872 (*connp->conn_upcalls->su_opctl)
4873 (connp->conn_upper_handle, SOCK_OPCTL_SHUT_RECV, 0);
4874 goto done;
4875 }
4876
4877 if (tcp->tcp_rcv_list != NULL) {
4878 /*
4879 * Push any mblk(s) enqueued from co processing.
4880 */
4881 flags |= tcp_rcv_drain(tcp);
4882 }
4883 ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg);
4884
4885 mp1 = tcp->tcp_ordrel_mp;
4886 tcp->tcp_ordrel_mp = NULL;
4887 tcp->tcp_ordrel_done = B_TRUE;
4888 putnext(connp->conn_rq, mp1);
4889 }
4890 done:
4891 ASSERT(!(flags & TH_MARKNEXT_NEEDED));
4892 }
4893
|
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25 */
26
27 /* This file contains all TCP input processing functions. */
28
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/strsun.h>
32 #include <sys/strsubr.h>
33 #include <sys/stropts.h>
34 #include <sys/strlog.h>
35 #define _SUN_TPI_VERSION 2
36 #include <sys/tihdr.h>
37 #include <sys/suntpi.h>
38 #include <sys/xti_inet.h>
39 #include <sys/squeue_impl.h>
40 #include <sys/squeue.h>
41 #include <sys/tsol/tnet.h>
42
43 #include <inet/common.h>
44 #include <inet/ip.h>
2214
2215 /*
2216 * Prime pump for checksum calculation in IP. Include the
2217 * adjustment for a source route if any.
2218 */
2219 data_length = tcp_hdr_len + connp->conn_sum;
2220 data_length = (data_length >> 16) + (data_length & 0xFFFF);
2221 tcpha->tha_sum = htons(data_length);
2222
2223 if (tcp->tcp_ip_forward_progress) {
2224 tcp->tcp_ip_forward_progress = B_FALSE;
2225 connp->conn_ixa->ixa_flags |= IXAF_REACH_CONF;
2226 } else {
2227 connp->conn_ixa->ixa_flags &= ~IXAF_REACH_CONF;
2228 }
2229 return (mp1);
2230 }
2231 }
2232
2233 /*
2234 * Dummy socket upcalls for if/when the conn_t gets detached from a
2235 * direct-callback sonode via a user-driven close(). Easy to catch with
2236 * DTrace FBT, and should be mostly harmless.
2237 */
2238
2239 /* ARGSUSED */
2240 static sock_upper_handle_t
2241 tcp_dummy_newconn(sock_upper_handle_t x, sock_lower_handle_t y,
2242 sock_downcalls_t *z, cred_t *cr, pid_t pid, sock_upcalls_t **ignored)
2243 {
2244 ASSERT(0); /* Panic in debug, otherwise ignore. */
2245 return (NULL);
2246 }
2247
2248 /* ARGSUSED */
2249 static void
2250 tcp_dummy_connected(sock_upper_handle_t x, sock_connid_t y, cred_t *cr,
2251 pid_t pid)
2252 {
2253 ASSERT(x == NULL);
2254 /* Normally we'd crhold(cr) and attach it to socket state. */
2255 /* LINTED */
2256 }
2257
2258 /* ARGSUSED */
2259 static int
2260 tcp_dummy_disconnected(sock_upper_handle_t x, sock_connid_t y, int blah)
2261 {
2262 ASSERT(0); /* Panic in debug, otherwise ignore. */
2263 return (-1);
2264 }
2265
2266 /* ARGSUSED */
2267 static void
2268 tcp_dummy_opctl(sock_upper_handle_t x, sock_opctl_action_t y, uintptr_t blah)
2269 {
2270 ASSERT(x == NULL);
2271 /* We really want this one to be a harmless NOP for now. */
2272 /* LINTED */
2273 }
2274
2275 /* ARGSUSED */
2276 static ssize_t
2277 tcp_dummy_recv(sock_upper_handle_t x, mblk_t *mp, size_t len, int flags,
2278 int *error, boolean_t *push)
2279 {
2280 ASSERT(x == NULL);
2281
2282 /*
2283 * Consume the message, set ESHUTDOWN, and return an error.
2284 * Nobody's home!
2285 */
2286 freemsg(mp);
2287 *error = ESHUTDOWN;
2288 return (-1);
2289 }
2290
2291 /* ARGSUSED */
2292 static void
2293 tcp_dummy_set_proto_props(sock_upper_handle_t x, struct sock_proto_props *y)
2294 {
2295 ASSERT(0); /* Panic in debug, otherwise ignore. */
2296 }
2297
2298 /* ARGSUSED */
2299 static void
2300 tcp_dummy_txq_full(sock_upper_handle_t x, boolean_t y)
2301 {
2302 ASSERT(0); /* Panic in debug, otherwise ignore. */
2303 }
2304
2305 /* ARGSUSED */
2306 static void
2307 tcp_dummy_signal_oob(sock_upper_handle_t x, ssize_t len)
2308 {
2309 ASSERT(x == NULL);
2310 /* Otherwise, this would signal socket state about OOB data. */
2311 }
2312
2313 /* ARGSUSED */
2314 static void
2315 tcp_dummy_set_error(sock_upper_handle_t x, int err)
2316 {
2317 ASSERT(0); /* Panic in debug, otherwise ignore. */
2318 }
2319
2320 /* ARGSUSED */
2321 static void
2322 tcp_dummy_onearg(sock_upper_handle_t x)
2323 {
2324 ASSERT(0); /* Panic in debug, otherwise ignore. */
2325 }
2326
2327 static sock_upcalls_t tcp_dummy_upcalls = {
2328 tcp_dummy_newconn,
2329 tcp_dummy_connected,
2330 tcp_dummy_disconnected,
2331 tcp_dummy_opctl,
2332 tcp_dummy_recv,
2333 tcp_dummy_set_proto_props,
2334 tcp_dummy_txq_full,
2335 tcp_dummy_signal_oob,
2336 tcp_dummy_onearg,
2337 tcp_dummy_set_error,
2338 tcp_dummy_onearg
2339 };
2340
2341 /*
2342 * Handle M_DATA messages from IP. Its called directly from IP via
2343 * squeue for received IP packets.
2344 *
2345 * The first argument is always the connp/tcp to which the mp belongs.
2346 * There are no exceptions to this rule. The caller has already put
2347 * a reference on this connp/tcp and once tcp_input_data() returns,
2348 * the squeue will do the refrele.
2349 *
2350 * The TH_SYN for the listener directly go to tcp_input_listener via
2351 * squeue. ICMP errors go directly to tcp_icmp_input().
2352 *
2353 * sqp: NULL = recursive, sqp != NULL means called from squeue
2354 */
2355 void
2356 tcp_input_data(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2357 {
2358 int32_t bytes_acked;
2359 int32_t gap;
2360 mblk_t *mp1;
2361 uint_t flags;
2363 uchar_t *iphdr;
2364 uchar_t *rptr;
2365 int32_t rgap;
2366 uint32_t seg_ack;
2367 int seg_len;
2368 uint_t ip_hdr_len;
2369 uint32_t seg_seq;
2370 tcpha_t *tcpha;
2371 int urp;
2372 tcp_opt_t tcpopt;
2373 ip_pkt_t ipp;
2374 boolean_t ofo_seg = B_FALSE; /* Out of order segment */
2375 uint32_t cwnd;
2376 uint32_t add;
2377 int npkt;
2378 int mss;
2379 conn_t *connp = (conn_t *)arg;
2380 squeue_t *sqp = (squeue_t *)arg2;
2381 tcp_t *tcp = connp->conn_tcp;
2382 tcp_stack_t *tcps = tcp->tcp_tcps;
2383 sock_upcalls_t *sockupcalls;
2384
2385 /*
2386 * RST from fused tcp loopback peer should trigger an unfuse.
2387 */
2388 if (tcp->tcp_fused) {
2389 TCP_STAT(tcps, tcp_fusion_aborted);
2390 tcp_unfuse(tcp);
2391 }
2392
2393 iphdr = mp->b_rptr;
2394 rptr = mp->b_rptr;
2395 ASSERT(OK_32PTR(rptr));
2396
2397 ip_hdr_len = ira->ira_ip_hdr_length;
2398 if (connp->conn_recv_ancillary.crb_all != 0) {
2399 /*
2400 * Record packet information in the ip_pkt_t
2401 */
2402 ipp.ipp_fields = 0;
2403 if (ira->ira_flags & IRAF_IS_IPV4) {
2489 if (tcp->tcp_detached || !pullupmsg(mp, -1)) {
2490 freemsg(mp);
2491 return;
2492 }
2493 /* Update pointers into message */
2494 iphdr = rptr = mp->b_rptr;
2495 tcpha = (tcpha_t *)&rptr[ip_hdr_len];
2496 if (SEQ_GT(seg_seq, tcp->tcp_rnxt)) {
2497 /*
2498 * Since we can't handle any data with this urgent
2499 * pointer that is out of sequence, we expunge
2500 * the data. This allows us to still register
2501 * the urgent mark and generate the M_PCSIG,
2502 * which we can do.
2503 */
2504 mp->b_wptr = (uchar_t *)tcpha + TCP_HDR_LENGTH(tcpha);
2505 seg_len = 0;
2506 }
2507 }
2508
2509 sockupcalls = connp->conn_upcalls;
2510 /* A conn_t may have belonged to a now-closed socket. Be careful. */
2511 if (sockupcalls == NULL)
2512 sockupcalls = &tcp_dummy_upcalls;
2513
2514 switch (tcp->tcp_state) {
2515 case TCPS_SYN_SENT:
2516 if (connp->conn_final_sqp == NULL &&
2517 tcp_outbound_squeue_switch && sqp != NULL) {
2518 ASSERT(connp->conn_initial_sqp == connp->conn_sqp);
2519 connp->conn_final_sqp = sqp;
2520 if (connp->conn_final_sqp != connp->conn_sqp) {
2521 DTRACE_PROBE1(conn__final__sqp__switch,
2522 conn_t *, connp);
2523 CONN_INC_REF(connp);
2524 SQUEUE_SWITCH(connp, connp->conn_final_sqp);
2525 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
2526 tcp_input_data, connp, ira, ip_squeue_flag,
2527 SQTAG_CONNECT_FINISH);
2528 return;
2529 }
2530 DTRACE_PROBE1(conn__final__sqp__same, conn_t *, connp);
2531 }
2532 if (flags & TH_ACK) {
2533 /*
2705 */
2706 if ((ack_mp = tcp_ack_mp(tcp)) != NULL) {
2707 if (tcp->tcp_ack_tid != 0) {
2708 (void) TCP_TIMER_CANCEL(tcp,
2709 tcp->tcp_ack_tid);
2710 tcp->tcp_ack_tid = 0;
2711 }
2712 tcp_send_data(tcp, ack_mp);
2713 BUMP_LOCAL(tcp->tcp_obsegs);
2714 TCPS_BUMP_MIB(tcps, tcpOutAck);
2715
2716 if (!IPCL_IS_NONSTR(connp)) {
2717 /* Send up T_CONN_CON */
2718 if (ira->ira_cred != NULL) {
2719 mblk_setcred(mp1,
2720 ira->ira_cred,
2721 ira->ira_cpid);
2722 }
2723 putnext(connp->conn_rq, mp1);
2724 } else {
2725 (*sockupcalls->su_connected)
2726 (connp->conn_upper_handle,
2727 tcp->tcp_connid,
2728 ira->ira_cred,
2729 ira->ira_cpid);
2730 freemsg(mp1);
2731 }
2732
2733 freemsg(mp);
2734 return;
2735 }
2736 /*
2737 * Forget fusion; we need to handle more
2738 * complex cases below. Send the deferred
2739 * T_CONN_CON message upstream and proceed
2740 * as usual. Mark this tcp as not capable
2741 * of fusion.
2742 */
2743 TCP_STAT(tcps, tcp_fusion_unfusable);
2744 tcp->tcp_unfusable = B_TRUE;
2745 if (!IPCL_IS_NONSTR(connp)) {
2746 if (ira->ira_cred != NULL) {
2747 mblk_setcred(mp1, ira->ira_cred,
2748 ira->ira_cpid);
2749 }
2750 putnext(connp->conn_rq, mp1);
2751 } else {
2752 (*sockupcalls->su_connected)
2753 (connp->conn_upper_handle,
2754 tcp->tcp_connid, ira->ira_cred,
2755 ira->ira_cpid);
2756 freemsg(mp1);
2757 }
2758 }
2759
2760 /*
2761 * Check to see if there is data to be sent. If
2762 * yes, set the transmit flag. Then check to see
2763 * if received data processing needs to be done.
2764 * If not, go straight to xmit_check. This short
2765 * cut is OK as we don't support T/TCP.
2766 */
2767 if (tcp->tcp_unsent)
2768 flags |= TH_XMIT_NEEDED;
2769
2770 if (seg_len == 0 && !(flags & TH_URG)) {
2771 freemsg(mp);
2772 goto xmit_check;
3106 * for this connection or if this is a new urgent
3107 * byte. Also send a zero-length "unmarked" message
3108 * to inform SIOCATMARK that this is not the mark.
3109 *
3110 * tcp_urp_last_valid is cleared when the T_exdata_ind
3111 * is sent up. This plus the check for old data
3112 * (gap >= 0) handles the wraparound of the sequence
3113 * number space without having to always track the
3114 * correct MAX(tcp_urp_last, tcp_rnxt). (BSD tracks
3115 * this max in its rcv_up variable).
3116 *
3117 * This prevents duplicate SIGURGS due to a "late"
3118 * zero-window probe when the T_EXDATA_IND has already
3119 * been sent up.
3120 */
3121 if ((flags & TH_URG) &&
3122 (!tcp->tcp_urp_last_valid || SEQ_GT(urp + seg_seq,
3123 tcp->tcp_urp_last))) {
3124 if (IPCL_IS_NONSTR(connp)) {
3125 if (!TCP_IS_DETACHED(tcp)) {
3126 (*sockupcalls->su_signal_oob)
3127 (connp->conn_upper_handle,
3128 urp);
3129 }
3130 } else {
3131 mp1 = allocb(0, BPRI_MED);
3132 if (mp1 == NULL) {
3133 freemsg(mp);
3134 return;
3135 }
3136 if (!TCP_IS_DETACHED(tcp) &&
3137 !putnextctl1(connp->conn_rq,
3138 M_PCSIG, SIGURG)) {
3139 /* Try again on the rexmit. */
3140 freemsg(mp1);
3141 freemsg(mp);
3142 return;
3143 }
3144 /*
3145 * If the next byte would be the mark
3146 * then mark with MARKNEXT else mark
3384 */
3385 if (flags & TH_URG && urp >= 0) {
3386 if (!tcp->tcp_urp_last_valid ||
3387 SEQ_GT(urp + seg_seq, tcp->tcp_urp_last)) {
3388 /*
3389 * Non-STREAMS sockets handle the urgent data a litte
3390 * differently from STREAMS based sockets. There is no
3391 * need to mark any mblks with the MSG{NOT,}MARKNEXT
3392 * flags to keep SIOCATMARK happy. Instead a
3393 * su_signal_oob upcall is made to update the mark.
3394 * Neither is a T_EXDATA_IND mblk needed to be
3395 * prepended to the urgent data. The urgent data is
3396 * delivered using the su_recv upcall, where we set
3397 * the MSG_OOB flag to indicate that it is urg data.
3398 *
3399 * Neither TH_SEND_URP_MARK nor TH_MARKNEXT_NEEDED
3400 * are used by non-STREAMS sockets.
3401 */
3402 if (IPCL_IS_NONSTR(connp)) {
3403 if (!TCP_IS_DETACHED(tcp)) {
3404 (*sockupcalls->su_signal_oob)
3405 (connp->conn_upper_handle, urp);
3406 }
3407 } else {
3408 /*
3409 * If we haven't generated the signal yet for
3410 * this urgent pointer value, do it now. Also,
3411 * send up a zero-length M_DATA indicating
3412 * whether or not this is the mark. The latter
3413 * is not needed when a T_EXDATA_IND is sent up.
3414 * However, if there are allocation failures
3415 * this code relies on the sender retransmitting
3416 * and the socket code for determining the mark
3417 * should not block waiting for the peer to
3418 * transmit. Thus, for simplicity we always
3419 * send up the mark indication.
3420 */
3421 mp1 = allocb(0, BPRI_MED);
3422 if (mp1 == NULL) {
3423 freemsg(mp);
3424 return;
3543 * the remainder back in will cause a
3544 * loop. In this case, drop the
3545 * packet and let the sender try
3546 * sending a good packet.
3547 */
3548 if (tmp_rnxt == tcp->tcp_rnxt) {
3549 freemsg(mp);
3550 return;
3551 }
3552 }
3553 tcp_input_data(connp, mp, NULL, ira);
3554 return;
3555 }
3556 /*
3557 * This segment contains only the urgent byte. We
3558 * have to allocate the T_exdata_ind, if we can.
3559 */
3560 if (IPCL_IS_NONSTR(connp)) {
3561 int error;
3562
3563 (*sockupcalls->su_recv)
3564 (connp->conn_upper_handle, mp, seg_len,
3565 MSG_OOB, &error, NULL);
3566 /*
3567 * We should never be in middle of a
3568 * fallback, the squeue guarantees that.
3569 */
3570 ASSERT(error != EOPNOTSUPP);
3571 mp = NULL;
3572 goto update_ack;
3573 } else if (!tcp->tcp_urp_mp) {
3574 struct T_exdata_ind *tei;
3575 mp1 = allocb(sizeof (struct T_exdata_ind),
3576 BPRI_MED);
3577 if (!mp1) {
3578 /*
3579 * Sigh... It'll be back.
3580 * Generate any MSG*MARK message now.
3581 */
3582 freemsg(mp);
3583 seg_len = 0;
4722 tcp_display(tcp, NULL, DISP_PORT_ONLY));
4723 #endif /* DEBUG */
4724 }
4725
4726 /*
4727 * Check for ancillary data changes compared to last segment.
4728 */
4729 if (connp->conn_recv_ancillary.crb_all != 0) {
4730 mp = tcp_input_add_ancillary(tcp, mp, &ipp, ira);
4731 if (mp == NULL)
4732 return;
4733 }
4734
4735 if (IPCL_IS_NONSTR(connp)) {
4736 /*
4737 * Non-STREAMS socket
4738 */
4739 boolean_t push = flags & (TH_PUSH|TH_FIN);
4740 int error;
4741
4742 if ((*sockupcalls->su_recv)(connp->conn_upper_handle,
4743 mp, seg_len, 0, &error, &push) <= 0) {
4744 /*
4745 * We should never be in middle of a
4746 * fallback, the squeue guarantees that.
4747 */
4748 ASSERT(error != EOPNOTSUPP);
4749 if (error == ENOSPC)
4750 tcp->tcp_rwnd -= seg_len;
4751 } else if (push) {
4752 /* PUSH bit set and sockfs is not flow controlled */
4753 flags |= tcp_rwnd_reopen(tcp);
4754 }
4755 } else if (tcp->tcp_listener != NULL || tcp->tcp_hard_binding) {
4756 /*
4757 * Side queue inbound data until the accept happens.
4758 * tcp_accept/tcp_rput drains this when the accept happens.
4759 * M_DATA is queued on b_cont. Otherwise (T_OPTDATA_IND or
4760 * T_EXDATA_IND) it is queued on b_next.
4761 * XXX Make urgent data use this. Requires:
4762 * Removing tcp_listener check for TH_URG
4964 tcp->tcp_localnet ?
4965 tcps->tcps_local_dack_interval :
4966 tcps->tcps_deferred_ack_interval);
4967 }
4968 }
4969 if (flags & TH_ORDREL_NEEDED) {
4970 /*
4971 * Notify upper layer about an orderly release. If this is
4972 * a non-STREAMS socket, then just make an upcall. For STREAMS
4973 * we send up an ordrel_ind, unless this is an eager, in which
4974 * case the ordrel will be sent when tcp_accept_finish runs.
4975 * Note that for non-STREAMS we make an upcall even if it is an
4976 * eager, because we have an upper handle to send it to.
4977 */
4978 ASSERT(IPCL_IS_NONSTR(connp) || tcp->tcp_listener == NULL);
4979 ASSERT(!tcp->tcp_detached);
4980
4981 if (IPCL_IS_NONSTR(connp)) {
4982 ASSERT(tcp->tcp_ordrel_mp == NULL);
4983 tcp->tcp_ordrel_done = B_TRUE;
4984 (*sockupcalls->su_opctl)(connp->conn_upper_handle,
4985 SOCK_OPCTL_SHUT_RECV, 0);
4986 goto done;
4987 }
4988
4989 if (tcp->tcp_rcv_list != NULL) {
4990 /*
4991 * Push any mblk(s) enqueued from co processing.
4992 */
4993 flags |= tcp_rcv_drain(tcp);
4994 }
4995 ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg);
4996
4997 mp1 = tcp->tcp_ordrel_mp;
4998 tcp->tcp_ordrel_mp = NULL;
4999 tcp->tcp_ordrel_done = B_TRUE;
5000 putnext(connp->conn_rq, mp1);
5001 }
5002 done:
5003 ASSERT(!(flags & TH_MARKNEXT_NEEDED));
5004 }
5005
|