Print this page
5295 remove maxburst logic from TCP's send algorithm Reviewed by: Dan McDonald <danmcd@omniti.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/inet/tcp/tcp_timers.c
+++ new/usr/src/uts/common/inet/tcp/tcp_timers.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
|
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright 2011 Joyent, Inc. All rights reserved.
26 + * Copyright (c) 2014 by Delphix. All rights reserved.
26 27 */
27 28
28 29 #include <sys/types.h>
29 30 #include <sys/strlog.h>
30 31 #include <sys/strsun.h>
31 32 #include <sys/squeue_impl.h>
32 33 #include <sys/squeue.h>
33 34 #include <sys/callo.h>
34 35 #include <sys/strsubr.h>
35 36
36 37 #include <inet/common.h>
37 38 #include <inet/ip.h>
38 39 #include <inet/ip_ire.h>
39 40 #include <inet/ip_rts.h>
40 41 #include <inet/tcp.h>
41 42 #include <inet/tcp_impl.h>
42 43
43 44 /*
44 45 * Implementation of TCP Timers.
45 46 * =============================
46 47 *
47 48 * INTERFACE:
48 49 *
49 50 * There are two basic functions dealing with tcp timers:
50 51 *
51 52 * timeout_id_t tcp_timeout(connp, func, time)
52 53 * clock_t tcp_timeout_cancel(connp, timeout_id)
53 54 * TCP_TIMER_RESTART(tcp, intvl)
54 55 *
55 56 * tcp_timeout() starts a timer for the 'tcp' instance arranging to call 'func'
56 57 * after 'time' ticks passed. The function called by timeout() must adhere to
57 58 * the same restrictions as a driver soft interrupt handler - it must not sleep
58 59 * or call other functions that might sleep. The value returned is the opaque
59 60 * non-zero timeout identifier that can be passed to tcp_timeout_cancel() to
60 61 * cancel the request. The call to tcp_timeout() may fail in which case it
61 62 * returns zero. This is different from the timeout(9F) function which never
62 63 * fails.
63 64 *
64 65 * The call-back function 'func' always receives 'connp' as its single
65 66 * argument. It is always executed in the squeue corresponding to the tcp
66 67 * structure. The tcp structure is guaranteed to be present at the time the
67 68 * call-back is called.
68 69 *
69 70 * NOTE: The call-back function 'func' is never called if tcp is in
70 71 * the TCPS_CLOSED state.
71 72 *
72 73 * tcp_timeout_cancel() attempts to cancel a pending tcp_timeout()
73 74 * request. locks acquired by the call-back routine should not be held across
74 75 * the call to tcp_timeout_cancel() or a deadlock may result.
75 76 *
76 77 * tcp_timeout_cancel() returns -1 if the timeout request is invalid.
77 78 * Otherwise, it returns an integer value greater than or equal to 0.
78 79 *
79 80 * NOTE: both tcp_timeout() and tcp_timeout_cancel() should always be called
80 81 * within squeue context corresponding to the tcp instance. Since the
81 82 * call-back is also called via the same squeue, there are no race
82 83 * conditions described in untimeout(9F) manual page since all calls are
83 84 * strictly serialized.
84 85 *
85 86 * TCP_TIMER_RESTART() is a macro that attempts to cancel a pending timeout
86 87 * stored in tcp_timer_tid and starts a new one using
87 88 * MSEC_TO_TICK(intvl). It always uses tcp_timer() function as a call-back
88 89 * and stores the return value of tcp_timeout() in the tcp->tcp_timer_tid
89 90 * field.
90 91 *
91 92 * IMPLEMENTATION:
92 93 *
93 94 * TCP timers are implemented using three-stage process. The call to
94 95 * tcp_timeout() uses timeout(9F) function to call tcp_timer_callback() function
95 96 * when the timer expires. The tcp_timer_callback() arranges the call of the
96 97 * tcp_timer_handler() function via squeue corresponding to the tcp
97 98 * instance. The tcp_timer_handler() calls actual requested timeout call-back
98 99 * and passes tcp instance as an argument to it. Information is passed between
99 100 * stages using the tcp_timer_t structure which contains the connp pointer, the
100 101 * tcp call-back to call and the timeout id returned by the timeout(9F).
101 102 *
102 103 * The tcp_timer_t structure is not used directly, it is embedded in an mblk_t -
103 104 * like structure that is used to enter an squeue. The mp->b_rptr of this pseudo
104 105 * mblk points to the beginning of tcp_timer_t structure. The tcp_timeout()
105 106 * returns the pointer to this mblk.
106 107 *
107 108 * The pseudo mblk is allocated from a special tcp_timer_cache kmem cache. It
108 109 * looks like a normal mblk without actual dblk attached to it.
109 110 *
110 111 * To optimize performance each tcp instance holds a small cache of timer
111 112 * mblocks. In the current implementation it caches up to two timer mblocks per
112 113 * tcp instance. The cache is preserved over tcp frees and is only freed when
113 114 * the whole tcp structure is destroyed by its kmem destructor. Since all tcp
114 115 * timer processing happens on a corresponding squeue, the cache manipulation
115 116 * does not require any locks. Experiments show that majority of timer mblocks
116 117 * allocations are satisfied from the tcp cache and do not involve kmem calls.
117 118 *
118 119 * The tcp_timeout() places a refhold on the connp instance which guarantees
119 120 * that it will be present at the time the call-back function fires. The
120 121 * tcp_timer_handler() drops the reference after calling the call-back, so the
121 122 * call-back function does not need to manipulate the references explicitly.
122 123 */
123 124
124 125 kmem_cache_t *tcp_timercache;
125 126
126 127 static void tcp_ip_notify(tcp_t *);
127 128 static void tcp_timer_callback(void *);
128 129 static void tcp_timer_free(tcp_t *, mblk_t *);
129 130 static void tcp_timer_handler(void *, mblk_t *, void *, ip_recv_attr_t *);
130 131
131 132 /*
132 133 * tim is in millisec.
133 134 */
134 135 timeout_id_t
135 136 tcp_timeout(conn_t *connp, void (*f)(void *), hrtime_t tim)
136 137 {
137 138 mblk_t *mp;
138 139 tcp_timer_t *tcpt;
139 140 tcp_t *tcp = connp->conn_tcp;
140 141
141 142 ASSERT(connp->conn_sqp != NULL);
142 143
143 144 TCP_DBGSTAT(tcp->tcp_tcps, tcp_timeout_calls);
144 145
145 146 if (tcp->tcp_timercache == NULL) {
146 147 mp = tcp_timermp_alloc(KM_NOSLEEP | KM_PANIC);
147 148 } else {
148 149 TCP_DBGSTAT(tcp->tcp_tcps, tcp_timeout_cached_alloc);
149 150 mp = tcp->tcp_timercache;
150 151 tcp->tcp_timercache = mp->b_next;
151 152 mp->b_next = NULL;
152 153 ASSERT(mp->b_wptr == NULL);
153 154 }
154 155
155 156 CONN_INC_REF(connp);
156 157 tcpt = (tcp_timer_t *)mp->b_rptr;
157 158 tcpt->connp = connp;
158 159 tcpt->tcpt_proc = f;
159 160 /*
160 161 * TCP timers are normal timeouts. Plus, they do not require more than
161 162 * a 10 millisecond resolution. By choosing a coarser resolution and by
162 163 * rounding up the expiration to the next resolution boundary, we can
163 164 * batch timers in the callout subsystem to make TCP timers more
164 165 * efficient. The roundup also protects short timers from expiring too
165 166 * early before they have a chance to be cancelled.
166 167 */
167 168 tcpt->tcpt_tid = timeout_generic(CALLOUT_NORMAL, tcp_timer_callback, mp,
168 169 tim * MICROSEC, CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP);
169 170 VERIFY(!(tcpt->tcpt_tid & CALLOUT_ID_FREE));
170 171
171 172 return ((timeout_id_t)mp);
172 173 }
173 174
174 175 static void
175 176 tcp_timer_callback(void *arg)
176 177 {
177 178 mblk_t *mp = (mblk_t *)arg;
178 179 tcp_timer_t *tcpt;
179 180 conn_t *connp;
180 181
181 182 tcpt = (tcp_timer_t *)mp->b_rptr;
182 183 connp = tcpt->connp;
183 184 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_timer_handler, connp,
184 185 NULL, SQ_FILL, SQTAG_TCP_TIMER);
185 186 }
186 187
187 188 /* ARGSUSED */
188 189 static void
189 190 tcp_timer_handler(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
190 191 {
191 192 tcp_timer_t *tcpt;
192 193 conn_t *connp = (conn_t *)arg;
193 194 tcp_t *tcp = connp->conn_tcp;
194 195
195 196 tcpt = (tcp_timer_t *)mp->b_rptr;
196 197 ASSERT(connp == tcpt->connp);
197 198 ASSERT((squeue_t *)arg2 == connp->conn_sqp);
198 199
199 200 if (tcpt->tcpt_tid & CALLOUT_ID_FREE) {
200 201 /*
201 202 * This timeout was cancelled after it was enqueued to the
202 203 * squeue; free the timer and return.
203 204 */
204 205 tcp_timer_free(connp->conn_tcp, mp);
205 206 return;
206 207 }
207 208
208 209 /*
209 210 * If the TCP has reached the closed state, don't proceed any
210 211 * further. This TCP logically does not exist on the system.
211 212 * tcpt_proc could for example access queues, that have already
212 213 * been qprocoff'ed off.
213 214 */
214 215 if (tcp->tcp_state != TCPS_CLOSED) {
215 216 (*tcpt->tcpt_proc)(connp);
216 217 } else {
217 218 tcp->tcp_timer_tid = 0;
218 219 }
219 220
220 221 tcp_timer_free(connp->conn_tcp, mp);
221 222 }
222 223
223 224 /*
224 225 * There is potential race with untimeout and the handler firing at the same
225 226 * time. The mblock may be freed by the handler while we are trying to use
226 227 * it. But since both should execute on the same squeue, this race should not
227 228 * occur.
228 229 */
229 230 clock_t
230 231 tcp_timeout_cancel(conn_t *connp, timeout_id_t id)
231 232 {
232 233 mblk_t *mp = (mblk_t *)id;
233 234 tcp_timer_t *tcpt;
234 235 clock_t delta;
235 236
236 237 TCP_DBGSTAT(connp->conn_tcp->tcp_tcps, tcp_timeout_cancel_reqs);
237 238
238 239 if (mp == NULL)
239 240 return (-1);
240 241
241 242 tcpt = (tcp_timer_t *)mp->b_rptr;
242 243 ASSERT(tcpt->connp == connp);
243 244
244 245 delta = untimeout_default(tcpt->tcpt_tid, 0);
245 246
246 247 if (delta >= 0) {
247 248 TCP_DBGSTAT(connp->conn_tcp->tcp_tcps, tcp_timeout_canceled);
248 249 tcp_timer_free(connp->conn_tcp, mp);
249 250 CONN_DEC_REF(connp);
250 251 } else {
251 252 /*
252 253 * If we were unable to untimeout successfully, it has already
253 254 * been enqueued on the squeue; mark the ID with the free
254 255 * bit. This bit can never be set in a valid identifier, and
255 256 * we'll use it to prevent the timeout from being executed.
256 257 * And note that we're within the squeue perimeter here, so
257 258 * we don't need to worry about racing with timer handling
258 259 * (which also executes within the perimeter).
259 260 */
260 261 tcpt->tcpt_tid |= CALLOUT_ID_FREE;
261 262 delta = 0;
262 263 }
263 264
264 265 return (TICK_TO_MSEC(delta));
265 266 }
266 267
267 268 /*
268 269 * Allocate space for the timer event. The allocation looks like mblk, but it is
269 270 * not a proper mblk. To avoid confusion we set b_wptr to NULL.
270 271 *
271 272 * Dealing with failures: If we can't allocate from the timer cache we try
272 273 * allocating from dblock caches using allocb_tryhard(). In this case b_wptr
273 274 * points to b_rptr.
274 275 * If we can't allocate anything using allocb_tryhard(), we perform a last
275 276 * attempt and use kmem_alloc_tryhard(). In this case we set b_wptr to -1 and
276 277 * save the actual allocation size in b_datap.
277 278 */
278 279 mblk_t *
279 280 tcp_timermp_alloc(int kmflags)
280 281 {
281 282 mblk_t *mp = (mblk_t *)kmem_cache_alloc(tcp_timercache,
282 283 kmflags & ~KM_PANIC);
283 284
284 285 if (mp != NULL) {
285 286 mp->b_next = mp->b_prev = NULL;
286 287 mp->b_rptr = (uchar_t *)(&mp[1]);
287 288 mp->b_wptr = NULL;
288 289 mp->b_datap = NULL;
289 290 mp->b_queue = NULL;
290 291 mp->b_cont = NULL;
291 292 } else if (kmflags & KM_PANIC) {
292 293 /*
293 294 * Failed to allocate memory for the timer. Try allocating from
294 295 * dblock caches.
295 296 */
296 297 /* ipclassifier calls this from a constructor - hence no tcps */
297 298 TCP_G_STAT(tcp_timermp_allocfail);
298 299 mp = allocb_tryhard(sizeof (tcp_timer_t));
299 300 if (mp == NULL) {
300 301 size_t size = 0;
301 302 /*
302 303 * Memory is really low. Try tryhard allocation.
303 304 *
304 305 * ipclassifier calls this from a constructor -
305 306 * hence no tcps
306 307 */
307 308 TCP_G_STAT(tcp_timermp_allocdblfail);
308 309 mp = kmem_alloc_tryhard(sizeof (mblk_t) +
309 310 sizeof (tcp_timer_t), &size, kmflags);
310 311 mp->b_rptr = (uchar_t *)(&mp[1]);
311 312 mp->b_next = mp->b_prev = NULL;
312 313 mp->b_wptr = (uchar_t *)-1;
313 314 mp->b_datap = (dblk_t *)size;
314 315 mp->b_queue = NULL;
315 316 mp->b_cont = NULL;
316 317 }
317 318 ASSERT(mp->b_wptr != NULL);
318 319 }
319 320 /* ipclassifier calls this from a constructor - hence no tcps */
320 321 TCP_G_DBGSTAT(tcp_timermp_alloced);
321 322
322 323 return (mp);
323 324 }
324 325
325 326 /*
326 327 * Free per-tcp timer cache.
327 328 * It can only contain entries from tcp_timercache.
328 329 */
329 330 void
330 331 tcp_timermp_free(tcp_t *tcp)
331 332 {
332 333 mblk_t *mp;
333 334
334 335 while ((mp = tcp->tcp_timercache) != NULL) {
335 336 ASSERT(mp->b_wptr == NULL);
336 337 tcp->tcp_timercache = tcp->tcp_timercache->b_next;
337 338 kmem_cache_free(tcp_timercache, mp);
338 339 }
339 340 }
340 341
341 342 /*
342 343 * Free timer event. Put it on the per-tcp timer cache if there is not too many
343 344 * events there already (currently at most two events are cached).
344 345 * If the event is not allocated from the timer cache, free it right away.
345 346 */
346 347 static void
347 348 tcp_timer_free(tcp_t *tcp, mblk_t *mp)
348 349 {
349 350 mblk_t *mp1 = tcp->tcp_timercache;
350 351
351 352 if (mp->b_wptr != NULL) {
352 353 /*
353 354 * This allocation is not from a timer cache, free it right
354 355 * away.
355 356 */
356 357 if (mp->b_wptr != (uchar_t *)-1)
357 358 freeb(mp);
358 359 else
359 360 kmem_free(mp, (size_t)mp->b_datap);
360 361 } else if (mp1 == NULL || mp1->b_next == NULL) {
361 362 /* Cache this timer block for future allocations */
362 363 mp->b_rptr = (uchar_t *)(&mp[1]);
363 364 mp->b_next = mp1;
364 365 tcp->tcp_timercache = mp;
365 366 } else {
366 367 kmem_cache_free(tcp_timercache, mp);
367 368 TCP_DBGSTAT(tcp->tcp_tcps, tcp_timermp_freed);
368 369 }
369 370 }
370 371
371 372 /*
372 373 * Stop all TCP timers.
373 374 */
374 375 void
375 376 tcp_timers_stop(tcp_t *tcp)
376 377 {
377 378 if (tcp->tcp_timer_tid != 0) {
378 379 (void) TCP_TIMER_CANCEL(tcp, tcp->tcp_timer_tid);
379 380 tcp->tcp_timer_tid = 0;
380 381 }
381 382 if (tcp->tcp_ka_tid != 0) {
382 383 (void) TCP_TIMER_CANCEL(tcp, tcp->tcp_ka_tid);
383 384 tcp->tcp_ka_tid = 0;
384 385 }
385 386 if (tcp->tcp_ack_tid != 0) {
386 387 (void) TCP_TIMER_CANCEL(tcp, tcp->tcp_ack_tid);
387 388 tcp->tcp_ack_tid = 0;
388 389 }
389 390 if (tcp->tcp_push_tid != 0) {
390 391 (void) TCP_TIMER_CANCEL(tcp, tcp->tcp_push_tid);
391 392 tcp->tcp_push_tid = 0;
392 393 }
393 394 if (tcp->tcp_reass_tid != 0) {
394 395 (void) TCP_TIMER_CANCEL(tcp, tcp->tcp_reass_tid);
395 396 tcp->tcp_reass_tid = 0;
396 397 }
397 398 }
398 399
399 400 /*
400 401 * Timer callback routine for keepalive probe. We do a fake resend of
401 402 * last ACKed byte. Then set a timer using RTO. When the timer expires,
402 403 * check to see if we have heard anything from the other end for the last
403 404 * RTO period. If we have, set the timer to expire for another
404 405 * tcp_keepalive_intrvl and check again. If we have not, set a timer using
405 406 * RTO << 1 and check again when it expires. Keep exponentially increasing
406 407 * the timeout if we have not heard from the other side. If for more than
407 408 * (tcp_ka_interval + tcp_ka_abort_thres) we have not heard anything,
408 409 * kill the connection unless the keepalive abort threshold is 0. In
409 410 * that case, we will probe "forever."
410 411 * If tcp_ka_cnt and tcp_ka_rinterval are non-zero, then we do not follow
411 412 * the exponential backoff, but send probes tcp_ka_cnt times in regular
412 413 * intervals of tcp_ka_rinterval milliseconds until we hear back from peer.
413 414 * Kill the connection if we don't hear back from peer after tcp_ka_cnt
414 415 * probes are sent.
415 416 */
416 417 void
417 418 tcp_keepalive_timer(void *arg)
418 419 {
419 420 mblk_t *mp;
420 421 conn_t *connp = (conn_t *)arg;
421 422 tcp_t *tcp = connp->conn_tcp;
422 423 int32_t firetime;
423 424 int32_t idletime;
424 425 int32_t ka_intrvl;
425 426 tcp_stack_t *tcps = tcp->tcp_tcps;
426 427
427 428 tcp->tcp_ka_tid = 0;
428 429
429 430 if (tcp->tcp_fused)
430 431 return;
431 432
432 433 TCPS_BUMP_MIB(tcps, tcpTimKeepalive);
433 434 ka_intrvl = tcp->tcp_ka_interval;
434 435
435 436 /*
436 437 * Keepalive probe should only be sent if the application has not
437 438 * done a close on the connection.
438 439 */
439 440 if (tcp->tcp_state > TCPS_CLOSE_WAIT) {
440 441 return;
441 442 }
442 443 /* Timer fired too early, restart it. */
443 444 if (tcp->tcp_state < TCPS_ESTABLISHED) {
444 445 tcp->tcp_ka_tid = TCP_TIMER(tcp, tcp_keepalive_timer,
445 446 ka_intrvl);
446 447 return;
447 448 }
448 449
449 450 idletime = TICK_TO_MSEC(ddi_get_lbolt() - tcp->tcp_last_recv_time);
450 451 /*
451 452 * If we have not heard from the other side for a long
452 453 * time, kill the connection unless the keepalive abort
453 454 * threshold is 0. In that case, we will probe "forever."
454 455 */
455 456 if (tcp->tcp_ka_abort_thres != 0 &&
456 457 idletime > (ka_intrvl + tcp->tcp_ka_abort_thres)) {
457 458 TCPS_BUMP_MIB(tcps, tcpTimKeepaliveDrop);
458 459 (void) tcp_clean_death(tcp, tcp->tcp_client_errno ?
459 460 tcp->tcp_client_errno : ETIMEDOUT);
460 461 return;
461 462 }
462 463
463 464 if (tcp->tcp_snxt == tcp->tcp_suna &&
464 465 idletime >= ka_intrvl) {
465 466 /* Fake resend of last ACKed byte. */
466 467 mblk_t *mp1 = allocb(1, BPRI_LO);
467 468
468 469 if (mp1 != NULL) {
469 470 *mp1->b_wptr++ = '\0';
470 471 mp = tcp_xmit_mp(tcp, mp1, 1, NULL, NULL,
471 472 tcp->tcp_suna - 1, B_FALSE, NULL, B_TRUE);
472 473 freeb(mp1);
473 474 /*
474 475 * if allocation failed, fall through to start the
475 476 * timer back.
476 477 */
477 478 if (mp != NULL) {
478 479 tcp_send_data(tcp, mp);
479 480 TCPS_BUMP_MIB(tcps, tcpTimKeepaliveProbe);
480 481 if (tcp->tcp_ka_rinterval) {
481 482 firetime = tcp->tcp_ka_rinterval;
482 483 } else if (tcp->tcp_ka_last_intrvl != 0) {
483 484 int max;
484 485 /*
485 486 * We should probe again at least
486 487 * in ka_intrvl, but not more than
487 488 * tcp_rto_max.
488 489 */
489 490 max = tcp->tcp_rto_max;
490 491 firetime = MIN(ka_intrvl - 1,
491 492 tcp->tcp_ka_last_intrvl << 1);
492 493 if (firetime > max)
493 494 firetime = max;
494 495 } else {
495 496 firetime = tcp->tcp_rto;
496 497 }
497 498 tcp->tcp_ka_tid = TCP_TIMER(tcp,
498 499 tcp_keepalive_timer, firetime);
499 500 tcp->tcp_ka_last_intrvl = firetime;
500 501 return;
501 502 }
502 503 }
503 504 } else {
504 505 tcp->tcp_ka_last_intrvl = 0;
505 506 }
506 507
507 508 /* firetime can be negative if (mp1 == NULL || mp == NULL) */
508 509 if ((firetime = ka_intrvl - idletime) < 0) {
509 510 firetime = ka_intrvl;
510 511 }
511 512 tcp->tcp_ka_tid = TCP_TIMER(tcp, tcp_keepalive_timer, firetime);
512 513 }
513 514
514 515 void
515 516 tcp_reass_timer(void *arg)
516 517 {
517 518 conn_t *connp = (conn_t *)arg;
518 519 tcp_t *tcp = connp->conn_tcp;
519 520
520 521 tcp->tcp_reass_tid = 0;
521 522 if (tcp->tcp_reass_head == NULL)
522 523 return;
523 524 ASSERT(tcp->tcp_reass_tail != NULL);
524 525 if (tcp->tcp_snd_sack_ok && tcp->tcp_num_sack_blk > 0) {
525 526 tcp_sack_remove(tcp->tcp_sack_list,
526 527 TCP_REASS_END(tcp->tcp_reass_tail), &tcp->tcp_num_sack_blk);
527 528 }
528 529 tcp_close_mpp(&tcp->tcp_reass_head);
529 530 tcp->tcp_reass_tail = NULL;
530 531 TCP_STAT(tcp->tcp_tcps, tcp_reass_timeout);
531 532 }
532 533
533 534 /* This function handles the push timeout. */
534 535 void
535 536 tcp_push_timer(void *arg)
536 537 {
537 538 conn_t *connp = (conn_t *)arg;
538 539 tcp_t *tcp = connp->conn_tcp;
539 540
540 541 TCP_DBGSTAT(tcp->tcp_tcps, tcp_push_timer_cnt);
541 542
542 543 ASSERT(tcp->tcp_listener == NULL);
543 544
544 545 ASSERT(!IPCL_IS_NONSTR(connp));
545 546
546 547 tcp->tcp_push_tid = 0;
547 548
548 549 if (tcp->tcp_rcv_list != NULL &&
549 550 tcp_rcv_drain(tcp) == TH_ACK_NEEDED)
550 551 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK);
551 552 }
552 553
553 554 /*
554 555 * This function handles delayed ACK timeout.
555 556 */
556 557 void
557 558 tcp_ack_timer(void *arg)
558 559 {
559 560 conn_t *connp = (conn_t *)arg;
560 561 tcp_t *tcp = connp->conn_tcp;
561 562 mblk_t *mp;
562 563 tcp_stack_t *tcps = tcp->tcp_tcps;
563 564
564 565 TCP_DBGSTAT(tcps, tcp_ack_timer_cnt);
565 566
566 567 tcp->tcp_ack_tid = 0;
567 568
568 569 if (tcp->tcp_fused)
569 570 return;
570 571
571 572 /*
572 573 * Do not send ACK if there is no outstanding unack'ed data.
573 574 */
574 575 if (tcp->tcp_rnxt == tcp->tcp_rack) {
575 576 return;
576 577 }
577 578
578 579 if ((tcp->tcp_rnxt - tcp->tcp_rack) > tcp->tcp_mss) {
579 580 /*
580 581 * Make sure we don't allow deferred ACKs to result in
581 582 * timer-based ACKing. If we have held off an ACK
582 583 * when there was more than an mss here, and the timer
583 584 * goes off, we have to worry about the possibility
584 585 * that the sender isn't doing slow-start, or is out
585 586 * of step with us for some other reason. We fall
586 587 * permanently back in the direction of
587 588 * ACK-every-other-packet as suggested in RFC 1122.
588 589 */
589 590 if (tcp->tcp_rack_abs_max > 2)
590 591 tcp->tcp_rack_abs_max--;
591 592 tcp->tcp_rack_cur_max = 2;
592 593 }
593 594 mp = tcp_ack_mp(tcp);
594 595
595 596 if (mp != NULL) {
596 597 BUMP_LOCAL(tcp->tcp_obsegs);
597 598 TCPS_BUMP_MIB(tcps, tcpOutAck);
598 599 TCPS_BUMP_MIB(tcps, tcpOutAckDelayed);
599 600 tcp_send_data(tcp, mp);
600 601 }
601 602 }
602 603
603 604 /*
604 605 * Notify IP that we are having trouble with this connection. IP should
605 606 * make note so it can potentially use a different IRE.
606 607 */
607 608 static void
608 609 tcp_ip_notify(tcp_t *tcp)
609 610 {
610 611 conn_t *connp = tcp->tcp_connp;
611 612 ire_t *ire;
612 613
613 614 /*
614 615 * Note: in the case of source routing we want to blow away the
615 616 * route to the first source route hop.
616 617 */
617 618 ire = connp->conn_ixa->ixa_ire;
618 619 if (ire != NULL && !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
619 620 if (ire->ire_ipversion == IPV4_VERSION) {
620 621 /*
621 622 * As per RFC 1122, we send an RTM_LOSING to inform
622 623 * routing protocols.
623 624 */
624 625 ip_rts_change(RTM_LOSING, ire->ire_addr,
625 626 ire->ire_gateway_addr, ire->ire_mask,
626 627 connp->conn_laddr_v4, 0, 0, 0,
627 628 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_IFA),
628 629 ire->ire_ipst);
629 630 }
630 631 (void) ire_no_good(ire);
631 632 }
632 633 }
633 634
634 635 /*
635 636 * tcp_timer is the timer service routine. It handles the retransmission,
636 637 * FIN_WAIT_2 flush, and zero window probe timeout events. It figures out
637 638 * from the state of the tcp instance what kind of action needs to be done
638 639 * at the time it is called.
639 640 */
640 641 void
641 642 tcp_timer(void *arg)
642 643 {
643 644 mblk_t *mp;
644 645 clock_t first_threshold;
645 646 clock_t second_threshold;
646 647 clock_t ms;
647 648 uint32_t mss;
648 649 conn_t *connp = (conn_t *)arg;
649 650 tcp_t *tcp = connp->conn_tcp;
650 651 tcp_stack_t *tcps = tcp->tcp_tcps;
651 652 boolean_t dont_timeout = B_FALSE;
652 653
653 654 tcp->tcp_timer_tid = 0;
654 655
655 656 if (tcp->tcp_fused)
656 657 return;
657 658
658 659 first_threshold = tcp->tcp_first_timer_threshold;
659 660 second_threshold = tcp->tcp_second_timer_threshold;
660 661 switch (tcp->tcp_state) {
661 662 case TCPS_IDLE:
662 663 case TCPS_BOUND:
663 664 case TCPS_LISTEN:
664 665 return;
665 666 case TCPS_SYN_RCVD: {
666 667 tcp_t *listener = tcp->tcp_listener;
667 668
668 669 if (tcp->tcp_syn_rcvd_timeout == 0 && (listener != NULL)) {
669 670 /* it's our first timeout */
670 671 tcp->tcp_syn_rcvd_timeout = 1;
671 672 mutex_enter(&listener->tcp_eager_lock);
672 673 listener->tcp_syn_rcvd_timeout++;
673 674 if (!tcp->tcp_dontdrop && !tcp->tcp_closemp_used) {
674 675 /*
675 676 * Make this eager available for drop if we
676 677 * need to drop one to accomodate a new
677 678 * incoming SYN request.
678 679 */
679 680 MAKE_DROPPABLE(listener, tcp);
680 681 }
681 682 if (!listener->tcp_syn_defense &&
682 683 (listener->tcp_syn_rcvd_timeout >
683 684 (tcps->tcps_conn_req_max_q0 >> 2)) &&
684 685 (tcps->tcps_conn_req_max_q0 > 200)) {
685 686 /* We may be under attack. Put on a defense. */
686 687 listener->tcp_syn_defense = B_TRUE;
687 688 cmn_err(CE_WARN, "High TCP connect timeout "
688 689 "rate! System (port %d) may be under a "
689 690 "SYN flood attack!",
690 691 ntohs(listener->tcp_connp->conn_lport));
691 692
692 693 listener->tcp_ip_addr_cache = kmem_zalloc(
693 694 IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t),
694 695 KM_NOSLEEP);
695 696 }
696 697 mutex_exit(&listener->tcp_eager_lock);
697 698 } else if (listener != NULL) {
698 699 mutex_enter(&listener->tcp_eager_lock);
699 700 tcp->tcp_syn_rcvd_timeout++;
700 701 if (tcp->tcp_syn_rcvd_timeout > 1 &&
701 702 !tcp->tcp_closemp_used) {
702 703 /*
703 704 * This is our second timeout. Put the tcp in
704 705 * the list of droppable eagers to allow it to
705 706 * be dropped, if needed. We don't check
706 707 * whether tcp_dontdrop is set or not to
707 708 * protect ourselve from a SYN attack where a
708 709 * remote host can spoof itself as one of the
709 710 * good IP source and continue to hold
710 711 * resources too long.
711 712 */
712 713 MAKE_DROPPABLE(listener, tcp);
713 714 }
714 715 mutex_exit(&listener->tcp_eager_lock);
715 716 }
716 717 }
717 718 /* FALLTHRU */
718 719 case TCPS_SYN_SENT:
719 720 first_threshold = tcp->tcp_first_ctimer_threshold;
720 721 second_threshold = tcp->tcp_second_ctimer_threshold;
721 722
722 723 /*
723 724 * If an app has set the second_threshold to 0, it means that
724 725 * we need to retransmit forever, unless this is a passive
725 726 * open. We need to set second_threshold back to a normal
726 727 * value such that later comparison with it still makes
727 728 * sense. But we set dont_timeout to B_TRUE so that we will
728 729 * never time out.
729 730 */
730 731 if (second_threshold == 0) {
731 732 second_threshold = tcps->tcps_ip_abort_linterval;
732 733 if (tcp->tcp_active_open)
733 734 dont_timeout = B_TRUE;
734 735 }
735 736 break;
736 737 case TCPS_ESTABLISHED:
737 738 case TCPS_CLOSE_WAIT:
738 739 /*
739 740 * If the end point has not been closed, TCP can retransmit
740 741 * forever. But if the end point is closed, the normal
741 742 * timeout applies.
742 743 */
743 744 if (second_threshold == 0) {
744 745 second_threshold = tcps->tcps_ip_abort_linterval;
745 746 dont_timeout = B_TRUE;
746 747 }
747 748 /* FALLTHRU */
748 749 case TCPS_FIN_WAIT_1:
749 750 case TCPS_CLOSING:
750 751 case TCPS_LAST_ACK:
751 752 /* If we have data to rexmit */
752 753 if (tcp->tcp_suna != tcp->tcp_snxt) {
753 754 clock_t time_to_wait;
754 755
755 756 TCPS_BUMP_MIB(tcps, tcpTimRetrans);
756 757 if (!tcp->tcp_xmit_head)
757 758 break;
758 759 time_to_wait = ddi_get_lbolt() -
759 760 (clock_t)tcp->tcp_xmit_head->b_prev;
760 761 time_to_wait = tcp->tcp_rto -
761 762 TICK_TO_MSEC(time_to_wait);
762 763 /*
763 764 * If the timer fires too early, 1 clock tick earlier,
764 765 * restart the timer.
765 766 */
766 767 if (time_to_wait > msec_per_tick) {
767 768 TCP_STAT(tcps, tcp_timer_fire_early);
768 769 TCP_TIMER_RESTART(tcp, time_to_wait);
769 770 return;
770 771 }
771 772 /*
772 773 * When we probe zero windows, we force the swnd open.
773 774 * If our peer acks with a closed window swnd will be
774 775 * set to zero by tcp_rput(). As long as we are
775 776 * receiving acks tcp_rput will
776 777 * reset 'tcp_ms_we_have_waited' so as not to trip the
777 778 * first and second interval actions. NOTE: the timer
778 779 * interval is allowed to continue its exponential
779 780 * backoff.
780 781 */
781 782 if (tcp->tcp_swnd == 0 || tcp->tcp_zero_win_probe) {
782 783 if (connp->conn_debug) {
783 784 (void) strlog(TCP_MOD_ID, 0, 1,
784 785 SL_TRACE, "tcp_timer: zero win");
785 786 }
786 787 } else {
787 788 /*
788 789 * After retransmission, we need to do
789 790 * slow start. Set the ssthresh to one
790 791 * half of current effective window and
791 792 * cwnd to one MSS. Also reset
792 793 * tcp_cwnd_cnt.
793 794 *
794 795 * Note that if tcp_ssthresh is reduced because
795 796 * of ECN, do not reduce it again unless it is
796 797 * already one window of data away (tcp_cwr
797 798 * should then be cleared) or this is a
798 799 * timeout for a retransmitted segment.
799 800 */
800 801 uint32_t npkt;
801 802
802 803 if (!tcp->tcp_cwr || tcp->tcp_rexmit) {
803 804 npkt = ((tcp->tcp_timer_backoff ?
804 805 tcp->tcp_cwnd_ssthresh :
805 806 tcp->tcp_snxt -
806 807 tcp->tcp_suna) >> 1) / tcp->tcp_mss;
807 808 tcp->tcp_cwnd_ssthresh = MAX(npkt, 2) *
808 809 tcp->tcp_mss;
809 810 }
810 811 tcp->tcp_cwnd = tcp->tcp_mss;
811 812 tcp->tcp_cwnd_cnt = 0;
812 813 if (tcp->tcp_ecn_ok) {
813 814 tcp->tcp_cwr = B_TRUE;
814 815 tcp->tcp_cwr_snd_max = tcp->tcp_snxt;
815 816 tcp->tcp_ecn_cwr_sent = B_FALSE;
816 817 }
817 818 }
818 819 break;
819 820 }
820 821 /*
821 822 * We have something to send yet we cannot send. The
822 823 * reason can be:
823 824 *
824 825 * 1. Zero send window: we need to do zero window probe.
825 826 * 2. Zero cwnd: because of ECN, we need to "clock out
826 827 * segments.
827 828 * 3. SWS avoidance: receiver may have shrunk window,
828 829 * reset our knowledge.
829 830 *
830 831 * Note that condition 2 can happen with either 1 or
831 832 * 3. But 1 and 3 are exclusive.
832 833 */
833 834 if (tcp->tcp_unsent != 0) {
834 835 /*
835 836 * Should not hold the zero-copy messages for too long.
836 837 */
837 838 if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean)
838 839 tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp,
839 840 tcp->tcp_xmit_head, B_TRUE);
840 841
841 842 if (tcp->tcp_cwnd == 0) {
842 843 /*
843 844 * Set tcp_cwnd to 1 MSS so that a
844 845 * new segment can be sent out. We
845 846 * are "clocking out" new data when
846 847 * the network is really congested.
847 848 */
848 849 ASSERT(tcp->tcp_ecn_ok);
849 850 tcp->tcp_cwnd = tcp->tcp_mss;
850 851 }
851 852 if (tcp->tcp_swnd == 0) {
852 853 /* Extend window for zero window probe */
853 854 tcp->tcp_swnd++;
854 855 tcp->tcp_zero_win_probe = B_TRUE;
855 856 TCPS_BUMP_MIB(tcps, tcpOutWinProbe);
856 857 } else {
857 858 /*
858 859 * Handle timeout from sender SWS avoidance.
859 860 * Reset our knowledge of the max send window
860 861 * since the receiver might have reduced its
861 862 * receive buffer. Avoid setting tcp_max_swnd
862 863 * to one since that will essentially disable
863 864 * the SWS checks.
864 865 *
865 866 * Note that since we don't have a SWS
866 867 * state variable, if the timeout is set
867 868 * for ECN but not for SWS, this
868 869 * code will also be executed. This is
869 870 * fine as tcp_max_swnd is updated
870 871 * constantly and it will not affect
871 872 * anything.
872 873 */
873 874 tcp->tcp_max_swnd = MAX(tcp->tcp_swnd, 2);
874 875 }
875 876 tcp_wput_data(tcp, NULL, B_FALSE);
876 877 return;
877 878 }
878 879 /* Is there a FIN that needs to be to re retransmitted? */
879 880 if ((tcp->tcp_valid_bits & TCP_FSS_VALID) &&
880 881 !tcp->tcp_fin_acked)
881 882 break;
882 883 /* Nothing to do, return without restarting timer. */
883 884 TCP_STAT(tcps, tcp_timer_fire_miss);
884 885 return;
885 886 case TCPS_FIN_WAIT_2:
886 887 /*
887 888 * User closed the TCP endpoint and peer ACK'ed our FIN.
888 889 * We waited some time for for peer's FIN, but it hasn't
889 890 * arrived. We flush the connection now to avoid
890 891 * case where the peer has rebooted.
891 892 */
892 893 if (TCP_IS_DETACHED(tcp)) {
893 894 (void) tcp_clean_death(tcp, 0);
894 895 } else {
895 896 TCP_TIMER_RESTART(tcp,
896 897 tcp->tcp_fin_wait_2_flush_interval);
897 898 }
898 899 return;
899 900 case TCPS_TIME_WAIT:
900 901 (void) tcp_clean_death(tcp, 0);
901 902 return;
902 903 default:
903 904 if (connp->conn_debug) {
904 905 (void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE|SL_ERROR,
905 906 "tcp_timer: strange state (%d) %s",
906 907 tcp->tcp_state, tcp_display(tcp, NULL,
907 908 DISP_PORT_ONLY));
908 909 }
909 910 return;
910 911 }
911 912
912 913 /*
913 914 * If the system is under memory pressure or the max number of
914 915 * connections have been established for the listener, be more
915 916 * aggressive in aborting connections.
916 917 */
917 918 if (tcps->tcps_reclaim || (tcp->tcp_listen_cnt != NULL &&
918 919 tcp->tcp_listen_cnt->tlc_cnt > tcp->tcp_listen_cnt->tlc_max)) {
919 920 second_threshold = tcp_early_abort * SECONDS;
920 921
921 922 /* We will ignore the never timeout promise in this case... */
922 923 dont_timeout = B_FALSE;
923 924 }
924 925
925 926 ASSERT(second_threshold != 0);
926 927
927 928 if ((ms = tcp->tcp_ms_we_have_waited) > second_threshold) {
928 929 /*
929 930 * Should not hold the zero-copy messages for too long.
930 931 */
931 932 if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean)
932 933 tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp,
933 934 tcp->tcp_xmit_head, B_TRUE);
934 935
935 936 if (dont_timeout) {
936 937 /*
937 938 * Reset tcp_ms_we_have_waited to avoid overflow since
938 939 * we are going to retransmit forever.
939 940 */
940 941 tcp->tcp_ms_we_have_waited = second_threshold;
941 942 goto timer_rexmit;
942 943 }
943 944
944 945 /*
945 946 * For zero window probe, we need to send indefinitely,
946 947 * unless we have not heard from the other side for some
947 948 * time...
948 949 */
949 950 if ((tcp->tcp_zero_win_probe == 0) ||
950 951 (TICK_TO_MSEC(ddi_get_lbolt() - tcp->tcp_last_recv_time) >
951 952 second_threshold)) {
952 953 TCPS_BUMP_MIB(tcps, tcpTimRetransDrop);
953 954 /*
954 955 * If TCP is in SYN_RCVD state, send back a
955 956 * RST|ACK as BSD does. Note that tcp_zero_win_probe
956 957 * should be zero in TCPS_SYN_RCVD state.
957 958 */
958 959 if (tcp->tcp_state == TCPS_SYN_RCVD) {
959 960 tcp_xmit_ctl("tcp_timer: RST sent on timeout "
960 961 "in SYN_RCVD",
961 962 tcp, tcp->tcp_snxt,
962 963 tcp->tcp_rnxt, TH_RST | TH_ACK);
963 964 }
964 965 (void) tcp_clean_death(tcp,
965 966 tcp->tcp_client_errno ?
966 967 tcp->tcp_client_errno : ETIMEDOUT);
967 968 return;
968 969 } else {
969 970 /*
970 971 * If the system is under memory pressure, we also
971 972 * abort connection in zero window probing.
972 973 */
973 974 if (tcps->tcps_reclaim) {
974 975 (void) tcp_clean_death(tcp,
975 976 tcp->tcp_client_errno ?
976 977 tcp->tcp_client_errno : ETIMEDOUT);
977 978 TCP_STAT(tcps, tcp_zwin_mem_drop);
978 979 return;
979 980 }
980 981 /*
981 982 * Set tcp_ms_we_have_waited to second_threshold
982 983 * so that in next timeout, we will do the above
983 984 * check (ddi_get_lbolt() - tcp_last_recv_time).
984 985 * This is also to avoid overflow.
985 986 *
986 987 * We don't need to decrement tcp_timer_backoff
987 988 * to avoid overflow because it will be decremented
988 989 * later if new timeout value is greater than
989 990 * tcp_rto_max. In the case when tcp_rto_max is
990 991 * greater than second_threshold, it means that we
991 992 * will wait longer than second_threshold to send
992 993 * the next
993 994 * window probe.
994 995 */
995 996 tcp->tcp_ms_we_have_waited = second_threshold;
996 997 }
997 998 } else if (ms > first_threshold) {
998 999 /*
999 1000 * Should not hold the zero-copy messages for too long.
1000 1001 */
1001 1002 if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean)
1002 1003 tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp,
1003 1004 tcp->tcp_xmit_head, B_TRUE);
1004 1005
1005 1006 /*
1006 1007 * We have been retransmitting for too long... The RTT
1007 1008 * we calculated is probably incorrect. Reinitialize it.
1008 1009 * Need to compensate for 0 tcp_rtt_sa. Reset
1009 1010 * tcp_rtt_update so that we won't accidentally cache a
1010 1011 * bad value. But only do this if this is not a zero
1011 1012 * window probe.
1012 1013 */
1013 1014 if (tcp->tcp_rtt_sa != 0 && tcp->tcp_zero_win_probe == 0) {
1014 1015 tcp->tcp_rtt_sd += (tcp->tcp_rtt_sa >> 3) +
1015 1016 (tcp->tcp_rtt_sa >> 5);
1016 1017 tcp->tcp_rtt_sa = 0;
1017 1018 tcp_ip_notify(tcp);
1018 1019 tcp->tcp_rtt_update = 0;
1019 1020 }
1020 1021 }
1021 1022
1022 1023 timer_rexmit:
1023 1024 tcp->tcp_timer_backoff++;
1024 1025 if ((ms = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd +
1025 1026 tcps->tcps_rexmit_interval_extra + (tcp->tcp_rtt_sa >> 5)) <
1026 1027 tcp->tcp_rto_min) {
1027 1028 /*
1028 1029 * This means the original RTO is tcp_rexmit_interval_min.
1029 1030 * So we will use tcp_rexmit_interval_min as the RTO value
1030 1031 * and do the backoff.
1031 1032 */
1032 1033 ms = tcp->tcp_rto_min << tcp->tcp_timer_backoff;
1033 1034 } else {
1034 1035 ms <<= tcp->tcp_timer_backoff;
1035 1036 }
1036 1037 if (ms > tcp->tcp_rto_max) {
1037 1038 ms = tcp->tcp_rto_max;
1038 1039 /*
1039 1040 * ms is at max, decrement tcp_timer_backoff to avoid
1040 1041 * overflow.
1041 1042 */
1042 1043 tcp->tcp_timer_backoff--;
1043 1044 }
1044 1045 tcp->tcp_ms_we_have_waited += ms;
1045 1046 if (tcp->tcp_zero_win_probe == 0) {
1046 1047 tcp->tcp_rto = ms;
1047 1048 }
1048 1049 TCP_TIMER_RESTART(tcp, ms);
1049 1050 /*
1050 1051 * This is after a timeout and tcp_rto is backed off. Set
1051 1052 * tcp_set_timer to 1 so that next time RTO is updated, we will
1052 1053 * restart the timer with a correct value.
1053 1054 */
1054 1055 tcp->tcp_set_timer = 1;
1055 1056 mss = tcp->tcp_snxt - tcp->tcp_suna;
1056 1057 if (mss > tcp->tcp_mss)
1057 1058 mss = tcp->tcp_mss;
1058 1059 if (mss > tcp->tcp_swnd && tcp->tcp_swnd != 0)
|
↓ open down ↓ |
1023 lines elided |
↑ open up ↑ |
1059 1060 mss = tcp->tcp_swnd;
1060 1061
1061 1062 if ((mp = tcp->tcp_xmit_head) != NULL)
1062 1063 mp->b_prev = (mblk_t *)ddi_get_lbolt();
1063 1064 mp = tcp_xmit_mp(tcp, mp, mss, NULL, NULL, tcp->tcp_suna, B_TRUE, &mss,
1064 1065 B_TRUE);
1065 1066
1066 1067 /*
1067 1068 * When slow start after retransmission begins, start with
1068 1069 * this seq no. tcp_rexmit_max marks the end of special slow
1069 - * start phase. tcp_snd_burst controls how many segments
1070 - * can be sent because of an ack.
1070 + * start phase.
1071 1071 */
1072 1072 tcp->tcp_rexmit_nxt = tcp->tcp_suna;
1073 - tcp->tcp_snd_burst = TCP_CWND_SS;
1074 1073 if ((tcp->tcp_valid_bits & TCP_FSS_VALID) &&
1075 1074 (tcp->tcp_unsent == 0)) {
1076 1075 tcp->tcp_rexmit_max = tcp->tcp_fss;
1077 1076 } else {
1078 1077 tcp->tcp_rexmit_max = tcp->tcp_snxt;
1079 1078 }
1080 1079 tcp->tcp_rexmit = B_TRUE;
1081 1080 tcp->tcp_dupack_cnt = 0;
1082 1081
1083 1082 /*
1084 1083 * Remove all rexmit SACK blk to start from fresh.
1085 1084 */
1086 1085 if (tcp->tcp_snd_sack_ok)
1087 1086 TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list, tcp);
1088 1087 if (mp == NULL) {
1089 1088 return;
1090 1089 }
1091 1090
1092 1091 tcp->tcp_csuna = tcp->tcp_snxt;
1093 1092 TCPS_BUMP_MIB(tcps, tcpRetransSegs);
1094 1093 TCPS_UPDATE_MIB(tcps, tcpRetransBytes, mss);
1095 1094 tcp_send_data(tcp, mp);
1096 1095
1097 1096 }
1098 1097
1099 1098 /*
1100 1099 * Handle lingering timeouts. This function is called when the SO_LINGER timeout
1101 1100 * expires.
1102 1101 */
1103 1102 void
1104 1103 tcp_close_linger_timeout(void *arg)
1105 1104 {
1106 1105 conn_t *connp = (conn_t *)arg;
1107 1106 tcp_t *tcp = connp->conn_tcp;
1108 1107
1109 1108 tcp->tcp_client_errno = ETIMEDOUT;
1110 1109 tcp_stop_lingering(tcp);
1111 1110 }
|
↓ open down ↓ |
28 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX