1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * This file contains functions related to TCP time wait processing.  Also
  28  * refer to the time wait handling comments in tcp_impl.h.
  29  */
  30 
  31 #include <sys/types.h>
  32 #include <sys/strsun.h>
  33 #include <sys/squeue_impl.h>
  34 #include <sys/squeue.h>
  35 #include <sys/callo.h>
  36 
  37 #include <inet/common.h>
  38 #include <inet/ip.h>
  39 #include <inet/tcp.h>
  40 #include <inet/tcp_impl.h>
  41 #include <inet/tcp_cluster.h>
  42 
  43 static void     tcp_timewait_close(void *, mblk_t *, void *, ip_recv_attr_t *);
  44 
  45 /*
  46  * TCP_TIME_WAIT_DELAY governs how often the time_wait_collector runs.
  47  * Running it every 5 seconds seems to give the best results.
  48  */
  49 #define TCP_TIME_WAIT_DELAY ((hrtime_t)5 * NANOSEC)
  50 
  51 /*
  52  * Remove a connection from the list of detached TIME_WAIT connections.
  53  * It returns B_FALSE if it can't remove the connection from the list
  54  * as the connection has already been removed from the list due to an
  55  * earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE.
  56  */
  57 boolean_t
  58 tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tcp_time_wait)
  59 {
  60         boolean_t       locked = B_FALSE;
  61 
  62         if (tcp_time_wait == NULL) {
  63                 tcp_time_wait = *((tcp_squeue_priv_t **)
  64                     squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP));
  65                 mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
  66                 locked = B_TRUE;
  67         } else {
  68                 ASSERT(MUTEX_HELD(&tcp_time_wait->tcp_time_wait_lock));
  69         }
  70 
  71         /* 0 means that the tcp_t has not been added to the time wait list. */
  72         if (tcp->tcp_time_wait_expire == 0) {
  73                 ASSERT(tcp->tcp_time_wait_next == NULL);
  74                 ASSERT(tcp->tcp_time_wait_prev == NULL);
  75                 if (locked)
  76                         mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
  77                 return (B_FALSE);
  78         }
  79         ASSERT(TCP_IS_DETACHED(tcp));
  80         ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
  81 
  82         if (tcp == tcp_time_wait->tcp_time_wait_head) {
  83                 ASSERT(tcp->tcp_time_wait_prev == NULL);
  84                 tcp_time_wait->tcp_time_wait_head = tcp->tcp_time_wait_next;
  85                 if (tcp_time_wait->tcp_time_wait_head != NULL) {
  86                         tcp_time_wait->tcp_time_wait_head->tcp_time_wait_prev =
  87                             NULL;
  88                 } else {
  89                         tcp_time_wait->tcp_time_wait_tail = NULL;
  90                 }
  91         } else if (tcp == tcp_time_wait->tcp_time_wait_tail) {
  92                 ASSERT(tcp->tcp_time_wait_next == NULL);
  93                 tcp_time_wait->tcp_time_wait_tail = tcp->tcp_time_wait_prev;
  94                 ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
  95                 tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = NULL;
  96         } else {
  97                 ASSERT(tcp->tcp_time_wait_prev->tcp_time_wait_next == tcp);
  98                 ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == tcp);
  99                 tcp->tcp_time_wait_prev->tcp_time_wait_next =
 100                     tcp->tcp_time_wait_next;
 101                 tcp->tcp_time_wait_next->tcp_time_wait_prev =
 102                     tcp->tcp_time_wait_prev;
 103         }
 104         tcp->tcp_time_wait_next = NULL;
 105         tcp->tcp_time_wait_prev = NULL;
 106         tcp->tcp_time_wait_expire = 0;
 107 
 108         if (locked)
 109                 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
 110         return (B_TRUE);
 111 }
 112 
 113 /*
 114  * Add a connection to the list of detached TIME_WAIT connections
 115  * and set its time to expire.
 116  */
 117 void
 118 tcp_time_wait_append(tcp_t *tcp)
 119 {
 120         tcp_stack_t     *tcps = tcp->tcp_tcps;
 121         squeue_t        *sqp = tcp->tcp_connp->conn_sqp;
 122         tcp_squeue_priv_t *tcp_time_wait =
 123             *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
 124 
 125         tcp_timers_stop(tcp);
 126 
 127         /* Freed above */
 128         ASSERT(tcp->tcp_timer_tid == 0);
 129         ASSERT(tcp->tcp_ack_tid == 0);
 130 
 131         /* must have happened at the time of detaching the tcp */
 132         ASSERT(tcp->tcp_ptpahn == NULL);
 133         ASSERT(tcp->tcp_flow_stopped == 0);
 134         ASSERT(tcp->tcp_time_wait_next == NULL);
 135         ASSERT(tcp->tcp_time_wait_prev == NULL);
 136         ASSERT(tcp->tcp_time_wait_expire == 0);
 137         ASSERT(tcp->tcp_listener == NULL);
 138 
 139         tcp->tcp_time_wait_expire = ddi_get_lbolt64();
 140         /*
 141          * Since tcp_time_wait_expire is lbolt64, it should not wrap around
 142          * in practice.  Hence it cannot be 0.  Note that zero means that the
 143          * tcp_t is not in the TIME_WAIT list.
 144          */
 145         tcp->tcp_time_wait_expire += MSEC_TO_TICK(
 146             tcps->tcps_time_wait_interval);
 147 
 148         ASSERT(TCP_IS_DETACHED(tcp));
 149         ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
 150         ASSERT(tcp->tcp_time_wait_next == NULL);
 151         ASSERT(tcp->tcp_time_wait_prev == NULL);
 152         TCP_DBGSTAT(tcps, tcp_time_wait);
 153 
 154         mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
 155         if (tcp_time_wait->tcp_time_wait_head == NULL) {
 156                 ASSERT(tcp_time_wait->tcp_time_wait_tail == NULL);
 157                 tcp_time_wait->tcp_time_wait_head = tcp;
 158 
 159                 /*
 160                  * Even if the list was empty before, there may be a timer
 161                  * running since a tcp_t can be removed from the list
 162                  * in other places, such as tcp_clean_death().  So check if
 163                  * a timer is needed.
 164                  */
 165                 if (tcp_time_wait->tcp_time_wait_tid == 0) {
 166                         tcp_time_wait->tcp_time_wait_tid =
 167                             timeout_generic(CALLOUT_NORMAL,
 168                             tcp_time_wait_collector, sqp,
 169                             (hrtime_t)(tcps->tcps_time_wait_interval + 1) *
 170                             MICROSEC, CALLOUT_TCP_RESOLUTION,
 171                             CALLOUT_FLAG_ROUNDUP);
 172                 }
 173         } else {
 174                 /*
 175                  * The list is not empty, so a timer must be running.  If not,
 176                  * tcp_time_wait_collector() must be running on this
 177                  * tcp_time_wait list at the same time.
 178                  */
 179                 ASSERT(tcp_time_wait->tcp_time_wait_tid != 0 ||
 180                     tcp_time_wait->tcp_time_wait_running);
 181                 ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
 182                 ASSERT(tcp_time_wait->tcp_time_wait_tail->tcp_state ==
 183                     TCPS_TIME_WAIT);
 184                 tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = tcp;
 185                 tcp->tcp_time_wait_prev = tcp_time_wait->tcp_time_wait_tail;
 186 
 187         }
 188         tcp_time_wait->tcp_time_wait_tail = tcp;
 189         mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
 190 }
 191 
 192 /*
 193  * Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT
 194  * tcp_t.  Used in tcp_time_wait_collector().
 195  */
 196 /* ARGSUSED */
 197 static void
 198 tcp_timewait_close(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
 199 {
 200         conn_t  *connp = (conn_t *)arg;
 201         tcp_t   *tcp = connp->conn_tcp;
 202 
 203         ASSERT(tcp != NULL);
 204         if (tcp->tcp_state == TCPS_CLOSED) {
 205                 return;
 206         }
 207 
 208         ASSERT((connp->conn_family == AF_INET &&
 209             connp->conn_ipversion == IPV4_VERSION) ||
 210             (connp->conn_family == AF_INET6 &&
 211             (connp->conn_ipversion == IPV4_VERSION ||
 212             connp->conn_ipversion == IPV6_VERSION)));
 213         ASSERT(!tcp->tcp_listener);
 214 
 215         ASSERT(TCP_IS_DETACHED(tcp));
 216 
 217         /*
 218          * Because they have no upstream client to rebind or tcp_close()
 219          * them later, we axe the connection here and now.
 220          */
 221         tcp_close_detached(tcp);
 222 }
 223 
 224 /*
 225  * Blows away all tcps whose TIME_WAIT has expired. List traversal
 226  * is done forwards from the head.
 227  * This walks all stack instances since
 228  * tcp_time_wait remains global across all stacks.
 229  */
 230 /* ARGSUSED */
 231 void
 232 tcp_time_wait_collector(void *arg)
 233 {
 234         tcp_t *tcp;
 235         int64_t now;
 236         mblk_t *mp;
 237         conn_t *connp;
 238         kmutex_t *lock;
 239         boolean_t removed;
 240         extern void (*cl_inet_disconnect)(netstackid_t, uint8_t, sa_family_t,
 241             uint8_t *, in_port_t, uint8_t *, in_port_t, void *);
 242 
 243         squeue_t *sqp = (squeue_t *)arg;
 244         tcp_squeue_priv_t *tcp_time_wait =
 245             *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
 246 
 247         mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
 248         tcp_time_wait->tcp_time_wait_tid = 0;
 249 #ifdef DEBUG
 250         tcp_time_wait->tcp_time_wait_running = B_TRUE;
 251 #endif
 252 
 253         if (tcp_time_wait->tcp_free_list != NULL &&
 254             tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) {
 255                 TCP_G_STAT(tcp_freelist_cleanup);
 256                 while ((tcp = tcp_time_wait->tcp_free_list) != NULL) {
 257                         tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next;
 258                         tcp->tcp_time_wait_next = NULL;
 259                         tcp_time_wait->tcp_free_list_cnt--;
 260                         ASSERT(tcp->tcp_tcps == NULL);
 261                         CONN_DEC_REF(tcp->tcp_connp);
 262                 }
 263                 ASSERT(tcp_time_wait->tcp_free_list_cnt == 0);
 264         }
 265 
 266         /*
 267          * In order to reap time waits reliably, we should use a
 268          * source of time that is not adjustable by the user -- hence
 269          * the call to ddi_get_lbolt64().
 270          */
 271         now = ddi_get_lbolt64();
 272         while ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL) {
 273                 /*
 274                  * lbolt64 should not wrap around in practice...  So we can
 275                  * do a direct comparison.
 276                  */
 277                 if (now < tcp->tcp_time_wait_expire)
 278                         break;
 279 
 280                 removed = tcp_time_wait_remove(tcp, tcp_time_wait);
 281                 ASSERT(removed);
 282 
 283                 connp = tcp->tcp_connp;
 284                 ASSERT(connp->conn_fanout != NULL);
 285                 lock = &connp->conn_fanout->connf_lock;
 286                 /*
 287                  * This is essentially a TW reclaim fast path optimization for
 288                  * performance where the timewait collector checks under the
 289                  * fanout lock (so that no one else can get access to the
 290                  * conn_t) that the refcnt is 2 i.e. one for TCP and one for
 291                  * the classifier hash list. If ref count is indeed 2, we can
 292                  * just remove the conn under the fanout lock and avoid
 293                  * cleaning up the conn under the squeue, provided that
 294                  * clustering callbacks are not enabled. If clustering is
 295                  * enabled, we need to make the clustering callback before
 296                  * setting the CONDEMNED flag and after dropping all locks and
 297                  * so we forego this optimization and fall back to the slow
 298                  * path. Also please see the comments in tcp_closei_local
 299                  * regarding the refcnt logic.
 300                  *
 301                  * Since we are holding the tcp_time_wait_lock, its better
 302                  * not to block on the fanout_lock because other connections
 303                  * can't add themselves to time_wait list. So we do a
 304                  * tryenter instead of mutex_enter.
 305                  */
 306                 if (mutex_tryenter(lock)) {
 307                         mutex_enter(&connp->conn_lock);
 308                         if ((connp->conn_ref == 2) &&
 309                             (cl_inet_disconnect == NULL)) {
 310                                 ipcl_hash_remove_locked(connp,
 311                                     connp->conn_fanout);
 312                                 /*
 313                                  * Set the CONDEMNED flag now itself so that
 314                                  * the refcnt cannot increase due to any
 315                                  * walker.
 316                                  */
 317                                 connp->conn_state_flags |= CONN_CONDEMNED;
 318                                 mutex_exit(lock);
 319                                 mutex_exit(&connp->conn_lock);
 320                                 if (tcp_time_wait->tcp_free_list_cnt <
 321                                     tcp_free_list_max_cnt) {
 322                                         /* Add to head of tcp_free_list */
 323                                         mutex_exit(
 324                                             &tcp_time_wait->tcp_time_wait_lock);
 325                                         tcp_cleanup(tcp);
 326                                         ASSERT(connp->conn_latch == NULL);
 327                                         ASSERT(connp->conn_policy == NULL);
 328                                         ASSERT(tcp->tcp_tcps == NULL);
 329                                         ASSERT(connp->conn_netstack == NULL);
 330 
 331                                         mutex_enter(
 332                                             &tcp_time_wait->tcp_time_wait_lock);
 333                                         tcp->tcp_time_wait_next =
 334                                             tcp_time_wait->tcp_free_list;
 335                                         tcp_time_wait->tcp_free_list = tcp;
 336                                         tcp_time_wait->tcp_free_list_cnt++;
 337                                         continue;
 338                                 } else {
 339                                         /* Do not add to tcp_free_list */
 340                                         mutex_exit(
 341                                             &tcp_time_wait->tcp_time_wait_lock);
 342                                         tcp_bind_hash_remove(tcp);
 343                                         ixa_cleanup(tcp->tcp_connp->conn_ixa);
 344                                         tcp_ipsec_cleanup(tcp);
 345                                         CONN_DEC_REF(tcp->tcp_connp);
 346                                 }
 347                         } else {
 348                                 CONN_INC_REF_LOCKED(connp);
 349                                 mutex_exit(lock);
 350                                 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
 351                                 mutex_exit(&connp->conn_lock);
 352                                 /*
 353                                  * We can reuse the closemp here since conn has
 354                                  * detached (otherwise we wouldn't even be in
 355                                  * time_wait list). tcp_closemp_used can safely
 356                                  * be changed without taking a lock as no other
 357                                  * thread can concurrently access it at this
 358                                  * point in the connection lifecycle.
 359                                  */
 360 
 361                                 if (tcp->tcp_closemp.b_prev == NULL)
 362                                         tcp->tcp_closemp_used = B_TRUE;
 363                                 else
 364                                         cmn_err(CE_PANIC,
 365                                             "tcp_timewait_collector: "
 366                                             "concurrent use of tcp_closemp: "
 367                                             "connp %p tcp %p\n", (void *)connp,
 368                                             (void *)tcp);
 369 
 370                                 TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
 371                                 mp = &tcp->tcp_closemp;
 372                                 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
 373                                     tcp_timewait_close, connp, NULL,
 374                                     SQ_FILL, SQTAG_TCP_TIMEWAIT);
 375                         }
 376                 } else {
 377                         mutex_enter(&connp->conn_lock);
 378                         CONN_INC_REF_LOCKED(connp);
 379                         mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
 380                         mutex_exit(&connp->conn_lock);
 381                         /*
 382                          * We can reuse the closemp here since conn has
 383                          * detached (otherwise we wouldn't even be in
 384                          * time_wait list). tcp_closemp_used can safely
 385                          * be changed without taking a lock as no other
 386                          * thread can concurrently access it at this
 387                          * point in the connection lifecycle.
 388                          */
 389 
 390                         if (tcp->tcp_closemp.b_prev == NULL)
 391                                 tcp->tcp_closemp_used = B_TRUE;
 392                         else
 393                                 cmn_err(CE_PANIC, "tcp_timewait_collector: "
 394                                     "concurrent use of tcp_closemp: "
 395                                     "connp %p tcp %p\n", (void *)connp,
 396                                     (void *)tcp);
 397 
 398                         TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
 399                         mp = &tcp->tcp_closemp;
 400                         SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
 401                             tcp_timewait_close, connp, NULL,
 402                             SQ_FILL, SQTAG_TCP_TIMEWAIT);
 403                 }
 404                 mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
 405         }
 406 
 407         if (tcp_time_wait->tcp_free_list != NULL)
 408                 tcp_time_wait->tcp_free_list->tcp_in_free_list = B_TRUE;
 409 
 410         /*
 411          * If the time wait list is not empty and there is no timer running,
 412          * restart it.
 413          */
 414         if ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL &&
 415             tcp_time_wait->tcp_time_wait_tid == 0) {
 416                 hrtime_t firetime;
 417 
 418                 firetime = TICK_TO_NSEC(tcp->tcp_time_wait_expire - now);
 419                 /* This ensures that we won't wake up too often. */
 420                 firetime = MAX(TCP_TIME_WAIT_DELAY, firetime);
 421                 tcp_time_wait->tcp_time_wait_tid =
 422                     timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector,
 423                     sqp, firetime, CALLOUT_TCP_RESOLUTION,
 424                     CALLOUT_FLAG_ROUNDUP);
 425         }
 426 #ifdef DEBUG
 427         tcp_time_wait->tcp_time_wait_running = B_FALSE;
 428 #endif
 429         mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
 430 }
 431 
 432 /*
 433  * tcp_time_wait_processing() handles processing of incoming packets when
 434  * the tcp_t is in the TIME_WAIT state.
 435  *
 436  * A TIME_WAIT tcp_t that has an associated open TCP end point (not in
 437  * detached state) is never put on the time wait list.
 438  */
 439 void
 440 tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
 441     uint32_t seg_ack, int seg_len, tcpha_t *tcpha, ip_recv_attr_t *ira)
 442 {
 443         int32_t         bytes_acked;
 444         int32_t         gap;
 445         int32_t         rgap;
 446         tcp_opt_t       tcpopt;
 447         uint_t          flags;
 448         uint32_t        new_swnd = 0;
 449         conn_t          *nconnp;
 450         conn_t          *connp = tcp->tcp_connp;
 451         tcp_stack_t     *tcps = tcp->tcp_tcps;
 452 
 453         BUMP_LOCAL(tcp->tcp_ibsegs);
 454         DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp);
 455 
 456         flags = (unsigned int)tcpha->tha_flags & 0xFF;
 457         new_swnd = ntohs(tcpha->tha_win) <<
 458             ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws);
 459         if (tcp->tcp_snd_ts_ok) {
 460                 if (!tcp_paws_check(tcp, tcpha, &tcpopt)) {
 461                         tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
 462                             tcp->tcp_rnxt, TH_ACK);
 463                         goto done;
 464                 }
 465         }
 466         gap = seg_seq - tcp->tcp_rnxt;
 467         rgap = tcp->tcp_rwnd - (gap + seg_len);
 468         if (gap < 0) {
 469                 TCPS_BUMP_MIB(tcps, tcpInDataDupSegs);
 470                 TCPS_UPDATE_MIB(tcps, tcpInDataDupBytes,
 471                     (seg_len > -gap ? -gap : seg_len));
 472                 seg_len += gap;
 473                 if (seg_len < 0 || (seg_len == 0 && !(flags & TH_FIN))) {
 474                         if (flags & TH_RST) {
 475                                 goto done;
 476                         }
 477                         if ((flags & TH_FIN) && seg_len == -1) {
 478                                 /*
 479                                  * When TCP receives a duplicate FIN in
 480                                  * TIME_WAIT state, restart the 2 MSL timer.
 481                                  * See page 73 in RFC 793. Make sure this TCP
 482                                  * is already on the TIME_WAIT list. If not,
 483                                  * just restart the timer.
 484                                  */
 485                                 if (TCP_IS_DETACHED(tcp)) {
 486                                         if (tcp_time_wait_remove(tcp, NULL) ==
 487                                             B_TRUE) {
 488                                                 tcp_time_wait_append(tcp);
 489                                                 TCP_DBGSTAT(tcps,
 490                                                     tcp_rput_time_wait);
 491                                         }
 492                                 } else {
 493                                         ASSERT(tcp != NULL);
 494                                         TCP_TIMER_RESTART(tcp,
 495                                             tcps->tcps_time_wait_interval);
 496                                 }
 497                                 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
 498                                     tcp->tcp_rnxt, TH_ACK);
 499                                 goto done;
 500                         }
 501                         flags |=  TH_ACK_NEEDED;
 502                         seg_len = 0;
 503                         goto process_ack;
 504                 }
 505 
 506                 /* Fix seg_seq, and chew the gap off the front. */
 507                 seg_seq = tcp->tcp_rnxt;
 508         }
 509 
 510         if ((flags & TH_SYN) && gap > 0 && rgap < 0) {
 511                 /*
 512                  * Make sure that when we accept the connection, pick
 513                  * an ISS greater than (tcp_snxt + ISS_INCR/2) for the
 514                  * old connection.
 515                  *
 516                  * The next ISS generated is equal to tcp_iss_incr_extra
 517                  * + ISS_INCR/2 + other components depending on the
 518                  * value of tcp_strong_iss.  We pre-calculate the new
 519                  * ISS here and compare with tcp_snxt to determine if
 520                  * we need to make adjustment to tcp_iss_incr_extra.
 521                  *
 522                  * The above calculation is ugly and is a
 523                  * waste of CPU cycles...
 524                  */
 525                 uint32_t new_iss = tcps->tcps_iss_incr_extra;
 526                 int32_t adj;
 527                 ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
 528 
 529                 switch (tcps->tcps_strong_iss) {
 530                 case 2: {
 531                         /* Add time and MD5 components. */
 532                         uint32_t answer[4];
 533                         struct {
 534                                 uint32_t ports;
 535                                 in6_addr_t src;
 536                                 in6_addr_t dst;
 537                         } arg;
 538                         MD5_CTX context;
 539 
 540                         mutex_enter(&tcps->tcps_iss_key_lock);
 541                         context = tcps->tcps_iss_key;
 542                         mutex_exit(&tcps->tcps_iss_key_lock);
 543                         arg.ports = connp->conn_ports;
 544                         /* We use MAPPED addresses in tcp_iss_init */
 545                         arg.src = connp->conn_laddr_v6;
 546                         arg.dst = connp->conn_faddr_v6;
 547                         MD5Update(&context, (uchar_t *)&arg,
 548                             sizeof (arg));
 549                         MD5Final((uchar_t *)answer, &context);
 550                         answer[0] ^= answer[1] ^ answer[2] ^ answer[3];
 551                         new_iss += (gethrtime() >> ISS_NSEC_SHT) + answer[0];
 552                         break;
 553                 }
 554                 case 1:
 555                         /* Add time component and min random (i.e. 1). */
 556                         new_iss += (gethrtime() >> ISS_NSEC_SHT) + 1;
 557                         break;
 558                 default:
 559                         /* Add only time component. */
 560                         new_iss += (uint32_t)gethrestime_sec() * ISS_INCR;
 561                         break;
 562                 }
 563                 if ((adj = (int32_t)(tcp->tcp_snxt - new_iss)) > 0) {
 564                         /*
 565                          * New ISS not guaranteed to be ISS_INCR/2
 566                          * ahead of the current tcp_snxt, so add the
 567                          * difference to tcp_iss_incr_extra.
 568                          */
 569                         tcps->tcps_iss_incr_extra += adj;
 570                 }
 571                 /*
 572                  * If tcp_clean_death() can not perform the task now,
 573                  * drop the SYN packet and let the other side re-xmit.
 574                  * Otherwise pass the SYN packet back in, since the
 575                  * old tcp state has been cleaned up or freed.
 576                  */
 577                 if (tcp_clean_death(tcp, 0) == -1)
 578                         goto done;
 579                 nconnp = ipcl_classify(mp, ira, ipst);
 580                 if (nconnp != NULL) {
 581                         TCP_STAT(tcps, tcp_time_wait_syn_success);
 582                         /* Drops ref on nconnp */
 583                         tcp_reinput(nconnp, mp, ira, ipst);
 584                         return;
 585                 }
 586                 goto done;
 587         }
 588 
 589         /*
 590          * rgap is the amount of stuff received out of window.  A negative
 591          * value is the amount out of window.
 592          */
 593         if (rgap < 0) {
 594                 TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs);
 595                 TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap);
 596                 /* Fix seg_len and make sure there is something left. */
 597                 seg_len += rgap;
 598                 if (seg_len <= 0) {
 599                         if (flags & TH_RST) {
 600                                 goto done;
 601                         }
 602                         flags |=  TH_ACK_NEEDED;
 603                         seg_len = 0;
 604                         goto process_ack;
 605                 }
 606         }
 607         /*
 608          * Check whether we can update tcp_ts_recent.  This test is
 609          * NOT the one in RFC 1323 3.4.  It is from Braden, 1993, "TCP
 610          * Extensions for High Performance: An Update", Internet Draft.
 611          */
 612         if (tcp->tcp_snd_ts_ok &&
 613             TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) &&
 614             SEQ_LEQ(seg_seq, tcp->tcp_rack)) {
 615                 tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val;
 616                 tcp->tcp_last_rcv_lbolt = ddi_get_lbolt64();
 617         }
 618 
 619         if (seg_seq != tcp->tcp_rnxt && seg_len > 0) {
 620                 /* Always ack out of order packets */
 621                 flags |= TH_ACK_NEEDED;
 622                 seg_len = 0;
 623         } else if (seg_len > 0) {
 624                 TCPS_BUMP_MIB(tcps, tcpInClosed);
 625                 TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs);
 626                 TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len);
 627         }
 628         if (flags & TH_RST) {
 629                 (void) tcp_clean_death(tcp, 0);
 630                 goto done;
 631         }
 632         if (flags & TH_SYN) {
 633                 tcp_xmit_ctl("TH_SYN", tcp, seg_ack, seg_seq + 1,
 634                     TH_RST|TH_ACK);
 635                 /*
 636                  * Do not delete the TCP structure if it is in
 637                  * TIME_WAIT state.  Refer to RFC 1122, 4.2.2.13.
 638                  */
 639                 goto done;
 640         }
 641 process_ack:
 642         if (flags & TH_ACK) {
 643                 bytes_acked = (int)(seg_ack - tcp->tcp_suna);
 644                 if (bytes_acked <= 0) {
 645                         if (bytes_acked == 0 && seg_len == 0 &&
 646                             new_swnd == tcp->tcp_swnd)
 647                                 TCPS_BUMP_MIB(tcps, tcpInDupAck);
 648                 } else {
 649                         /* Acks something not sent */
 650                         flags |= TH_ACK_NEEDED;
 651                 }
 652         }
 653         if (flags & TH_ACK_NEEDED) {
 654                 /*
 655                  * Time to send an ack for some reason.
 656                  */
 657                 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
 658                     tcp->tcp_rnxt, TH_ACK);
 659         }
 660 done:
 661         freemsg(mp);
 662 }