1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2011, Joyent Inc. All rights reserved.
  25  */
  26 
  27 /*
  28  * This file contains functions related to TCP time wait processing.  Also
  29  * refer to the time wait handling comments in tcp_impl.h.
  30  */
  31 
  32 #include <sys/types.h>
  33 #include <sys/strsun.h>
  34 #include <sys/squeue_impl.h>
  35 #include <sys/squeue.h>
  36 #include <sys/callo.h>
  37 
  38 #include <inet/common.h>
  39 #include <inet/ip.h>
  40 #include <inet/tcp.h>
  41 #include <inet/tcp_impl.h>
  42 #include <inet/tcp_cluster.h>
  43 
  44 static void     tcp_timewait_close(void *, mblk_t *, void *, ip_recv_attr_t *);
  45 
  46 /*
  47  * TCP_TIME_WAIT_DELAY governs how often the time_wait_collector runs.
  48  * Running it every 5 seconds seems to give the best results.
  49  */
  50 #define TCP_TIME_WAIT_DELAY ((hrtime_t)5 * NANOSEC)
  51 
  52 /*
  53  * Remove a connection from the list of detached TIME_WAIT connections.
  54  * It returns B_FALSE if it can't remove the connection from the list
  55  * as the connection has already been removed from the list due to an
  56  * earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE.
  57  */
  58 boolean_t
  59 tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tcp_time_wait)
  60 {
  61         boolean_t       locked = B_FALSE;
  62 
  63         if (tcp_time_wait == NULL) {
  64                 tcp_time_wait = *((tcp_squeue_priv_t **)
  65                     squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP));
  66                 mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
  67                 locked = B_TRUE;
  68         } else {
  69                 ASSERT(MUTEX_HELD(&tcp_time_wait->tcp_time_wait_lock));
  70         }
  71 
  72         /* 0 means that the tcp_t has not been added to the time wait list. */
  73         if (tcp->tcp_time_wait_expire == 0) {
  74                 ASSERT(tcp->tcp_time_wait_next == NULL);
  75                 ASSERT(tcp->tcp_time_wait_prev == NULL);
  76                 if (locked)
  77                         mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
  78                 return (B_FALSE);
  79         }
  80         ASSERT(TCP_IS_DETACHED(tcp));
  81         ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
  82 
  83         if (tcp == tcp_time_wait->tcp_time_wait_head) {
  84                 ASSERT(tcp->tcp_time_wait_prev == NULL);
  85                 tcp_time_wait->tcp_time_wait_head = tcp->tcp_time_wait_next;
  86                 if (tcp_time_wait->tcp_time_wait_head != NULL) {
  87                         tcp_time_wait->tcp_time_wait_head->tcp_time_wait_prev =
  88                             NULL;
  89                 } else {
  90                         tcp_time_wait->tcp_time_wait_tail = NULL;
  91                 }
  92         } else if (tcp == tcp_time_wait->tcp_time_wait_tail) {
  93                 ASSERT(tcp->tcp_time_wait_next == NULL);
  94                 tcp_time_wait->tcp_time_wait_tail = tcp->tcp_time_wait_prev;
  95                 ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
  96                 tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = NULL;
  97         } else {
  98                 ASSERT(tcp->tcp_time_wait_prev->tcp_time_wait_next == tcp);
  99                 ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == tcp);
 100                 tcp->tcp_time_wait_prev->tcp_time_wait_next =
 101                     tcp->tcp_time_wait_next;
 102                 tcp->tcp_time_wait_next->tcp_time_wait_prev =
 103                     tcp->tcp_time_wait_prev;
 104         }
 105         tcp->tcp_time_wait_next = NULL;
 106         tcp->tcp_time_wait_prev = NULL;
 107         tcp->tcp_time_wait_expire = 0;
 108 
 109         if (locked)
 110                 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
 111         return (B_TRUE);
 112 }
 113 
 114 /*
 115  * Add a connection to the list of detached TIME_WAIT connections
 116  * and set its time to expire.
 117  */
 118 void
 119 tcp_time_wait_append(tcp_t *tcp)
 120 {
 121         tcp_stack_t     *tcps = tcp->tcp_tcps;
 122         squeue_t        *sqp = tcp->tcp_connp->conn_sqp;
 123         tcp_squeue_priv_t *tcp_time_wait =
 124             *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
 125 
 126         tcp_timers_stop(tcp);
 127 
 128         /* Freed above */
 129         ASSERT(tcp->tcp_timer_tid == 0);
 130         ASSERT(tcp->tcp_ack_tid == 0);
 131 
 132         /* must have happened at the time of detaching the tcp */
 133         ASSERT(tcp->tcp_ptpahn == NULL);
 134         ASSERT(tcp->tcp_flow_stopped == 0);
 135         ASSERT(tcp->tcp_time_wait_next == NULL);
 136         ASSERT(tcp->tcp_time_wait_prev == NULL);
 137         ASSERT(tcp->tcp_time_wait_expire == 0);
 138         ASSERT(tcp->tcp_listener == NULL);
 139 
 140         tcp->tcp_time_wait_expire = ddi_get_lbolt64();
 141         /*
 142          * Since tcp_time_wait_expire is lbolt64, it should not wrap around
 143          * in practice.  Hence it cannot be 0.  Note that zero means that the
 144          * tcp_t is not in the TIME_WAIT list.
 145          */
 146         tcp->tcp_time_wait_expire += MSEC_TO_TICK(
 147             tcps->tcps_time_wait_interval);
 148 
 149         ASSERT(TCP_IS_DETACHED(tcp));
 150         ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
 151         ASSERT(tcp->tcp_time_wait_next == NULL);
 152         ASSERT(tcp->tcp_time_wait_prev == NULL);
 153         TCP_DBGSTAT(tcps, tcp_time_wait);
 154 
 155         mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
 156         if (tcp_time_wait->tcp_time_wait_head == NULL) {
 157                 ASSERT(tcp_time_wait->tcp_time_wait_tail == NULL);
 158                 tcp_time_wait->tcp_time_wait_head = tcp;
 159 
 160                 /*
 161                  * Even if the list was empty before, there may be a timer
 162                  * running since a tcp_t can be removed from the list
 163                  * in other places, such as tcp_clean_death().  So check if
 164                  * a timer is needed.
 165                  */
 166                 if (tcp_time_wait->tcp_time_wait_tid == 0) {
 167                         tcp_time_wait->tcp_time_wait_tid =
 168                             timeout_generic(CALLOUT_NORMAL,
 169                             tcp_time_wait_collector, sqp,
 170                             (hrtime_t)(tcps->tcps_time_wait_interval + 1) *
 171                             MICROSEC, CALLOUT_TCP_RESOLUTION,
 172                             CALLOUT_FLAG_ROUNDUP);
 173                 }
 174         } else {
 175                 /*
 176                  * The list is not empty, so a timer must be running.  If not,
 177                  * tcp_time_wait_collector() must be running on this
 178                  * tcp_time_wait list at the same time.
 179                  */
 180                 ASSERT(tcp_time_wait->tcp_time_wait_tid != 0 ||
 181                     tcp_time_wait->tcp_time_wait_running);
 182                 ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
 183                 ASSERT(tcp_time_wait->tcp_time_wait_tail->tcp_state ==
 184                     TCPS_TIME_WAIT);
 185                 tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = tcp;
 186                 tcp->tcp_time_wait_prev = tcp_time_wait->tcp_time_wait_tail;
 187 
 188         }
 189         tcp_time_wait->tcp_time_wait_tail = tcp;
 190         mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
 191 }
 192 
 193 /*
 194  * Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT
 195  * tcp_t.  Used in tcp_time_wait_collector().
 196  */
 197 /* ARGSUSED */
 198 static void
 199 tcp_timewait_close(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
 200 {
 201         conn_t  *connp = (conn_t *)arg;
 202         tcp_t   *tcp = connp->conn_tcp;
 203 
 204         ASSERT(tcp != NULL);
 205         if (tcp->tcp_state == TCPS_CLOSED) {
 206                 return;
 207         }
 208 
 209         ASSERT((connp->conn_family == AF_INET &&
 210             connp->conn_ipversion == IPV4_VERSION) ||
 211             (connp->conn_family == AF_INET6 &&
 212             (connp->conn_ipversion == IPV4_VERSION ||
 213             connp->conn_ipversion == IPV6_VERSION)));
 214         ASSERT(!tcp->tcp_listener);
 215 
 216         ASSERT(TCP_IS_DETACHED(tcp));
 217 
 218         /*
 219          * Because they have no upstream client to rebind or tcp_close()
 220          * them later, we axe the connection here and now.
 221          */
 222         tcp_close_detached(tcp);
 223 }
 224 
 225 /*
 226  * Blows away all tcps whose TIME_WAIT has expired. List traversal
 227  * is done forwards from the head.
 228  * This walks all stack instances since
 229  * tcp_time_wait remains global across all stacks.
 230  */
 231 /* ARGSUSED */
 232 void
 233 tcp_time_wait_collector(void *arg)
 234 {
 235         tcp_t *tcp;
 236         int64_t now;
 237         mblk_t *mp;
 238         conn_t *connp;
 239         kmutex_t *lock;
 240         boolean_t removed;
 241         extern void (*cl_inet_disconnect)(netstackid_t, uint8_t, sa_family_t,
 242             uint8_t *, in_port_t, uint8_t *, in_port_t, void *);
 243 
 244         squeue_t *sqp = (squeue_t *)arg;
 245         tcp_squeue_priv_t *tcp_time_wait =
 246             *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
 247 
 248         mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
 249         tcp_time_wait->tcp_time_wait_tid = 0;
 250 #ifdef DEBUG
 251         tcp_time_wait->tcp_time_wait_running = B_TRUE;
 252 #endif
 253 
 254         if (tcp_time_wait->tcp_free_list != NULL &&
 255             tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) {
 256                 TCP_G_STAT(tcp_freelist_cleanup);
 257                 while ((tcp = tcp_time_wait->tcp_free_list) != NULL) {
 258                         tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next;
 259                         tcp->tcp_time_wait_next = NULL;
 260                         tcp_time_wait->tcp_free_list_cnt--;
 261                         ASSERT(tcp->tcp_tcps == NULL);
 262                         CONN_DEC_REF(tcp->tcp_connp);
 263                 }
 264                 ASSERT(tcp_time_wait->tcp_free_list_cnt == 0);
 265         }
 266 
 267         /*
 268          * In order to reap time waits reliably, we should use a
 269          * source of time that is not adjustable by the user -- hence
 270          * the call to ddi_get_lbolt64().
 271          */
 272         now = ddi_get_lbolt64();
 273         while ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL) {
 274                 /*
 275                  * lbolt64 should not wrap around in practice...  So we can
 276                  * do a direct comparison.
 277                  */
 278                 if (now < tcp->tcp_time_wait_expire)
 279                         break;
 280 
 281                 removed = tcp_time_wait_remove(tcp, tcp_time_wait);
 282                 ASSERT(removed);
 283 
 284                 connp = tcp->tcp_connp;
 285                 ASSERT(connp->conn_fanout != NULL);
 286                 lock = &connp->conn_fanout->connf_lock;
 287                 /*
 288                  * This is essentially a TW reclaim fast path optimization for
 289                  * performance where the timewait collector checks under the
 290                  * fanout lock (so that no one else can get access to the
 291                  * conn_t) that the refcnt is 2 i.e. one for TCP and one for
 292                  * the classifier hash list. If ref count is indeed 2, we can
 293                  * just remove the conn under the fanout lock and avoid
 294                  * cleaning up the conn under the squeue, provided that
 295                  * clustering callbacks are not enabled. If clustering is
 296                  * enabled, we need to make the clustering callback before
 297                  * setting the CONDEMNED flag and after dropping all locks and
 298                  * so we forego this optimization and fall back to the slow
 299                  * path. Also please see the comments in tcp_closei_local
 300                  * regarding the refcnt logic.
 301                  *
 302                  * Since we are holding the tcp_time_wait_lock, its better
 303                  * not to block on the fanout_lock because other connections
 304                  * can't add themselves to time_wait list. So we do a
 305                  * tryenter instead of mutex_enter.
 306                  */
 307                 if (mutex_tryenter(lock)) {
 308                         mutex_enter(&connp->conn_lock);
 309                         if ((connp->conn_ref == 2) &&
 310                             (cl_inet_disconnect == NULL)) {
 311                                 ipcl_hash_remove_locked(connp,
 312                                     connp->conn_fanout);
 313                                 /*
 314                                  * Set the CONDEMNED flag now itself so that
 315                                  * the refcnt cannot increase due to any
 316                                  * walker.
 317                                  */
 318                                 connp->conn_state_flags |= CONN_CONDEMNED;
 319                                 mutex_exit(lock);
 320                                 mutex_exit(&connp->conn_lock);
 321                                 if (tcp_time_wait->tcp_free_list_cnt <
 322                                     tcp_free_list_max_cnt) {
 323                                         /* Add to head of tcp_free_list */
 324                                         mutex_exit(
 325                                             &tcp_time_wait->tcp_time_wait_lock);
 326                                         tcp_cleanup(tcp);
 327                                         ASSERT(connp->conn_latch == NULL);
 328                                         ASSERT(connp->conn_policy == NULL);
 329                                         ASSERT(tcp->tcp_tcps == NULL);
 330                                         ASSERT(connp->conn_netstack == NULL);
 331 
 332                                         mutex_enter(
 333                                             &tcp_time_wait->tcp_time_wait_lock);
 334                                         tcp->tcp_time_wait_next =
 335                                             tcp_time_wait->tcp_free_list;
 336                                         tcp_time_wait->tcp_free_list = tcp;
 337                                         tcp_time_wait->tcp_free_list_cnt++;
 338                                         continue;
 339                                 } else {
 340                                         /* Do not add to tcp_free_list */
 341                                         mutex_exit(
 342                                             &tcp_time_wait->tcp_time_wait_lock);
 343                                         tcp_bind_hash_remove(tcp);
 344                                         ixa_cleanup(tcp->tcp_connp->conn_ixa);
 345                                         tcp_ipsec_cleanup(tcp);
 346                                         CONN_DEC_REF(tcp->tcp_connp);
 347                                 }
 348                         } else {
 349                                 CONN_INC_REF_LOCKED(connp);
 350                                 mutex_exit(lock);
 351                                 mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
 352                                 mutex_exit(&connp->conn_lock);
 353                                 /*
 354                                  * We can reuse the closemp here since conn has
 355                                  * detached (otherwise we wouldn't even be in
 356                                  * time_wait list). tcp_closemp_used can safely
 357                                  * be changed without taking a lock as no other
 358                                  * thread can concurrently access it at this
 359                                  * point in the connection lifecycle.
 360                                  */
 361 
 362                                 if (tcp->tcp_closemp.b_prev == NULL)
 363                                         tcp->tcp_closemp_used = B_TRUE;
 364                                 else
 365                                         cmn_err(CE_PANIC,
 366                                             "tcp_timewait_collector: "
 367                                             "concurrent use of tcp_closemp: "
 368                                             "connp %p tcp %p\n", (void *)connp,
 369                                             (void *)tcp);
 370 
 371                                 TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
 372                                 mp = &tcp->tcp_closemp;
 373                                 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
 374                                     tcp_timewait_close, connp, NULL,
 375                                     SQ_FILL, SQTAG_TCP_TIMEWAIT);
 376                         }
 377                 } else {
 378                         mutex_enter(&connp->conn_lock);
 379                         CONN_INC_REF_LOCKED(connp);
 380                         mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
 381                         mutex_exit(&connp->conn_lock);
 382                         /*
 383                          * We can reuse the closemp here since conn has
 384                          * detached (otherwise we wouldn't even be in
 385                          * time_wait list). tcp_closemp_used can safely
 386                          * be changed without taking a lock as no other
 387                          * thread can concurrently access it at this
 388                          * point in the connection lifecycle.
 389                          */
 390 
 391                         if (tcp->tcp_closemp.b_prev == NULL)
 392                                 tcp->tcp_closemp_used = B_TRUE;
 393                         else
 394                                 cmn_err(CE_PANIC, "tcp_timewait_collector: "
 395                                     "concurrent use of tcp_closemp: "
 396                                     "connp %p tcp %p\n", (void *)connp,
 397                                     (void *)tcp);
 398 
 399                         TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
 400                         mp = &tcp->tcp_closemp;
 401                         SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
 402                             tcp_timewait_close, connp, NULL,
 403                             SQ_FILL, SQTAG_TCP_TIMEWAIT);
 404                 }
 405                 mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
 406         }
 407 
 408         if (tcp_time_wait->tcp_free_list != NULL)
 409                 tcp_time_wait->tcp_free_list->tcp_in_free_list = B_TRUE;
 410 
 411         /*
 412          * If the time wait list is not empty and there is no timer running,
 413          * restart it.
 414          */
 415         if ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL &&
 416             tcp_time_wait->tcp_time_wait_tid == 0) {
 417                 hrtime_t firetime;
 418 
 419                 firetime = TICK_TO_NSEC(tcp->tcp_time_wait_expire - now);
 420                 /* This ensures that we won't wake up too often. */
 421                 firetime = MAX(TCP_TIME_WAIT_DELAY, firetime);
 422                 tcp_time_wait->tcp_time_wait_tid =
 423                     timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector,
 424                     sqp, firetime, CALLOUT_TCP_RESOLUTION,
 425                     CALLOUT_FLAG_ROUNDUP);
 426         }
 427 #ifdef DEBUG
 428         tcp_time_wait->tcp_time_wait_running = B_FALSE;
 429 #endif
 430         mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
 431 }
 432 
 433 /*
 434  * tcp_time_wait_processing() handles processing of incoming packets when
 435  * the tcp_t is in the TIME_WAIT state.
 436  *
 437  * A TIME_WAIT tcp_t that has an associated open TCP end point (not in
 438  * detached state) is never put on the time wait list.
 439  */
 440 void
 441 tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
 442     uint32_t seg_ack, int seg_len, tcpha_t *tcpha, ip_recv_attr_t *ira)
 443 {
 444         int32_t         bytes_acked;
 445         int32_t         gap;
 446         int32_t         rgap;
 447         tcp_opt_t       tcpopt;
 448         uint_t          flags;
 449         uint32_t        new_swnd = 0;
 450         conn_t          *nconnp;
 451         conn_t          *connp = tcp->tcp_connp;
 452         tcp_stack_t     *tcps = tcp->tcp_tcps;
 453 
 454         BUMP_LOCAL(tcp->tcp_ibsegs);
 455         DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp);
 456 
 457         flags = (unsigned int)tcpha->tha_flags & 0xFF;
 458         new_swnd = ntohs(tcpha->tha_win) <<
 459             ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws);
 460         if (tcp->tcp_snd_ts_ok) {
 461                 if (!tcp_paws_check(tcp, tcpha, &tcpopt)) {
 462                         tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
 463                             tcp->tcp_rnxt, TH_ACK);
 464                         goto done;
 465                 }
 466         }
 467         gap = seg_seq - tcp->tcp_rnxt;
 468         rgap = tcp->tcp_rwnd - (gap + seg_len);
 469         if (gap < 0) {
 470                 TCPS_BUMP_MIB(tcps, tcpInDataDupSegs);
 471                 TCPS_UPDATE_MIB(tcps, tcpInDataDupBytes,
 472                     (seg_len > -gap ? -gap : seg_len));
 473                 seg_len += gap;
 474                 if (seg_len < 0 || (seg_len == 0 && !(flags & TH_FIN))) {
 475                         if (flags & TH_RST) {
 476                                 goto done;
 477                         }
 478                         if ((flags & TH_FIN) && seg_len == -1) {
 479                                 /*
 480                                  * When TCP receives a duplicate FIN in
 481                                  * TIME_WAIT state, restart the 2 MSL timer.
 482                                  * See page 73 in RFC 793. Make sure this TCP
 483                                  * is already on the TIME_WAIT list. If not,
 484                                  * just restart the timer.
 485                                  */
 486                                 if (TCP_IS_DETACHED(tcp)) {
 487                                         if (tcp_time_wait_remove(tcp, NULL) ==
 488                                             B_TRUE) {
 489                                                 tcp_time_wait_append(tcp);
 490                                                 TCP_DBGSTAT(tcps,
 491                                                     tcp_rput_time_wait);
 492                                         }
 493                                 } else {
 494                                         ASSERT(tcp != NULL);
 495                                         TCP_TIMER_RESTART(tcp,
 496                                             tcps->tcps_time_wait_interval);
 497                                 }
 498                                 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
 499                                     tcp->tcp_rnxt, TH_ACK);
 500                                 goto done;
 501                         }
 502                         flags |=  TH_ACK_NEEDED;
 503                         seg_len = 0;
 504                         goto process_ack;
 505                 }
 506 
 507                 /* Fix seg_seq, and chew the gap off the front. */
 508                 seg_seq = tcp->tcp_rnxt;
 509         }
 510 
 511         if ((flags & TH_SYN) && gap > 0 && rgap < 0) {
 512                 /*
 513                  * Make sure that when we accept the connection, pick
 514                  * an ISS greater than (tcp_snxt + tcp_iss_incr/2) for the
 515                  * old connection.
 516                  *
 517                  * The next ISS generated is equal to tcp_iss_incr_extra
 518                  * + tcp_iss_incr/2 + other components depending on the
 519                  * value of tcp_strong_iss.  We pre-calculate the new
 520                  * ISS here and compare with tcp_snxt to determine if
 521                  * we need to make adjustment to tcp_iss_incr_extra.
 522                  *
 523                  * The above calculation is ugly and is a
 524                  * waste of CPU cycles...
 525                  */
 526                 uint32_t new_iss = tcps->tcps_iss_incr_extra;
 527                 int32_t adj;
 528                 ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
 529 
 530                 switch (tcps->tcps_strong_iss) {
 531                 case 2: {
 532                         /* Add time and MD5 components. */
 533                         uint32_t answer[4];
 534                         struct {
 535                                 uint32_t ports;
 536                                 in6_addr_t src;
 537                                 in6_addr_t dst;
 538                         } arg;
 539                         MD5_CTX context;
 540 
 541                         mutex_enter(&tcps->tcps_iss_key_lock);
 542                         context = tcps->tcps_iss_key;
 543                         mutex_exit(&tcps->tcps_iss_key_lock);
 544                         arg.ports = connp->conn_ports;
 545                         /* We use MAPPED addresses in tcp_iss_init */
 546                         arg.src = connp->conn_laddr_v6;
 547                         arg.dst = connp->conn_faddr_v6;
 548                         MD5Update(&context, (uchar_t *)&arg,
 549                             sizeof (arg));
 550                         MD5Final((uchar_t *)answer, &context);
 551                         answer[0] ^= answer[1] ^ answer[2] ^ answer[3];
 552                         new_iss += (gethrtime() >> ISS_NSEC_SHT) + answer[0];
 553                         break;
 554                 }
 555                 case 1:
 556                         /* Add time component and min random (i.e. 1). */
 557                         new_iss += (gethrtime() >> ISS_NSEC_SHT) + 1;
 558                         break;
 559                 default:
 560                         /* Add only time component. */
 561                         new_iss += (uint32_t)gethrestime_sec() *
 562                             tcps->tcps_iss_incr;
 563                         break;
 564                 }
 565                 if ((adj = (int32_t)(tcp->tcp_snxt - new_iss)) > 0) {
 566                         /*
 567                          * New ISS not guaranteed to be tcp_iss_incr/2
 568                          * ahead of the current tcp_snxt, so add the
 569                          * difference to tcp_iss_incr_extra.
 570                          */
 571                         tcps->tcps_iss_incr_extra += adj;
 572                 }
 573                 /*
 574                  * If tcp_clean_death() can not perform the task now,
 575                  * drop the SYN packet and let the other side re-xmit.
 576                  * Otherwise pass the SYN packet back in, since the
 577                  * old tcp state has been cleaned up or freed.
 578                  */
 579                 if (tcp_clean_death(tcp, 0) == -1)
 580                         goto done;
 581                 nconnp = ipcl_classify(mp, ira, ipst);
 582                 if (nconnp != NULL) {
 583                         TCP_STAT(tcps, tcp_time_wait_syn_success);
 584                         /* Drops ref on nconnp */
 585                         tcp_reinput(nconnp, mp, ira, ipst);
 586                         return;
 587                 }
 588                 goto done;
 589         }
 590 
 591         /*
 592          * rgap is the amount of stuff received out of window.  A negative
 593          * value is the amount out of window.
 594          */
 595         if (rgap < 0) {
 596                 TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs);
 597                 TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap);
 598                 /* Fix seg_len and make sure there is something left. */
 599                 seg_len += rgap;
 600                 if (seg_len <= 0) {
 601                         if (flags & TH_RST) {
 602                                 goto done;
 603                         }
 604                         flags |=  TH_ACK_NEEDED;
 605                         seg_len = 0;
 606                         goto process_ack;
 607                 }
 608         }
 609         /*
 610          * Check whether we can update tcp_ts_recent.  This test is
 611          * NOT the one in RFC 1323 3.4.  It is from Braden, 1993, "TCP
 612          * Extensions for High Performance: An Update", Internet Draft.
 613          */
 614         if (tcp->tcp_snd_ts_ok &&
 615             TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) &&
 616             SEQ_LEQ(seg_seq, tcp->tcp_rack)) {
 617                 tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val;
 618                 tcp->tcp_last_rcv_lbolt = ddi_get_lbolt64();
 619         }
 620 
 621         if (seg_seq != tcp->tcp_rnxt && seg_len > 0) {
 622                 /* Always ack out of order packets */
 623                 flags |= TH_ACK_NEEDED;
 624                 seg_len = 0;
 625         } else if (seg_len > 0) {
 626                 TCPS_BUMP_MIB(tcps, tcpInClosed);
 627                 TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs);
 628                 TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len);
 629         }
 630         if (flags & TH_RST) {
 631                 (void) tcp_clean_death(tcp, 0);
 632                 goto done;
 633         }
 634         if (flags & TH_SYN) {
 635                 tcp_xmit_ctl("TH_SYN", tcp, seg_ack, seg_seq + 1,
 636                     TH_RST|TH_ACK);
 637                 /*
 638                  * Do not delete the TCP structure if it is in
 639                  * TIME_WAIT state.  Refer to RFC 1122, 4.2.2.13.
 640                  */
 641                 goto done;
 642         }
 643 process_ack:
 644         if (flags & TH_ACK) {
 645                 bytes_acked = (int)(seg_ack - tcp->tcp_suna);
 646                 if (bytes_acked <= 0) {
 647                         if (bytes_acked == 0 && seg_len == 0 &&
 648                             new_swnd == tcp->tcp_swnd)
 649                                 TCPS_BUMP_MIB(tcps, tcpInDupAck);
 650                 } else {
 651                         /* Acks something not sent */
 652                         flags |= TH_ACK_NEEDED;
 653                 }
 654         }
 655         if (flags & TH_ACK_NEEDED) {
 656                 /*
 657                  * Time to send an ack for some reason.
 658                  */
 659                 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
 660                     tcp->tcp_rnxt, TH_ACK);
 661         }
 662 done:
 663         freemsg(mp);
 664 }