Print this page
DLPX-25998 TCP congestion control is inadequate
Reviewed at: http://reviews.delphix.com/r/34808/
DLPX-45697 Adding Avg. RTT to connstat
DLPX-43064 include high-resolution round-trip times in connstat (EP-652)
DLPX-42721 Create inline function for TCP RTO calculation
DLPX-37540 TCP per-connection kernel statistics DLPX-37544 connstat command to display per-connection TCP statistics

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/inet/tcp/tcp.c
          +++ new/usr/src/uts/common/inet/tcp/tcp.c
↓ open down ↓ 15 lines elided ↑ open up ↑
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright 2017 Joyent, Inc.
  25   25   * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
  26      - * Copyright (c) 2013,2014 by Delphix. All rights reserved.
       26 + * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
  27   27   * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
  28   28   */
  29   29  /* Copyright (c) 1990 Mentat Inc. */
  30   30  
  31   31  #include <sys/types.h>
  32   32  #include <sys/stream.h>
  33   33  #include <sys/strsun.h>
  34   34  #include <sys/strsubr.h>
  35   35  #include <sys/stropts.h>
  36   36  #include <sys/strlog.h>
↓ open down ↓ 30 lines elided ↑ open up ↑
  67   67  #include <sys/systm.h>
  68   68  #include <netinet/in.h>
  69   69  #include <netinet/tcp.h>
  70   70  #include <netinet/ip6.h>
  71   71  #include <netinet/icmp6.h>
  72   72  #include <net/if.h>
  73   73  #include <net/route.h>
  74   74  #include <inet/ipsec_impl.h>
  75   75  
  76   76  #include <inet/common.h>
       77 +#include <inet/cc.h>
  77   78  #include <inet/ip.h>
  78   79  #include <inet/ip_impl.h>
  79   80  #include <inet/ip6.h>
  80   81  #include <inet/ip_ndp.h>
  81   82  #include <inet/proto_set.h>
  82   83  #include <inet/mib2.h>
  83   84  #include <inet/optcom.h>
  84   85  #include <inet/snmpcom.h>
  85   86  #include <inet/kstatcom.h>
  86   87  #include <inet/tcp.h>
↓ open down ↓ 172 lines elided ↑ open up ↑
 259  260  
 260  261  /* TCP Timer control structure */
 261  262  typedef struct tcpt_s {
 262  263          pfv_t   tcpt_pfv;       /* The routine we are to call */
 263  264          tcp_t   *tcpt_tcp;      /* The parameter we are to pass in */
 264  265  } tcpt_t;
 265  266  
 266  267  /*
 267  268   * Functions called directly via squeue having a prototype of edesc_t.
 268  269   */
 269      -void            tcp_input_listener(void *arg, mblk_t *mp, void *arg2,
 270      -    ip_recv_attr_t *ira);
 271  270  void            tcp_input_data(void *arg, mblk_t *mp, void *arg2,
 272  271      ip_recv_attr_t *ira);
 273  272  static void     tcp_linger_interrupted(void *arg, mblk_t *mp, void *arg2,
 274  273      ip_recv_attr_t *dummy);
 275  274  
 276  275  
 277  276  /* Prototype for TCP functions */
 278  277  static void     tcp_random_init(void);
 279  278  int             tcp_random(void);
 280  279  static int      tcp_connect_ipv4(tcp_t *tcp, ipaddr_t *dstaddrp,
↓ open down ↓ 288 lines elided ↑ open up ↑
 569  568  
 570  569          tcp->tcp_connp = connp;
 571  570  
 572  571          ASSERT(connp->conn_tcp == tcp);
 573  572          ASSERT(connp->conn_flags & IPCL_TCPCONN);
 574  573          connp->conn_state_flags = CONN_INCIPIENT;
 575  574          ASSERT(connp->conn_proto == IPPROTO_TCP);
 576  575          ASSERT(connp->conn_ref == 1);
 577  576  }
 578  577  
      578 +#pragma inline(tcp_calculate_rto)
      579 +
 579  580  /*
      581 + * RTO = average estimates (sa / 8) + 4 * deviation estimates (sd)
      582 + *
      583 + * Add tcp_rexmit_interval extra in case of extreme environment where the
      584 + * algorithm fails to work.  The default value of tcp_rexmit_interval_extra
      585 + * should be 0.
      586 + *
      587 + * As we use a finer grained clock than BSD and update RTO for every ACKs, add
      588 + * in another .25 of RTT to the deviation of RTO to accommodate burstiness of
      589 + * 1/4 of window size.
      590 + */
      591 +clock_t
      592 +tcp_calculate_rto(tcp_t *tcp, tcp_stack_t *tcps)
      593 +{
      594 +        clock_t rto;
      595 +
      596 +        rto = NSEC2MSEC((tcp->tcp_rtt_sa >> 3) + (tcp->tcp_rtt_sa >> 5) +
      597 +            tcp->tcp_rtt_sd) + tcps->tcps_rexmit_interval_extra +
      598 +            tcps->tcps_conn_grace_period;
      599 +
      600 +        if (rto < tcp->tcp_rto_min)
      601 +                rto = tcp->tcp_rto_min;
      602 +        else if (rto > tcp->tcp_rto_max)
      603 +                rto = tcp->tcp_rto_max;
      604 +
      605 +        return (rto);
      606 +}
      607 +
      608 +/*
 580  609   * Adapt to the information, such as rtt and rtt_sd, provided from the
 581  610   * DCE and IRE maintained by IP.
 582  611   *
 583  612   * Checks for multicast and broadcast destination address.
 584  613   * Returns zero if ok; an errno on failure.
 585  614   *
 586  615   * Note that the MSS calculation here is based on the info given in
 587  616   * the DCE and IRE.  We do not do any calculation based on TCP options.  They
 588  617   * will be handled in tcp_input_data() when TCP knows which options to use.
 589  618   *
↓ open down ↓ 43 lines elided ↑ open up ↑
 633  662          if (error != 0)
 634  663                  return (error);
 635  664  
 636  665          error = tcp_build_hdrs(tcp);
 637  666          if (error != 0)
 638  667                  return (error);
 639  668  
 640  669          tcp->tcp_localnet = uinfo.iulp_localnet;
 641  670  
 642  671          if (uinfo.iulp_rtt != 0) {
 643      -                clock_t rto;
 644      -
 645      -                tcp->tcp_rtt_sa = uinfo.iulp_rtt;
 646      -                tcp->tcp_rtt_sd = uinfo.iulp_rtt_sd;
 647      -                rto = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd +
 648      -                    tcps->tcps_rexmit_interval_extra +
 649      -                    (tcp->tcp_rtt_sa >> 5);
 650      -
 651      -                TCP_SET_RTO(tcp, rto);
      672 +                tcp->tcp_rtt_sa = MSEC2NSEC(uinfo.iulp_rtt);
      673 +                tcp->tcp_rtt_sd = MSEC2NSEC(uinfo.iulp_rtt_sd);
      674 +                tcp->tcp_rto = tcp_calculate_rto(tcp, tcps);
 652  675          }
 653  676          if (uinfo.iulp_ssthresh != 0)
 654  677                  tcp->tcp_cwnd_ssthresh = uinfo.iulp_ssthresh;
 655  678          else
 656  679                  tcp->tcp_cwnd_ssthresh = TCP_MAX_LARGEWIN;
 657  680          if (uinfo.iulp_spipe > 0) {
 658  681                  connp->conn_sndbuf = MIN(uinfo.iulp_spipe,
 659  682                      tcps->tcps_max_buf);
 660  683                  if (tcps->tcps_snd_lowat_fraction != 0) {
 661  684                          connp->conn_sndlowat = connp->conn_sndbuf /
↓ open down ↓ 569 lines elided ↑ open up ↑
1231 1254  void
1232 1255  tcp_closei_local(tcp_t *tcp)
1233 1256  {
1234 1257          conn_t          *connp = tcp->tcp_connp;
1235 1258          tcp_stack_t     *tcps = tcp->tcp_tcps;
1236 1259          int32_t         oldstate;
1237 1260  
1238 1261          if (!TCP_IS_SOCKET(tcp))
1239 1262                  tcp_acceptor_hash_remove(tcp);
1240 1263  
1241      -        TCPS_UPDATE_MIB(tcps, tcpHCInSegs, tcp->tcp_ibsegs);
1242      -        tcp->tcp_ibsegs = 0;
1243      -        TCPS_UPDATE_MIB(tcps, tcpHCOutSegs, tcp->tcp_obsegs);
1244      -        tcp->tcp_obsegs = 0;
1245      -
1246 1264          /*
1247 1265           * This can be called via tcp_time_wait_processing() if TCP gets a
1248 1266           * SYN with sequence number outside the TIME-WAIT connection's
1249 1267           * window.  So we need to check for TIME-WAIT state here as the
1250 1268           * connection counter is already decremented.  See SET_TIME_WAIT()
1251 1269           * macro
1252 1270           */
1253 1271          if (tcp->tcp_state >= TCPS_ESTABLISHED &&
1254 1272              tcp->tcp_state < TCPS_TIME_WAIT) {
1255 1273                  TCPS_CONN_DEC(tcps);
↓ open down ↓ 158 lines elided ↑ open up ↑
1414 1432          }
1415 1433          ASSERT(tcp->tcp_rthdrlen == 0);
1416 1434  
1417 1435          /*
1418 1436           * Following is really a blowing away a union.
1419 1437           * It happens to have exactly two members of identical size
1420 1438           * the following code is enough.
1421 1439           */
1422 1440          tcp_close_mpp(&tcp->tcp_conn.tcp_eager_conn_ind);
1423 1441  
     1442 +        /* Allow the CC algorithm to clean up after itself. */
     1443 +        if (tcp->tcp_cc_algo != NULL && tcp->tcp_cc_algo->cb_destroy != NULL)
     1444 +                tcp->tcp_cc_algo->cb_destroy(&tcp->tcp_ccv);
     1445 +
1424 1446          /*
1425 1447           * Destroy any association with SO_REUSEPORT group.
1426 1448           */
1427 1449          if (tcp->tcp_rg_bind != NULL) {
1428 1450                  /*
1429 1451                   * This is only necessary for connections which enabled
1430 1452                   * SO_REUSEPORT but were never bound.  Such connections should
1431 1453                   * be the one and only member of the tcp_rg_tp to which they
1432 1454                   * have been associated.
1433 1455                   */
↓ open down ↓ 41 lines elided ↑ open up ↑
1475 1497   * putting the conn/tcp back in freelist, we don't pay a penalty for
1476 1498   * allocating memory without checking 'q/q0' and freeing it if we can't
1477 1499   * accept the connection.
1478 1500   *
1479 1501   * Care should be taken to put the conn back in the same squeue's freelist
1480 1502   * from which it was allocated. Best results are obtained if conn is
1481 1503   * allocated from listener's squeue and freed to the same. Time wait
1482 1504   * collector will free up the freelist is the connection ends up sitting
1483 1505   * there for too long.
1484 1506   */
1485      -void *
     1507 +conn_t *
1486 1508  tcp_get_conn(void *arg, tcp_stack_t *tcps)
1487 1509  {
1488 1510          tcp_t                   *tcp = NULL;
1489 1511          conn_t                  *connp = NULL;
1490 1512          squeue_t                *sqp = (squeue_t *)arg;
1491 1513          tcp_squeue_priv_t       *tcp_time_wait;
1492 1514          netstack_t              *ns;
1493 1515          mblk_t                  *tcp_rsrv_mp = NULL;
1494 1516  
1495 1517          tcp_time_wait =
↓ open down ↓ 18 lines elided ↑ open up ↑
1514 1536                  connp->conn_netstack = ns;
1515 1537                  connp->conn_ixa->ixa_ipst = ns->netstack_ip;
1516 1538                  tcp->tcp_tcps = tcps;
1517 1539                  ipcl_globalhash_insert(connp);
1518 1540  
1519 1541                  connp->conn_ixa->ixa_notify_cookie = tcp;
1520 1542                  ASSERT(connp->conn_ixa->ixa_notify == tcp_notify);
1521 1543                  connp->conn_recv = tcp_input_data;
1522 1544                  ASSERT(connp->conn_recvicmp == tcp_icmp_input);
1523 1545                  ASSERT(connp->conn_verifyicmp == tcp_verifyicmp);
1524      -                return ((void *)connp);
     1546 +                return (connp);
1525 1547          }
1526 1548          mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
1527 1549          /*
1528 1550           * Pre-allocate the tcp_rsrv_mp. This mblk will not be freed until
1529 1551           * this conn_t/tcp_t is freed at ipcl_conn_destroy().
1530 1552           */
1531 1553          tcp_rsrv_mp = allocb(0, BPRI_HI);
1532 1554          if (tcp_rsrv_mp == NULL)
1533 1555                  return (NULL);
1534 1556  
↓ open down ↓ 14 lines elided ↑ open up ↑
1549 1571          connp->conn_verifyicmp = tcp_verifyicmp;
1550 1572  
1551 1573          /*
1552 1574           * Register tcp_notify to listen to capability changes detected by IP.
1553 1575           * This upcall is made in the context of the call to conn_ip_output
1554 1576           * thus it is inside the squeue.
1555 1577           */
1556 1578          connp->conn_ixa->ixa_notify = tcp_notify;
1557 1579          connp->conn_ixa->ixa_notify_cookie = tcp;
1558 1580  
1559      -        return ((void *)connp);
     1581 +        return (connp);
1560 1582  }
1561 1583  
1562 1584  /*
1563 1585   * Handle connect to IPv4 destinations, including connections for AF_INET6
1564 1586   * sockets connecting to IPv4 mapped IPv6 destinations.
1565 1587   * Returns zero if OK, a positive errno, or a negative TLI error.
1566 1588   */
1567 1589  static int
1568 1590  tcp_connect_ipv4(tcp_t *tcp, ipaddr_t *dstaddrp, in_port_t dstport,
1569 1591      uint_t srcid)
↓ open down ↓ 349 lines elided ↑ open up ↑
1919 1941          ASSERT(tcp->tcp_listener == NULL);
1920 1942          ASSERT((connp->conn_family == AF_INET &&
1921 1943              connp->conn_ipversion == IPV4_VERSION) ||
1922 1944              (connp->conn_family == AF_INET6 &&
1923 1945              (connp->conn_ipversion == IPV4_VERSION ||
1924 1946              connp->conn_ipversion == IPV6_VERSION)));
1925 1947  
1926 1948          /* Cancel outstanding timers */
1927 1949          tcp_timers_stop(tcp);
1928 1950  
1929      -        /*
1930      -         * Reset everything in the state vector, after updating global
1931      -         * MIB data from instance counters.
1932      -         */
1933      -        TCPS_UPDATE_MIB(tcps, tcpHCInSegs, tcp->tcp_ibsegs);
1934      -        tcp->tcp_ibsegs = 0;
1935      -        TCPS_UPDATE_MIB(tcps, tcpHCOutSegs, tcp->tcp_obsegs);
1936      -        tcp->tcp_obsegs = 0;
1937      -
1938 1951          tcp_close_mpp(&tcp->tcp_xmit_head);
1939 1952          if (tcp->tcp_snd_zcopy_aware)
1940 1953                  tcp_zcopy_notify(tcp);
1941 1954          tcp->tcp_xmit_last = tcp->tcp_xmit_tail = NULL;
1942 1955          tcp->tcp_unsent = tcp->tcp_xmit_tail_unsent = 0;
1943 1956          mutex_enter(&tcp->tcp_non_sq_lock);
1944 1957          if (tcp->tcp_flow_stopped &&
1945 1958              TCP_UNSENT_BYTES(tcp) <= connp->conn_sndlowat) {
1946 1959                  tcp_clrqfull(tcp);
1947 1960          }
↓ open down ↓ 151 lines elided ↑ open up ↑
2099 2112          ASSERT(tcp->tcp_xmit_last == NULL);
2100 2113          ASSERT(tcp->tcp_unsent == 0);
2101 2114          ASSERT(tcp->tcp_xmit_tail == NULL);
2102 2115          ASSERT(tcp->tcp_xmit_tail_unsent == 0);
2103 2116  
2104 2117          tcp->tcp_snxt = 0;                      /* Displayed in mib */
2105 2118          tcp->tcp_suna = 0;                      /* Displayed in mib */
2106 2119          tcp->tcp_swnd = 0;
2107 2120          DONTCARE(tcp->tcp_cwnd);        /* Init in tcp_process_options */
2108 2121  
2109      -        ASSERT(tcp->tcp_ibsegs == 0);
2110      -        ASSERT(tcp->tcp_obsegs == 0);
2111      -
2112 2122          if (connp->conn_ht_iphc != NULL) {
2113 2123                  kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
2114 2124                  connp->conn_ht_iphc = NULL;
2115 2125                  connp->conn_ht_iphc_allocated = 0;
2116 2126                  connp->conn_ht_iphc_len = 0;
2117 2127                  connp->conn_ht_ulp = NULL;
2118 2128                  connp->conn_ht_ulp_len = 0;
2119 2129                  tcp->tcp_ipha = NULL;
2120 2130                  tcp->tcp_ip6h = NULL;
2121 2131                  tcp->tcp_tcpha = NULL;
↓ open down ↓ 71 lines elided ↑ open up ↑
2193 2203          ASSERT(tcp->tcp_rcv_cnt == 0);
2194 2204  
2195 2205          DONTCARE(tcp->tcp_cwnd_ssthresh); /* Init in tcp_set_destination */
2196 2206          DONTCARE(tcp->tcp_cwnd_max);            /* Init in tcp_init_values */
2197 2207          tcp->tcp_csuna = 0;
2198 2208  
2199 2209          tcp->tcp_rto = 0;                       /* Displayed in MIB */
2200 2210          DONTCARE(tcp->tcp_rtt_sa);              /* Init in tcp_init_values */
2201 2211          DONTCARE(tcp->tcp_rtt_sd);              /* Init in tcp_init_values */
2202 2212          tcp->tcp_rtt_update = 0;
     2213 +        tcp->tcp_rtt_sum = 0;
     2214 +        tcp->tcp_rtt_cnt = 0;
2203 2215  
2204 2216          DONTCARE(tcp->tcp_swl1); /* Init in case TCPS_LISTEN/TCPS_SYN_SENT */
2205 2217          DONTCARE(tcp->tcp_swl2); /* Init in case TCPS_LISTEN/TCPS_SYN_SENT */
2206 2218  
2207 2219          tcp->tcp_rack = 0;                      /* Displayed in mib */
2208 2220          tcp->tcp_rack_cnt = 0;
2209 2221          tcp->tcp_rack_cur_max = 0;
2210 2222          tcp->tcp_rack_abs_max = 0;
2211 2223  
2212 2224          tcp->tcp_max_swnd = 0;
↓ open down ↓ 115 lines elided ↑ open up ↑
2328 2340  
2329 2341  #ifdef DEBUG
2330 2342          DONTCARE(tcp->tcmp_stk[0]);
2331 2343  #endif
2332 2344  
2333 2345          PRESERVE(tcp->tcp_connid);
2334 2346  
2335 2347          ASSERT(tcp->tcp_listen_cnt == NULL);
2336 2348          ASSERT(tcp->tcp_reass_tid == 0);
2337 2349  
     2350 +        /* Allow the CC algorithm to clean up after itself. */
     2351 +        if (tcp->tcp_cc_algo->cb_destroy != NULL)
     2352 +                tcp->tcp_cc_algo->cb_destroy(&tcp->tcp_ccv);
     2353 +        tcp->tcp_cc_algo = NULL;
     2354 +
2338 2355  #undef  DONTCARE
2339 2356  #undef  PRESERVE
2340 2357  }
2341 2358  
2342 2359  /*
2343 2360   * Initialize the various fields in tcp_t.  If parent (the listener) is non
2344 2361   * NULL, certain values will be inheritted from it.
2345 2362   */
2346 2363  void
2347 2364  tcp_init_values(tcp_t *tcp, tcp_t *parent)
2348 2365  {
2349 2366          tcp_stack_t     *tcps = tcp->tcp_tcps;
2350 2367          conn_t          *connp = tcp->tcp_connp;
2351      -        clock_t         rto;
2352 2368  
2353 2369          ASSERT((connp->conn_family == AF_INET &&
2354 2370              connp->conn_ipversion == IPV4_VERSION) ||
2355 2371              (connp->conn_family == AF_INET6 &&
2356 2372              (connp->conn_ipversion == IPV4_VERSION ||
2357 2373              connp->conn_ipversion == IPV6_VERSION)));
2358 2374  
     2375 +        tcp->tcp_ccv.type = IPPROTO_TCP;
     2376 +        tcp->tcp_ccv.ccvc.tcp = tcp;
     2377 +
2359 2378          if (parent == NULL) {
     2379 +                tcp->tcp_cc_algo = tcps->tcps_default_cc_algo;
     2380 +
2360 2381                  tcp->tcp_naglim = tcps->tcps_naglim_def;
2361 2382  
2362 2383                  tcp->tcp_rto_initial = tcps->tcps_rexmit_interval_initial;
2363 2384                  tcp->tcp_rto_min = tcps->tcps_rexmit_interval_min;
2364 2385                  tcp->tcp_rto_max = tcps->tcps_rexmit_interval_max;
2365 2386  
2366 2387                  tcp->tcp_first_ctimer_threshold =
2367 2388                      tcps->tcps_ip_notify_cinterval;
2368 2389                  tcp->tcp_second_ctimer_threshold =
2369 2390                      tcps->tcps_ip_abort_cinterval;
↓ open down ↓ 7 lines elided ↑ open up ↑
2377 2398                  tcp->tcp_ka_abort_thres = tcps->tcps_keepalive_abort_interval;
2378 2399                  tcp->tcp_ka_cnt = 0;
2379 2400                  tcp->tcp_ka_rinterval = 0;
2380 2401  
2381 2402                  /*
2382 2403                   * Default value of tcp_init_cwnd is 0, so no need to set here
2383 2404                   * if parent is NULL.  But we need to inherit it from parent.
2384 2405                   */
2385 2406          } else {
2386 2407                  /* Inherit various TCP parameters from the parent. */
     2408 +                tcp->tcp_cc_algo = parent->tcp_cc_algo;
     2409 +
2387 2410                  tcp->tcp_naglim = parent->tcp_naglim;
2388 2411  
2389 2412                  tcp->tcp_rto_initial = parent->tcp_rto_initial;
2390 2413                  tcp->tcp_rto_min = parent->tcp_rto_min;
2391 2414                  tcp->tcp_rto_max = parent->tcp_rto_max;
2392 2415  
2393 2416                  tcp->tcp_first_ctimer_threshold =
2394 2417                      parent->tcp_first_ctimer_threshold;
2395 2418                  tcp->tcp_second_ctimer_threshold =
2396 2419                      parent->tcp_second_ctimer_threshold;
↓ open down ↓ 6 lines elided ↑ open up ↑
2403 2426                      parent->tcp_fin_wait_2_flush_interval;
2404 2427  
2405 2428                  tcp->tcp_ka_interval = parent->tcp_ka_interval;
2406 2429                  tcp->tcp_ka_abort_thres = parent->tcp_ka_abort_thres;
2407 2430                  tcp->tcp_ka_cnt = parent->tcp_ka_cnt;
2408 2431                  tcp->tcp_ka_rinterval = parent->tcp_ka_rinterval;
2409 2432  
2410 2433                  tcp->tcp_init_cwnd = parent->tcp_init_cwnd;
2411 2434          }
2412 2435  
     2436 +        if (tcp->tcp_cc_algo->cb_init != NULL)
     2437 +                VERIFY(tcp->tcp_cc_algo->cb_init(&tcp->tcp_ccv) == 0);
     2438 +
2413 2439          /*
2414 2440           * Initialize tcp_rtt_sa and tcp_rtt_sd so that the calculated RTO
2415 2441           * will be close to tcp_rexmit_interval_initial.  By doing this, we
2416 2442           * allow the algorithm to adjust slowly to large fluctuations of RTT
2417 2443           * during first few transmissions of a connection as seen in slow
2418 2444           * links.
2419 2445           */
2420      -        tcp->tcp_rtt_sa = tcp->tcp_rto_initial << 2;
2421      -        tcp->tcp_rtt_sd = tcp->tcp_rto_initial >> 1;
2422      -        rto = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd +
2423      -            tcps->tcps_rexmit_interval_extra + (tcp->tcp_rtt_sa >> 5) +
2424      -            tcps->tcps_conn_grace_period;
2425      -        TCP_SET_RTO(tcp, rto);
     2446 +        tcp->tcp_rtt_sa = MSEC2NSEC(tcp->tcp_rto_initial) << 2;
     2447 +        tcp->tcp_rtt_sd = MSEC2NSEC(tcp->tcp_rto_initial) >> 1;
     2448 +        tcp->tcp_rto = tcp_calculate_rto(tcp, tcps);
2426 2449  
2427 2450          tcp->tcp_timer_backoff = 0;
2428 2451          tcp->tcp_ms_we_have_waited = 0;
2429 2452          tcp->tcp_last_recv_time = ddi_get_lbolt();
2430 2453          tcp->tcp_cwnd_max = tcps->tcps_cwnd_max_;
2431 2454          tcp->tcp_cwnd_ssthresh = TCP_MAX_LARGEWIN;
2432 2455  
2433 2456          tcp->tcp_maxpsz_multiplier = tcps->tcps_maxpsz_multiplier;
2434 2457  
2435 2458          /* NOTE:  ISS is now set in tcp_set_destination(). */
↓ open down ↓ 216 lines elided ↑ open up ↑
2652 2675                   * to make TCP operate as if in the global zone.
2653 2676                   */
2654 2677                  if (tcps->tcps_netstack->netstack_stackid !=
2655 2678                      GLOBAL_NETSTACKID)
2656 2679                          zoneid = GLOBAL_ZONEID;
2657 2680                  else
2658 2681                          zoneid = crgetzoneid(credp);
2659 2682          }
2660 2683  
2661 2684          sqp = IP_SQUEUE_GET((uint_t)gethrtime());
2662      -        connp = (conn_t *)tcp_get_conn(sqp, tcps);
     2685 +        connp = tcp_get_conn(sqp, tcps);
2663 2686          /*
2664 2687           * Both tcp_get_conn and netstack_find_by_cred incremented refcnt,
2665 2688           * so we drop it by one.
2666 2689           */
2667 2690          netstack_rele(tcps->tcps_netstack);
2668 2691          if (connp == NULL) {
2669 2692                  *errorp = ENOSR;
2670 2693                  return (NULL);
2671 2694          }
2672 2695          ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
↓ open down ↓ 1167 lines elided ↑ open up ↑
3840 3863              KM_SLEEP);
3841 3864          for (i = 0; i < tcps->tcps_sc_cnt; i++) {
3842 3865                  tcps->tcps_sc[i] = kmem_zalloc(sizeof (tcp_stats_cpu_t),
3843 3866                      KM_SLEEP);
3844 3867          }
3845 3868  
3846 3869          mutex_init(&tcps->tcps_listener_conf_lock, NULL, MUTEX_DEFAULT, NULL);
3847 3870          list_create(&tcps->tcps_listener_conf, sizeof (tcp_listener_t),
3848 3871              offsetof(tcp_listener_t, tl_link));
3849 3872  
     3873 +        tcps->tcps_default_cc_algo = cc_load_algo(CC_DEFAULT_ALGO_NAME);
     3874 +        ASSERT3P(tcps->tcps_default_cc_algo, !=, NULL);
     3875 +
3850 3876          return (tcps);
3851 3877  }
3852 3878  
3853 3879  /*
3854 3880   * Called when the IP module is about to be unloaded.
3855 3881   */
3856 3882  void
3857 3883  tcp_ddi_g_destroy(void)
3858 3884  {
3859 3885          tcp_g_kstat_fini(tcp_g_kstat);
↓ open down ↓ 638 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX