Print this page
5295 remove maxburst logic from TCP's send algorithm Reviewed by: Dan McDonald <danmcd@omniti.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/inet/tcp/tcp_output.c
          +++ new/usr/src/uts/common/inet/tcp/tcp_output.c
↓ open down ↓ 13 lines elided ↑ open up ↑
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
       24 + * Copyright (c) 2014 by Delphix. All rights reserved.
  24   25   */
  25   26  
  26   27  /* This file contains all TCP output processing functions. */
  27   28  
  28   29  #include <sys/types.h>
  29   30  #include <sys/stream.h>
  30   31  #include <sys/strsun.h>
  31   32  #include <sys/strsubr.h>
  32   33  #include <sys/stropts.h>
  33   34  #include <sys/strlog.h>
↓ open down ↓ 1720 lines elided ↑ open up ↑
1754 1755           * multithreading scheme to avoid this complexity.
1755 1756           */
1756 1757          ixa->ixa_pktlen = msgdsize(mp);
1757 1758          (void) conn_ip_output(mp, ixa);
1758 1759  }
1759 1760  
1760 1761  /*
1761 1762   * tcp_send() is called by tcp_wput_data() and returns one of the following:
1762 1763   *
1763 1764   * -1 = failed allocation.
1764      - *  0 = success; burst count reached, or usable send window is too small,
1765      - *      and that we'd rather wait until later before sending again.
     1765 + *  0 = We've either successfully sent data, or our usable send window is too
     1766 + *      small and we'd rather wait until later before sending again.
1766 1767   */
1767 1768  static int
1768 1769  tcp_send(tcp_t *tcp, const int mss, const int total_hdr_len,
1769 1770      const int tcp_hdr_len, const int num_sack_blk, int *usable,
1770 1771      uint_t *snxt, int *tail_unsent, mblk_t **xmit_tail, mblk_t *local_time)
1771 1772  {
1772      -        int             num_burst_seg = tcp->tcp_snd_burst;
1773 1773          int             num_lso_seg = 1;
1774 1774          uint_t          lso_usable;
1775 1775          boolean_t       do_lso_send = B_FALSE;
1776 1776          tcp_stack_t     *tcps = tcp->tcp_tcps;
1777 1777          conn_t          *connp = tcp->tcp_connp;
1778 1778          ip_xmit_attr_t  *ixa = connp->conn_ixa;
1779 1779  
1780 1780          /*
1781 1781           * Check LSO possibility. The value of tcp->tcp_lso indicates whether
1782 1782           * the underlying connection is LSO capable. Will check whether having
↓ open down ↓ 5 lines elided ↑ open up ↑
1788 1788  
1789 1789          for (;;) {
1790 1790                  struct datab    *db;
1791 1791                  tcpha_t         *tcpha;
1792 1792                  uint32_t        sum;
1793 1793                  mblk_t          *mp, *mp1;
1794 1794                  uchar_t         *rptr;
1795 1795                  int             len;
1796 1796  
1797 1797                  /*
1798      -                 * Burst count reached, return successfully.
1799      -                 */
1800      -                if (num_burst_seg == 0)
1801      -                        break;
1802      -
1803      -                /*
1804 1798                   * Calculate the maximum payload length we can send at one
1805 1799                   * time.
1806 1800                   */
1807 1801                  if (do_lso_send) {
1808 1802                          /*
1809      -                         * Check whether be able to to do LSO for the current
1810      -                         * available data.
     1803 +                         * Determine whether or not it's possible to do LSO,
     1804 +                         * and if so, how much data we can send.
1811 1805                           */
1812      -                        if (num_burst_seg >= 2 && (*usable - 1) / mss >= 1) {
     1806 +                        if ((*usable - 1) / mss >= 1) {
1813 1807                                  lso_usable = MIN(tcp->tcp_lso_max, *usable);
1814      -                                lso_usable = MIN(lso_usable,
1815      -                                    num_burst_seg * mss);
1816      -
1817 1808                                  num_lso_seg = lso_usable / mss;
1818 1809                                  if (lso_usable % mss) {
1819 1810                                          num_lso_seg++;
1820 1811                                          tcp->tcp_last_sent_len = (ushort_t)
1821 1812                                              (lso_usable % mss);
1822 1813                                  } else {
1823 1814                                          tcp->tcp_last_sent_len = (ushort_t)mss;
1824 1815                                  }
1825 1816                          } else {
1826 1817                                  do_lso_send = B_FALSE;
1827 1818                                  num_lso_seg = 1;
1828 1819                                  lso_usable = mss;
1829 1820                          }
1830 1821                  }
1831 1822  
1832 1823                  ASSERT(num_lso_seg <= IP_MAXPACKET / mss + 1);
1833      -#ifdef DEBUG
1834      -                DTRACE_PROBE2(tcp_send_lso, int, num_lso_seg, boolean_t,
1835      -                    do_lso_send);
1836      -#endif
1837      -                /*
1838      -                 * Adjust num_burst_seg here.
1839      -                 */
1840      -                num_burst_seg -= num_lso_seg;
1841 1824  
1842 1825                  len = mss;
1843 1826                  if (len > *usable) {
1844 1827                          ASSERT(do_lso_send == B_FALSE);
1845 1828  
1846 1829                          len = *usable;
1847 1830                          if (len <= 0) {
1848 1831                                  /* Terminate the loop */
1849 1832                                  break;  /* success; too small */
1850 1833                          }
↓ open down ↓ 1566 lines elided ↑ open up ↑
3417 3400                   */
3418 3401                  if (SEQ_GT(tcp->tcp_sack_snxt, tcp->tcp_rexmit_max)) {
3419 3402                          tcp->tcp_rexmit_max = tcp->tcp_sack_snxt;
3420 3403                  }
3421 3404          }
3422 3405  }
3423 3406  
3424 3407  /*
3425 3408   * tcp_ss_rexmit() is called to do slow start retransmission after a timeout
3426 3409   * or ICMP errors.
3427      - *
3428      - * To limit the number of duplicate segments, we limit the number of segment
3429      - * to be sent in one time to tcp_snd_burst, the burst variable.
3430 3410   */
3431 3411  void
3432 3412  tcp_ss_rexmit(tcp_t *tcp)
3433 3413  {
3434 3414          uint32_t        snxt;
3435 3415          uint32_t        smax;
3436 3416          int32_t         win;
3437 3417          int32_t         mss;
3438 3418          int32_t         off;
3439      -        int32_t         burst = tcp->tcp_snd_burst;
3440 3419          mblk_t          *snxt_mp;
3441 3420          tcp_stack_t     *tcps = tcp->tcp_tcps;
3442 3421  
3443 3422          /*
3444 3423           * Note that tcp_rexmit can be set even though TCP has retransmitted
3445 3424           * all unack'ed segments.
3446 3425           */
3447 3426          if (SEQ_LT(tcp->tcp_rexmit_nxt, tcp->tcp_rexmit_max)) {
3448 3427                  smax = tcp->tcp_rexmit_max;
3449 3428                  snxt = tcp->tcp_rexmit_nxt;
3450 3429                  if (SEQ_LT(snxt, tcp->tcp_suna)) {
3451 3430                          snxt = tcp->tcp_suna;
3452 3431                  }
3453 3432                  win = MIN(tcp->tcp_cwnd, tcp->tcp_swnd);
3454 3433                  win -= snxt - tcp->tcp_suna;
3455 3434                  mss = tcp->tcp_mss;
3456 3435                  snxt_mp = tcp_get_seg_mp(tcp, snxt, &off);
3457 3436  
3458      -                while (SEQ_LT(snxt, smax) && (win > 0) &&
3459      -                    (burst > 0) && (snxt_mp != NULL)) {
     3437 +                while (SEQ_LT(snxt, smax) && (win > 0) && (snxt_mp != NULL)) {
3460 3438                          mblk_t  *xmit_mp;
3461 3439                          mblk_t  *old_snxt_mp = snxt_mp;
3462 3440                          uint32_t cnt = mss;
3463 3441  
3464 3442                          if (win < cnt) {
3465 3443                                  cnt = win;
3466 3444                          }
3467 3445                          if (SEQ_GT(snxt + cnt, smax)) {
3468 3446                                  cnt = smax - snxt;
3469 3447                          }
↓ open down ↓ 8 lines elided ↑ open up ↑
3478 3456                          win -= cnt;
3479 3457                          /*
3480 3458                           * Update the send timestamp to avoid false
3481 3459                           * retransmission.
3482 3460                           */
3483 3461                          old_snxt_mp->b_prev = (mblk_t *)ddi_get_lbolt();
3484 3462                          TCPS_BUMP_MIB(tcps, tcpRetransSegs);
3485 3463                          TCPS_UPDATE_MIB(tcps, tcpRetransBytes, cnt);
3486 3464  
3487 3465                          tcp->tcp_rexmit_nxt = snxt;
3488      -                        burst--;
3489 3466                  }
3490 3467                  /*
3491 3468                   * If we have transmitted all we have at the time
3492 3469                   * we started the retranmission, we can leave
3493 3470                   * the rest of the job to tcp_wput_data().  But we
3494 3471                   * need to check the send window first.  If the
3495 3472                   * win is not 0, go on with tcp_wput_data().
3496 3473                   */
3497 3474                  if (SEQ_LT(snxt, smax) || win == 0) {
3498 3475                          return;
↓ open down ↓ 20 lines elided ↑ open up ↑
3519 3496                  return;
3520 3497  
3521 3498          if ((tcp->tcp_valid_bits & TCP_FSS_VALID) && (tcp->tcp_unsent == 0))
3522 3499                  tcp->tcp_rexmit_max = tcp->tcp_fss;
3523 3500          else
3524 3501                  tcp->tcp_rexmit_max = tcp->tcp_snxt;
3525 3502  
3526 3503          tcp->tcp_rexmit_nxt = tcp->tcp_suna;
3527 3504          tcp->tcp_rexmit = B_TRUE;
3528 3505          tcp->tcp_dupack_cnt = 0;
3529      -        tcp->tcp_snd_burst = TCP_CWND_SS;
3530 3506          tcp_ss_rexmit(tcp);
3531 3507  }
3532 3508  
3533 3509  /*
3534 3510   * tcp_get_seg_mp() is called to get the pointer to a segment in the
3535 3511   * send queue which starts at the given sequence number. If the given
3536 3512   * sequence number is equal to last valid sequence number (tcp_snxt), the
3537 3513   * returned mblk is the last valid mblk, and off is set to the length of
3538 3514   * that mblk.
3539 3515   *
↓ open down ↓ 199 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX