Print this page
DLPX-25998 TCP congestion control is inadequate
Reviewed at: http://reviews.delphix.com/r/34808/
DLPX-45697 Adding Avg. RTT to connstat
DLPX-43064 include high-resolution round-trip times in connstat (EP-652)
DLPX-37540 TCP per-connection kernel statistics DLPX-37544 connstat command to display per-connection TCP statistics

@@ -20,11 +20,11 @@
  */
 /*
  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2015 Joyent, Inc.
  * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
  */
 /* Copyright (c) 1990 Mentat Inc. */
 
 #ifndef _INET_TCP_H
 #define _INET_TCP_H

@@ -44,10 +44,11 @@
 #include <inet/ip6.h>
 #include <inet/mi.h>
 #include <inet/mib2.h>
 #include <inet/tcp_stack.h>
 #include <inet/tcp_sack.h>
+#include <inet/cc.h>
 
 /* TCP states */
 #define TCPS_CLOSED             -6
 #define TCPS_IDLE               -5      /* idle (opened, but not bound) */
 #define TCPS_BOUND              -4      /* bound, ready to connect or accept */

@@ -151,10 +152,13 @@
         int64_t         tcp_time_wait_expire;
 
         struct conn_s   *tcp_connp;     /* back pointer to conn_t */
         tcp_stack_t     *tcp_tcps;      /* back pointer to tcp_stack_t */
 
+        struct cc_algo  *tcp_cc_algo;   /* congestion control algorithm */
+        struct cc_var   tcp_ccv;        /* congestion control specific vars */
+
         int32_t tcp_state;
         int32_t tcp_rcv_ws;             /* My window scale power */
         int32_t tcp_snd_ws;             /* Sender's window scale power */
         uint32_t tcp_ts_recent;         /* Timestamp of earliest unacked */
                                         /*  data segment */

@@ -176,29 +180,22 @@
         mblk_t  *tcp_xmit_head;         /* Head of xmit/rexmit list */
         mblk_t  *tcp_xmit_last;         /* Last valid data seen by tcp_wput */
         mblk_t  *tcp_xmit_tail;         /* Last data sent */
         uint32_t tcp_unsent;            /* # of bytes in hand that are unsent */
         uint32_t tcp_xmit_tail_unsent;  /* # of unsent bytes in xmit_tail */
-
         uint32_t tcp_suna;              /* Sender unacknowledged */
         uint32_t tcp_rexmit_nxt;        /* Next rexmit seq num */
         uint32_t tcp_rexmit_max;        /* Max retran seq num */
         uint32_t tcp_cwnd;              /* Congestion window */
         int32_t tcp_cwnd_cnt;           /* cwnd cnt in congestion avoidance */
-
-        uint32_t tcp_ibsegs;            /* Inbound segments on this stream */
-        uint32_t tcp_obsegs;            /* Outbound segments on this stream */
-
         uint32_t tcp_naglim;            /* Tunable nagle limit */
         uint32_t        tcp_valid_bits;
 #define TCP_ISS_VALID   0x1     /* Is the tcp_iss seq num active? */
 #define TCP_FSS_VALID   0x2     /* Is the tcp_fss seq num active? */
 #define TCP_URG_VALID   0x4     /* Is the tcp_urg seq num active? */
 #define TCP_OFO_FIN_VALID 0x8   /* Has TCP received an out of order FIN? */
 
-
-
         timeout_id_t    tcp_timer_tid;  /* Control block for timer service */
         uchar_t tcp_timer_backoff;      /* Backoff shift count. */
         int64_t tcp_last_recv_time;     /* Last time we receive a segment. */
         uint32_t tcp_init_cwnd;         /* Initial cwnd (start/restart) */
 

@@ -281,13 +278,15 @@
 
         uint32_t tcp_cwnd_ssthresh;     /* Congestion window */
         uint32_t tcp_cwnd_max;
         uint32_t tcp_csuna;             /* Clear (no rexmits in window) suna */
 
-        clock_t tcp_rtt_sa;             /* Round trip smoothed average */
-        clock_t tcp_rtt_sd;             /* Round trip smoothed deviation */
-        clock_t tcp_rtt_update;         /* Round trip update(s) */
+        hrtime_t tcp_rtt_sum;           /* Round trip sum */
+        uint32_t tcp_rtt_cnt;           /* Round trip count (non_dup ACKs) */
+        hrtime_t tcp_rtt_sa;            /* Round trip smoothed average */
+        hrtime_t tcp_rtt_sd;            /* Round trip smoothed deviation */
+        uint32_t tcp_rtt_update;        /* Round trip update(s) */
         clock_t tcp_ms_we_have_waited;  /* Total retrans time */
 
         uint32_t tcp_swl1;              /* These help us avoid using stale */
         uint32_t tcp_swl2;              /*  packets to update state */
 

@@ -499,10 +498,12 @@
         timeout_id_t            tcp_reass_tid;
 
         /* FIN-WAIT-2 flush timeout */
         uint32_t                tcp_fin_wait_2_flush_interval;
 
+        tcp_conn_stats_t        tcp_cs;
+
 #ifdef DEBUG
         pc_t                    tcmp_stk[15];
 #endif
 } tcp_t;
 

@@ -515,11 +516,11 @@
 
 extern void     tcp_conn_reclaim(void *);
 extern void     tcp_free(tcp_t *tcp);
 extern void     tcp_ddi_g_init(void);
 extern void     tcp_ddi_g_destroy(void);
-extern void     *tcp_get_conn(void *arg, tcp_stack_t *);
+extern conn_t   *tcp_get_conn(void *arg, tcp_stack_t *);
 extern mblk_t   *tcp_snmp_get(queue_t *, mblk_t *, boolean_t);
 extern int      tcp_snmp_set(queue_t *, int, int, uchar_t *, int len);
 
 /* Pad for the tf_t structure to avoid false cache line sharing. */
 #define TF_CACHEL_PAD   64