Print this page
DLPX-25998 TCP congestion control is inadequate
Reviewed at: http://reviews.delphix.com/r/34808/
DLPX-45697 Adding Avg. RTT to connstat
DLPX-43064 include high-resolution round-trip times in connstat (EP-652)
DLPX-37540 TCP per-connection kernel statistics DLPX-37544 connstat command to display per-connection TCP statistics

*** 20,30 **** */ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2015 Joyent, Inc. * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. ! * Copyright (c) 2014 by Delphix. All rights reserved. */ /* Copyright (c) 1990 Mentat Inc. */ #ifndef _INET_TCP_H #define _INET_TCP_H --- 20,30 ---- */ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2015 Joyent, Inc. * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. ! * Copyright (c) 2014, 2017 by Delphix. All rights reserved. */ /* Copyright (c) 1990 Mentat Inc. */ #ifndef _INET_TCP_H #define _INET_TCP_H
*** 44,53 **** --- 44,54 ---- #include <inet/ip6.h> #include <inet/mi.h> #include <inet/mib2.h> #include <inet/tcp_stack.h> #include <inet/tcp_sack.h> + #include <inet/cc.h> /* TCP states */ #define TCPS_CLOSED -6 #define TCPS_IDLE -5 /* idle (opened, but not bound) */ #define TCPS_BOUND -4 /* bound, ready to connect or accept */
*** 151,160 **** --- 152,164 ---- int64_t tcp_time_wait_expire; struct conn_s *tcp_connp; /* back pointer to conn_t */ tcp_stack_t *tcp_tcps; /* back pointer to tcp_stack_t */ + struct cc_algo *tcp_cc_algo; /* congestion control algorithm */ + struct cc_var tcp_ccv; /* congestion control specific vars */ + int32_t tcp_state; int32_t tcp_rcv_ws; /* My window scale power */ int32_t tcp_snd_ws; /* Sender's window scale power */ uint32_t tcp_ts_recent; /* Timestamp of earliest unacked */ /* data segment */
*** 176,204 **** mblk_t *tcp_xmit_head; /* Head of xmit/rexmit list */ mblk_t *tcp_xmit_last; /* Last valid data seen by tcp_wput */ mblk_t *tcp_xmit_tail; /* Last data sent */ uint32_t tcp_unsent; /* # of bytes in hand that are unsent */ uint32_t tcp_xmit_tail_unsent; /* # of unsent bytes in xmit_tail */ - uint32_t tcp_suna; /* Sender unacknowledged */ uint32_t tcp_rexmit_nxt; /* Next rexmit seq num */ uint32_t tcp_rexmit_max; /* Max retran seq num */ uint32_t tcp_cwnd; /* Congestion window */ int32_t tcp_cwnd_cnt; /* cwnd cnt in congestion avoidance */ - - uint32_t tcp_ibsegs; /* Inbound segments on this stream */ - uint32_t tcp_obsegs; /* Outbound segments on this stream */ - uint32_t tcp_naglim; /* Tunable nagle limit */ uint32_t tcp_valid_bits; #define TCP_ISS_VALID 0x1 /* Is the tcp_iss seq num active? */ #define TCP_FSS_VALID 0x2 /* Is the tcp_fss seq num active? */ #define TCP_URG_VALID 0x4 /* Is the tcp_urg seq num active? */ #define TCP_OFO_FIN_VALID 0x8 /* Has TCP received an out of order FIN? */ - - timeout_id_t tcp_timer_tid; /* Control block for timer service */ uchar_t tcp_timer_backoff; /* Backoff shift count. */ int64_t tcp_last_recv_time; /* Last time we receive a segment. */ uint32_t tcp_init_cwnd; /* Initial cwnd (start/restart) */ --- 180,201 ----
*** 281,293 **** uint32_t tcp_cwnd_ssthresh; /* Congestion window */ uint32_t tcp_cwnd_max; uint32_t tcp_csuna; /* Clear (no rexmits in window) suna */ ! clock_t tcp_rtt_sa; /* Round trip smoothed average */ ! clock_t tcp_rtt_sd; /* Round trip smoothed deviation */ ! clock_t tcp_rtt_update; /* Round trip update(s) */ clock_t tcp_ms_we_have_waited; /* Total retrans time */ uint32_t tcp_swl1; /* These help us avoid using stale */ uint32_t tcp_swl2; /* packets to update state */ --- 278,292 ---- uint32_t tcp_cwnd_ssthresh; /* Congestion window */ uint32_t tcp_cwnd_max; uint32_t tcp_csuna; /* Clear (no rexmits in window) suna */ ! hrtime_t tcp_rtt_sum; /* Round trip sum */ ! uint32_t tcp_rtt_cnt; /* Round trip count (non_dup ACKs) */ ! hrtime_t tcp_rtt_sa; /* Round trip smoothed average */ ! hrtime_t tcp_rtt_sd; /* Round trip smoothed deviation */ ! uint32_t tcp_rtt_update; /* Round trip update(s) */ clock_t tcp_ms_we_have_waited; /* Total retrans time */ uint32_t tcp_swl1; /* These help us avoid using stale */ uint32_t tcp_swl2; /* packets to update state */
*** 499,508 **** --- 498,509 ---- timeout_id_t tcp_reass_tid; /* FIN-WAIT-2 flush timeout */ uint32_t tcp_fin_wait_2_flush_interval; + tcp_conn_stats_t tcp_cs; + #ifdef DEBUG pc_t tcmp_stk[15]; #endif } tcp_t;
*** 515,525 **** extern void tcp_conn_reclaim(void *); extern void tcp_free(tcp_t *tcp); extern void tcp_ddi_g_init(void); extern void tcp_ddi_g_destroy(void); ! extern void *tcp_get_conn(void *arg, tcp_stack_t *); extern mblk_t *tcp_snmp_get(queue_t *, mblk_t *, boolean_t); extern int tcp_snmp_set(queue_t *, int, int, uchar_t *, int len); /* Pad for the tf_t structure to avoid false cache line sharing. */ #define TF_CACHEL_PAD 64 --- 516,526 ---- extern void tcp_conn_reclaim(void *); extern void tcp_free(tcp_t *tcp); extern void tcp_ddi_g_init(void); extern void tcp_ddi_g_destroy(void); ! extern conn_t *tcp_get_conn(void *arg, tcp_stack_t *); extern mblk_t *tcp_snmp_get(queue_t *, mblk_t *, boolean_t); extern int tcp_snmp_set(queue_t *, int, int, uchar_t *, int len); /* Pad for the tf_t structure to avoid false cache line sharing. */ #define TF_CACHEL_PAD 64