Print this page
DLPX-25998 TCP congestion control is inadequate
Reviewed at: http://reviews.delphix.com/r/34808/
DLPX-45697 Adding Avg. RTT to connstat
DLPX-43064 include high-resolution round-trip times in connstat (EP-652)
DLPX-37540 TCP per-connection kernel statistics DLPX-37544 connstat command to display per-connection TCP statistics
@@ -20,11 +20,11 @@
*/
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2015 Joyent, Inc.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
*/
/* Copyright (c) 1990 Mentat Inc. */
#ifndef _INET_TCP_H
#define _INET_TCP_H
@@ -44,10 +44,11 @@
#include <inet/ip6.h>
#include <inet/mi.h>
#include <inet/mib2.h>
#include <inet/tcp_stack.h>
#include <inet/tcp_sack.h>
+#include <inet/cc.h>
/* TCP states */
#define TCPS_CLOSED -6
#define TCPS_IDLE -5 /* idle (opened, but not bound) */
#define TCPS_BOUND -4 /* bound, ready to connect or accept */
@@ -151,10 +152,13 @@
int64_t tcp_time_wait_expire;
struct conn_s *tcp_connp; /* back pointer to conn_t */
tcp_stack_t *tcp_tcps; /* back pointer to tcp_stack_t */
+ struct cc_algo *tcp_cc_algo; /* congestion control algorithm */
+ struct cc_var tcp_ccv; /* congestion control specific vars */
+
int32_t tcp_state;
int32_t tcp_rcv_ws; /* My window scale power */
int32_t tcp_snd_ws; /* Sender's window scale power */
uint32_t tcp_ts_recent; /* Timestamp of earliest unacked */
/* data segment */
@@ -176,29 +180,22 @@
mblk_t *tcp_xmit_head; /* Head of xmit/rexmit list */
mblk_t *tcp_xmit_last; /* Last valid data seen by tcp_wput */
mblk_t *tcp_xmit_tail; /* Last data sent */
uint32_t tcp_unsent; /* # of bytes in hand that are unsent */
uint32_t tcp_xmit_tail_unsent; /* # of unsent bytes in xmit_tail */
-
uint32_t tcp_suna; /* Sender unacknowledged */
uint32_t tcp_rexmit_nxt; /* Next rexmit seq num */
uint32_t tcp_rexmit_max; /* Max retran seq num */
uint32_t tcp_cwnd; /* Congestion window */
int32_t tcp_cwnd_cnt; /* cwnd cnt in congestion avoidance */
-
- uint32_t tcp_ibsegs; /* Inbound segments on this stream */
- uint32_t tcp_obsegs; /* Outbound segments on this stream */
-
uint32_t tcp_naglim; /* Tunable nagle limit */
uint32_t tcp_valid_bits;
#define TCP_ISS_VALID 0x1 /* Is the tcp_iss seq num active? */
#define TCP_FSS_VALID 0x2 /* Is the tcp_fss seq num active? */
#define TCP_URG_VALID 0x4 /* Is the tcp_urg seq num active? */
#define TCP_OFO_FIN_VALID 0x8 /* Has TCP received an out of order FIN? */
-
-
timeout_id_t tcp_timer_tid; /* Control block for timer service */
uchar_t tcp_timer_backoff; /* Backoff shift count. */
int64_t tcp_last_recv_time; /* Last time we receive a segment. */
uint32_t tcp_init_cwnd; /* Initial cwnd (start/restart) */
@@ -281,13 +278,15 @@
uint32_t tcp_cwnd_ssthresh; /* Congestion window */
uint32_t tcp_cwnd_max;
uint32_t tcp_csuna; /* Clear (no rexmits in window) suna */
- clock_t tcp_rtt_sa; /* Round trip smoothed average */
- clock_t tcp_rtt_sd; /* Round trip smoothed deviation */
- clock_t tcp_rtt_update; /* Round trip update(s) */
+ hrtime_t tcp_rtt_sum; /* Round trip sum */
+ uint32_t tcp_rtt_cnt; /* Round trip count (non_dup ACKs) */
+ hrtime_t tcp_rtt_sa; /* Round trip smoothed average */
+ hrtime_t tcp_rtt_sd; /* Round trip smoothed deviation */
+ uint32_t tcp_rtt_update; /* Round trip update(s) */
clock_t tcp_ms_we_have_waited; /* Total retrans time */
uint32_t tcp_swl1; /* These help us avoid using stale */
uint32_t tcp_swl2; /* packets to update state */
@@ -499,10 +498,12 @@
timeout_id_t tcp_reass_tid;
/* FIN-WAIT-2 flush timeout */
uint32_t tcp_fin_wait_2_flush_interval;
+ tcp_conn_stats_t tcp_cs;
+
#ifdef DEBUG
pc_t tcmp_stk[15];
#endif
} tcp_t;
@@ -515,11 +516,11 @@
extern void tcp_conn_reclaim(void *);
extern void tcp_free(tcp_t *tcp);
extern void tcp_ddi_g_init(void);
extern void tcp_ddi_g_destroy(void);
-extern void *tcp_get_conn(void *arg, tcp_stack_t *);
+extern conn_t *tcp_get_conn(void *arg, tcp_stack_t *);
extern mblk_t *tcp_snmp_get(queue_t *, mblk_t *, boolean_t);
extern int tcp_snmp_set(queue_t *, int, int, uchar_t *, int len);
/* Pad for the tf_t structure to avoid false cache line sharing. */
#define TF_CACHEL_PAD 64