tcp: do not sample rtt for retransmitted segments

Change-Id: I365c31607332a944ef498369881332b515894ed7
Signed-off-by: Florin Coras <fcoras@cisco.com>
This commit is contained in:
Florin Coras
2017-09-24 19:43:08 -04:00
committed by Dave Barach
parent 95524c3a2e
commit f1762d6fa8
3 changed files with 103 additions and 84 deletions

View File

@ -20,7 +20,7 @@
#define TCP_DEBUG (1)
#define TCP_DEBUG_SM (0)
#define TCP_DEBUG_CC (1)
#define TCP_DEBUG_CC (0)
#define TCP_DEBUG_CC_STAT (1)
#define foreach_tcp_dbg_evt \
@ -411,21 +411,6 @@ typedef enum _tcp_dbg_evt
ed->data[4] = _tc->snd_wnd; \
}
#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
.format = "dack-tx: rcv_nxt %u rcv_wnd %u snd_nxt %u av_wnd %u snd_wnd %u",\
.format_args = "i4i4i4i4i4", \
}; \
DECLARE_ETD(_tc, _e, 5); \
ed->data[0] = _tc->rcv_nxt - _tc->irs; \
ed->data[1] = _tc->rcv_wnd; \
ed->data[2] = _tc->snd_nxt - _tc->iss; \
ed->data[3] = tcp_available_snd_wnd(_tc); \
ed->data[4] = _tc->snd_wnd; \
}
#define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
@ -441,21 +426,6 @@ typedef enum _tcp_dbg_evt
ed->data[4] = tcp_flight_size(_tc); \
}
#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
.format = "dack-rx: snd_una %u cwnd %u snd_wnd %u flight %u rcv_wnd %u",\
.format_args = "i4i4i4i4i4", \
}; \
DECLARE_ETD(_tc, _e, 5); \
ed->data[0] = _tc->snd_una - _tc->iss; \
ed->data[1] = _tc->cwnd; \
ed->data[2] = _tc->snd_wnd; \
ed->data[3] = tcp_flight_size(_tc); \
ed->data[4] = _tc->rcv_wnd; \
}
#define TCP_EVT_PKTIZE_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
@ -601,9 +571,7 @@ if (_av > 0) \
}
#else
#define TCP_EVT_ACK_SENT_HANDLER(_tc, ...)
#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)
#define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...)
#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)
#define TCP_EVT_PKTIZE_HANDLER(_tc, ...)
#define TCP_EVT_INPUT_HANDLER(_tc, _type, _len, _written, ...)
#define TCP_EVT_TIMER_POP_HANDLER(_tc_index, _timer_id, ...)
@ -649,6 +617,30 @@ if (_av > 0) \
*/
#if TCP_DEBUG_CC
#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
.format = "cc: %s wnd %u snd_cong %u rxt_bytes %u", \
.format_args = "t4i4i4i4", \
.n_enum_strings = 6, \
.enum_strings = { \
"fast-rxt", \
"rxt-timeout", \
"first-rxt", \
"recovered", \
"congestion", \
"undo", \
}, \
}; \
DECLARE_ETD(_tc, _e, 4); \
ed->data[0] = _sub_evt; \
ed->data[1] = tcp_available_snd_space (_tc); \
ed->data[2] = _tc->snd_congestion - _tc->iss; \
ed->data[3] = _tc->snd_rxt_bytes; \
}
#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
@ -663,26 +655,34 @@ if (_av > 0) \
ed->data[3] = _tc->snd_rxt_bytes; \
}
#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...) \
#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
.format = "cc: %s wnd %u snd_cong %u rxt_bytes %u", \
.format_args = "t4i4i4i4", \
.n_enum_strings = 5, \
.enum_strings = { \
"fast-rxt", \
"rxt-timeout", \
"first-rxt", \
"recovered", \
"congestion", \
}, \
.format = "dack-tx: rcv_nxt %u rcv_wnd %u snd_nxt %u av_wnd %u snd_wnd %u",\
.format_args = "i4i4i4i4i4", \
}; \
DECLARE_ETD(_tc, _e, 4); \
ed->data[0] = _sub_evt; \
ed->data[1] = tcp_available_snd_space (_tc); \
ed->data[2] = _tc->snd_congestion - _tc->iss; \
ed->data[3] = _tc->snd_rxt_bytes; \
DECLARE_ETD(_tc, _e, 5); \
ed->data[0] = _tc->rcv_nxt - _tc->irs; \
ed->data[1] = _tc->rcv_wnd; \
ed->data[2] = _tc->snd_nxt - _tc->iss; \
ed->data[3] = tcp_available_snd_wnd(_tc); \
ed->data[4] = _tc->snd_wnd; \
}
#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
.format = "dack-rx: snd_una %u cwnd %u snd_wnd %u flight %u rcv_wnd %u",\
.format_args = "i4i4i4i4i4", \
}; \
DECLARE_ETD(_tc, _e, 5); \
ed->data[0] = _tc->snd_una - _tc->iss; \
ed->data[1] = _tc->cwnd; \
ed->data[2] = _tc->snd_wnd; \
ed->data[3] = tcp_flight_size(_tc); \
ed->data[4] = _tc->rcv_wnd; \
}
#define TCP_EVT_CC_PACK_HANDLER(_tc, ...) \
@ -696,6 +696,13 @@ if (_av > 0) \
ed->data[0] = _tc->snd_una - _tc->iss; \
ed->data[1] = _tc->snd_una_max - _tc->iss; \
}
#else
#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)
#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)
#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)
#define TCP_EVT_CC_PACK_HANDLER(_tc, ...)
#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...)
#endif
/*
* Congestion control stats
@ -744,13 +751,6 @@ if (_tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now()) \
#define TCP_EVT_CC_STAT_HANDLER(_tc, ...)
#endif
#else
#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)
#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...)
#define TCP_EVT_CC_PACK_HANDLER(_tc, ...)
#define TCP_EVT_CC_STAT_HANDLER(_tc, ...)
#endif
#endif /* SRC_VNET_TCP_TCP_DEBUG_H_ */
/*
* fd.io coding-style-patch-verification: ON

View File

@ -248,8 +248,8 @@ tcp_update_timestamp (tcp_connection_t * tc, u32 seq, u32 seq_end)
* then the TSval from the segment is copied to TS.Recent;
* otherwise, the TSval is ignored.
*/
if (tcp_opts_tstamp (&tc->rcv_opts) && tc->tsval_recent
&& seq_leq (seq, tc->rcv_las) && seq_leq (tc->rcv_las, seq_end))
if (tcp_opts_tstamp (&tc->rcv_opts) && seq_leq (seq, tc->rcv_las)
&& seq_leq (tc->rcv_las, seq_end))
{
ASSERT (timestamp_leq (tc->tsval_recent, tc->rcv_opts.tsval));
tc->tsval_recent = tc->rcv_opts.tsval;
@ -418,51 +418,53 @@ tcp_update_rto (tcp_connection_t * tc)
tc->rto = clib_max (tc->rto, TCP_RTO_MIN);
}
/** Update RTT estimate and RTO timer
/**
* Update RTT estimate and RTO timer
*
* Measure RTT: We have two sources of RTT measurements: TSOPT and ACK
* timing. Middle boxes are known to fiddle with TCP options so we
* should give higher priority to ACK timing.
*
* This should be called only if previously sent bytes have been acked.
*
* return 1 if valid rtt 0 otherwise
*/
static int
tcp_update_rtt (tcp_connection_t * tc, u32 ack)
{
u32 mrtt = 0;
u8 rtx_acked;
/* Determine if only rtx bytes are acked. */
rtx_acked = tcp_in_cong_recovery (tc) || !tc->bytes_acked;
/* Karn's rule, part 1. Don't use retransmitted segments to estimate
* RTT because they're ambiguous. */
if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq) && !rtx_acked)
if (tcp_in_cong_recovery (tc) || tc->sack_sb.sacked_bytes)
goto done;
if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq))
{
mrtt = tcp_time_now () - tc->rtt_ts;
}
/* As per RFC7323 TSecr can be used for RTTM only if the segment advances
* snd_una, i.e., the left side of the send window:
* seq_lt (tc->snd_una, ack). */
else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr
&& tc->bytes_acked)
* seq_lt (tc->snd_una, ack). This is a condition for calling update_rtt */
else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr)
{
mrtt = tcp_time_now () - tc->rcv_opts.tsecr;
}
/* Ignore dubious measurements */
if (mrtt == 0 || mrtt > TCP_RTT_MAX)
goto done;
tcp_estimate_rtt (tc, mrtt);
done:
/* Allow measuring of a new RTT */
tc->rtt_ts = 0;
/* If ACK moves left side of the wnd make sure boff is 0, even if mrtt is
* not valid */
if (tc->bytes_acked)
tc->rto_boff = 0;
/* Ignore dubious measurements */
if (mrtt == 0 || mrtt > TCP_RTT_MAX)
return 0;
tcp_estimate_rtt (tc, mrtt);
/* If we got here something must've been ACKed so make sure boff is 0,
* even if mrrt is not valid since we update the rto lower */
tc->rto_boff = 0;
tcp_update_rto (tc);
return 0;
@ -932,10 +934,11 @@ static void
tcp_cc_recovery_exit (tcp_connection_t * tc)
{
/* Deflate rto */
tcp_update_rto (tc);
tc->rto_boff = 0;
tcp_update_rto (tc);
tc->snd_rxt_ts = 0;
tcp_recovery_off (tc);
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
}
void
@ -946,6 +949,7 @@ tcp_cc_fastrecovery_exit (tcp_connection_t * tc)
tc->rcv_dupacks = 0;
tcp_fastrecovery_off (tc);
tcp_fastrecovery_1_smss_off (tc);
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
}
static void
@ -958,13 +962,14 @@ tcp_cc_congestion_undo (tcp_connection_t * tc)
if (tcp_in_recovery (tc))
tcp_cc_recovery_exit (tc);
ASSERT (tc->rto_boff == 0);
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 5);
/* TODO extend for fastrecovery */
}
static u8
tcp_cc_is_spurious_retransmit (tcp_connection_t * tc)
{
return (tcp_in_recovery (tc)
return (tcp_in_recovery (tc) && tc->rto_boff == 1
&& tc->snd_rxt_ts
&& tcp_opts_tstamp (&tc->rcv_opts)
&& timestamp_lt (tc->rcv_opts.tsecr, tc->snd_rxt_ts));
@ -988,7 +993,6 @@ tcp_cc_recover (tcp_connection_t * tc)
ASSERT (tc->rto_boff == 0);
ASSERT (!tcp_in_cong_recovery (tc));
ASSERT (tcp_scoreboard_is_sane_post_recovery (tc));
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
return 0;
}
@ -2115,7 +2119,6 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
new_tc0->flags |= TCP_CONN_SNDACK;
/* Update rtt with the syn-ack sample */
new_tc0->bytes_acked = 1;
tcp_update_rtt (new_tc0, vnet_buffer (b0)->tcp.ack_number);
TCP_EVT_DBG (TCP_EVT_SYNACK_RCVD, new_tc0);
}
@ -2352,7 +2355,6 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
/* Update rtt and rto */
tc0->bytes_acked = 1;
tcp_update_rtt (tc0, vnet_buffer (b0)->tcp.ack_number);
/* Switch state to ESTABLISHED */

View File

@ -918,7 +918,24 @@ tcp_send_reset (tcp_connection_t * tc)
opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
ASSERT (opts_write_len == tc->snd_opts_len);
vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
tcp_enqueue_to_output_now (vm, b, bi, tc->c_is_ip4);
if (tc->c_is_ip4)
{
ip4_header_t *ih4;
ih4 = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip.ip4,
&tc->c_rmt_ip.ip4, IP_PROTOCOL_TCP, 0);
th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
}
else
{
int bogus = ~0;
ip6_header_t *ih6;
ih6 = vlib_buffer_push_ip6 (vm, b, &tc->c_lcl_ip.ip6,
&tc->c_rmt_ip.ip6, IP_PROTOCOL_TCP);
th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
ASSERT (!bogus);
}
tcp_enqueue_to_ip_lookup_now (vm, b, bi, tc->c_is_ip4);
TCP_EVT_DBG (TCP_EVT_RST_SENT, tc);
}
void
@ -1324,7 +1341,7 @@ tcp_rtx_timeout_cc (tcp_connection_t * tc)
tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss);
tc->cwnd = tcp_loss_wnd (tc);
tc->snd_congestion = tc->snd_una_max;
tc->rtt_ts = 0;
tcp_recovery_on (tc);
}