tcp: do not sample rtt for retransmitted segments
Change-Id: I365c31607332a944ef498369881332b515894ed7 Signed-off-by: Florin Coras <fcoras@cisco.com>
This commit is contained in:
Florin Coras
committed by
Dave Barach
parent
95524c3a2e
commit
f1762d6fa8
@ -20,7 +20,7 @@
|
||||
|
||||
#define TCP_DEBUG (1)
|
||||
#define TCP_DEBUG_SM (0)
|
||||
#define TCP_DEBUG_CC (1)
|
||||
#define TCP_DEBUG_CC (0)
|
||||
#define TCP_DEBUG_CC_STAT (1)
|
||||
|
||||
#define foreach_tcp_dbg_evt \
|
||||
@ -411,21 +411,6 @@ typedef enum _tcp_dbg_evt
|
||||
ed->data[4] = _tc->snd_wnd; \
|
||||
}
|
||||
|
||||
#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...) \
|
||||
{ \
|
||||
ELOG_TYPE_DECLARE (_e) = \
|
||||
{ \
|
||||
.format = "dack-tx: rcv_nxt %u rcv_wnd %u snd_nxt %u av_wnd %u snd_wnd %u",\
|
||||
.format_args = "i4i4i4i4i4", \
|
||||
}; \
|
||||
DECLARE_ETD(_tc, _e, 5); \
|
||||
ed->data[0] = _tc->rcv_nxt - _tc->irs; \
|
||||
ed->data[1] = _tc->rcv_wnd; \
|
||||
ed->data[2] = _tc->snd_nxt - _tc->iss; \
|
||||
ed->data[3] = tcp_available_snd_wnd(_tc); \
|
||||
ed->data[4] = _tc->snd_wnd; \
|
||||
}
|
||||
|
||||
#define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...) \
|
||||
{ \
|
||||
ELOG_TYPE_DECLARE (_e) = \
|
||||
@ -441,21 +426,6 @@ typedef enum _tcp_dbg_evt
|
||||
ed->data[4] = tcp_flight_size(_tc); \
|
||||
}
|
||||
|
||||
#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...) \
|
||||
{ \
|
||||
ELOG_TYPE_DECLARE (_e) = \
|
||||
{ \
|
||||
.format = "dack-rx: snd_una %u cwnd %u snd_wnd %u flight %u rcv_wnd %u",\
|
||||
.format_args = "i4i4i4i4i4", \
|
||||
}; \
|
||||
DECLARE_ETD(_tc, _e, 5); \
|
||||
ed->data[0] = _tc->snd_una - _tc->iss; \
|
||||
ed->data[1] = _tc->cwnd; \
|
||||
ed->data[2] = _tc->snd_wnd; \
|
||||
ed->data[3] = tcp_flight_size(_tc); \
|
||||
ed->data[4] = _tc->rcv_wnd; \
|
||||
}
|
||||
|
||||
#define TCP_EVT_PKTIZE_HANDLER(_tc, ...) \
|
||||
{ \
|
||||
ELOG_TYPE_DECLARE (_e) = \
|
||||
@ -601,9 +571,7 @@ if (_av > 0) \
|
||||
}
|
||||
#else
|
||||
#define TCP_EVT_ACK_SENT_HANDLER(_tc, ...)
|
||||
#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)
|
||||
#define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...)
|
||||
#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)
|
||||
#define TCP_EVT_PKTIZE_HANDLER(_tc, ...)
|
||||
#define TCP_EVT_INPUT_HANDLER(_tc, _type, _len, _written, ...)
|
||||
#define TCP_EVT_TIMER_POP_HANDLER(_tc_index, _timer_id, ...)
|
||||
@ -649,6 +617,30 @@ if (_av > 0) \
|
||||
*/
|
||||
|
||||
#if TCP_DEBUG_CC
|
||||
|
||||
#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...) \
|
||||
{ \
|
||||
ELOG_TYPE_DECLARE (_e) = \
|
||||
{ \
|
||||
.format = "cc: %s wnd %u snd_cong %u rxt_bytes %u", \
|
||||
.format_args = "t4i4i4i4", \
|
||||
.n_enum_strings = 6, \
|
||||
.enum_strings = { \
|
||||
"fast-rxt", \
|
||||
"rxt-timeout", \
|
||||
"first-rxt", \
|
||||
"recovered", \
|
||||
"congestion", \
|
||||
"undo", \
|
||||
}, \
|
||||
}; \
|
||||
DECLARE_ETD(_tc, _e, 4); \
|
||||
ed->data[0] = _sub_evt; \
|
||||
ed->data[1] = tcp_available_snd_space (_tc); \
|
||||
ed->data[2] = _tc->snd_congestion - _tc->iss; \
|
||||
ed->data[3] = _tc->snd_rxt_bytes; \
|
||||
}
|
||||
|
||||
#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...) \
|
||||
{ \
|
||||
ELOG_TYPE_DECLARE (_e) = \
|
||||
@ -663,26 +655,34 @@ if (_av > 0) \
|
||||
ed->data[3] = _tc->snd_rxt_bytes; \
|
||||
}
|
||||
|
||||
#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...) \
|
||||
#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...) \
|
||||
{ \
|
||||
ELOG_TYPE_DECLARE (_e) = \
|
||||
{ \
|
||||
.format = "cc: %s wnd %u snd_cong %u rxt_bytes %u", \
|
||||
.format_args = "t4i4i4i4", \
|
||||
.n_enum_strings = 5, \
|
||||
.enum_strings = { \
|
||||
"fast-rxt", \
|
||||
"rxt-timeout", \
|
||||
"first-rxt", \
|
||||
"recovered", \
|
||||
"congestion", \
|
||||
}, \
|
||||
.format = "dack-tx: rcv_nxt %u rcv_wnd %u snd_nxt %u av_wnd %u snd_wnd %u",\
|
||||
.format_args = "i4i4i4i4i4", \
|
||||
}; \
|
||||
DECLARE_ETD(_tc, _e, 4); \
|
||||
ed->data[0] = _sub_evt; \
|
||||
ed->data[1] = tcp_available_snd_space (_tc); \
|
||||
ed->data[2] = _tc->snd_congestion - _tc->iss; \
|
||||
ed->data[3] = _tc->snd_rxt_bytes; \
|
||||
DECLARE_ETD(_tc, _e, 5); \
|
||||
ed->data[0] = _tc->rcv_nxt - _tc->irs; \
|
||||
ed->data[1] = _tc->rcv_wnd; \
|
||||
ed->data[2] = _tc->snd_nxt - _tc->iss; \
|
||||
ed->data[3] = tcp_available_snd_wnd(_tc); \
|
||||
ed->data[4] = _tc->snd_wnd; \
|
||||
}
|
||||
|
||||
#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...) \
|
||||
{ \
|
||||
ELOG_TYPE_DECLARE (_e) = \
|
||||
{ \
|
||||
.format = "dack-rx: snd_una %u cwnd %u snd_wnd %u flight %u rcv_wnd %u",\
|
||||
.format_args = "i4i4i4i4i4", \
|
||||
}; \
|
||||
DECLARE_ETD(_tc, _e, 5); \
|
||||
ed->data[0] = _tc->snd_una - _tc->iss; \
|
||||
ed->data[1] = _tc->cwnd; \
|
||||
ed->data[2] = _tc->snd_wnd; \
|
||||
ed->data[3] = tcp_flight_size(_tc); \
|
||||
ed->data[4] = _tc->rcv_wnd; \
|
||||
}
|
||||
|
||||
#define TCP_EVT_CC_PACK_HANDLER(_tc, ...) \
|
||||
@ -696,6 +696,13 @@ if (_av > 0) \
|
||||
ed->data[0] = _tc->snd_una - _tc->iss; \
|
||||
ed->data[1] = _tc->snd_una_max - _tc->iss; \
|
||||
}
|
||||
#else
|
||||
#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)
|
||||
#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)
|
||||
#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)
|
||||
#define TCP_EVT_CC_PACK_HANDLER(_tc, ...)
|
||||
#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Congestion control stats
|
||||
@ -744,13 +751,6 @@ if (_tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now()) \
|
||||
#define TCP_EVT_CC_STAT_HANDLER(_tc, ...)
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)
|
||||
#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...)
|
||||
#define TCP_EVT_CC_PACK_HANDLER(_tc, ...)
|
||||
#define TCP_EVT_CC_STAT_HANDLER(_tc, ...)
|
||||
#endif
|
||||
|
||||
#endif /* SRC_VNET_TCP_TCP_DEBUG_H_ */
|
||||
/*
|
||||
* fd.io coding-style-patch-verification: ON
|
||||
|
@ -248,8 +248,8 @@ tcp_update_timestamp (tcp_connection_t * tc, u32 seq, u32 seq_end)
|
||||
* then the TSval from the segment is copied to TS.Recent;
|
||||
* otherwise, the TSval is ignored.
|
||||
*/
|
||||
if (tcp_opts_tstamp (&tc->rcv_opts) && tc->tsval_recent
|
||||
&& seq_leq (seq, tc->rcv_las) && seq_leq (tc->rcv_las, seq_end))
|
||||
if (tcp_opts_tstamp (&tc->rcv_opts) && seq_leq (seq, tc->rcv_las)
|
||||
&& seq_leq (tc->rcv_las, seq_end))
|
||||
{
|
||||
ASSERT (timestamp_leq (tc->tsval_recent, tc->rcv_opts.tsval));
|
||||
tc->tsval_recent = tc->rcv_opts.tsval;
|
||||
@ -418,51 +418,53 @@ tcp_update_rto (tcp_connection_t * tc)
|
||||
tc->rto = clib_max (tc->rto, TCP_RTO_MIN);
|
||||
}
|
||||
|
||||
/** Update RTT estimate and RTO timer
|
||||
/**
|
||||
* Update RTT estimate and RTO timer
|
||||
*
|
||||
* Measure RTT: We have two sources of RTT measurements: TSOPT and ACK
|
||||
* timing. Middle boxes are known to fiddle with TCP options so we
|
||||
* should give higher priority to ACK timing.
|
||||
*
|
||||
* This should be called only if previously sent bytes have been acked.
|
||||
*
|
||||
* return 1 if valid rtt 0 otherwise
|
||||
*/
|
||||
static int
|
||||
tcp_update_rtt (tcp_connection_t * tc, u32 ack)
|
||||
{
|
||||
u32 mrtt = 0;
|
||||
u8 rtx_acked;
|
||||
|
||||
/* Determine if only rtx bytes are acked. */
|
||||
rtx_acked = tcp_in_cong_recovery (tc) || !tc->bytes_acked;
|
||||
|
||||
/* Karn's rule, part 1. Don't use retransmitted segments to estimate
|
||||
* RTT because they're ambiguous. */
|
||||
if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq) && !rtx_acked)
|
||||
if (tcp_in_cong_recovery (tc) || tc->sack_sb.sacked_bytes)
|
||||
goto done;
|
||||
|
||||
if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq))
|
||||
{
|
||||
mrtt = tcp_time_now () - tc->rtt_ts;
|
||||
}
|
||||
/* As per RFC7323 TSecr can be used for RTTM only if the segment advances
|
||||
* snd_una, i.e., the left side of the send window:
|
||||
* seq_lt (tc->snd_una, ack). */
|
||||
else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr
|
||||
&& tc->bytes_acked)
|
||||
* seq_lt (tc->snd_una, ack). This is a condition for calling update_rtt */
|
||||
else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr)
|
||||
{
|
||||
mrtt = tcp_time_now () - tc->rcv_opts.tsecr;
|
||||
}
|
||||
|
||||
/* Ignore dubious measurements */
|
||||
if (mrtt == 0 || mrtt > TCP_RTT_MAX)
|
||||
goto done;
|
||||
|
||||
tcp_estimate_rtt (tc, mrtt);
|
||||
|
||||
done:
|
||||
|
||||
/* Allow measuring of a new RTT */
|
||||
tc->rtt_ts = 0;
|
||||
|
||||
/* If ACK moves left side of the wnd make sure boff is 0, even if mrtt is
|
||||
* not valid */
|
||||
if (tc->bytes_acked)
|
||||
tc->rto_boff = 0;
|
||||
|
||||
/* Ignore dubious measurements */
|
||||
if (mrtt == 0 || mrtt > TCP_RTT_MAX)
|
||||
return 0;
|
||||
|
||||
tcp_estimate_rtt (tc, mrtt);
|
||||
/* If we got here something must've been ACKed so make sure boff is 0,
|
||||
* even if mrrt is not valid since we update the rto lower */
|
||||
tc->rto_boff = 0;
|
||||
tcp_update_rto (tc);
|
||||
|
||||
return 0;
|
||||
@ -932,10 +934,11 @@ static void
|
||||
tcp_cc_recovery_exit (tcp_connection_t * tc)
|
||||
{
|
||||
/* Deflate rto */
|
||||
tcp_update_rto (tc);
|
||||
tc->rto_boff = 0;
|
||||
tcp_update_rto (tc);
|
||||
tc->snd_rxt_ts = 0;
|
||||
tcp_recovery_off (tc);
|
||||
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
|
||||
}
|
||||
|
||||
void
|
||||
@ -946,6 +949,7 @@ tcp_cc_fastrecovery_exit (tcp_connection_t * tc)
|
||||
tc->rcv_dupacks = 0;
|
||||
tcp_fastrecovery_off (tc);
|
||||
tcp_fastrecovery_1_smss_off (tc);
|
||||
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -958,13 +962,14 @@ tcp_cc_congestion_undo (tcp_connection_t * tc)
|
||||
if (tcp_in_recovery (tc))
|
||||
tcp_cc_recovery_exit (tc);
|
||||
ASSERT (tc->rto_boff == 0);
|
||||
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 5);
|
||||
/* TODO extend for fastrecovery */
|
||||
}
|
||||
|
||||
static u8
|
||||
tcp_cc_is_spurious_retransmit (tcp_connection_t * tc)
|
||||
{
|
||||
return (tcp_in_recovery (tc)
|
||||
return (tcp_in_recovery (tc) && tc->rto_boff == 1
|
||||
&& tc->snd_rxt_ts
|
||||
&& tcp_opts_tstamp (&tc->rcv_opts)
|
||||
&& timestamp_lt (tc->rcv_opts.tsecr, tc->snd_rxt_ts));
|
||||
@ -988,7 +993,6 @@ tcp_cc_recover (tcp_connection_t * tc)
|
||||
ASSERT (tc->rto_boff == 0);
|
||||
ASSERT (!tcp_in_cong_recovery (tc));
|
||||
ASSERT (tcp_scoreboard_is_sane_post_recovery (tc));
|
||||
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2115,7 +2119,6 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
|
||||
new_tc0->flags |= TCP_CONN_SNDACK;
|
||||
|
||||
/* Update rtt with the syn-ack sample */
|
||||
new_tc0->bytes_acked = 1;
|
||||
tcp_update_rtt (new_tc0, vnet_buffer (b0)->tcp.ack_number);
|
||||
TCP_EVT_DBG (TCP_EVT_SYNACK_RCVD, new_tc0);
|
||||
}
|
||||
@ -2352,7 +2355,6 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
|
||||
}
|
||||
|
||||
/* Update rtt and rto */
|
||||
tc0->bytes_acked = 1;
|
||||
tcp_update_rtt (tc0, vnet_buffer (b0)->tcp.ack_number);
|
||||
|
||||
/* Switch state to ESTABLISHED */
|
||||
|
@ -918,7 +918,24 @@ tcp_send_reset (tcp_connection_t * tc)
|
||||
opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
|
||||
ASSERT (opts_write_len == tc->snd_opts_len);
|
||||
vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
|
||||
tcp_enqueue_to_output_now (vm, b, bi, tc->c_is_ip4);
|
||||
if (tc->c_is_ip4)
|
||||
{
|
||||
ip4_header_t *ih4;
|
||||
ih4 = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip.ip4,
|
||||
&tc->c_rmt_ip.ip4, IP_PROTOCOL_TCP, 0);
|
||||
th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
|
||||
}
|
||||
else
|
||||
{
|
||||
int bogus = ~0;
|
||||
ip6_header_t *ih6;
|
||||
ih6 = vlib_buffer_push_ip6 (vm, b, &tc->c_lcl_ip.ip6,
|
||||
&tc->c_rmt_ip.ip6, IP_PROTOCOL_TCP);
|
||||
th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
|
||||
ASSERT (!bogus);
|
||||
}
|
||||
tcp_enqueue_to_ip_lookup_now (vm, b, bi, tc->c_is_ip4);
|
||||
TCP_EVT_DBG (TCP_EVT_RST_SENT, tc);
|
||||
}
|
||||
|
||||
void
|
||||
@ -1324,7 +1341,7 @@ tcp_rtx_timeout_cc (tcp_connection_t * tc)
|
||||
tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss);
|
||||
tc->cwnd = tcp_loss_wnd (tc);
|
||||
tc->snd_congestion = tc->snd_una_max;
|
||||
|
||||
tc->rtt_ts = 0;
|
||||
tcp_recovery_on (tc);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user