Fix fifo ooo bugs and improve testing

Change-Id: If3c01e318bcb740ca5b240c63f712e2167082a80
Signed-off-by: Dave Barach <dave@barachs.net>
Signed-off-by: Florin Coras <fcoras@cisco.com>
This commit is contained in:
Dave Barach
2017-04-14 16:46:44 -04:00
committed by Florin Coras
parent fb5b2af2c0
commit 1f75cfd733
7 changed files with 603 additions and 95 deletions
+91 -35
View File
@@ -15,6 +15,36 @@
#include <svm/svm_fifo.h>
#define offset_lt(_a, _b) ((i32)((_a)-(_b)) < 0)
#define offset_leq(_a, _b) ((i32)((_a)-(_b)) <= 0)
u8 *
format_ooo_segment (u8 * s, va_list * args)
{
ooo_segment_t *seg = va_arg (*args, ooo_segment_t *);
s = format (s, "pos %u, len %u, next %d, prev %d",
seg->start, seg->length, seg->next, seg->prev);
return s;
}
u8 *
format_ooo_list (u8 * s, va_list * args)
{
svm_fifo_t *f = va_arg (*args, svm_fifo_t *);
u32 ooo_segment_index = f->ooos_list_head;
ooo_segment_t *seg;
while (ooo_segment_index != OOO_SEGMENT_INVALID_INDEX)
{
seg = pool_elt_at_index (f->ooo_segments, ooo_segment_index);
s = format (s, "\n %U", format_ooo_segment, seg);
ooo_segment_index = seg->next;
}
return s;
}
/** create an svm fifo, in the current heap. Fails vs blow up the process */
svm_fifo_t *
svm_fifo_create (u32 data_size_in_bytes)
@@ -47,7 +77,7 @@ ooo_segment_new (svm_fifo_t * f, u32 start, u32 length)
pool_get (f->ooo_segments, s);
s->fifo_position = start;
s->start = start;
s->length = length;
s->prev = s->next = OOO_SEGMENT_INVALID_INDEX;
@@ -88,14 +118,13 @@ static void
ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length)
{
ooo_segment_t *s, *new_s, *prev, *next, *it;
u32 new_index, position, end_offset, s_sof, s_eof, s_index;
u32 new_index, end_offset, s_sof, s_eof, s_index;
position = (f->tail + offset) % f->nitems;
end_offset = offset + length;
if (f->ooos_list_head == OOO_SEGMENT_INVALID_INDEX)
{
s = ooo_segment_new (f, position, length);
s = ooo_segment_new (f, offset, length);
f->ooos_list_head = s - f->ooo_segments;
f->ooos_newest = f->ooos_list_head;
return;
@@ -104,26 +133,26 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length)
/* Find first segment that starts after new segment */
s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head);
while (s->next != OOO_SEGMENT_INVALID_INDEX
&& ooo_segment_offset (f, s) <= offset)
&& offset_leq (ooo_segment_offset (f, s), offset))
s = pool_elt_at_index (f->ooo_segments, s->next);
s_index = s - f->ooo_segments;
s_sof = ooo_segment_offset (f, s);
s_eof = ooo_segment_end_offset (f, s);
prev = ooo_segment_get_prev (f, s);
/* No overlap, add before current segment */
if (end_offset < s_sof)
if (offset_lt (end_offset, s_sof)
&& (!prev || offset_lt (prev->start + prev->length, offset)))
{
new_s = ooo_segment_new (f, position, length);
new_s = ooo_segment_new (f, offset, length);
new_index = new_s - f->ooo_segments;
/* Pool might've moved, get segment again */
s = pool_elt_at_index (f->ooo_segments, s_index);
if (s->prev != OOO_SEGMENT_INVALID_INDEX)
{
new_s->prev = s->prev;
prev = pool_elt_at_index (f->ooo_segments, new_s->prev);
prev->next = new_index;
}
@@ -139,9 +168,9 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length)
return;
}
/* No overlap, add after current segment */
else if (s_eof < offset)
else if (offset_lt (s_eof, offset))
{
new_s = ooo_segment_new (f, position, length);
new_s = ooo_segment_new (f, offset, length);
new_index = new_s - f->ooo_segments;
/* Pool might've moved, get segment again */
@@ -150,7 +179,6 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length)
if (s->next != OOO_SEGMENT_INVALID_INDEX)
{
new_s->next = s->next;
next = pool_elt_at_index (f->ooo_segments, new_s->next);
next->prev = new_index;
}
@@ -167,7 +195,7 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length)
*/
/* Merge at head */
if (offset <= s_sof)
if (offset_leq (offset, s_sof))
{
/* If we have a previous, check if we overlap */
if (s->prev != OOO_SEGMENT_INVALID_INDEX)
@@ -176,26 +204,31 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length)
/* New segment merges prev and current. Remove previous and
* update position of current. */
if (ooo_segment_end_offset (f, prev) >= offset)
if (offset_leq (offset, ooo_segment_end_offset (f, prev)))
{
s->fifo_position = prev->fifo_position;
s->start = prev->start;
s->length = s_eof - ooo_segment_offset (f, prev);
ooo_segment_del (f, s->prev);
}
else
{
s->start = offset;
s->length = s_eof - ooo_segment_offset (f, s);
}
}
else
{
s->fifo_position = position;
s->start = offset;
s->length = s_eof - ooo_segment_offset (f, s);
}
/* The new segment's tail may cover multiple smaller ones */
if (s_eof < end_offset)
if (offset_lt (s_eof, end_offset))
{
/* Remove segments completely covered */
it = (s->next != OOO_SEGMENT_INVALID_INDEX) ?
pool_elt_at_index (f->ooo_segments, s->next) : 0;
while (it && ooo_segment_end_offset (f, it) < end_offset)
while (it && offset_lt (ooo_segment_end_offset (f, it), end_offset))
{
next = (it->next != OOO_SEGMENT_INVALID_INDEX) ?
pool_elt_at_index (f->ooo_segments, it->next) : 0;
@@ -207,7 +240,7 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length)
s->length = end_offset - ooo_segment_offset (f, s);
/* If partial overlap with last, merge */
if (it && ooo_segment_offset (f, it) < end_offset)
if (it && offset_lt (ooo_segment_offset (f, it), end_offset))
{
s->length +=
it->length - (ooo_segment_offset (f, it) - end_offset);
@@ -216,7 +249,7 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length)
}
}
/* Last but overlapping previous */
else if (s_eof <= end_offset)
else if (offset_leq (s_eof, end_offset))
{
s->length = end_offset - ooo_segment_offset (f, s);
}
@@ -247,7 +280,7 @@ ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued)
s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head);
/* If last tail update overlaps one/multiple ooo segments, remove them */
diff = (f->nitems + f->tail - s->fifo_position) % f->nitems;
diff = (f->nitems + ((int) s->start - f->tail)) % f->nitems;
while (0 < diff && diff < n_bytes_enqueued)
{
/* Segment end is beyond the tail. Advance tail and be done */
@@ -262,7 +295,7 @@ ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued)
{
index = s - f->ooo_segments;
s = pool_elt_at_index (f->ooo_segments, s->next);
diff = (f->nitems + f->tail - s->fifo_position) % f->nitems;
diff = (f->nitems + ((int) s->start - f->tail)) % f->nitems;
ooo_segment_del (f, index);
}
/* End of search */
@@ -368,9 +401,20 @@ svm_fifo_enqueue_with_offset_internal (svm_fifo_t * f,
{
u32 total_copy_bytes, first_copy_bytes, second_copy_bytes;
u32 cursize, nitems;
u32 tail_plus_offset;
u32 normalized_offset;
int rv;
ASSERT (offset > 0);
/* Safety: don't wrap more than nitems/2 */
ASSERT ((f->nitems + offset - f->tail) % f->nitems < f->nitems / 2);
/* Users would do do well to avoid this */
if (PREDICT_FALSE (f->tail == (offset % f->nitems)))
{
rv = svm_fifo_enqueue_internal (f, pid, required_bytes, copy_from_here);
if (rv > 0)
return 0;
return -1;
}
/* read cursize, which can only increase while we're working */
cursize = svm_fifo_max_dequeue (f);
@@ -384,24 +428,24 @@ svm_fifo_enqueue_with_offset_internal (svm_fifo_t * f,
/* Number of bytes we're going to copy */
total_copy_bytes = required_bytes;
tail_plus_offset = (f->tail + offset) % nitems;
normalized_offset = offset % nitems;
/* Number of bytes in first copy segment */
first_copy_bytes = ((nitems - tail_plus_offset) < total_copy_bytes)
? (nitems - tail_plus_offset) : total_copy_bytes;
first_copy_bytes = ((nitems - normalized_offset) < total_copy_bytes)
? (nitems - normalized_offset) : total_copy_bytes;
clib_memcpy (&f->data[tail_plus_offset], copy_from_here, first_copy_bytes);
clib_memcpy (&f->data[normalized_offset], copy_from_here, first_copy_bytes);
/* Number of bytes in second copy segment, if any */
second_copy_bytes = total_copy_bytes - first_copy_bytes;
if (second_copy_bytes)
{
tail_plus_offset += first_copy_bytes;
tail_plus_offset %= nitems;
normalized_offset += first_copy_bytes;
normalized_offset %= nitems;
ASSERT (tail_plus_offset == 0);
ASSERT (normalized_offset == 0);
clib_memcpy (&f->data[tail_plus_offset],
clib_memcpy (&f->data[normalized_offset],
copy_from_here + first_copy_bytes, second_copy_bytes);
}
@@ -573,8 +617,8 @@ format_svm_fifo (u8 * s, va_list * args)
ooo_segment_t *seg;
u32 seg_index;
s =
format (s, "ooo pool %d active elts\n", pool_elts (f->ooo_segments));
s = format (s, "ooo pool %d active elts\n",
pool_elts (f->ooo_segments));
seg_index = f->ooos_list_head;
@@ -582,13 +626,25 @@ format_svm_fifo (u8 * s, va_list * args)
{
seg = pool_elt_at_index (f->ooo_segments, seg_index);
s = format (s, " pos %u, len %u next %d\n",
seg->fifo_position, seg->length, seg->next);
seg->start, seg->length, seg->next);
seg_index = seg->next;
}
}
return s;
}
u32
svm_fifo_number_ooo_segments (svm_fifo_t * f)
{
return pool_elts (f->ooo_segments);
}
ooo_segment_t *
svm_fifo_first_ooo_segment (svm_fifo_t * f)
{
return pool_elt_at_index (f->ooo_segments, f->ooos_list_head);
}
/*
* fd.io coding-style-patch-verification: ON
*
+18 -3
View File
@@ -36,10 +36,13 @@ typedef struct
u32 next; /**< Next linked-list element pool index */
u32 prev; /**< Previous linked-list element pool index */
u32 fifo_position; /**< Start of segment, normalized*/
u32 start; /**< Start of segment, normalized*/
u32 length; /**< Length of segment */
} ooo_segment_t;
format_function_t format_ooo_segment;
format_function_t format_ooo_list;
#define OOO_SEGMENT_INVALID_INDEX ((u32)~0)
typedef struct
@@ -127,6 +130,8 @@ int svm_fifo_dequeue_nowait (svm_fifo_t * f, int pid, u32 max_bytes,
int svm_fifo_peek (svm_fifo_t * f, int pid, u32 offset, u32 max_bytes,
u8 * copy_here);
int svm_fifo_dequeue_drop (svm_fifo_t * f, int pid, u32 max_bytes);
u32 svm_fifo_number_ooo_segments (svm_fifo_t * f);
ooo_segment_t *svm_fifo_first_ooo_segment (svm_fifo_t * f);
format_function_t format_svm_fifo;
@@ -139,13 +144,23 @@ svm_fifo_newest_ooo_segment (svm_fifo_t * f)
always_inline u32
ooo_segment_offset (svm_fifo_t * f, ooo_segment_t * s)
{
return ((f->nitems + s->fifo_position - f->tail) % f->nitems);
// return ((f->nitems + s->fifo_position - f->tail) % f->nitems);
return s->start;
}
always_inline u32
ooo_segment_end_offset (svm_fifo_t * f, ooo_segment_t * s)
{
return ((f->nitems + s->fifo_position + s->length - f->tail) % f->nitems);
// return ((f->nitems + s->fifo_position + s->length - f->tail) % f->nitems);
return s->start + s->length;
}
always_inline ooo_segment_t *
ooo_segment_get_prev (svm_fifo_t * f, ooo_segment_t * s)
{
if (s->prev == OOO_SEGMENT_INVALID_INDEX)
return 0;
return pool_elt_at_index (f->ooo_segments, s->prev);
}
#endif /* __included_ssvm_fifo_h__ */
+1 -1
View File
@@ -447,7 +447,7 @@ format_tcp_state (u8 * s, va_list * args)
if (*state < TCP_N_STATES)
s = format (s, "%s", tcp_fsm_states[*state]);
else
s = format (s, "UNKNOWN");
s = format (s, "UNKNOWN (%d (0x%x))", *state, *state);
return s;
}
+1
View File
@@ -58,6 +58,7 @@ typedef enum _tcp_state
} tcp_state_t;
format_function_t format_tcp_state;
format_function_t format_tcp_flags;
/** TCP timers */
#define foreach_tcp_timer \
+1 -1
View File
@@ -40,7 +40,7 @@
#include <vnet/ip/ip.h>
#include <vnet/tcp/tcp.h>
static u8 *
u8 *
format_tcp_flags (u8 * s, va_list * args)
{
int flags = va_arg (*args, int);
+19 -9
View File
@@ -211,8 +211,6 @@ tcp_options_parse (tcp_header_t * th, tcp_options_t * to)
always_inline int
tcp_segment_check_paws (tcp_connection_t * tc)
{
/* XXX normally test for timestamp should be lt instead of leq, but for
* local testing this is not enough */
return tcp_opts_tstamp (&tc->opt) && tc->tsval_recent
&& timestamp_lt (tc->opt.tsval, tc->tsval_recent);
}
@@ -999,7 +997,7 @@ tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b,
u16 data_len)
{
stream_session_t *s0;
u32 offset, seq;
u32 offset;
int rv;
/* Pure ACK. Do nothing */
@@ -1009,8 +1007,9 @@ tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b,
}
s0 = stream_session_get (tc->c_s_index, tc->c_thread_index);
seq = vnet_buffer (b)->tcp.seq_number;
offset = seq - tc->rcv_nxt;
offset = vnet_buffer (b)->tcp.seq_number - tc->irs;
clib_warning ("ooo: offset %d len %d", offset, data_len);
rv = svm_fifo_enqueue_with_offset (s0->server_rx_fifo, s0->pid, offset,
data_len, vlib_buffer_get_current (b));
@@ -1032,8 +1031,8 @@ tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b,
/* Get the newest segment from the fifo */
newest = svm_fifo_newest_ooo_segment (s0->server_rx_fifo);
start = tc->rcv_nxt + ooo_segment_offset (s0->server_rx_fifo, newest);
end = tc->rcv_nxt + ooo_segment_end_offset (s0->server_rx_fifo, newest);
start = ooo_segment_offset (s0->server_rx_fifo, newest);
end = ooo_segment_end_offset (s0->server_rx_fifo, newest);
tcp_update_sack_list (tc, start, end);
}
@@ -1072,6 +1071,7 @@ tcp_segment_rcv (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b,
{
/* Old sequence numbers allowed through because they overlapped
* the rx window */
if (seq_lt (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt))
{
error = TCP_ERROR_SEGMENT_OLD;
@@ -1181,6 +1181,7 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 n_left_from, next_index, *from, *to_next;
u32 my_thread_index = vm->thread_index, errors = 0;
tcp_main_t *tm = vnet_get_tcp_main ();
u8 is_fin = 0;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -1243,9 +1244,11 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
n_advance_bytes0 += sizeof (ip60[0]);
}
is_fin = (th0->flags & TCP_FLAG_FIN) != 0;
/* SYNs, FINs and data consume sequence numbers */
vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number
+ tcp_is_syn (th0) + tcp_is_fin (th0) + n_data_bytes0;
+ tcp_is_syn (th0) + is_fin + n_data_bytes0;
/* TODO header prediction fast path */
@@ -1272,8 +1275,11 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_advance (b0, n_advance_bytes0);
error0 = tcp_segment_rcv (tm, tc0, b0, n_data_bytes0, &next0);
/* N.B. buffer is rewritten if segment is ooo. Thus, th0 becomes a
* dangling reference. */
/* 8: check the FIN bit */
if (tcp_fin (th0))
if (is_fin)
{
/* Enter CLOSE-WAIT and notify session. Don't send ACK, instead
* wait for session to call close. To avoid lingering
@@ -2365,8 +2371,12 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (PREDICT_FALSE (error0 == TCP_ERROR_DISPATCH))
{
tcp_state_t state0 = tc0->state;
/* Overload tcp flags to store state */
vnet_buffer (b0)->tcp.flags = tc0->state;
clib_warning ("disp error state %U flags %U",
format_tcp_state, &state0,
format_tcp_flags, flags0);
}
}
else
+472 -46
View File
File diff suppressed because it is too large Load Diff