acl-plugin: create forward and return sessions in lieu of making a special per-packet session key
Using a separate session key has proven to be tricky for the following reasons: - it's a lot of storage to have what looks to be nearly identical to 5tuple, just maybe with some fields swapped - shuffling the fields from 5tuple adds to memory pressure - the fact that the fields do not coincide with the packet memory means for any staged processing we need to use up a lot of memory Thus, just add two entries into the bihash table pointing to the same session entry, so we could match the packets from either direction. With this we have the key layout of L3 info (which takes up the majority of space for IPv6 case) the same as in the packet, thus, opening up the possibility for other optimizations. Not having to create and store a separate session key should also give us a small performance win in itself. Also, add the routine to show the session bihash in a better way than a bunch of numbers. Alas, the memory usage in the bihash obviously doubles. Change-Id: I8fd2ed4714ad7fc447c4fa224d209bc0b736b371 Signed-off-by: Andrew Yourtchenko <ayourtch@gmail.com>
This commit is contained in:
committed by
Damjan Marion
parent
c6f186b23d
commit
a34c08c8c5
@@ -82,7 +82,7 @@ acl_fa_node_fn (vlib_main_t * vm,
|
||||
u32 pkts_restart_session_timer = 0;
|
||||
u32 trace_bitmap = 0;
|
||||
acl_main_t *am = &acl_main;
|
||||
fa_5tuple_t fa_5tuple, kv_sess;
|
||||
fa_5tuple_t fa_5tuple;
|
||||
clib_bihash_kv_40_8_t value_sess;
|
||||
vlib_node_runtime_t *error_node;
|
||||
u64 now = clib_cpu_time_now ();
|
||||
@@ -114,7 +114,6 @@ acl_fa_node_fn (vlib_main_t * vm,
|
||||
u32 match_acl_pos = ~0;
|
||||
u32 match_rule_index = ~0;
|
||||
u8 error0 = 0;
|
||||
u32 valid_new_sess;
|
||||
|
||||
/* speculatively enqueue b0 to the current next frame */
|
||||
bi0 = from[0];
|
||||
@@ -144,25 +143,15 @@ acl_fa_node_fn (vlib_main_t * vm,
|
||||
sw_if_index0)
|
||||
: (is_input * FA_POLICY_EPOCH_IS_INPUT);
|
||||
/*
|
||||
* Extract the L3/L4 matching info into a 5-tuple structure,
|
||||
* then create a session key whose layout is independent on forward or reverse
|
||||
* direction of the packet.
|
||||
* Extract the L3/L4 matching info into a 5-tuple structure.
|
||||
*/
|
||||
|
||||
acl_plugin_fill_5tuple_inline (lc_index0, b0, is_ip6, is_input,
|
||||
is_l2_path,
|
||||
(fa_5tuple_opaque_t *) & fa_5tuple);
|
||||
fa_5tuple.l4.lsb_of_sw_if_index = sw_if_index0 & 0xffff;
|
||||
valid_new_sess =
|
||||
acl_make_5tuple_session_key (am, is_input, is_ip6, sw_if_index0,
|
||||
&fa_5tuple, &kv_sess);
|
||||
// XXDEL fa_5tuple.pkt.is_input = is_input;
|
||||
fa_5tuple.pkt.mask_type_index_lsb = ~0;
|
||||
#ifdef FA_NODE_VERBOSE_DEBUG
|
||||
clib_warning
|
||||
("ACL_FA_NODE_DBG: session 5-tuple %016llx %016llx %016llx %016llx %016llx %016llx",
|
||||
kv_sess.kv.key[0], kv_sess.kv.key[1], kv_sess.kv.key[2],
|
||||
kv_sess.kv.key[3], kv_sess.kv.key[4], kv_sess.kv.value);
|
||||
clib_warning
|
||||
("ACL_FA_NODE_DBG: packet 5-tuple %016llx %016llx %016llx %016llx %016llx %016llx",
|
||||
fa_5tuple.kv.key[0], fa_5tuple.kv.key[1], fa_5tuple.kv.key[2],
|
||||
@@ -174,7 +163,7 @@ acl_fa_node_fn (vlib_main_t * vm,
|
||||
if (acl_fa_ifc_has_sessions (am, sw_if_index0))
|
||||
{
|
||||
if (acl_fa_find_session
|
||||
(am, sw_if_index0, &kv_sess, &value_sess))
|
||||
(am, sw_if_index0, &fa_5tuple, &value_sess))
|
||||
{
|
||||
trace_bitmap |= 0x80000000;
|
||||
error0 = ACL_FA_ERROR_ACL_EXIST_SESSION;
|
||||
@@ -272,26 +261,14 @@ acl_fa_node_fn (vlib_main_t * vm,
|
||||
|
||||
if (acl_fa_can_add_session (am, is_input, sw_if_index0))
|
||||
{
|
||||
if (PREDICT_TRUE (valid_new_sess))
|
||||
{
|
||||
fa_session_t *sess =
|
||||
acl_fa_add_session (am, is_input,
|
||||
sw_if_index0,
|
||||
now, &kv_sess,
|
||||
current_policy_epoch);
|
||||
acl_fa_track_session (am, is_input, sw_if_index0,
|
||||
now, sess, &fa_5tuple);
|
||||
pkts_new_session += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* ICMP packets with non-icmp_valid_new type will be
|
||||
* forwared without being dropped.
|
||||
*/
|
||||
action = 1;
|
||||
pkts_acl_permit += 1;
|
||||
}
|
||||
fa_session_t *sess =
|
||||
acl_fa_add_session (am, is_input, is_ip6,
|
||||
sw_if_index0,
|
||||
now, &fa_5tuple,
|
||||
current_policy_epoch);
|
||||
acl_fa_track_session (am, is_input, sw_if_index0,
|
||||
now, sess, &fa_5tuple);
|
||||
pkts_new_session += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -39,8 +39,16 @@ typedef union {
|
||||
u64 as_u64;
|
||||
struct {
|
||||
u16 port[2];
|
||||
u16 proto;
|
||||
u16 lsb_of_sw_if_index;
|
||||
union {
|
||||
struct {
|
||||
u8 proto;
|
||||
u8 is_input: 1;
|
||||
u8 is_slowpath: 1;
|
||||
u8 reserved0: 6;
|
||||
u16 lsb_of_sw_if_index;
|
||||
};
|
||||
u32 non_port_l4_data;
|
||||
};
|
||||
};
|
||||
} fa_session_l4_key_t;
|
||||
|
||||
|
||||
@@ -192,7 +192,7 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
|
||||
int l3_offset;
|
||||
int l4_offset;
|
||||
u16 ports[2];
|
||||
u16 proto;
|
||||
u8 proto;
|
||||
|
||||
if (is_l2_path)
|
||||
{
|
||||
@@ -307,6 +307,8 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
|
||||
|
||||
}
|
||||
p5tuple_pkt->l4.proto = proto;
|
||||
p5tuple_pkt->l4.is_input = is_input;
|
||||
|
||||
if (PREDICT_TRUE (offset_within_packet (b0, l4_offset)))
|
||||
{
|
||||
p5tuple_pkt->pkt.l4_valid = 1;
|
||||
@@ -322,6 +324,7 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
|
||||
*(u8 *) get_ptr_to_offset (b0,
|
||||
l4_offset + offsetof (icmp46_header_t,
|
||||
code));
|
||||
p5tuple_pkt->l4.is_slowpath = 1;
|
||||
}
|
||||
else if ((IP_PROTOCOL_TCP == proto) || (IP_PROTOCOL_UDP == proto))
|
||||
{
|
||||
@@ -338,21 +341,12 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
|
||||
l4_offset + offsetof (tcp_header_t,
|
||||
flags));
|
||||
p5tuple_pkt->pkt.tcp_flags_valid = (proto == IP_PROTOCOL_TCP);
|
||||
p5tuple_pkt->l4.is_slowpath = 0;
|
||||
}
|
||||
/*
|
||||
* FIXME: rather than the above conditional, here could
|
||||
* be a nice generic mechanism to extract two L4 values:
|
||||
*
|
||||
* have a per-protocol array of 4 elements like this:
|
||||
* u8 offset; to take the byte from, off L4 header
|
||||
* u8 mask; to mask it with, before storing
|
||||
*
|
||||
* this way we can describe UDP, TCP and ICMP[46] semantics,
|
||||
* and add a sort of FPM-type behavior for other protocols.
|
||||
*
|
||||
* Of course, is it faster ? and is it needed ?
|
||||
*
|
||||
*/
|
||||
else
|
||||
{
|
||||
p5tuple_pkt->l4.is_slowpath = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -48,6 +48,26 @@ fa_session_get_shortest_timeout (acl_main_t * am)
|
||||
return timeout;
|
||||
}
|
||||
|
||||
static u8 *
|
||||
format_session_bihash_5tuple (u8 * s, va_list * args)
|
||||
{
|
||||
fa_5tuple_t *p5t = va_arg (*args, fa_5tuple_t *);
|
||||
fa_full_session_id_t *sess = (void *) &p5t->pkt;
|
||||
|
||||
return format (s, "l3 %U -> %U"
|
||||
" l4 lsb_of_sw_if_index %d proto %d l4_is_input %d l4_slow_path %d l4_reserved0 %d port %d -> %d | sess id %d thread id %d epoch %04x",
|
||||
format_ip46_address, &p5t->addr[0],
|
||||
IP46_TYPE_ANY,
|
||||
format_ip46_address, &p5t->addr[1],
|
||||
IP46_TYPE_ANY,
|
||||
p5t->l4.lsb_of_sw_if_index,
|
||||
p5t->l4.proto, p5t->l4.is_input, p5t->l4.is_slowpath,
|
||||
p5t->l4.reserved0, p5t->l4.port[0], p5t->l4.port[1],
|
||||
sess->session_index, sess->thread_index,
|
||||
sess->intf_policy_epoch);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
acl_fa_verify_init_sessions (acl_main_t * am)
|
||||
{
|
||||
@@ -73,6 +93,8 @@ acl_fa_verify_init_sessions (acl_main_t * am)
|
||||
"ACL plugin FA session bihash",
|
||||
am->fa_conn_table_hash_num_buckets,
|
||||
am->fa_conn_table_hash_memory_size);
|
||||
clib_bihash_set_kvp_format_fn_40_8 (&am->fa_sessions_hash,
|
||||
format_session_bihash_5tuple);
|
||||
am->fa_sessions_hash_is_initialized = 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,72 +67,6 @@ acl_fa_ifc_has_out_acl (acl_main_t * am, int sw_if_index0)
|
||||
return it_has;
|
||||
}
|
||||
|
||||
/* Session keys match the packets received, and mirror the packets sent */
|
||||
always_inline u32
|
||||
acl_make_5tuple_session_key (acl_main_t * am, int is_input, int is_ip6,
|
||||
u32 sw_if_index, fa_5tuple_t * p5tuple_pkt,
|
||||
fa_5tuple_t * p5tuple_sess)
|
||||
{
|
||||
int src_index = is_input ? 0 : 1;
|
||||
int dst_index = is_input ? 1 : 0;
|
||||
u32 valid_new_sess = 1;
|
||||
p5tuple_sess->addr[src_index] = p5tuple_pkt->addr[0];
|
||||
p5tuple_sess->addr[dst_index] = p5tuple_pkt->addr[1];
|
||||
p5tuple_sess->l4.as_u64 = p5tuple_pkt->l4.as_u64;
|
||||
|
||||
if (PREDICT_TRUE (p5tuple_pkt->l4.proto != icmp_protos[is_ip6]))
|
||||
{
|
||||
p5tuple_sess->l4.port[src_index] = p5tuple_pkt->l4.port[0];
|
||||
p5tuple_sess->l4.port[dst_index] = p5tuple_pkt->l4.port[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
static const u8 *icmp_invmap[] = { icmp4_invmap, icmp6_invmap };
|
||||
static const u8 *icmp_valid_new[] =
|
||||
{ icmp4_valid_new, icmp6_valid_new };
|
||||
static const u8 icmp_invmap_size[] = { sizeof (icmp4_invmap),
|
||||
sizeof (icmp6_invmap)
|
||||
};
|
||||
static const u8 icmp_valid_new_size[] = { sizeof (icmp4_valid_new),
|
||||
sizeof (icmp6_valid_new)
|
||||
};
|
||||
int type =
|
||||
is_ip6 ? p5tuple_pkt->l4.port[0] - 128 : p5tuple_pkt->l4.port[0];
|
||||
|
||||
p5tuple_sess->l4.port[0] = p5tuple_pkt->l4.port[0];
|
||||
p5tuple_sess->l4.port[1] = p5tuple_pkt->l4.port[1];
|
||||
|
||||
/*
|
||||
* Invert ICMP type for valid icmp_invmap messages:
|
||||
* 1) input node with outbound ACL interface
|
||||
* 2) output node with inbound ACL interface
|
||||
*
|
||||
*/
|
||||
if ((is_input && acl_fa_ifc_has_out_acl (am, sw_if_index)) ||
|
||||
(!is_input && acl_fa_ifc_has_in_acl (am, sw_if_index)))
|
||||
{
|
||||
if (type >= 0 &&
|
||||
type <= icmp_invmap_size[is_ip6] && icmp_invmap[is_ip6][type])
|
||||
{
|
||||
p5tuple_sess->l4.port[0] = icmp_invmap[is_ip6][type] - 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* ONLY ICMP messages defined in icmp4_valid_new/icmp6_valid_new table
|
||||
* are allowed to create stateful ACL.
|
||||
* The other messages will be forwarded without creating a reflexive ACL.
|
||||
*/
|
||||
if (type < 0 ||
|
||||
type > icmp_valid_new_size[is_ip6] || !icmp_valid_new[is_ip6][type])
|
||||
{
|
||||
valid_new_sess = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return valid_new_sess;
|
||||
}
|
||||
|
||||
always_inline int
|
||||
fa_session_get_timeout_type (acl_main_t * am, fa_session_t * sess)
|
||||
{
|
||||
@@ -316,6 +250,100 @@ acl_fa_track_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
|
||||
return 3;
|
||||
}
|
||||
|
||||
always_inline u64
|
||||
reverse_l4_u64_fastpath (u64 l4, int is_ip6)
|
||||
{
|
||||
fa_session_l4_key_t l4i = {.as_u64 = l4 };
|
||||
fa_session_l4_key_t l4o;
|
||||
|
||||
l4o.port[1] = l4i.port[0];
|
||||
l4o.port[0] = l4i.port[1];
|
||||
|
||||
l4o.non_port_l4_data = l4i.non_port_l4_data;
|
||||
l4o.is_input = 1 - l4i.is_input;
|
||||
return l4o.as_u64;
|
||||
}
|
||||
|
||||
always_inline u64
|
||||
reverse_l4_u64_slowpath (u64 l4, int is_ip6)
|
||||
{
|
||||
fa_session_l4_key_t l4i = {.as_u64 = l4 };
|
||||
fa_session_l4_key_t l4o;
|
||||
|
||||
if (l4i.proto == icmp_protos[is_ip6])
|
||||
{
|
||||
static const u8 *icmp_invmap[] = { icmp4_invmap, icmp6_invmap };
|
||||
static const u8 *icmp_valid_new[] =
|
||||
{ icmp4_valid_new, icmp6_valid_new };
|
||||
static const u8 icmp_invmap_size[] = { sizeof (icmp4_invmap),
|
||||
sizeof (icmp6_invmap)
|
||||
};
|
||||
static const u8 icmp_valid_new_size[] = { sizeof (icmp4_valid_new),
|
||||
sizeof (icmp6_valid_new)
|
||||
};
|
||||
int type = is_ip6 ? l4i.port[0] - 128 : l4i.port[0];
|
||||
|
||||
l4o.non_port_l4_data = l4i.non_port_l4_data;
|
||||
l4o.port[0] = l4i.port[0];
|
||||
l4o.port[1] = l4i.port[1];
|
||||
|
||||
|
||||
/*
|
||||
* ONLY ICMP messages defined in icmp4_valid_new/icmp6_valid_new table
|
||||
* are allowed to create stateful ACL.
|
||||
* The other messages will be forwarded without creating a reverse session.
|
||||
*/
|
||||
|
||||
if (type >= 0 && (type <= icmp_valid_new_size[is_ip6])
|
||||
&& (icmp_valid_new[is_ip6][type])
|
||||
&& (type <= icmp_invmap_size[is_ip6]) && icmp_invmap[is_ip6][type])
|
||||
{
|
||||
/*
|
||||
* we set the inverse direction and correct the port,
|
||||
* if it is okay to add the reverse session.
|
||||
* If not, then the same session will be added twice
|
||||
* to bihash, which is the same as adding just one session.
|
||||
*/
|
||||
l4o.is_input = 1 - l4i.is_input;
|
||||
l4o.port[0] = icmp_invmap[is_ip6][type] - 1;
|
||||
}
|
||||
|
||||
return l4o.as_u64;
|
||||
}
|
||||
else
|
||||
return reverse_l4_u64_fastpath (l4, is_ip6);
|
||||
}
|
||||
|
||||
always_inline u64
|
||||
reverse_l4_u64 (u64 l4, int is_ip6)
|
||||
{
|
||||
fa_session_l4_key_t l4i = {.as_u64 = l4 };
|
||||
|
||||
if (PREDICT_FALSE (l4i.is_slowpath))
|
||||
{
|
||||
return reverse_l4_u64_slowpath (l4, is_ip6);
|
||||
}
|
||||
else
|
||||
{
|
||||
return reverse_l4_u64_fastpath (l4, is_ip6);
|
||||
}
|
||||
}
|
||||
|
||||
always_inline void
|
||||
reverse_session_add_del (acl_main_t * am, const int is_ip6,
|
||||
clib_bihash_kv_40_8_t * pkv, int is_add)
|
||||
{
|
||||
clib_bihash_kv_40_8_t kv2;
|
||||
/* the first 4xu64 is two addresses, so just swap them */
|
||||
kv2.key[0] = pkv->key[2];
|
||||
kv2.key[1] = pkv->key[3];
|
||||
kv2.key[2] = pkv->key[0];
|
||||
kv2.key[3] = pkv->key[1];
|
||||
/* the last u64 needs special treatment (ports, etc.) */
|
||||
kv2.key[4] = reverse_l4_u64 (pkv->key[4], is_ip6);
|
||||
kv2.value = pkv->value;
|
||||
clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &kv2, is_add);
|
||||
}
|
||||
|
||||
always_inline void
|
||||
acl_fa_delete_session (acl_main_t * am, u32 sw_if_index,
|
||||
@@ -326,6 +354,9 @@ acl_fa_delete_session (acl_main_t * am, u32 sw_if_index,
|
||||
get_session_ptr (am, sess_id.thread_index, sess_id.session_index);
|
||||
ASSERT (sess->thread_index == os_get_thread_index ());
|
||||
clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &sess->info.kv, 0);
|
||||
|
||||
reverse_session_add_del (am, sess->info.pkt.is_ip6, &sess->info.kv, 0);
|
||||
|
||||
acl_fa_per_worker_data_t *pw = &am->per_worker_data[sess_id.thread_index];
|
||||
pool_put_index (pw->fa_sessions_pool, sess_id.session_index);
|
||||
/* Deleting from timer structures not needed,
|
||||
@@ -362,9 +393,11 @@ acl_fa_try_recycle_session (acl_main_t * am, int is_input, u16 thread_index,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
always_inline fa_session_t *
|
||||
acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
|
||||
fa_5tuple_t * p5tuple, u16 current_policy_epoch)
|
||||
acl_fa_add_session (acl_main_t * am, int is_input, int is_ip6,
|
||||
u32 sw_if_index, u64 now, fa_5tuple_t * p5tuple,
|
||||
u16 current_policy_epoch)
|
||||
{
|
||||
clib_bihash_kv_40_8_t *pkv = &p5tuple->kv;
|
||||
clib_bihash_kv_40_8_t kv;
|
||||
@@ -396,10 +429,11 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
|
||||
sess->link_prev_idx = ~0;
|
||||
sess->link_next_idx = ~0;
|
||||
|
||||
|
||||
|
||||
ASSERT (am->fa_sessions_hash_is_initialized == 1);
|
||||
clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &kv, 1);
|
||||
|
||||
reverse_session_add_del (am, is_ip6, &kv, 1);
|
||||
|
||||
acl_fa_conn_list_add_session (am, f_sess_id, now);
|
||||
|
||||
vec_validate (pw->fa_session_adds_by_sw_if_index, sw_if_index);
|
||||
|
||||
Reference in New Issue
Block a user