cnat: Add support for SNat ICMP

Type: feature

snat supports :
* echo request/reply by allocating an identifier
when translating echo requests
* icmp errors in the same manner as dnat

Change-Id: I684e983b0181f95c5eace5a984d40084e5625fa4
Signed-off-by: Nathan Skrzypczak <nathan.skrzypczak@gmail.com>
This commit is contained in:
Nathan Skrzypczak
2020-09-10 17:44:41 +02:00
committed by Dave Barach
parent ece39214bc
commit 613b2c3c78
7 changed files with 461 additions and 143 deletions

View File

@ -42,6 +42,30 @@ icmp_type_is_error_message (u8 icmp_type)
return 0;
}
static_always_inline u8
icmp_type_is_echo (u8 icmp_type)
{
switch (icmp_type)
{
case ICMP4_echo_request:
case ICMP4_echo_reply:
return 1;
}
return 0;
}
static_always_inline u8
icmp6_type_is_echo (u8 icmp_type)
{
switch (icmp_type)
{
case ICMP6_echo_request:
case ICMP6_echo_reply:
return 1;
}
return 0;
}
static_always_inline u8
icmp6_type_is_error_message (u8 icmp_type)
{
@ -170,20 +194,39 @@ cnat_tcp_update_session_lifetime (tcp_header_t * tcp, u32 index)
}
static_always_inline void
cnat_translation_icmp4 (ip4_header_t * outer_ip4, udp_header_t * outer_udp,
ip4_address_t outer_new_addr[VLIB_N_DIR],
u16 outer_new_port[VLIB_N_DIR], u8 snat_outer_ip)
cnat_translation_icmp4_echo (ip4_header_t * ip4, icmp46_header_t * icmp,
ip4_address_t new_addr[VLIB_N_DIR],
u16 new_port[VLIB_N_DIR])
{
ip_csum_t sum;
u16 old_port;
cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
cnat_ip4_translate_l3 (ip4, new_addr);
old_port = echo->identifier;
echo->identifier = new_port[VLIB_RX];
sum = icmp->checksum;
sum = ip_csum_update (sum, old_port, new_port[VLIB_RX],
ip4_header_t /* cheat */ ,
length /* changed member */ );
icmp->checksum = ip_csum_fold (sum);
}
static_always_inline void
cnat_translation_icmp4_error (ip4_header_t * outer_ip4,
icmp46_header_t * icmp,
ip4_address_t outer_new_addr[VLIB_N_DIR],
u16 outer_new_port[VLIB_N_DIR],
u8 snat_outer_ip)
{
icmp46_header_t *icmp = (icmp46_header_t *) outer_udp;
ip4_address_t new_addr[VLIB_N_DIR];
ip4_address_t old_addr[VLIB_N_DIR];
u16 new_port[VLIB_N_DIR];
u16 old_port[VLIB_N_DIR];
ip_csum_t sum, old_ip_sum, inner_l4_sum, inner_l4_old_sum;
if (!icmp_type_is_error_message (icmp->type))
return;
ip4_header_t *ip4 = (ip4_header_t *) (icmp + 2);
udp_header_t *udp = (udp_header_t *) (ip4 + 1);
tcp_header_t *tcp = (tcp_header_t *) udp;
@ -287,10 +330,18 @@ cnat_translation_ip4 (const cnat_session_t * session,
}
else if (ip4->protocol == IP_PROTOCOL_ICMP)
{
/* SNAT only if src_addr was translated */
u8 snat_outer_ip =
(ip4->src_address.as_u32 == session->key.cs_ip[VLIB_RX].ip4.as_u32);
cnat_translation_icmp4 (ip4, udp, new_addr, new_port, snat_outer_ip);
icmp46_header_t *icmp = (icmp46_header_t *) udp;
if (icmp_type_is_error_message (icmp->type))
{
/* SNAT only if src_addr was translated */
u8 snat_outer_ip =
(ip4->src_address.as_u32 ==
session->key.cs_ip[VLIB_RX].ip4.as_u32);
cnat_translation_icmp4_error (ip4, icmp, new_addr, new_port,
snat_outer_ip);
}
else if (icmp_type_is_echo (icmp->type))
cnat_translation_icmp4_echo (ip4, icmp, new_addr, new_port);
}
}
@ -358,11 +409,52 @@ cnat_ip6_translate_l4 (ip6_header_t * ip6, udp_header_t * udp,
}
static_always_inline void
cnat_translation_icmp6 (ip6_header_t * outer_ip6, udp_header_t * outer_udp,
ip6_address_t outer_new_addr[VLIB_N_DIR],
u16 outer_new_port[VLIB_N_DIR], u8 snat_outer_ip)
cnat_translation_icmp6_echo (ip6_header_t * ip6, icmp46_header_t * icmp,
ip6_address_t new_addr[VLIB_N_DIR],
u16 new_port[VLIB_N_DIR])
{
cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
ip6_address_t old_addr[VLIB_N_DIR];
ip_csum_t sum;
u16 old_port;
old_port = echo->identifier;
ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
sum = icmp->checksum;
cnat_ip6_translate_l3 (ip6, new_addr);
if (has_ip6_address (&new_addr[VLIB_TX]))
{
sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
}
if (has_ip6_address (&new_addr[VLIB_RX]))
{
sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
}
echo->identifier = new_port[VLIB_RX];
sum = ip_csum_update (sum, old_port, new_port[VLIB_RX],
ip4_header_t /* cheat */ ,
length /* changed member */ );
icmp->checksum = ip_csum_fold (sum);
}
static_always_inline void
cnat_translation_icmp6_error (ip6_header_t * outer_ip6,
icmp46_header_t * icmp,
ip6_address_t outer_new_addr[VLIB_N_DIR],
u16 outer_new_port[VLIB_N_DIR],
u8 snat_outer_ip)
{
icmp46_header_t *icmp = (icmp46_header_t *) outer_udp;
ip6_address_t new_addr[VLIB_N_DIR];
ip6_address_t old_addr[VLIB_N_DIR];
ip6_address_t outer_old_addr[VLIB_N_DIR];
@ -411,6 +503,7 @@ cnat_translation_icmp6 (ip6_header_t * outer_ip6, udp_header_t * outer_udp,
sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[1]);
}
/* Translate inner TCP / UDP */
if (ip6->protocol == IP_PROTOCOL_TCP)
{
inner_l4_old_sum = inner_l4_sum = tcp->checksum;
@ -494,10 +587,18 @@ cnat_translation_ip6 (const cnat_session_t * session,
}
else if (ip6->protocol == IP_PROTOCOL_ICMP6)
{
/* SNAT only if src_addr was translated */
u8 snat_outer_ip = cmp_ip6_address (&ip6->src_address,
&session->key.cs_ip[VLIB_RX].ip6);
cnat_translation_icmp6 (ip6, udp, new_addr, new_port, snat_outer_ip);
icmp46_header_t *icmp = (icmp46_header_t *) udp;
if (icmp6_type_is_error_message (icmp->type))
{
/* SNAT only if src_addr was translated */
u8 snat_outer_ip = cmp_ip6_address (&ip6->src_address,
&session->key.
cs_ip[VLIB_RX].ip6);
cnat_translation_icmp6_error (ip6, icmp, new_addr, new_port,
snat_outer_ip);
}
else if (icmp6_type_is_echo (icmp->type))
cnat_translation_icmp6_echo (ip6, icmp, new_addr, new_port);
}
}
@ -517,18 +618,32 @@ cnat_session_make_key (vlib_buffer_t * b, ip_address_family_t af,
if (PREDICT_FALSE (ip4->protocol == IP_PROTOCOL_ICMP))
{
icmp46_header_t *icmp = (icmp46_header_t *) (ip4 + 1);
if (!icmp_type_is_error_message (icmp->type))
if (icmp_type_is_error_message (icmp->type))
{
ip4 = (ip4_header_t *) (icmp + 2); /* Use inner packet */
udp = (udp_header_t *) (ip4 + 1);
/* Swap dst & src for search as ICMP payload is reversed */
ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
&ip4->dst_address);
ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
&ip4->src_address);
session->key.cs_proto = ip4->protocol;
session->key.cs_port[VLIB_TX] = udp->src_port;
session->key.cs_port[VLIB_RX] = udp->dst_port;
}
else if (icmp_type_is_echo (icmp->type))
{
cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
&ip4->dst_address);
ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
&ip4->src_address);
session->key.cs_proto = ip4->protocol;
session->key.cs_port[VLIB_TX] = echo->identifier;
session->key.cs_port[VLIB_RX] = echo->identifier;
}
else
goto error;
ip4 = (ip4_header_t *) (icmp + 2); /* Use inner packet */
udp = (udp_header_t *) (ip4 + 1);
/* Swap dst & src for search as ICMP payload is reversed */
ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
&ip4->dst_address);
ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
&ip4->src_address);
session->key.cs_proto = ip4->protocol;
session->key.cs_port[VLIB_TX] = udp->src_port;
session->key.cs_port[VLIB_RX] = udp->dst_port;
}
else
{
@ -550,18 +665,32 @@ cnat_session_make_key (vlib_buffer_t * b, ip_address_family_t af,
if (PREDICT_FALSE (ip6->protocol == IP_PROTOCOL_ICMP6))
{
icmp46_header_t *icmp = (icmp46_header_t *) (ip6 + 1);
if (!icmp6_type_is_error_message (icmp->type))
if (icmp6_type_is_error_message (icmp->type))
{
ip6 = (ip6_header_t *) (icmp + 2); /* Use inner packet */
udp = (udp_header_t *) (ip6 + 1);
/* Swap dst & src for search as ICMP payload is reversed */
ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
&ip6->dst_address);
ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
&ip6->src_address);
session->key.cs_proto = ip6->protocol;
session->key.cs_port[VLIB_TX] = udp->src_port;
session->key.cs_port[VLIB_RX] = udp->dst_port;
}
else if (icmp6_type_is_echo (icmp->type))
{
cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
&ip6->dst_address);
ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
&ip6->src_address);
session->key.cs_proto = ip6->protocol;
session->key.cs_port[VLIB_TX] = echo->identifier;
session->key.cs_port[VLIB_RX] = echo->identifier;
}
else
goto error;
ip6 = (ip6_header_t *) (icmp + 2); /* Use inner packet */
udp = (udp_header_t *) (ip6 + 1);
/* Swap dst & src for search as ICMP payload is reversed */
ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
&ip6->dst_address);
ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
&ip6->src_address);
session->key.cs_proto = ip6->protocol;
session->key.cs_port[VLIB_TX] = udp->src_port;
session->key.cs_port[VLIB_RX] = udp->dst_port;
}
else
{

View File

@ -25,8 +25,9 @@ typedef enum cnat_snat_next_
typedef struct cnat_snat_trace_
{
u32 found;
cnat_session_t session;
u32 found_session;
u32 created_session;
} cnat_snat_trace_t;
vlib_node_registration_t cnat_snat_ip4_node;
@ -39,8 +40,11 @@ format_cnat_snat_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
cnat_snat_trace_t *t = va_arg (*args, cnat_snat_trace_t *);
if (t->found)
if (t->found_session)
s = format (s, "found: %U", format_cnat_session, &t->session, 1);
else if (t->created_session)
s = format (s, "created: %U\n tr: %U",
format_cnat_session, &t->session, 1);
else
s = format (s, "not found");
return s;
@ -56,6 +60,7 @@ cnat_snat_inline (vlib_main_t * vm,
cnat_node_ctx_t * ctx, int rv, cnat_session_t * session)
{
cnat_main_t *cm = &cnat_main;
int created_session = 0;
ip4_header_t *ip4;
ip_protocol_t iproto;
ip6_header_t *ip6;
@ -126,10 +131,9 @@ cnat_snat_inline (vlib_main_t * vm,
&ip6->dst_address);
}
/* Port allocation, first try to use the original port, allocate one
if it is already used */
sport = udp0->src_port;
rv = cnat_allocate_port (cm, &sport);
sport = 0;
rv = cnat_allocate_port (&sport, iproto);
if (rv)
{
vlib_node_increment_counter (vm, cnat_snat_ip4_node.index,
@ -137,13 +141,16 @@ cnat_snat_inline (vlib_main_t * vm,
next0 = CNAT_SNAT_NEXT_DROP;
goto trace;
}
session->value.cs_port[VLIB_RX] = sport;
session->value.cs_port[VLIB_TX] = udp0->dst_port;
session->value.cs_port[VLIB_TX] = sport;
if (iproto == IP_PROTOCOL_TCP || iproto == IP_PROTOCOL_UDP)
session->value.cs_port[VLIB_TX] = udp0->dst_port;
session->value.cs_lbi = INDEX_INVALID;
session->value.flags =
CNAT_SESSION_FLAG_NO_CLIENT | CNAT_SESSION_FLAG_ALLOC_PORT;
created_session = 1;
cnat_session_create (session, ctx, CNAT_SESSION_FLAG_HAS_SNAT);
}
@ -160,7 +167,9 @@ trace:
t = vlib_add_trace (vm, node, b, sizeof (*t));
if (NULL != session)
t->found_session = !rv;
t->created_session = created_session;
if (t->found_session || t->created_session)
clib_memcpy (&t->session, session, sizeof (t->session));
}
return next0;

View File

@ -70,7 +70,6 @@ cnat_vip_inline (vlib_main_t * vm,
cnat_node_ctx_t * ctx, int rv, cnat_session_t * session)
{
vlib_combined_counter_main_t *cntm = &cnat_translation_counters;
cnat_main_t *cm = &cnat_main;
const cnat_translation_t *ct = NULL;
ip4_header_t *ip4 = NULL;
ip_protocol_t iproto;
@ -201,7 +200,7 @@ cnat_vip_inline (vlib_main_t * vm,
&& (rsession_flags & CNAT_SESSION_FLAG_HAS_SNAT)) {
sport = 0; /* force allocation */
session->value.flags |= CNAT_SESSION_FLAG_ALLOC_PORT;
rv = cnat_allocate_port (cm, &sport);
rv = cnat_allocate_port (&sport, iproto);
if (rv)
{
vlib_node_increment_counter (vm, cnat_vip_ip4_node.index,

View File

@ -128,7 +128,7 @@ cnat_session_free (cnat_session_t * session)
clib_bihash_kv_40_48_t *bkey = (clib_bihash_kv_40_48_t *) session;
/* age it */
if (session->value.flags & CNAT_SESSION_FLAG_ALLOC_PORT)
cnat_free_port (session->value.cs_port[VLIB_RX]);
cnat_free_port (session->value.cs_port[VLIB_RX], session->key.cs_proto);
if (!(session->value.flags & CNAT_SESSION_FLAG_NO_CLIENT))
cnat_client_free_by_ip (&session->key.cs_ip[VLIB_TX], session->key.cs_af);
cnat_timestamp_free (session->value.cs_ts_index);

View File

@ -80,9 +80,14 @@ cnat_types_init (vlib_main_t * vm)
CNAT_FIB_SOURCE_PRIORITY,
FIB_SOURCE_BH_SIMPLE);
clib_rwlock_init (&cnat_main.ts_lock);
clib_spinlock_init (&cnat_main.src_ports_lock);
clib_bitmap_validate (cnat_main.src_ports, UINT16_MAX);
vec_validate (cnat_main.src_ports, CNAT_N_SPORT_PROTO);
for (int i = 0; i < CNAT_N_SPORT_PROTO; i++)
{
clib_spinlock_init (&cnat_main.src_ports[i].lock);
clib_bitmap_validate (cnat_main.src_ports[i].bmap, UINT16_MAX);
}
throttle_init (&cnat_throttle, n_vlib_mains, 1e-3);
return (NULL);

View File

@ -49,6 +49,15 @@
#define MIN_SRC_PORT ((u16) 0xC000)
typedef enum
{
CNAT_SPORT_PROTO_TCP,
CNAT_SPORT_PROTO_UDP,
CNAT_SPORT_PROTO_ICMP,
CNAT_SPORT_PROTO_ICMP6,
CNAT_N_SPORT_PROTO
} cnat_sport_proto_t;
typedef struct cnat_endpoint_t_
{
ip_address_t ce_ip;
@ -61,7 +70,11 @@ typedef struct cnat_endpoint_tuple_t_
cnat_endpoint_t src_ep;
} cnat_endpoint_tuple_t;
typedef struct
{
u16 identifier;
u16 sequence;
} cnat_echo_header_t;
typedef struct
{
@ -80,6 +93,15 @@ typedef struct
ip6_address_t ip_masks[129];
} cnat_snat_pfx_table_t;
typedef struct cnat_src_port_allocator_
{
/* Source ports bitmap for snat */
clib_bitmap_t *bmap;
/* Lock for src_ports access */
clib_spinlock_t lock;
} cnat_src_port_allocator_t;
typedef struct cnat_main_
{
/* Memory size of the session bihash */
@ -113,11 +135,8 @@ typedef struct cnat_main_
/* Lock for the timestamp pool */
clib_rwlock_t ts_lock;
/* Source ports bitmap for snat */
clib_bitmap_t *src_ports;
/* Lock for src_ports access */
clib_spinlock_t src_ports_lock;
/* Per proto source ports allocator for snat */
cnat_src_port_allocator_t *src_ports;
/* Ip4 Address to use for source NATing */
ip4_address_t snat_ip4;
@ -265,33 +284,59 @@ cnat_timestamp_free (u32 index)
clib_rwlock_writer_unlock (&cnat_main.ts_lock);
}
always_inline void
cnat_free_port (u16 port)
always_inline cnat_src_port_allocator_t *
cnat_get_src_port_allocator (ip_protocol_t iproto)
{
cnat_main_t *cm = &cnat_main;
clib_spinlock_lock (&cm->src_ports_lock);
clib_bitmap_set_no_check (cm->src_ports, port, 0);
clib_spinlock_unlock (&cm->src_ports_lock);
switch (iproto)
{
case IP_PROTOCOL_TCP:
return &cm->src_ports[CNAT_SPORT_PROTO_TCP];
case IP_PROTOCOL_UDP:
return &cm->src_ports[CNAT_SPORT_PROTO_UDP];
case IP_PROTOCOL_ICMP:
return &cm->src_ports[CNAT_SPORT_PROTO_ICMP];
case IP_PROTOCOL_ICMP6:
return &cm->src_ports[CNAT_SPORT_PROTO_ICMP6];
default:
return 0;
}
}
always_inline void
cnat_free_port (u16 port, ip_protocol_t iproto)
{
cnat_src_port_allocator_t *ca;
ca = cnat_get_src_port_allocator (iproto);
if (!ca)
return;
clib_spinlock_lock (&ca->lock);
clib_bitmap_set_no_check (ca->bmap, port, 0);
clib_spinlock_unlock (&ca->lock);
}
always_inline int
cnat_allocate_port (cnat_main_t * cm, u16 * port)
cnat_allocate_port (u16 * port, ip_protocol_t iproto)
{
*port = clib_net_to_host_u16 (*port);
if (*port == 0)
*port = MIN_SRC_PORT;
clib_spinlock_lock (&cm->src_ports_lock);
if (clib_bitmap_get_no_check (cm->src_ports, *port))
cnat_src_port_allocator_t *ca;
ca = cnat_get_src_port_allocator (iproto);
if (!ca)
return -1;
clib_spinlock_lock (&ca->lock);
if (clib_bitmap_get_no_check (ca->bmap, *port))
{
*port = clib_bitmap_next_clear (cm->src_ports, *port);
*port = clib_bitmap_next_clear (ca->bmap, *port);
if (PREDICT_FALSE (*port >= UINT16_MAX))
*port = clib_bitmap_next_clear (cm->src_ports, MIN_SRC_PORT);
*port = clib_bitmap_next_clear (ca->bmap, MIN_SRC_PORT);
if (PREDICT_FALSE (*port >= UINT16_MAX))
return -1;
}
clib_bitmap_set_no_check (cm->src_ports, *port, 1);
clib_bitmap_set_no_check (ca->bmap, *port, 1);
*port = clib_host_to_net_u16 (*port);
clib_spinlock_unlock (&cm->src_ports_lock);
clib_spinlock_unlock (&ca->lock);
return 0;
}

File diff suppressed because it is too large Load Diff