From ece39214bcb05c535ba5de9af97b5f84f6911cba Mon Sep 17 00:00:00 2001 From: Nathan Skrzypczak Date: Tue, 8 Sep 2020 15:16:08 +0200 Subject: [PATCH] cnat: Ip ICMP error support Type: feature Add CNAT translation for ICMP 4 & 6 errors inner packet will be translated according to existing sessions. Change-Id: If118751988f44ef96b800878596296d1ab8ab6f8 Signed-off-by: Nathan Skrzypczak --- src/plugins/cnat/cnat_client.c | 20 +- src/plugins/cnat/cnat_node.h | 507 ++++++++++++++++++++++------- src/plugins/cnat/cnat_node_snat.c | 3 +- src/plugins/cnat/cnat_node_vip.c | 5 +- src/plugins/cnat/cnat_session.h | 5 + src/plugins/cnat/test/test_cnat.py | 182 ++++++++--- 6 files changed, 557 insertions(+), 165 deletions(-) diff --git a/src/plugins/cnat/cnat_client.c b/src/plugins/cnat/cnat_client.c index 10d9966ad53..314000d785e 100644 --- a/src/plugins/cnat/cnat_client.c +++ b/src/plugins/cnat/cnat_client.c @@ -61,12 +61,10 @@ cnat_client_free_by_ip (ip46_address_t * ip, u8 af) cnat_client_t *cc; cc = (AF_IP4 == af ? cnat_client_ip4_find (&ip->ip4) : cnat_client_ip6_find (&ip->ip6)); - /* This can happen if the translation gets deleted - before the session */ - if (NULL == cc) - return; + ASSERT (NULL != cc); + if ((0 == cnat_client_uncnt_session (cc)) - && (cc->flags & CNAT_FLAG_EXPIRES)) + && (cc->flags & CNAT_FLAG_EXPIRES) && (0 == cc->tr_refcnt)) cnat_client_destroy (cc); } @@ -101,7 +99,6 @@ cnat_client_throttle_pool_process () /* *INDENT-ON* */ vec_foreach (ai, del_vec) { - /* Free session */ addr = pool_elt_at_index (cnat_client_db.throttle_pool[i], *ai); pool_put (cnat_client_db.throttle_pool[i], addr); } @@ -127,7 +124,7 @@ cnat_client_translation_deleted (index_t cci) ASSERT (!(cc->flags & CNAT_FLAG_EXPIRES)); cc->tr_refcnt--; - if (0 == cc->tr_refcnt) + if (0 == cc->tr_refcnt && 0 == cc->session_refcnt) cnat_client_destroy (cc); } @@ -171,6 +168,8 @@ cnat_client_add (const ip_address_t * ip, u8 flags) cci = cc - cnat_client_pool; cc->parent_cci = cci; cc->flags = flags; + cc->tr_refcnt = 0; + cc->session_refcnt = 0; ip_address_copy (&cc->cc_ip, ip); cnat_client_db_add (cc); @@ -238,9 +237,16 @@ cnat_client_dpo_interpose (const dpo_id_t * original, int cnat_client_purge (void) { + vlib_thread_main_t *tm = vlib_get_thread_main (); + int nthreads; + nthreads = tm->n_threads + 1; ASSERT (0 == hash_elts (cnat_client_db.crd_cip6)); ASSERT (0 == hash_elts (cnat_client_db.crd_cip4)); ASSERT (0 == pool_elts (cnat_client_pool)); + for (int i = 0; i < nthreads; i++) + { + ASSERT (0 == pool_elts (cnat_client_db.throttle_pool[i])); + } return (0); } diff --git a/src/plugins/cnat/cnat_node.h b/src/plugins/cnat/cnat_node.h index 58e81c12b45..a3967960078 100644 --- a/src/plugins/cnat/cnat_node.h +++ b/src/plugins/cnat/cnat_node.h @@ -26,6 +26,42 @@ typedef uword (*cnat_node_sub_t) (vlib_main_t * vm, cnat_node_ctx_t * ctx, int rv, cnat_session_t * session); +static_always_inline u8 +icmp_type_is_error_message (u8 icmp_type) +{ + switch (icmp_type) + { + case ICMP4_destination_unreachable: + case ICMP4_time_exceeded: + case ICMP4_parameter_problem: + case ICMP4_source_quench: + case ICMP4_redirect: + case ICMP4_alternate_host_address: + return 1; + } + return 0; +} + +static_always_inline u8 +icmp6_type_is_error_message (u8 icmp_type) +{ + switch (icmp_type) + { + case ICMP6_destination_unreachable: + case ICMP6_time_exceeded: + case ICMP6_parameter_problem: + return 1; + } + return 0; +} + +static_always_inline u8 +cmp_ip6_address (const ip6_address_t * a1, const ip6_address_t * a2) +{ + return ((a1->as_u64[0] == a2->as_u64[0]) + && (a1->as_u64[1] == a2->as_u64[1])); +} + /** * Inline translation functions */ @@ -38,44 +74,52 @@ has_ip6_address (ip6_address_t * a) static_always_inline void cnat_ip4_translate_l4 (ip4_header_t * ip4, udp_header_t * udp, - u16 * checksum, + ip_csum_t * sum, ip4_address_t new_addr[VLIB_N_DIR], u16 new_port[VLIB_N_DIR]) { u16 old_port[VLIB_N_DIR]; ip4_address_t old_addr[VLIB_N_DIR]; - ip_csum_t sum; + + /* Fastpath no checksum */ + if (PREDICT_TRUE (0 == *sum)) + { + udp->dst_port = new_port[VLIB_TX]; + udp->src_port = new_port[VLIB_RX]; + return; + } old_port[VLIB_TX] = udp->dst_port; old_port[VLIB_RX] = udp->src_port; old_addr[VLIB_TX] = ip4->dst_address; old_addr[VLIB_RX] = ip4->src_address; - sum = *checksum; if (new_addr[VLIB_TX].as_u32) - sum = - ip_csum_update (sum, old_addr[VLIB_TX].as_u32, new_addr[VLIB_TX].as_u32, - ip4_header_t, dst_address); + { + *sum = + ip_csum_update (*sum, old_addr[VLIB_TX].as_u32, + new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address); + } if (new_port[VLIB_TX]) { udp->dst_port = new_port[VLIB_TX]; - sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX], - ip4_header_t /* cheat */ , - length /* changed member */ ); + *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX], + ip4_header_t /* cheat */ , + length /* changed member */ ); } if (new_addr[VLIB_RX].as_u32) - sum = - ip_csum_update (sum, old_addr[VLIB_RX].as_u32, new_addr[VLIB_RX].as_u32, - ip4_header_t, src_address); - + { + *sum = + ip_csum_update (*sum, old_addr[VLIB_RX].as_u32, + new_addr[VLIB_RX].as_u32, ip4_header_t, src_address); + } if (new_port[VLIB_RX]) { udp->src_port = new_port[VLIB_RX]; - sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX], - ip4_header_t /* cheat */ , - length /* changed member */ ); + *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX], + ip4_header_t /* cheat */ , + length /* changed member */ ); } - *checksum = ip_csum_fold (sum); } static_always_inline void @@ -125,6 +169,94 @@ cnat_tcp_update_session_lifetime (tcp_header_t * tcp, u32 index) } } +static_always_inline void +cnat_translation_icmp4 (ip4_header_t * outer_ip4, udp_header_t * outer_udp, + ip4_address_t outer_new_addr[VLIB_N_DIR], + u16 outer_new_port[VLIB_N_DIR], u8 snat_outer_ip) +{ + icmp46_header_t *icmp = (icmp46_header_t *) outer_udp; + ip4_address_t new_addr[VLIB_N_DIR]; + ip4_address_t old_addr[VLIB_N_DIR]; + u16 new_port[VLIB_N_DIR]; + u16 old_port[VLIB_N_DIR]; + ip_csum_t sum, old_ip_sum, inner_l4_sum, inner_l4_old_sum; + + if (!icmp_type_is_error_message (icmp->type)) + return; + + ip4_header_t *ip4 = (ip4_header_t *) (icmp + 2); + udp_header_t *udp = (udp_header_t *) (ip4 + 1); + tcp_header_t *tcp = (tcp_header_t *) udp; + + /* Swap inner ports */ + new_addr[VLIB_TX] = outer_new_addr[VLIB_RX]; + new_addr[VLIB_RX] = outer_new_addr[VLIB_TX]; + new_port[VLIB_TX] = outer_new_port[VLIB_RX]; + new_port[VLIB_RX] = outer_new_port[VLIB_TX]; + + old_addr[VLIB_TX] = ip4->dst_address; + old_addr[VLIB_RX] = ip4->src_address; + old_port[VLIB_RX] = udp->src_port; + old_port[VLIB_TX] = udp->dst_port; + + sum = icmp->checksum; + old_ip_sum = ip4->checksum; + + /* translate outer ip. */ + if (!snat_outer_ip) + outer_new_addr[VLIB_RX] = outer_ip4->src_address; + cnat_ip4_translate_l3 (outer_ip4, outer_new_addr); + + if (ip4->protocol == IP_PROTOCOL_TCP) + { + inner_l4_old_sum = inner_l4_sum = tcp->checksum; + cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port); + tcp->checksum = ip_csum_fold (inner_l4_sum); + } + else if (ip4->protocol == IP_PROTOCOL_UDP) + { + inner_l4_old_sum = inner_l4_sum = udp->checksum; + cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port); + udp->checksum = ip_csum_fold (inner_l4_sum); + } + else + return; + + /* UDP/TCP checksum changed */ + sum = ip_csum_update (sum, inner_l4_old_sum, inner_l4_sum, + ip4_header_t, checksum); + + /* UDP/TCP Ports changed */ + if (old_port[VLIB_TX] && new_port[VLIB_TX]) + sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX], + ip4_header_t /* cheat */ , + length /* changed member */ ); + + if (old_port[VLIB_RX] && new_port[VLIB_RX]) + sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX], + ip4_header_t /* cheat */ , + length /* changed member */ ); + + + cnat_ip4_translate_l3 (ip4, new_addr); + ip_csum_t new_ip_sum = ip4->checksum; + /* IP checksum changed */ + sum = ip_csum_update (sum, old_ip_sum, new_ip_sum, ip4_header_t, checksum); + + /* IP src/dst addr changed */ + if (new_addr[VLIB_TX].as_u32) + sum = + ip_csum_update (sum, old_addr[VLIB_TX].as_u32, new_addr[VLIB_TX].as_u32, + ip4_header_t, dst_address); + + if (new_addr[VLIB_RX].as_u32) + sum = + ip_csum_update (sum, old_addr[VLIB_RX].as_u32, new_addr[VLIB_RX].as_u32, + ip4_header_t, src_address); + + icmp->checksum = ip_csum_fold (sum); +} + static_always_inline void cnat_translation_ip4 (const cnat_session_t * session, ip4_header_t * ip4, udp_header_t * udp) @@ -140,27 +272,26 @@ cnat_translation_ip4 (const cnat_session_t * session, if (ip4->protocol == IP_PROTOCOL_TCP) { - if (PREDICT_FALSE (tcp->checksum)) - cnat_ip4_translate_l4 (ip4, udp, &tcp->checksum, new_addr, new_port); - else - { - udp->dst_port = new_port[VLIB_TX]; - udp->src_port = new_port[VLIB_RX]; - } + ip_csum_t sum = tcp->checksum; + cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port); + tcp->checksum = ip_csum_fold (sum); + cnat_ip4_translate_l3 (ip4, new_addr); cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index); } else if (ip4->protocol == IP_PROTOCOL_UDP) { - if (PREDICT_FALSE (udp->checksum)) - cnat_ip4_translate_l4 (ip4, udp, &udp->checksum, new_addr, new_port); - else - { - udp->dst_port = new_port[VLIB_TX]; - udp->src_port = new_port[VLIB_RX]; - } + ip_csum_t sum = udp->checksum; + cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port); + udp->checksum = ip_csum_fold (sum); + cnat_ip4_translate_l3 (ip4, new_addr); + } + else if (ip4->protocol == IP_PROTOCOL_ICMP) + { + /* SNAT only if src_addr was translated */ + u8 snat_outer_ip = + (ip4->src_address.as_u32 == session->key.cs_ip[VLIB_RX].ip4.as_u32); + cnat_translation_icmp4 (ip4, udp, new_addr, new_port, snat_outer_ip); } - - cnat_ip4_translate_l3 (ip4, new_addr); } static_always_inline void @@ -174,20 +305,146 @@ cnat_ip6_translate_l3 (ip6_header_t * ip6, ip6_address_t new_addr[VLIB_N_DIR]) static_always_inline void cnat_ip6_translate_l4 (ip6_header_t * ip6, udp_header_t * udp, - u16 * checksum, + ip_csum_t * sum, ip6_address_t new_addr[VLIB_N_DIR], u16 new_port[VLIB_N_DIR]) { u16 old_port[VLIB_N_DIR]; ip6_address_t old_addr[VLIB_N_DIR]; - ip_csum_t sum; + + /* Fastpath no checksum */ + if (PREDICT_TRUE (0 == *sum)) + { + udp->dst_port = new_port[VLIB_TX]; + udp->src_port = new_port[VLIB_RX]; + return; + } old_port[VLIB_TX] = udp->dst_port; old_port[VLIB_RX] = udp->src_port; ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address); ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address); - sum = *checksum; + if (has_ip6_address (&new_addr[VLIB_TX])) + { + *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[0]); + *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[1]); + *sum = ip_csum_sub_even (*sum, old_addr[VLIB_TX].as_u64[0]); + *sum = ip_csum_sub_even (*sum, old_addr[VLIB_TX].as_u64[1]); + } + + if (new_port[VLIB_TX]) + { + udp->dst_port = new_port[VLIB_TX]; + *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX], + ip4_header_t /* cheat */ , + length /* changed member */ ); + } + if (has_ip6_address (&new_addr[VLIB_RX])) + { + *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[0]); + *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[1]); + *sum = ip_csum_sub_even (*sum, old_addr[VLIB_RX].as_u64[0]); + *sum = ip_csum_sub_even (*sum, old_addr[VLIB_RX].as_u64[1]); + } + + if (new_port[VLIB_RX]) + { + udp->src_port = new_port[VLIB_RX]; + *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX], + ip4_header_t /* cheat */ , + length /* changed member */ ); + } +} + +static_always_inline void +cnat_translation_icmp6 (ip6_header_t * outer_ip6, udp_header_t * outer_udp, + ip6_address_t outer_new_addr[VLIB_N_DIR], + u16 outer_new_port[VLIB_N_DIR], u8 snat_outer_ip) +{ + icmp46_header_t *icmp = (icmp46_header_t *) outer_udp; + ip6_address_t new_addr[VLIB_N_DIR]; + ip6_address_t old_addr[VLIB_N_DIR]; + ip6_address_t outer_old_addr[VLIB_N_DIR]; + u16 new_port[VLIB_N_DIR]; + u16 old_port[VLIB_N_DIR]; + ip_csum_t sum, inner_l4_sum, inner_l4_old_sum; + + if (!icmp6_type_is_error_message (icmp->type)) + return; + + ip6_header_t *ip6 = (ip6_header_t *) (icmp + 2); + udp_header_t *udp = (udp_header_t *) (ip6 + 1); + tcp_header_t *tcp = (tcp_header_t *) udp; + + /* Swap inner ports */ + ip6_address_copy (&new_addr[VLIB_RX], &outer_new_addr[VLIB_TX]); + ip6_address_copy (&new_addr[VLIB_TX], &outer_new_addr[VLIB_RX]); + new_port[VLIB_TX] = outer_new_port[VLIB_RX]; + new_port[VLIB_RX] = outer_new_port[VLIB_TX]; + + ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address); + ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address); + old_port[VLIB_RX] = udp->src_port; + old_port[VLIB_TX] = udp->dst_port; + + sum = icmp->checksum; + /* Translate outer ip */ + ip6_address_copy (&outer_old_addr[VLIB_TX], &outer_ip6->dst_address); + ip6_address_copy (&outer_old_addr[VLIB_RX], &outer_ip6->src_address); + if (!snat_outer_ip) + ip6_address_copy (&outer_new_addr[VLIB_RX], &outer_ip6->src_address); + cnat_ip6_translate_l3 (outer_ip6, outer_new_addr); + if (has_ip6_address (&outer_new_addr[VLIB_TX])) + { + sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[0]); + sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[1]); + sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[0]); + sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[1]); + } + + if (has_ip6_address (&outer_new_addr[VLIB_RX])) + { + sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[0]); + sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[1]); + sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[0]); + sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[1]); + } + + if (ip6->protocol == IP_PROTOCOL_TCP) + { + inner_l4_old_sum = inner_l4_sum = tcp->checksum; + cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port); + tcp->checksum = ip_csum_fold (inner_l4_sum); + } + else if (ip6->protocol == IP_PROTOCOL_UDP) + { + inner_l4_old_sum = inner_l4_sum = udp->checksum; + cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port); + udp->checksum = ip_csum_fold (inner_l4_sum); + } + else + return; + + /* UDP/TCP checksum changed */ + sum = ip_csum_update (sum, inner_l4_old_sum, inner_l4_sum, + ip4_header_t /* cheat */ , + checksum); + + /* UDP/TCP Ports changed */ + if (old_port[VLIB_TX] && new_port[VLIB_TX]) + sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX], + ip4_header_t /* cheat */ , + length /* changed member */ ); + + if (old_port[VLIB_RX] && new_port[VLIB_RX]) + sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX], + ip4_header_t /* cheat */ , + length /* changed member */ ); + + + cnat_ip6_translate_l3 (ip6, new_addr); + /* IP src/dst addr changed */ if (has_ip6_address (&new_addr[VLIB_TX])) { sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]); @@ -196,13 +453,6 @@ cnat_ip6_translate_l4 (ip6_header_t * ip6, udp_header_t * udp, sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]); } - if (new_port[VLIB_TX]) - { - udp->dst_port = new_port[VLIB_TX]; - sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX], - ip4_header_t /* cheat */ , - length /* changed member */ ); - } if (has_ip6_address (&new_addr[VLIB_RX])) { sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]); @@ -211,14 +461,7 @@ cnat_ip6_translate_l4 (ip6_header_t * ip6, udp_header_t * udp, sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]); } - if (new_port[VLIB_RX]) - { - udp->src_port = new_port[VLIB_RX]; - sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX], - ip4_header_t /* cheat */ , - length /* changed member */ ); - } - *checksum = ip_csum_fold (sum); + icmp->checksum = ip_csum_fold (sum); } static_always_inline void @@ -236,27 +479,26 @@ cnat_translation_ip6 (const cnat_session_t * session, if (ip6->protocol == IP_PROTOCOL_TCP) { - if (PREDICT_FALSE (tcp->checksum)) - cnat_ip6_translate_l4 (ip6, udp, &tcp->checksum, new_addr, new_port); - else - { - udp->dst_port = new_port[VLIB_TX]; - udp->src_port = new_port[VLIB_RX]; - } + ip_csum_t sum = tcp->checksum; + cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port); + tcp->checksum = ip_csum_fold (sum); + cnat_ip6_translate_l3 (ip6, new_addr); cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index); } else if (ip6->protocol == IP_PROTOCOL_UDP) { - if (PREDICT_FALSE (udp->checksum)) - cnat_ip6_translate_l4 (ip6, udp, &udp->checksum, new_addr, new_port); - else - { - udp->dst_port = new_port[VLIB_TX]; - udp->src_port = new_port[VLIB_RX]; - } + ip_csum_t sum = udp->checksum; + cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port); + udp->checksum = ip_csum_fold (sum); + cnat_ip6_translate_l3 (ip6, new_addr); + } + else if (ip6->protocol == IP_PROTOCOL_ICMP6) + { + /* SNAT only if src_addr was translated */ + u8 snat_outer_ip = cmp_ip6_address (&ip6->src_address, + &session->key.cs_ip[VLIB_RX].ip6); + cnat_translation_icmp6 (ip6, udp, new_addr, new_port, snat_outer_ip); } - - cnat_ip6_translate_l3 (ip6, new_addr); } static_always_inline void @@ -265,36 +507,80 @@ cnat_session_make_key (vlib_buffer_t * b, ip_address_family_t af, { udp_header_t *udp; cnat_session_t *session = (cnat_session_t *) bkey; + session->key.cs_af = af; + session->key.__cs_pad[0] = 0; + session->key.__cs_pad[1] = 0; if (AF_IP4 == af) { ip4_header_t *ip4; ip4 = vlib_buffer_get_current (b); - udp = (udp_header_t *) (ip4 + 1); - session->key.cs_af = AF_IP4; - session->key.__cs_pad[0] = 0; - session->key.__cs_pad[1] = 0; + if (PREDICT_FALSE (ip4->protocol == IP_PROTOCOL_ICMP)) + { + icmp46_header_t *icmp = (icmp46_header_t *) (ip4 + 1); + if (!icmp_type_is_error_message (icmp->type)) + goto error; + ip4 = (ip4_header_t *) (icmp + 2); /* Use inner packet */ + udp = (udp_header_t *) (ip4 + 1); + /* Swap dst & src for search as ICMP payload is reversed */ + ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX], + &ip4->dst_address); + ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX], + &ip4->src_address); + session->key.cs_proto = ip4->protocol; + session->key.cs_port[VLIB_TX] = udp->src_port; + session->key.cs_port[VLIB_RX] = udp->dst_port; + } + else + { + udp = (udp_header_t *) (ip4 + 1); + ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX], + &ip4->dst_address); + ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX], + &ip4->src_address); + session->key.cs_proto = ip4->protocol; + session->key.cs_port[VLIB_RX] = udp->src_port; + session->key.cs_port[VLIB_TX] = udp->dst_port; + } - ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX], &ip4->dst_address); - ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX], &ip4->src_address); - session->key.cs_port[VLIB_RX] = udp->src_port; - session->key.cs_port[VLIB_TX] = udp->dst_port; - session->key.cs_proto = ip4->protocol; } else { ip6_header_t *ip6; ip6 = vlib_buffer_get_current (b); - udp = (udp_header_t *) (ip6 + 1); - session->key.cs_af = AF_IP6; - session->key.__cs_pad[0] = 0; - session->key.__cs_pad[1] = 0; - - ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX], &ip6->dst_address); - ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX], &ip6->src_address); - session->key.cs_port[VLIB_RX] = udp->src_port; - session->key.cs_port[VLIB_TX] = udp->dst_port; - session->key.cs_proto = ip6->protocol; + if (PREDICT_FALSE (ip6->protocol == IP_PROTOCOL_ICMP6)) + { + icmp46_header_t *icmp = (icmp46_header_t *) (ip6 + 1); + if (!icmp6_type_is_error_message (icmp->type)) + goto error; + ip6 = (ip6_header_t *) (icmp + 2); /* Use inner packet */ + udp = (udp_header_t *) (ip6 + 1); + /* Swap dst & src for search as ICMP payload is reversed */ + ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX], + &ip6->dst_address); + ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX], + &ip6->src_address); + session->key.cs_proto = ip6->protocol; + session->key.cs_port[VLIB_TX] = udp->src_port; + session->key.cs_port[VLIB_RX] = udp->dst_port; + } + else + { + udp = (udp_header_t *) (ip6 + 1); + ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX], + &ip6->dst_address); + ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX], + &ip6->src_address); + session->key.cs_port[VLIB_RX] = udp->src_port; + session->key.cs_port[VLIB_TX] = udp->dst_port; + session->key.cs_proto = ip6->protocol; + } } + return; + +error: + /* Ensure we dont find anything */ + session->key.cs_proto = 0; + return; } /** @@ -312,32 +598,6 @@ cnat_session_create (cnat_session_t * session, cnat_node_ctx_t * ctx, clib_bihash_kv_40_48_t rvalue; int rv; - /* create the reverse flow key */ - ip46_address_copy (&rsession->key.cs_ip[VLIB_RX], - &session->value.cs_ip[VLIB_TX]); - ip46_address_copy (&rsession->key.cs_ip[VLIB_TX], - &session->value.cs_ip[VLIB_RX]); - rsession->key.cs_proto = session->key.cs_proto; - rsession->key.__cs_pad[0] = 0; - rsession->key.__cs_pad[1] = 0; - rsession->key.cs_af = ctx->af; - rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX]; - rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX]; - - /* First search for existing reverse session */ - rv = clib_bihash_search_inline_2_40_48 (&cnat_session_db, &rkey, &rvalue); - if (!rv) - { - /* Reverse session already exists - corresponding client should also exist - we only need to refcnt the timestamp */ - cnat_session_t *found_rsession = (cnat_session_t *) & rvalue; - session->value.cs_ts_index = found_rsession->value.cs_ts_index; - cnat_timestamp_inc_refcnt (session->value.cs_ts_index); - clib_bihash_add_del_40_48 (&cnat_session_db, bkey, 1 /* is_add */ ); - goto create_rsession; - } - session->value.cs_ts_index = cnat_timestamp_new (ctx->now); clib_bihash_add_del_40_48 (&cnat_session_db, bkey, 1); @@ -382,10 +642,31 @@ cnat_session_create (cnat_session_t * session, cnat_node_ctx_t * ctx, } else { + /* Refcount reverse session */ cnat_client_cnt_session (cc); } -create_rsession: + /* create the reverse flow key */ + ip46_address_copy (&rsession->key.cs_ip[VLIB_RX], + &session->value.cs_ip[VLIB_TX]); + ip46_address_copy (&rsession->key.cs_ip[VLIB_TX], + &session->value.cs_ip[VLIB_RX]); + rsession->key.cs_proto = session->key.cs_proto; + rsession->key.__cs_pad[0] = 0; + rsession->key.__cs_pad[1] = 0; + rsession->key.cs_af = ctx->af; + rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX]; + rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX]; + + /* First search for existing reverse session */ + rv = clib_bihash_search_inline_2_40_48 (&cnat_session_db, &rkey, &rvalue); + if (!rv) + { + /* Reverse session already exists + cleanup before creating for refcnts */ + cnat_session_t *found_rsession = (cnat_session_t *) & rvalue; + cnat_session_free (found_rsession); + } /* add the reverse flow */ ip46_address_copy (&rsession->value.cs_ip[VLIB_RX], &session->key.cs_ip[VLIB_TX]); diff --git a/src/plugins/cnat/cnat_node_snat.c b/src/plugins/cnat/cnat_node_snat.c index cc1421be084..aaa9e162ef0 100644 --- a/src/plugins/cnat/cnat_node_snat.c +++ b/src/plugins/cnat/cnat_node_snat.c @@ -81,7 +81,8 @@ cnat_snat_inline (vlib_main_t * vm, vnet_feature_next (&arc_next0, b); next0 = arc_next0; - if (iproto != IP_PROTOCOL_UDP && iproto != IP_PROTOCOL_TCP) + if (iproto != IP_PROTOCOL_UDP && iproto != IP_PROTOCOL_TCP + && iproto != IP_PROTOCOL_ICMP && iproto != IP_PROTOCOL_ICMP6) { /* Dont translate */ goto trace; diff --git a/src/plugins/cnat/cnat_node_vip.c b/src/plugins/cnat/cnat_node_vip.c index 574b72fe27b..10f228f974d 100644 --- a/src/plugins/cnat/cnat_node_vip.c +++ b/src/plugins/cnat/cnat_node_vip.c @@ -95,7 +95,8 @@ cnat_vip_inline (vlib_main_t * vm, cc = cnat_client_get (vnet_buffer (b)->ip.adj_index[VLIB_TX]); - if (iproto != IP_PROTOCOL_UDP && iproto != IP_PROTOCOL_TCP) + if (iproto != IP_PROTOCOL_UDP && iproto != IP_PROTOCOL_TCP + && iproto != IP_PROTOCOL_ICMP && iproto != IP_PROTOCOL_ICMP6) { /* Dont translate & follow the fib programming */ next0 = cc->cc_parent.dpoi_next_node; @@ -214,6 +215,7 @@ cnat_vip_inline (vlib_main_t * vm, } session->value.cs_lbi = dpo0->dpoi_index; + /* refcnt session in current client */ cnat_client_cnt_session (cc); cnat_session_create (session, ctx, rsession_flags); created_session = 1; @@ -222,7 +224,6 @@ cnat_vip_inline (vlib_main_t * vm, vnet_buffer (b)->ip.adj_index[VLIB_TX] = session->value.cs_lbi; } - if (AF_IP4 == ctx->af) cnat_translation_ip4 (session, ip4, udp0); else diff --git a/src/plugins/cnat/cnat_session.h b/src/plugins/cnat/cnat_session.h index 9e1e89342a8..4699dcc4fcf 100644 --- a/src/plugins/cnat/cnat_session.h +++ b/src/plugins/cnat/cnat_session.h @@ -146,6 +146,11 @@ extern u64 cnat_session_scan (vlib_main_t * vm, f64 start_time, int i); */ extern int cnat_session_purge (void); +/** + * Free a session & update refcounts + */ +extern void cnat_session_free (cnat_session_t * session); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/cnat/test/test_cnat.py b/src/plugins/cnat/test/test_cnat.py index 518d7335edc..34cd8b58240 100644 --- a/src/plugins/cnat/test/test_cnat.py +++ b/src/plugins/cnat/test/test_cnat.py @@ -7,8 +7,11 @@ from vpp_ip import DpoProto from scapy.packet import Raw from scapy.layers.l2 import Ether -from scapy.layers.inet import IP, UDP, TCP -from scapy.layers.inet6 import IPv6 +from scapy.layers.inet import IP, UDP, TCP, ICMP +from scapy.layers.inet import IPerror, TCPerror, UDPerror, ICMPerror +from scapy.layers.inet6 import IPv6, IPerror6, ICMPv6DestUnreach + +import struct from ipaddress import ip_address, ip_network, \ IPv4Address, IPv6Address, IPv4Network, IPv6Network @@ -39,6 +42,10 @@ class Ep(object): return {'addr': self.ip, 'port': self.port} + @property + def isV6(self): + return ":" in self.ip + def __str__(self): return ("%s:%d" % (self.ip, self.port)) @@ -180,10 +187,10 @@ class TestCNatTranslation(VppTestCase): i.admin_down() super(TestCNatTranslation, self).tearDown() - def cnat_create_translation(self, vip, nbr, isV6=False): - ip_v = "ip6" if isV6 else "ip4" + def cnat_create_translation(self, vip, nbr): + ip_v = "ip6" if vip.isV6 else "ip4" dep = Ep(getattr(self.pg1.remote_hosts[nbr], ip_v), 4000 + nbr) - sep = Ep("::", 0) if isV6 else Ep("0.0.0.0", 0) + sep = Ep("::", 0) if vip.isV6 else Ep("0.0.0.0", 0) t1 = VppCNatTranslation( self, vip.l4p, vip, [EpTuple(sep, dep), EpTuple(sep, dep)]) @@ -341,7 +348,7 @@ class TestCNatTranslation(VppTestCase): trs = [] for nbr, vip in enumerate(vips): - trs.append(self.cnat_create_translation(vip, nbr, isV6=isV6)) + trs.append(self.cnat_create_translation(vip, nbr)) self.logger.info(self.vapi.cli("sh cnat client")) self.logger.info(self.vapi.cli("sh cnat translation")) @@ -372,8 +379,10 @@ class TestCNatTranslation(VppTestCase): n_tries += 1 sessions = self.vapi.cnat_session_dump() self.sleep(2) + print(self.vapi.cli("show cnat session verbose")) self.assertTrue(n_tries < 100) + self.vapi.cli("test cnat scanner off") # # load some flows again and purge @@ -398,6 +407,109 @@ class TestCNatTranslation(VppTestCase): self.vapi.cnat_session_purge() self.assertFalse(self.vapi.cnat_session_dump()) + def test_icmp(self): + vips = [ + Ep("30.0.0.1", 5555), + Ep("30.0.0.2", 5554), + Ep("30.0.0.2", 5553, UDP), + Ep("30::1", 6666), + Ep("30::2", 5553, UDP), + ] + sport = 1234 + + self.pg0.generate_remote_hosts(len(vips)) + self.pg0.configure_ipv6_neighbors() + self.pg0.configure_ipv4_neighbors() + + self.pg1.generate_remote_hosts(len(vips)) + self.pg1.configure_ipv6_neighbors() + self.pg1.configure_ipv4_neighbors() + + self.vapi.cli("test cnat scanner off") + trs = [] + for nbr, vip in enumerate(vips): + trs.append(self.cnat_create_translation(vip, nbr)) + + self.logger.info(self.vapi.cli("sh cnat client")) + self.logger.info(self.vapi.cli("sh cnat translation")) + + for nbr, vip in enumerate(vips): + if vip.isV6: + client_addr = self.pg0.remote_hosts[0].ip6 + remote_addr = self.pg1.remote_hosts[nbr].ip6 + remote2_addr = self.pg2.remote_hosts[0].ip6 + else: + client_addr = self.pg0.remote_hosts[0].ip4 + remote_addr = self.pg1.remote_hosts[nbr].ip4 + remote2_addr = self.pg2.remote_hosts[0].ip4 + IP46 = IPv6 if vip.isV6 else IP + # from client to vip + p1 = (Ether(dst=self.pg0.local_mac, + src=self.pg0.remote_hosts[0].mac) / + IP46(src=client_addr, dst=vip.ip) / + vip.l4p(sport=sport, dport=vip.port) / + Raw()) + + rxs = self.send_and_expect(self.pg0, + p1 * N_PKTS, + self.pg1) + + for rx in rxs: + self.assert_packet_checksums_valid(rx) + self.assertEqual(rx[IP46].dst, remote_addr) + self.assertEqual(rx[vip.l4p].dport, 4000 + nbr) + self.assertEqual(rx[IP46].src, client_addr) + self.assertEqual(rx[vip.l4p].sport, sport) + + InnerIP = rxs[0][IP46] + + ICMP46 = ICMPv6DestUnreach if vip.isV6 else ICMP + ICMPelem = ICMPv6DestUnreach(code=1) if vip.isV6 else ICMP(type=11) + # from vip to client, ICMP error + p1 = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / + IP46(src=remote_addr, dst=client_addr) / + ICMPelem / InnerIP) + + rxs = self.send_and_expect(self.pg1, + p1 * N_PKTS, + self.pg0) + + TCPUDPError = TCPerror if vip.l4p == TCP else UDPerror + IP46error = IPerror6 if vip.isV6 else IPerror + for rx in rxs: + self.assert_packet_checksums_valid(rx) + self.assertEqual(rx[IP46].src, vip.ip) + self.assertEqual(rx[ICMP46][IP46error].src, client_addr) + self.assertEqual(rx[ICMP46][IP46error].dst, vip.ip) + self.assertEqual(rx[ICMP46][IP46error] + [TCPUDPError].sport, sport) + self.assertEqual(rx[ICMP46][IP46error] + [TCPUDPError].dport, vip.port) + + # from other remote to client, ICMP error + # outside shouldn't be NAT-ed + p1 = (Ether(dst=self.pg2.local_mac, src=self.pg2.remote_mac) / + IP46(src=remote2_addr, dst=client_addr) / + ICMPelem / InnerIP) + + rxs = self.send_and_expect(self.pg1, + p1 * N_PKTS, + self.pg0) + + TCPUDPError = TCPerror if vip.l4p == TCP else UDPerror + IP46error = IPerror6 if vip.isV6 else IPerror + for rx in rxs: + self.assert_packet_checksums_valid(rx) + self.assertEqual(rx[IP46].src, remote2_addr) + self.assertEqual(rx[ICMP46][IP46error].src, client_addr) + self.assertEqual(rx[ICMP46][IP46error].dst, vip.ip) + self.assertEqual(rx[ICMP46][IP46error] + [TCPUDPError].sport, sport) + self.assertEqual(rx[ICMP46][IP46error] + [TCPUDPError].dport, vip.port) + + self.vapi.cnat_session_purge() + def test_cnat6(self): # """ CNat Translation ipv6 """ vips = [ @@ -478,7 +590,7 @@ class TestCNatSourceNAT(VppTestCase): def cnat_test_sourcenat(self, srcNatAddr, l4p=TCP, isV6=False): ip_v = "ip6" if isV6 else "ip4" - ip_class = IPv6 if isV6 else IP + IP46 = IPv6 if isV6 else IP sports = [1234, 1235, 1236] dports = [6661, 6662, 6663] @@ -493,14 +605,17 @@ class TestCNatSourceNAT(VppTestCase): t1 = self.cnat_set_snat_address(srcNatAddr, self.pg0, isV6) for nbr, remote_host in enumerate(self.pg1.remote_hosts): + if isV6: + client_addr = self.pg0.remote_hosts[0].ip6 + remote_addr = self.pg1.remote_hosts[nbr].ip6 + else: + client_addr = self.pg0.remote_hosts[0].ip4 + remote_addr = self.pg1.remote_hosts[nbr].ip4 # from pods to outside network p1 = ( - Ether( - dst=self.pg0.local_mac, - src=self.pg0.remote_hosts[0].mac) / - ip_class( - src=getattr(self.pg0.remote_hosts[0], ip_v), - dst=getattr(remote_host, ip_v)) / + Ether(dst=self.pg0.local_mac, + src=self.pg0.remote_hosts[0].mac) / + IP46(src=client_addr, dst=remote_addr) / l4p(sport=sports[nbr], dport=dports[nbr]) / Raw()) @@ -510,21 +625,16 @@ class TestCNatSourceNAT(VppTestCase): self.pg1) for rx in rxs: self.assert_packet_checksums_valid(rx) - self.assertEqual( - rx[ip_class].dst, - getattr(remote_host, ip_v)) + self.assertEqual(rx[IP46].dst, remote_addr) self.assertEqual(rx[l4p].dport, dports[nbr]) - self.assertEqual( - rx[ip_class].src, - srcNatAddr) + self.assertEqual(rx[IP46].src, srcNatAddr) sport = rx[l4p].sport # from outside to pods p2 = ( - Ether( - dst=self.pg1.local_mac, - src=self.pg1.remote_hosts[nbr].mac) / - ip_class(src=getattr(remote_host, ip_v), dst=srcNatAddr) / + Ether(dst=self.pg1.local_mac, + src=self.pg1.remote_hosts[nbr].mac) / + IP46(src=remote_addr, dst=srcNatAddr) / l4p(sport=dports[nbr], dport=sport) / Raw()) @@ -535,18 +645,14 @@ class TestCNatSourceNAT(VppTestCase): for rx in rxs: self.assert_packet_checksums_valid(rx) - self.assertEqual( - rx[ip_class].dst, - getattr(self.pg0.remote_hosts[0], ip_v)) + self.assertEqual(rx[IP46].dst, client_addr) self.assertEqual(rx[l4p].dport, sports[nbr]) self.assertEqual(rx[l4p].sport, dports[nbr]) - self.assertEqual( - rx[ip_class].src, - getattr(remote_host, ip_v)) + self.assertEqual(rx[IP46].src, remote_addr) # add remote host to exclude list subnet_mask = 100 if isV6 else 16 - subnet = getattr(remote_host, ip_v) + "/" + str(subnet_mask) + subnet = "%s/%d" % (remote_addr, subnet_mask) exclude_subnet = ip_network(subnet, strict=False) t1.cnat_exclude_subnet(exclude_subnet) @@ -558,13 +664,9 @@ class TestCNatSourceNAT(VppTestCase): self.pg1) for rx in rxs: self.assert_packet_checksums_valid(rx) - self.assertEqual( - rx[ip_class].dst, - getattr(remote_host, ip_v)) + self.assertEqual(rx[IP46].dst, remote_addr) self.assertEqual(rx[l4p].dport, dports[nbr]) - self.assertEqual( - rx[ip_class].src, - getattr(self.pg0.remote_hosts[0], ip_v)) + self.assertEqual(rx[IP46].src, client_addr) # remove remote host from exclude list t1.cnat_exclude_subnet(exclude_subnet, isAdd=False) @@ -577,13 +679,9 @@ class TestCNatSourceNAT(VppTestCase): for rx in rxs: self.assert_packet_checksums_valid(rx) - self.assertEqual( - rx[ip_class].dst, - getattr(remote_host, ip_v)) + self.assertEqual(rx[IP46].dst, remote_addr) self.assertEqual(rx[l4p].dport, dports[nbr]) - self.assertEqual( - rx[ip_class].src, - srcNatAddr) + self.assertEqual(rx[IP46].src, srcNatAddr) def test_cnat6_sourcenat(self): # """ CNat Source Nat ipv6 """