From 29f3c7d2ecac2f9d80bb33e91bd5d1f9d434768a Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 19 May 2020 07:17:19 +0000 Subject: [PATCH] cnat: Destination based NAT Type: feature Signed-off-by: Neale Ranns Change-Id: I64a99a4fbc674212944247793fd5c1fb701408cb --- MAINTAINERS | 6 + src/plugins/cnat/CMakeLists.txt | 28 ++ src/plugins/cnat/FEATURE.yaml | 17 + src/plugins/cnat/bihash_40_48.h | 112 ++++++ src/plugins/cnat/cnat.api | 136 +++++++ src/plugins/cnat/cnat.rst | 81 ++++ src/plugins/cnat/cnat_api.c | 317 +++++++++++++++ src/plugins/cnat/cnat_client.c | 407 +++++++++++++++++++ src/plugins/cnat/cnat_client.h | 226 +++++++++++ src/plugins/cnat/cnat_error.def | 19 + src/plugins/cnat/cnat_node.h | 535 +++++++++++++++++++++++++ src/plugins/cnat/cnat_node_snat.c | 237 +++++++++++ src/plugins/cnat/cnat_node_vip.c | 308 ++++++++++++++ src/plugins/cnat/cnat_scanner.c | 113 ++++++ src/plugins/cnat/cnat_session.c | 269 +++++++++++++ src/plugins/cnat/cnat_session.h | 157 ++++++++ src/plugins/cnat/cnat_snat.c | 252 ++++++++++++ src/plugins/cnat/cnat_snat.h | 74 ++++ src/plugins/cnat/cnat_translation.c | 432 ++++++++++++++++++++ src/plugins/cnat/cnat_translation.h | 204 ++++++++++ src/plugins/cnat/cnat_types.c | 149 +++++++ src/plugins/cnat/cnat_types.h | 281 +++++++++++++ src/plugins/cnat/test/test_cnat.py | 596 ++++++++++++++++++++++++++++ src/vnet/ip/ip_types.c | 35 ++ src/vnet/lisp-cp/control.c | 35 -- 25 files changed, 4991 insertions(+), 35 deletions(-) create mode 100644 src/plugins/cnat/CMakeLists.txt create mode 100644 src/plugins/cnat/FEATURE.yaml create mode 100644 src/plugins/cnat/bihash_40_48.h create mode 100644 src/plugins/cnat/cnat.api create mode 100644 src/plugins/cnat/cnat.rst create mode 100644 src/plugins/cnat/cnat_api.c create mode 100644 src/plugins/cnat/cnat_client.c create mode 100644 src/plugins/cnat/cnat_client.h create mode 100644 src/plugins/cnat/cnat_error.def create mode 100644 src/plugins/cnat/cnat_node.h create mode 100644 src/plugins/cnat/cnat_node_snat.c create mode 100644 src/plugins/cnat/cnat_node_vip.c create mode 100644 src/plugins/cnat/cnat_scanner.c create mode 100644 src/plugins/cnat/cnat_session.c create mode 100644 src/plugins/cnat/cnat_session.h create mode 100644 src/plugins/cnat/cnat_snat.c create mode 100644 src/plugins/cnat/cnat_snat.h create mode 100644 src/plugins/cnat/cnat_translation.c create mode 100644 src/plugins/cnat/cnat_translation.h create mode 100644 src/plugins/cnat/cnat_types.c create mode 100644 src/plugins/cnat/cnat_types.h create mode 100644 src/plugins/cnat/test/test_cnat.py diff --git a/MAINTAINERS b/MAINTAINERS index 017557bbda4..e928186cf65 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -682,6 +682,12 @@ I: urpf M: Neale Ranns F: src/plugins/urpf +Plugin - CNat +I: cnat +M: Nathan Skrzypczak +M: Neale Ranns +F: src/plugins/cnat + VPP Config Tooling I: vpp_config M: John DeNisco diff --git a/src/plugins/cnat/CMakeLists.txt b/src/plugins/cnat/CMakeLists.txt new file mode 100644 index 00000000000..b37b02cfc16 --- /dev/null +++ b/src/plugins/cnat/CMakeLists.txt @@ -0,0 +1,28 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_vpp_plugin(cnat + SOURCES + cnat_api.c + cnat_client.c + cnat_node_snat.c + cnat_node_vip.c + cnat_scanner.c + cnat_session.c + cnat_translation.c + cnat_types.c + cnat_snat.c + + API_FILES + cnat.api +) diff --git a/src/plugins/cnat/FEATURE.yaml b/src/plugins/cnat/FEATURE.yaml new file mode 100644 index 00000000000..9deda2e94cc --- /dev/null +++ b/src/plugins/cnat/FEATURE.yaml @@ -0,0 +1,17 @@ +--- +name: Cloud NAT +maintainer: Nathan Skrzypczak +features: + - Destination based address/port translation + - Conditional sourceNATing based on prefix exclusions + +description: "This plugin is intended to complement the VPP's plugin_nat for + Cloud use-cases. It allows for source/destination address/port + translation based on multiple criterias. It is intended to be modular + enough so that one could write a use-case optimised translation function + without having to deal with actually re-writing packets or maintining + sessions. + This plugin supports multithreading. Workers share a unique bihash where + sessions are stored." +state: development +properties: [API, CLI, MULTITHREAD] diff --git a/src/plugins/cnat/bihash_40_48.h b/src/plugins/cnat/bihash_40_48.h new file mode 100644 index 00000000000..df345cec357 --- /dev/null +++ b/src/plugins/cnat/bihash_40_48.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#undef BIHASH_TYPE +#undef BIHASH_KVP_PER_PAGE +#undef BIHASH_32_64_SVM +#undef BIHASH_ENABLE_STATS +#undef BIHASH_KVP_AT_BUCKET_LEVEL +#undef BIHASH_LAZY_INSTANTIATE +#undef BIHASH_BUCKET_PREFETCH_CACHE_LINES + +#define BIHASH_TYPE _40_48 +#define BIHASH_KVP_PER_PAGE 2 +#define BIHASH_KVP_AT_BUCKET_LEVEL 1 +#define BIHASH_LAZY_INSTANTIATE 1 +#define BIHASH_BUCKET_PREFETCH_CACHE_LINES 2 + +#ifndef __included_bihash_40_48_h__ +#define __included_bihash_40_48_h__ + +#include +#include +#include +#include +#include + +typedef struct +{ + u64 key[5]; + u64 value[6]; +} clib_bihash_kv_40_48_t; + +static inline int +clib_bihash_is_free_40_48 (const clib_bihash_kv_40_48_t * v) +{ + /* Free values are clib_memset to 0xff, check a bit... */ + if (v->key[0] == ~0ULL && v->value[0] == ~0ULL) + return 1; + return 0; +} + +static inline u64 +clib_bihash_hash_40_48 (const clib_bihash_kv_40_48_t * v) +{ +#ifdef clib_crc32c_uses_intrinsics + return clib_crc32c ((u8 *) v->key, 40); +#else + u64 tmp = v->key[0] ^ v->key[1] ^ v->key[2] ^ v->key[3] ^ v->key[4]; + return clib_xxhash (tmp); +#endif +} + +static inline u8 * +format_bihash_kvp_40_48 (u8 * s, va_list * args) +{ + clib_bihash_kv_40_48_t *v = va_arg (*args, clib_bihash_kv_40_48_t *); + + s = + format (s, + "key %llu %llu %llu %llu %llu value %llu %llu %llu %llu %llu %u", + v->key[0], v->key[1], v->key[2], v->key[3], v->key[4], + v->value[0], v->value[1], v->value[2], v->value[3], v->value[4], + v->value[5]); + return s; +} + +static inline int +clib_bihash_key_compare_40_48 (u64 * a, u64 * b) +{ +#if defined (CLIB_HAVE_VEC512) + u64x8 v; + v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b); + return (u64x8_is_zero_mask (v) & 0x1f) == 0; +#elif defined (CLIB_HAVE_VEC256) + u64x4 v = { a[4] ^ b[4], 0, 0, 0 }; + v |= u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b); + return u64x4_is_all_zero (v); +#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE) + u64x2 v = { a[4] ^ b[4], 0 }; + v |= u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b); + v |= u64x2_load_unaligned (a + 2) ^ u64x2_load_unaligned (b + 2); + return u64x2_is_all_zero (v); +#else + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) + | (a[4] ^ b[4])) == 0; +#endif +} + +#undef __included_bihash_template_h__ +#include + +#endif /* __included_bihash_40_48_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/cnat/cnat.api b/src/plugins/cnat/cnat.api new file mode 100644 index 00000000000..10af9b9f8d7 --- /dev/null +++ b/src/plugins/cnat/cnat.api @@ -0,0 +1,136 @@ +/* Hey Emacs use -*- mode: C -*- */ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \file + This file defines the vpp control-plane API messages + used to control the ABF plugin +*/ + +option version = "0.1.0"; +import "vnet/ip/ip_types.api"; +import "vnet/fib/fib_types.api"; + +enum cnat_translation_flags:u8 +{ + CNAT_TRANSLATION_ALLOC_PORT = 1, +}; + +typedef cnat_endpoint +{ + vl_api_address_t addr; + u16 port; +}; + +typedef cnat_endpoint_tuple +{ + vl_api_cnat_endpoint_t dst_ep; + vl_api_cnat_endpoint_t src_ep; +}; + +typedef cnat_translation +{ + vl_api_cnat_endpoint_t vip; + u32 id; + vl_api_ip_proto_t ip_proto; + u8 is_real_ip; + u8 flags; + u8 n_paths; + vl_api_cnat_endpoint_tuple_t paths[n_paths]; +}; + +define cnat_translation_update +{ + u32 client_index; + u32 context; + vl_api_cnat_translation_t translation; +}; + +define cnat_translation_update_reply +{ + u32 context; + i32 retval; + u32 id; +}; + +autoreply define cnat_translation_del +{ + u32 client_index; + u32 context; + u32 id; +}; + +define cnat_translation_details +{ + u32 context; + vl_api_cnat_translation_t translation; +}; + +define cnat_translation_dump +{ + u32 client_index; + u32 context; +}; + +autoreply define cnat_session_purge +{ + u32 client_index; + u32 context; +}; + +typedef cnat_session +{ + vl_api_cnat_endpoint_t src; + vl_api_cnat_endpoint_t dst; + vl_api_cnat_endpoint_t new; + vl_api_ip_proto_t ip_proto; + f64 timestamp; +}; + +define cnat_session_details +{ + u32 context; + vl_api_cnat_session_t session; +}; + +define cnat_session_dump +{ + u32 client_index; + u32 context; +}; + +autoreply define cnat_set_snat_addresses +{ + u32 client_index; + u32 context; + vl_api_ip4_address_t snat_ip4; + vl_api_ip6_address_t snat_ip6; +}; + +autoreply define cnat_add_del_snat_prefix +{ + u32 client_index; + u32 context; + u8 is_add; + vl_api_prefix_t prefix; +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/cnat/cnat.rst b/src/plugins/cnat/cnat.rst new file mode 100644 index 00000000000..f1b3deff55e --- /dev/null +++ b/src/plugins/cnat/cnat.rst @@ -0,0 +1,81 @@ +.. _dev_cnat: + +.. toctree:: + +Cloud NAT +========= + +Overview +________ + +This plugin covers specific NAT use-cases that come mostly +from the container networking world. On the contraty of the +NAT concepts used for e.g. a home gateway, there is no notion +of 'outside' and 'inside'. We handle Virtual (or Real) IPs and +translations of the packets destined to them + +Terminology & Usage +___________________ + +Setting up the NAT will consist in the creation of a translation +that has several backends. A translation is 3-tuple containing : +a fully qualified IP address a port and a protocol. All packets +destined to it (ip, port) will then choose one of the backends, +and follow its rewrite rules. + +A backend consists of four rewrites components (source & destination +address, source & destination port) that shall be applied to packets +on the way in, and reverted on the way back. + +Backends are equally load-balanced with a flow hash. The choice +of a backend for a flow will trigger the creation of a NAT session, +that will store the packet rewrite to do and the one to undo +until the flow is reset or a timeout is reached + +Translating Addresses +--------------------- + +In this example, all packets destined to 30.0.0.2:80 will be +rewritten so that their destination IP is 20.0.0.1 and destination +port 8080. Here 30.0.0.2 has to be a virtual IP, it cannot be +assigned to an interface + +.. code-block:: console + + cnat translation add proto TCP vip 30.0.0.2 80 to ->20.0.0.1 8080 + + +If 30.0.0.2 is the address of an interface, we can use the following +to do the same translation, and additionnaly change the source. +address with 1.2.3.4 + +.. code-block:: console + + cnat translation add proto TCP real 30.0.0.2 80 to 1.2.3.4->20.0.0.1 8080 + +To show existing translations and sessions you can use + +.. code-block:: console + + cnat show session verbose + cant show translation + + +SourceNATing outgoing traffic +----------------------------- + +A independant part of the plugin allows changing the source address +of outgoing traffic on a per-interface basis. + +.. code-block:: console + + cnat snat with 30::1 + cnat snat exclude 20::/100 + ex_ctl _calico_master cnat snat exclude 10::/100 + ex_ctl _calico_master set interface feature tap0 ip6-cnat-snat arc ip6-unicast + + + +Extending the NAT +_________________ + diff --git a/src/plugins/cnat/cnat_api.c b/src/plugins/cnat/cnat_api.c new file mode 100644 index 00000000000..014f75c8682 --- /dev/null +++ b/src/plugins/cnat/cnat_api.c @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +/* define message IDs */ +#include +#include +#include + +/** + * Base message ID fot the plugin + */ +static u32 cnat_base_msg_id; + +#define REPLY_MSG_ID_BASE cnat_base_msg_id + +#include + +static void +cnat_endpoint_decode (const vl_api_cnat_endpoint_t * in, + cnat_endpoint_t * out) +{ + ip_address_decode2 (&in->addr, &out->ce_ip); + out->ce_port = clib_net_to_host_u16 (in->port); +} + +static void +cnat_endpoint_tuple_decode (const vl_api_cnat_endpoint_tuple_t * in, + cnat_endpoint_tuple_t * out) +{ + cnat_endpoint_decode (&in->src_ep, &out->src_ep); + cnat_endpoint_decode (&in->dst_ep, &out->dst_ep); +} + +static void +cnat_endpoint_encode (const cnat_endpoint_t * in, + vl_api_cnat_endpoint_t * out) +{ + ip_address_encode2 (&in->ce_ip, &out->addr); + out->port = clib_net_to_host_u16 (in->ce_port); +} + +static void +vl_api_cnat_translation_update_t_handler (vl_api_cnat_translation_update_t + * mp) +{ + vl_api_cnat_translation_update_reply_t *rmp; + cnat_endpoint_t vip; + cnat_endpoint_tuple_t *paths = NULL, *path; + ip_protocol_t ip_proto; + u32 id = ~0; + u8 flags; + int rv = 0; + u8 pi; + + rv = ip_proto_decode (mp->translation.ip_proto, &ip_proto); + + if (rv) + goto done; + + vec_validate (paths, mp->translation.n_paths - 1); + + for (pi = 0; pi < mp->translation.n_paths; pi++) + { + path = &paths[pi]; + cnat_endpoint_tuple_decode (&mp->translation.paths[pi], path); + } + cnat_endpoint_decode (&mp->translation.vip, &vip); + + flags = mp->translation.flags; + if (!mp->translation.is_real_ip) + flags |= CNAT_FLAG_EXCLUSIVE; + id = cnat_translation_update (&vip, ip_proto, paths, flags); + + vec_free (paths); + +done: + /* *INDENT-OFF* */ + REPLY_MACRO2 (VL_API_CNAT_TRANSLATION_UPDATE_REPLY, + ({ + rmp->id = htonl (id); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_cnat_translation_del_t_handler (vl_api_cnat_translation_del_t * mp) +{ + vl_api_cnat_translation_del_reply_t *rmp; + int rv; + + rv = cnat_translation_delete (ntohl (mp->id)); + + REPLY_MACRO (VL_API_CNAT_TRANSLATION_DEL_REPLY); +} + +typedef struct cnat_dump_walk_ctx_t_ +{ + vl_api_registration_t *rp; + u32 context; +} cnat_dump_walk_ctx_t; + +static walk_rc_t +cnat_translation_send_details (u32 cti, void *args) +{ + vl_api_cnat_translation_details_t *mp; + cnat_dump_walk_ctx_t *ctx; + cnat_ep_trk_t *trk; + vl_api_cnat_endpoint_tuple_t *path; + size_t msg_size; + cnat_translation_t *ct; + u8 n_paths; + + ctx = args; + ct = cnat_translation_get (cti); + n_paths = vec_len (ct->ct_paths); + msg_size = sizeof (*mp) + sizeof (mp->translation.paths[0]) * n_paths; + + mp = vl_msg_api_alloc_zero (msg_size); + mp->_vl_msg_id = ntohs (VL_API_CNAT_TRANSLATION_DETAILS + cnat_base_msg_id); + + /* fill in the message */ + mp->context = ctx->context; + mp->translation.n_paths = n_paths; + mp->translation.id = htonl (cti); + cnat_endpoint_encode (&ct->ct_vip, &mp->translation.vip); + mp->translation.ip_proto = ip_proto_encode (ct->ct_proto); + + path = mp->translation.paths; + vec_foreach (trk, ct->ct_paths) + { + cnat_endpoint_encode (&trk->ct_ep[VLIB_TX], &path->dst_ep); + cnat_endpoint_encode (&trk->ct_ep[VLIB_RX], &path->src_ep); + path++; + } + + vl_api_send_msg (ctx->rp, (u8 *) mp); + + return (WALK_CONTINUE); +} + +static void +vl_api_cnat_translation_dump_t_handler (vl_api_cnat_translation_dump_t * mp) +{ + vl_api_registration_t *rp; + + rp = vl_api_client_index_to_registration (mp->client_index); + if (rp == 0) + return; + + cnat_dump_walk_ctx_t ctx = { + .rp = rp, + .context = mp->context, + }; + + cnat_translation_walk (cnat_translation_send_details, &ctx); +} + +static void +ip_address2_from_46 (const ip46_address_t * nh, + ip_address_family_t af, ip_address_t * ip) +{ + ip_addr_46 (ip) = *nh; + ip_addr_version (ip) = af; +} + +static walk_rc_t +cnat_session_send_details (const cnat_session_t * session, void *args) +{ + vl_api_cnat_session_details_t *mp; + cnat_dump_walk_ctx_t *ctx; + cnat_endpoint_t ep; + + ctx = args; + + mp = vl_msg_api_alloc_zero (sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_CNAT_SESSION_DETAILS + cnat_base_msg_id); + + /* fill in the message */ + mp->context = ctx->context; + + ip_address2_from_46 (&session->value.cs_ip[VLIB_TX], session->key.cs_af, + &ep.ce_ip); + ep.ce_port = clib_host_to_net_u16 (session->value.cs_port[VLIB_TX]); + cnat_endpoint_encode (&ep, &mp->session.new); + + ip_address2_from_46 (&session->key.cs_ip[VLIB_RX], session->key.cs_af, + &ep.ce_ip); + ep.ce_port = clib_host_to_net_u16 (session->key.cs_port[VLIB_RX]); + cnat_endpoint_encode (&ep, &mp->session.src); + + ip_address2_from_46 (&session->key.cs_ip[VLIB_TX], session->key.cs_af, + &ep.ce_ip); + ep.ce_port = clib_host_to_net_u16 (session->key.cs_port[VLIB_TX]); + cnat_endpoint_encode (&ep, &mp->session.dst); + + mp->session.ip_proto = ip_proto_encode (session->key.cs_proto); + + vl_api_send_msg (ctx->rp, (u8 *) mp); + + return (WALK_CONTINUE); +} + +static void +vl_api_cnat_session_dump_t_handler (vl_api_cnat_session_dump_t * mp) +{ + vl_api_registration_t *rp; + + rp = vl_api_client_index_to_registration (mp->client_index); + if (rp == 0) + return; + + cnat_dump_walk_ctx_t ctx = { + .rp = rp, + .context = mp->context, + }; + + cnat_session_walk (cnat_session_send_details, &ctx); +} + +static void +vl_api_cnat_session_purge_t_handler (vl_api_cnat_session_purge_t * mp) +{ + vl_api_cnat_session_purge_reply_t *rmp; + int rv; + + cnat_client_throttle_pool_process (); + rv = cnat_session_purge (); + rv |= cnat_translation_purge (); + + REPLY_MACRO (VL_API_CNAT_SESSION_PURGE_REPLY); +} + +static void +vl_api_cnat_set_snat_addresses_t_handler (vl_api_cnat_set_snat_addresses_t + * mp) +{ + vl_api_cnat_set_snat_addresses_reply_t *rmp; + int rv = 0; + + ip4_address_decode (mp->snat_ip4, &cnat_main.snat_ip4); + ip6_address_decode (mp->snat_ip6, &cnat_main.snat_ip6); + + REPLY_MACRO (VL_API_CNAT_SET_SNAT_ADDRESSES_REPLY); +} + +static void + vl_api_cnat_add_del_snat_prefix_t_handler + (vl_api_cnat_add_del_snat_prefix_t * mp) +{ + vl_api_cnat_add_del_snat_prefix_reply_t *rmp; + ip_prefix_t pfx; + int rv; + + ip_prefix_decode2 (&mp->prefix, &pfx); + if (mp->is_add) + rv = cnat_add_snat_prefix (&pfx); + else + rv = cnat_del_snat_prefix (&pfx); + + REPLY_MACRO (VL_API_CNAT_ADD_DEL_SNAT_PREFIX_REPLY); +} + +#include + +static clib_error_t * +cnat_api_init (vlib_main_t * vm) +{ + /* Ask for a correctly-sized block of API message decode slots */ + cnat_base_msg_id = setup_message_id_table (); + + return 0; +} + +VLIB_INIT_FUNCTION (cnat_api_init); + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "CNat Translate", +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/cnat/cnat_client.c b/src/plugins/cnat/cnat_client.c new file mode 100644 index 00000000000..10d9966ad53 --- /dev/null +++ b/src/plugins/cnat/cnat_client.c @@ -0,0 +1,407 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +cnat_client_t *cnat_client_pool; + +cnat_client_db_t cnat_client_db; + +dpo_type_t cnat_client_dpo; + +static_always_inline u8 +cnat_client_is_clone (cnat_client_t * cc) +{ + return (FIB_NODE_INDEX_INVALID == cc->cc_fei); +} + +static void +cnat_client_db_remove (cnat_client_t * cc) +{ + if (ip_addr_version (&cc->cc_ip) == AF_IP4) + hash_unset (cnat_client_db.crd_cip4, ip_addr_v4 (&cc->cc_ip).as_u32); + else + hash_unset_mem_free (&cnat_client_db.crd_cip6, &ip_addr_v6 (&cc->cc_ip)); +} + +static void +cnat_client_destroy (cnat_client_t * cc) +{ + ASSERT (!cnat_client_is_clone (cc)); + if (!(cc->flags & CNAT_FLAG_EXCLUSIVE)) + { + ASSERT (fib_entry_is_sourced (cc->cc_fei, cnat_fib_source)); + fib_table_entry_delete_index (cc->cc_fei, cnat_fib_source); + ASSERT (!fib_entry_is_sourced (cc->cc_fei, cnat_fib_source)); + } + cnat_client_db_remove (cc); + dpo_reset (&cc->cc_parent); + pool_put (cnat_client_pool, cc); +} + +void +cnat_client_free_by_ip (ip46_address_t * ip, u8 af) +{ + cnat_client_t *cc; + cc = (AF_IP4 == af ? + cnat_client_ip4_find (&ip->ip4) : cnat_client_ip6_find (&ip->ip6)); + /* This can happen if the translation gets deleted + before the session */ + if (NULL == cc) + return; + if ((0 == cnat_client_uncnt_session (cc)) + && (cc->flags & CNAT_FLAG_EXPIRES)) + cnat_client_destroy (cc); +} + +void +cnat_client_throttle_pool_process () +{ + /* This processes ips stored in the throttle pool + to update session refcounts + and should be called before cnat_client_free_by_ip */ + vlib_thread_main_t *tm = vlib_get_thread_main (); + cnat_client_t *cc; + int nthreads; + u32 *del_vec = NULL, *ai; + ip_address_t *addr; + nthreads = tm->n_threads + 1; + for (int i = 0; i < nthreads; i++) + { + vec_reset_length (del_vec); + clib_spinlock_lock (&cnat_client_db.throttle_pool_lock[i]); + /* *INDENT-OFF* */ + pool_foreach(addr, cnat_client_db.throttle_pool[i], ({ + cc = (AF_IP4 == addr->version ? + cnat_client_ip4_find (&ip_addr_v4(addr)) : + cnat_client_ip6_find (&ip_addr_v6(addr))); + /* Client might not already be created */ + if (NULL != cc) + { + cnat_client_cnt_session (cc); + vec_add1(del_vec, addr - cnat_client_db.throttle_pool[i]); + } + })); + /* *INDENT-ON* */ + vec_foreach (ai, del_vec) + { + /* Free session */ + addr = pool_elt_at_index (cnat_client_db.throttle_pool[i], *ai); + pool_put (cnat_client_db.throttle_pool[i], addr); + } + clib_spinlock_unlock (&cnat_client_db.throttle_pool_lock[i]); + } +} + +void +cnat_client_translation_added (index_t cci) +{ + cnat_client_t *cc; + cc = cnat_client_get (cci); + ASSERT (!(cc->flags & CNAT_FLAG_EXPIRES)); + cc->tr_refcnt++; +} + +void +cnat_client_translation_deleted (index_t cci) +{ + cnat_client_t *cc; + + cc = cnat_client_get (cci); + ASSERT (!(cc->flags & CNAT_FLAG_EXPIRES)); + cc->tr_refcnt--; + + if (0 == cc->tr_refcnt) + cnat_client_destroy (cc); +} + +static void +cnat_client_db_add (cnat_client_t * cc) +{ + index_t cci; + + cci = cc - cnat_client_pool; + + if (ip_addr_version (&cc->cc_ip) == AF_IP4) + hash_set (cnat_client_db.crd_cip4, ip_addr_v4 (&cc->cc_ip).as_u32, cci); + else + hash_set_mem_alloc (&cnat_client_db.crd_cip6, + &ip_addr_v6 (&cc->cc_ip), cci); +} + + +index_t +cnat_client_add (const ip_address_t * ip, u8 flags) +{ + cnat_client_t *cc; + dpo_id_t tmp = DPO_INVALID; + fib_node_index_t fei; + dpo_proto_t dproto; + fib_prefix_t pfx; + index_t cci; + u32 fib_flags; + + /* check again if we need this client */ + cc = (AF_IP4 == ip->version ? + cnat_client_ip4_find (&ip->ip.ip4) : + cnat_client_ip6_find (&ip->ip.ip6)); + + if (NULL != cc) + return (cc - cnat_client_pool); + + + pool_get_aligned (cnat_client_pool, cc, CLIB_CACHE_LINE_BYTES); + cc->cc_locks = 1; + cci = cc - cnat_client_pool; + cc->parent_cci = cci; + cc->flags = flags; + + ip_address_copy (&cc->cc_ip, ip); + cnat_client_db_add (cc); + + ip_address_to_fib_prefix (&cc->cc_ip, &pfx); + + dproto = fib_proto_to_dpo (pfx.fp_proto); + dpo_set (&tmp, cnat_client_dpo, dproto, cci); + dpo_stack (cnat_client_dpo, dproto, &cc->cc_parent, drop_dpo_get (dproto)); + + fib_flags = FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT; + fib_flags |= (flags & CNAT_FLAG_EXCLUSIVE) ? + FIB_ENTRY_FLAG_EXCLUSIVE : FIB_ENTRY_FLAG_INTERPOSE; + + fei = fib_table_entry_special_dpo_add (CNAT_FIB_TABLE, + &pfx, cnat_fib_source, fib_flags, + &tmp); + + cc = pool_elt_at_index (cnat_client_pool, cci); + cc->cc_fei = fei; + + return (cci); +} + +void +cnat_client_learn (const cnat_learn_arg_t * l) +{ + /* RPC call to add a client from the dataplane */ + index_t cci; + cnat_client_t *cc; + cci = cnat_client_add (&l->addr, CNAT_FLAG_EXPIRES); + cc = pool_elt_at_index (cnat_client_pool, cci); + cnat_client_cnt_session (cc); + /* Process throttled calls if any */ + cnat_client_throttle_pool_process (); +} + +/** + * Interpose a policy DPO + */ +static void +cnat_client_dpo_interpose (const dpo_id_t * original, + const dpo_id_t * parent, dpo_id_t * clone) +{ + cnat_client_t *cc, *cc_clone; + + pool_get_zero (cnat_client_pool, cc_clone); + cc = cnat_client_get (original->dpoi_index); + + cc_clone->cc_fei = FIB_NODE_INDEX_INVALID; + cc_clone->parent_cci = cc->parent_cci; + cc_clone->flags = cc->flags; + ip_address_copy (&cc_clone->cc_ip, &cc->cc_ip); + + /* stack the clone on the FIB provided parent */ + dpo_stack (cnat_client_dpo, original->dpoi_proto, &cc_clone->cc_parent, + parent); + + /* return the clone */ + dpo_set (clone, + cnat_client_dpo, + original->dpoi_proto, cc_clone - cnat_client_pool); +} + +int +cnat_client_purge (void) +{ + ASSERT (0 == hash_elts (cnat_client_db.crd_cip6)); + ASSERT (0 == hash_elts (cnat_client_db.crd_cip4)); + ASSERT (0 == pool_elts (cnat_client_pool)); + return (0); +} + +u8 * +format_cnat_client (u8 * s, va_list * args) +{ + index_t cci = va_arg (*args, index_t); + u32 indent = va_arg (*args, u32); + + cnat_client_t *cc = pool_elt_at_index (cnat_client_pool, cci); + + s = format (s, "[%d] cnat-client:[%U] tr:%d sess:%d", cci, + format_ip_address, &cc->cc_ip, + cc->tr_refcnt, cc->session_refcnt); + if (cc->flags & CNAT_FLAG_EXPIRES) + s = format (s, " expires"); + + if (cc->flags & CNAT_FLAG_EXCLUSIVE) + s = format (s, " exclusive"); + + if (cnat_client_is_clone (cc)) + s = format (s, "\n%Uclone of [%d]\n%U%U", + format_white_space, indent + 2, cc->parent_cci, + format_white_space, indent + 2, + format_dpo_id, &cc->cc_parent, indent + 4); + + return (s); +} + + +static clib_error_t * +cnat_client_show (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + index_t cci; + + cci = INDEX_INVALID; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%d", &cci)) + ; + else + return (clib_error_return (0, "unknown input '%U'", + format_unformat_error, input)); + } + + if (INDEX_INVALID == cci) + { + /* *INDENT-OFF* */ + pool_foreach_index(cci, cnat_client_pool, ({ + vlib_cli_output(vm, "%U", format_cnat_client, cci, 0); + })) + /* *INDENT-ON* */ + + vlib_cli_output (vm, "%d clients", pool_elts (cnat_client_pool)); + vlib_cli_output (vm, "%d timestamps", pool_elts (cnat_timestamps)); + } + else + { + vlib_cli_output (vm, "Invalid policy ID:%d", cci); + } + + return (NULL); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cnat_client_show_cmd_node, static) = { + .path = "show cnat client", + .function = cnat_client_show, + .short_help = "show cnat client", + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +const static char *const cnat_client_dpo_ip4_nodes[] = { + "ip4-cnat-tx", + NULL, +}; + +const static char *const cnat_client_dpo_ip6_nodes[] = { + "ip6-cnat-tx", + NULL, +}; + +const static char *const *const cnat_client_dpo_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP4] = cnat_client_dpo_ip4_nodes, + [DPO_PROTO_IP6] = cnat_client_dpo_ip6_nodes, +}; + +static void +cnat_client_dpo_lock (dpo_id_t * dpo) +{ + cnat_client_t *cc; + + cc = cnat_client_get (dpo->dpoi_index); + + cc->cc_locks++; +} + +static void +cnat_client_dpo_unlock (dpo_id_t * dpo) +{ + cnat_client_t *cc; + + cc = cnat_client_get (dpo->dpoi_index); + + cc->cc_locks--; + + if (0 == cc->cc_locks) + { + ASSERT (cnat_client_is_clone (cc)); + pool_put (cnat_client_pool, cc); + } +} + +u8 * +format_cnat_client_dpo (u8 * s, va_list * ap) +{ + index_t cci = va_arg (*ap, index_t); + u32 indent = va_arg (*ap, u32); + + s = format (s, "%U", format_cnat_client, cci, indent); + + return (s); +} + +const static dpo_vft_t cnat_client_dpo_vft = { + .dv_lock = cnat_client_dpo_lock, + .dv_unlock = cnat_client_dpo_unlock, + .dv_format = format_cnat_client_dpo, + .dv_mk_interpose = cnat_client_dpo_interpose, +}; + +static clib_error_t * +cnat_client_init (vlib_main_t * vm) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + int nthreads = tm->n_threads + 1; + int i; + cnat_client_dpo = dpo_register_new_type (&cnat_client_dpo_vft, + cnat_client_dpo_nodes); + + cnat_client_db.crd_cip6 = hash_create_mem (0, + sizeof (ip6_address_t), + sizeof (uword)); + + vec_validate (cnat_client_db.throttle_pool, nthreads); + vec_validate (cnat_client_db.throttle_pool_lock, nthreads); + for (i = 0; i < nthreads; i++) + clib_spinlock_init (&cnat_client_db.throttle_pool_lock[i]); + + return (NULL); +} + +VLIB_INIT_FUNCTION (cnat_client_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/cnat/cnat_client.h b/src/plugins/cnat/cnat_client.h new file mode 100644 index 00000000000..9bc622dcc2c --- /dev/null +++ b/src/plugins/cnat/cnat_client.h @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CNAT_CLIENT_H__ +#define __CNAT_CLIENT_H__ + +#include + +/** + * A client is a representation of an IP address behind the NAT. + * A client thus sends packet to a VIP. + * Clients are learned in the Data-plane when they send packets, + * but, since they make additions to the FIB they must be programmed + * in the main thread. They are aged out when they become idle. + * + * A client interposes in the FIB graph for the prefix corresponding + * to the client (e.g. client's-IP/32). As a result this client object + * is cloned as the interpose DPO. The clones are removed when the lock + * count drops to zero. The originals are removed when the client ages. + * At forwarding time the client preforms the reverse translation and + * then ships the packet to where the FIB would send it. + */ +typedef struct cnat_client_t_ +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /** + * the client's IP address + */ + ip_address_t cc_ip; + + /** + * How to send packets to this client post translation + */ + dpo_id_t cc_parent; + + /** + * the FIB entry this client sources + */ + fib_node_index_t cc_fei; + + /** + * number of DPO locks + */ + u32 cc_locks; + + /** + * Translations refcount for cleanup + */ + u32 tr_refcnt; + + /** + * Session refcount for cleanup + */ + u32 session_refcnt; + + /** + * Parent cnat_client index if cloned via interpose + * or own index if vanilla client. + * Used to get translations & update session_refcnt + */ + index_t parent_cci; + + /** + * Client flags + */ + u8 flags; +} cnat_client_t; + +extern u8 *format_cnat_client (u8 * s, va_list * args); +extern void cnat_client_free_by_ip (ip46_address_t * addr, u8 af); + +extern cnat_client_t *cnat_client_pool; +extern dpo_type_t cnat_client_dpo; + +#define CC_INDEX_INVALID ((u32)(~0)) + +static_always_inline cnat_client_t * +cnat_client_get (index_t i) +{ + return (pool_elt_at_index (cnat_client_pool, i)); +} + +typedef struct cnat_learn_arg_t_ +{ + ip_address_t addr; +} cnat_learn_arg_t; + +/** + * A translation that references this VIP was deleted + */ +extern void cnat_client_translation_deleted (index_t cci); + +/** + * A translation that references this VIP was added + */ +extern void cnat_client_translation_added (index_t cci); +/** + * Called in the main thread by RPC from the workers to learn a + * new client + */ +extern void cnat_client_learn (const cnat_learn_arg_t * l); + +extern index_t cnat_client_add (const ip_address_t * ip, u8 flags); + +/** + * Check all the clients were purged by translation & session purge + */ +extern int cnat_client_purge (void); + +/** + * CNat Client (dpo) flags + */ +typedef enum +{ + /* IP already present in the FIB, need to interpose dpo */ + CNAT_FLAG_EXCLUSIVE = (1 << 1), + /* Prune this entry */ + CNAT_FLAG_EXPIRES = (1 << 2), +} cnat_entry_flag_t; + + +extern void cnat_client_throttle_pool_process (); + +/** + * DB of clients + */ +typedef struct cnat_client_db_t_ +{ + uword *crd_cip4; + uword *crd_cip6; + /* Pool of addresses that have been throttled + and need to be refcounted before calling + cnat_client_free_by_ip */ + ip_address_t **throttle_pool; + clib_spinlock_t *throttle_pool_lock; +} cnat_client_db_t; + +extern cnat_client_db_t cnat_client_db; + +/** + * Find a client from an IP4 address + */ +static_always_inline cnat_client_t * +cnat_client_ip4_find (const ip4_address_t * ip) +{ + uword *p; + + p = hash_get (cnat_client_db.crd_cip4, ip->as_u32); + + if (p) + return (pool_elt_at_index (cnat_client_pool, p[0])); + + return (NULL); +} + +static_always_inline u32 +cnat_client_ip4_find_index (const ip4_address_t * ip) +{ + uword *p; + + p = hash_get (cnat_client_db.crd_cip4, ip->as_u32); + + if (p) + return p[0]; + + return -1; +} + +/** + * Find a client from an IP6 address + */ +static_always_inline cnat_client_t * +cnat_client_ip6_find (const ip6_address_t * ip) +{ + uword *p; + + p = hash_get_mem (cnat_client_db.crd_cip6, ip); + + if (p) + return (pool_elt_at_index (cnat_client_pool, p[0])); + + return (NULL); +} + +/** + * Add a session refcnt to this client + */ +static_always_inline u32 +cnat_client_cnt_session (cnat_client_t * cc) +{ + cnat_client_t *ccp = cnat_client_get (cc->parent_cci); + return clib_atomic_add_fetch (&ccp->session_refcnt, 1); +} + +/** + * Del a session refcnt to this client + */ +static_always_inline u32 +cnat_client_uncnt_session (cnat_client_t * cc) +{ + cnat_client_t *ccp = cnat_client_get (cc->parent_cci); + return clib_atomic_sub_fetch (&ccp->session_refcnt, 1); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + +#endif diff --git a/src/plugins/cnat/cnat_error.def b/src/plugins/cnat/cnat_error.def new file mode 100644 index 00000000000..f7809d890ca --- /dev/null +++ b/src/plugins/cnat/cnat_error.def @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +cnat_error (NONE, "no error") +cnat_error (EXHAUSTED_PORTS, "no more free ports") + + diff --git a/src/plugins/cnat/cnat_node.h b/src/plugins/cnat/cnat_node.h new file mode 100644 index 00000000000..58e81c12b45 --- /dev/null +++ b/src/plugins/cnat/cnat_node.h @@ -0,0 +1,535 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CNAT_NODE_H__ +#define __CNAT_NODE_H__ + +#include +#include +#include + +typedef uword (*cnat_node_sub_t) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_buffer_t * b, + cnat_node_ctx_t * ctx, int rv, + cnat_session_t * session); + +/** + * Inline translation functions + */ + +static_always_inline u8 +has_ip6_address (ip6_address_t * a) +{ + return ((0 != a->as_u64[0]) || (0 != a->as_u64[1])); +} + +static_always_inline void +cnat_ip4_translate_l4 (ip4_header_t * ip4, udp_header_t * udp, + u16 * checksum, + ip4_address_t new_addr[VLIB_N_DIR], + u16 new_port[VLIB_N_DIR]) +{ + u16 old_port[VLIB_N_DIR]; + ip4_address_t old_addr[VLIB_N_DIR]; + ip_csum_t sum; + + old_port[VLIB_TX] = udp->dst_port; + old_port[VLIB_RX] = udp->src_port; + old_addr[VLIB_TX] = ip4->dst_address; + old_addr[VLIB_RX] = ip4->src_address; + + sum = *checksum; + if (new_addr[VLIB_TX].as_u32) + sum = + ip_csum_update (sum, old_addr[VLIB_TX].as_u32, new_addr[VLIB_TX].as_u32, + ip4_header_t, dst_address); + if (new_port[VLIB_TX]) + { + udp->dst_port = new_port[VLIB_TX]; + sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX], + ip4_header_t /* cheat */ , + length /* changed member */ ); + } + if (new_addr[VLIB_RX].as_u32) + sum = + ip_csum_update (sum, old_addr[VLIB_RX].as_u32, new_addr[VLIB_RX].as_u32, + ip4_header_t, src_address); + + if (new_port[VLIB_RX]) + { + udp->src_port = new_port[VLIB_RX]; + sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX], + ip4_header_t /* cheat */ , + length /* changed member */ ); + } + *checksum = ip_csum_fold (sum); +} + +static_always_inline void +cnat_ip4_translate_l3 (ip4_header_t * ip4, ip4_address_t new_addr[VLIB_N_DIR]) +{ + ip4_address_t old_addr[VLIB_N_DIR]; + ip_csum_t sum; + + old_addr[VLIB_TX] = ip4->dst_address; + old_addr[VLIB_RX] = ip4->src_address; + + sum = ip4->checksum; + if (new_addr[VLIB_TX].as_u32) + { + ip4->dst_address = new_addr[VLIB_TX]; + sum = + ip_csum_update (sum, old_addr[VLIB_TX].as_u32, + new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address); + } + if (new_addr[VLIB_RX].as_u32) + { + ip4->src_address = new_addr[VLIB_RX]; + sum = + ip_csum_update (sum, old_addr[VLIB_RX].as_u32, + new_addr[VLIB_RX].as_u32, ip4_header_t, src_address); + } + ip4->checksum = ip_csum_fold (sum); +} + +static_always_inline void +cnat_tcp_update_session_lifetime (tcp_header_t * tcp, u32 index) +{ + cnat_main_t *cm = &cnat_main; + if (PREDICT_FALSE (tcp_fin (tcp))) + { + cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT); + } + + if (PREDICT_FALSE (tcp_rst (tcp))) + { + cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT); + } + + if (PREDICT_FALSE (tcp_syn (tcp) && tcp_ack (tcp))) + { + cnat_timestamp_set_lifetime (index, cm->tcp_max_age); + } +} + +static_always_inline void +cnat_translation_ip4 (const cnat_session_t * session, + ip4_header_t * ip4, udp_header_t * udp) +{ + tcp_header_t *tcp = (tcp_header_t *) udp; + ip4_address_t new_addr[VLIB_N_DIR]; + u16 new_port[VLIB_N_DIR]; + + new_addr[VLIB_TX] = session->value.cs_ip[VLIB_TX].ip4; + new_addr[VLIB_RX] = session->value.cs_ip[VLIB_RX].ip4; + new_port[VLIB_TX] = session->value.cs_port[VLIB_TX]; + new_port[VLIB_RX] = session->value.cs_port[VLIB_RX]; + + if (ip4->protocol == IP_PROTOCOL_TCP) + { + if (PREDICT_FALSE (tcp->checksum)) + cnat_ip4_translate_l4 (ip4, udp, &tcp->checksum, new_addr, new_port); + else + { + udp->dst_port = new_port[VLIB_TX]; + udp->src_port = new_port[VLIB_RX]; + } + cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index); + } + else if (ip4->protocol == IP_PROTOCOL_UDP) + { + if (PREDICT_FALSE (udp->checksum)) + cnat_ip4_translate_l4 (ip4, udp, &udp->checksum, new_addr, new_port); + else + { + udp->dst_port = new_port[VLIB_TX]; + udp->src_port = new_port[VLIB_RX]; + } + } + + cnat_ip4_translate_l3 (ip4, new_addr); +} + +static_always_inline void +cnat_ip6_translate_l3 (ip6_header_t * ip6, ip6_address_t new_addr[VLIB_N_DIR]) +{ + if (has_ip6_address (&new_addr[VLIB_TX])) + ip6_address_copy (&ip6->dst_address, &new_addr[VLIB_TX]); + if (has_ip6_address (&new_addr[VLIB_RX])) + ip6_address_copy (&ip6->src_address, &new_addr[VLIB_RX]); +} + +static_always_inline void +cnat_ip6_translate_l4 (ip6_header_t * ip6, udp_header_t * udp, + u16 * checksum, + ip6_address_t new_addr[VLIB_N_DIR], + u16 new_port[VLIB_N_DIR]) +{ + u16 old_port[VLIB_N_DIR]; + ip6_address_t old_addr[VLIB_N_DIR]; + ip_csum_t sum; + + old_port[VLIB_TX] = udp->dst_port; + old_port[VLIB_RX] = udp->src_port; + ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address); + ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address); + + sum = *checksum; + if (has_ip6_address (&new_addr[VLIB_TX])) + { + sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]); + sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]); + sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]); + sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]); + } + + if (new_port[VLIB_TX]) + { + udp->dst_port = new_port[VLIB_TX]; + sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX], + ip4_header_t /* cheat */ , + length /* changed member */ ); + } + if (has_ip6_address (&new_addr[VLIB_RX])) + { + sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]); + sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]); + sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]); + sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]); + } + + if (new_port[VLIB_RX]) + { + udp->src_port = new_port[VLIB_RX]; + sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX], + ip4_header_t /* cheat */ , + length /* changed member */ ); + } + *checksum = ip_csum_fold (sum); +} + +static_always_inline void +cnat_translation_ip6 (const cnat_session_t * session, + ip6_header_t * ip6, udp_header_t * udp) +{ + tcp_header_t *tcp = (tcp_header_t *) udp; + ip6_address_t new_addr[VLIB_N_DIR]; + u16 new_port[VLIB_N_DIR]; + + ip6_address_copy (&new_addr[VLIB_TX], &session->value.cs_ip[VLIB_TX].ip6); + ip6_address_copy (&new_addr[VLIB_RX], &session->value.cs_ip[VLIB_RX].ip6); + new_port[VLIB_TX] = session->value.cs_port[VLIB_TX]; + new_port[VLIB_RX] = session->value.cs_port[VLIB_RX]; + + if (ip6->protocol == IP_PROTOCOL_TCP) + { + if (PREDICT_FALSE (tcp->checksum)) + cnat_ip6_translate_l4 (ip6, udp, &tcp->checksum, new_addr, new_port); + else + { + udp->dst_port = new_port[VLIB_TX]; + udp->src_port = new_port[VLIB_RX]; + } + cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index); + } + else if (ip6->protocol == IP_PROTOCOL_UDP) + { + if (PREDICT_FALSE (udp->checksum)) + cnat_ip6_translate_l4 (ip6, udp, &udp->checksum, new_addr, new_port); + else + { + udp->dst_port = new_port[VLIB_TX]; + udp->src_port = new_port[VLIB_RX]; + } + } + + cnat_ip6_translate_l3 (ip6, new_addr); +} + +static_always_inline void +cnat_session_make_key (vlib_buffer_t * b, ip_address_family_t af, + clib_bihash_kv_40_48_t * bkey) +{ + udp_header_t *udp; + cnat_session_t *session = (cnat_session_t *) bkey; + if (AF_IP4 == af) + { + ip4_header_t *ip4; + ip4 = vlib_buffer_get_current (b); + udp = (udp_header_t *) (ip4 + 1); + session->key.cs_af = AF_IP4; + session->key.__cs_pad[0] = 0; + session->key.__cs_pad[1] = 0; + + ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX], &ip4->dst_address); + ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX], &ip4->src_address); + session->key.cs_port[VLIB_RX] = udp->src_port; + session->key.cs_port[VLIB_TX] = udp->dst_port; + session->key.cs_proto = ip4->protocol; + } + else + { + ip6_header_t *ip6; + ip6 = vlib_buffer_get_current (b); + udp = (udp_header_t *) (ip6 + 1); + session->key.cs_af = AF_IP6; + session->key.__cs_pad[0] = 0; + session->key.__cs_pad[1] = 0; + + ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX], &ip6->dst_address); + ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX], &ip6->src_address); + session->key.cs_port[VLIB_RX] = udp->src_port; + session->key.cs_port[VLIB_TX] = udp->dst_port; + session->key.cs_proto = ip6->protocol; + } +} + +/** + * Create NAT sessions + */ + +static_always_inline void +cnat_session_create (cnat_session_t * session, cnat_node_ctx_t * ctx, + u8 rsession_flags) +{ + cnat_client_t *cc; + clib_bihash_kv_40_48_t rkey; + cnat_session_t *rsession = (cnat_session_t *) & rkey; + clib_bihash_kv_40_48_t *bkey = (clib_bihash_kv_40_48_t *) session; + clib_bihash_kv_40_48_t rvalue; + int rv; + + /* create the reverse flow key */ + ip46_address_copy (&rsession->key.cs_ip[VLIB_RX], + &session->value.cs_ip[VLIB_TX]); + ip46_address_copy (&rsession->key.cs_ip[VLIB_TX], + &session->value.cs_ip[VLIB_RX]); + rsession->key.cs_proto = session->key.cs_proto; + rsession->key.__cs_pad[0] = 0; + rsession->key.__cs_pad[1] = 0; + rsession->key.cs_af = ctx->af; + rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX]; + rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX]; + + /* First search for existing reverse session */ + rv = clib_bihash_search_inline_2_40_48 (&cnat_session_db, &rkey, &rvalue); + if (!rv) + { + /* Reverse session already exists + corresponding client should also exist + we only need to refcnt the timestamp */ + cnat_session_t *found_rsession = (cnat_session_t *) & rvalue; + session->value.cs_ts_index = found_rsession->value.cs_ts_index; + cnat_timestamp_inc_refcnt (session->value.cs_ts_index); + clib_bihash_add_del_40_48 (&cnat_session_db, bkey, 1 /* is_add */ ); + goto create_rsession; + } + + session->value.cs_ts_index = cnat_timestamp_new (ctx->now); + clib_bihash_add_del_40_48 (&cnat_session_db, bkey, 1); + + /* is this the first time we've seen this source address */ + cc = (AF_IP4 == ctx->af ? + cnat_client_ip4_find (&session->value.cs_ip[VLIB_RX].ip4) : + cnat_client_ip6_find (&session->value.cs_ip[VLIB_RX].ip6)); + + if (NULL == cc) + { + u64 r0 = 17; + if (AF_IP4 == ctx->af) + r0 = (u64) session->value.cs_ip[VLIB_RX].ip4.as_u32; + else + { + r0 = r0 * 31 + session->value.cs_ip[VLIB_RX].ip6.as_u64[0]; + r0 = r0 * 31 + session->value.cs_ip[VLIB_RX].ip6.as_u64[1]; + } + + /* Rate limit */ + if (!throttle_check (&cnat_throttle, ctx->thread_index, r0, ctx->seed)) + { + cnat_learn_arg_t l; + l.addr.version = ctx->af; + ip46_address_copy (&l.addr.ip, &session->value.cs_ip[VLIB_RX]); + /* fire client create to the main thread */ + vl_api_rpc_call_main_thread (cnat_client_learn, + (u8 *) & l, sizeof (l)); + } + else + { + /* Will still need to count those for session refcnt */ + ip_address_t *addr; + clib_spinlock_lock (&cnat_client_db.throttle_pool_lock + [ctx->thread_index]); + pool_get (cnat_client_db.throttle_pool[ctx->thread_index], addr); + addr->version = ctx->af; + ip46_address_copy (&addr->ip, &session->value.cs_ip[VLIB_RX]); + clib_spinlock_unlock (&cnat_client_db.throttle_pool_lock + [ctx->thread_index]); + } + } + else + { + cnat_client_cnt_session (cc); + } + +create_rsession: + /* add the reverse flow */ + ip46_address_copy (&rsession->value.cs_ip[VLIB_RX], + &session->key.cs_ip[VLIB_TX]); + ip46_address_copy (&rsession->value.cs_ip[VLIB_TX], + &session->key.cs_ip[VLIB_RX]); + rsession->value.cs_ts_index = session->value.cs_ts_index; + rsession->value.cs_lbi = INDEX_INVALID; + rsession->value.flags = rsession_flags; + rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX]; + rsession->value.cs_port[VLIB_RX] = session->key.cs_port[VLIB_TX]; + + clib_bihash_add_del_40_48 (&cnat_session_db, &rkey, 1); +} + +always_inline uword +cnat_node_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + cnat_node_sub_t cnat_sub, + ip_address_family_t af, u8 do_trace) +{ + u32 n_left, *from, thread_index; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE]; + vlib_buffer_t **b = bufs; + u16 nexts[VLIB_FRAME_SIZE], *next; + f64 now; + u64 seed; + + thread_index = vm->thread_index; + from = vlib_frame_vector_args (frame); + n_left = frame->n_vectors; + next = nexts; + vlib_get_buffers (vm, from, bufs, n_left); + now = vlib_time_now (vm); + seed = throttle_seed (&cnat_throttle, thread_index, vlib_time_now (vm)); + cnat_session_t *session[4]; + clib_bihash_kv_40_48_t bkey[4], bvalue[4]; + u64 hash[4]; + int rv[4]; + + cnat_node_ctx_t ctx = { now, seed, thread_index, af, do_trace }; + + if (n_left >= 8) + { + /* Kickstart our state */ + cnat_session_make_key (b[3], af, &bkey[3]); + cnat_session_make_key (b[2], af, &bkey[2]); + cnat_session_make_key (b[1], af, &bkey[1]); + cnat_session_make_key (b[0], af, &bkey[0]); + + hash[3] = clib_bihash_hash_40_48 (&bkey[3]); + hash[2] = clib_bihash_hash_40_48 (&bkey[2]); + hash[1] = clib_bihash_hash_40_48 (&bkey[1]); + hash[0] = clib_bihash_hash_40_48 (&bkey[0]); + } + + while (n_left >= 8) + { + if (n_left >= 12) + { + vlib_prefetch_buffer_header (b[11], LOAD); + vlib_prefetch_buffer_header (b[10], LOAD); + vlib_prefetch_buffer_header (b[9], LOAD); + vlib_prefetch_buffer_header (b[8], LOAD); + } + + rv[3] = + clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db, + hash[3], &bkey[3], + &bvalue[3]); + session[3] = (cnat_session_t *) (rv[3] ? &bkey[3] : &bvalue[3]); + next[3] = cnat_sub (vm, node, b[3], &ctx, rv[3], session[3]); + + rv[2] = + clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db, + hash[2], &bkey[2], + &bvalue[2]); + session[2] = (cnat_session_t *) (rv[2] ? &bkey[2] : &bvalue[2]); + next[2] = cnat_sub (vm, node, b[2], &ctx, rv[2], session[2]); + + rv[1] = + clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db, + hash[1], &bkey[1], + &bvalue[1]); + session[1] = (cnat_session_t *) (rv[1] ? &bkey[1] : &bvalue[1]); + next[1] = cnat_sub (vm, node, b[1], &ctx, rv[1], session[1]); + + rv[0] = + clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db, + hash[0], &bkey[0], + &bvalue[0]); + session[0] = (cnat_session_t *) (rv[0] ? &bkey[0] : &bvalue[0]); + next[0] = cnat_sub (vm, node, b[0], &ctx, rv[0], session[0]); + + cnat_session_make_key (b[7], af, &bkey[3]); + cnat_session_make_key (b[6], af, &bkey[2]); + cnat_session_make_key (b[5], af, &bkey[1]); + cnat_session_make_key (b[4], af, &bkey[0]); + + hash[3] = clib_bihash_hash_40_48 (&bkey[3]); + hash[2] = clib_bihash_hash_40_48 (&bkey[2]); + hash[1] = clib_bihash_hash_40_48 (&bkey[1]); + hash[0] = clib_bihash_hash_40_48 (&bkey[0]); + + clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[3]); + clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[2]); + clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[1]); + clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[0]); + + clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[3]); + clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[2]); + clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[1]); + clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[0]); + + b += 4; + next += 4; + n_left -= 4; + } + + while (n_left > 0) + { + cnat_session_make_key (b[0], af, &bkey[0]); + rv[0] = clib_bihash_search_inline_2_40_48 (&cnat_session_db, + &bkey[0], &bvalue[0]); + + session[0] = (cnat_session_t *) (rv[0] ? &bkey[0] : &bvalue[0]); + next[0] = cnat_sub (vm, node, b[0], &ctx, rv[0], session[0]); + + b++; + next++; + n_left--; + } + + vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); + + return frame->n_vectors; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + +#endif diff --git a/src/plugins/cnat/cnat_node_snat.c b/src/plugins/cnat/cnat_node_snat.c new file mode 100644 index 00000000000..cc1421be084 --- /dev/null +++ b/src/plugins/cnat/cnat_node_snat.c @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +typedef enum cnat_snat_next_ +{ + CNAT_SNAT_NEXT_DROP, + CNAT_SNAT_N_NEXT, +} cnat_snat_next_t; + +typedef struct cnat_snat_trace_ +{ + u32 found; + cnat_session_t session; +} cnat_snat_trace_t; + +vlib_node_registration_t cnat_snat_ip4_node; +vlib_node_registration_t cnat_snat_ip6_node; + +static u8 * +format_cnat_snat_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + cnat_snat_trace_t *t = va_arg (*args, cnat_snat_trace_t *); + + if (t->found) + s = format (s, "found: %U", format_cnat_session, &t->session, 1); + else + s = format (s, "not found"); + return s; +} + +/* CNat sub for source NAT as a feature arc on ip[46]-unicast + This node's sub shouldn't apply to the same flows as + cnat_vip_inline */ +always_inline uword +cnat_snat_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_buffer_t * b, + cnat_node_ctx_t * ctx, int rv, cnat_session_t * session) +{ + cnat_main_t *cm = &cnat_main; + ip4_header_t *ip4; + ip_protocol_t iproto; + ip6_header_t *ip6; + udp_header_t *udp0; + u32 arc_next0; + u16 next0; + u16 sport; + + if (AF_IP4 == ctx->af) + { + ip4 = vlib_buffer_get_current (b); + iproto = ip4->protocol; + udp0 = (udp_header_t *) (ip4 + 1); + } + else + { + ip6 = vlib_buffer_get_current (b); + iproto = ip6->protocol; + udp0 = (udp_header_t *) (ip6 + 1); + } + + /* By default don't follow previous next0 */ + vnet_feature_next (&arc_next0, b); + next0 = arc_next0; + + if (iproto != IP_PROTOCOL_UDP && iproto != IP_PROTOCOL_TCP) + { + /* Dont translate */ + goto trace; + } + + if (!rv) + { + /* session table hit */ + cnat_timestamp_update (session->value.cs_ts_index, ctx->now); + } + else + { + ip46_address_t ip46_dst_address; + if (AF_IP4 == ctx->af) + ip46_address_set_ip4 (&ip46_dst_address, &ip4->dst_address); + else + ip46_address_set_ip6 (&ip46_dst_address, &ip6->dst_address); + rv = cnat_search_snat_prefix (&ip46_dst_address, ctx->af); + if (!rv) + { + /* Prefix table hit, we shouldn't source NAT */ + goto trace; + } + /* New flow, create the sessions if necessary. session will be a snat + session, and rsession will be a dnat session + Note: packet going through this path are going to the outside, + so they will never hit the NAT again (they are not going towards + a VIP) */ + if (AF_IP4 == ctx->af) + { + ip46_address_set_ip4 (&session->value.cs_ip[VLIB_RX], + &cm->snat_ip4); + ip46_address_set_ip4 (&session->value.cs_ip[VLIB_TX], + &ip4->dst_address); + } + else + { + ip46_address_set_ip6 (&session->value.cs_ip[VLIB_RX], + &cm->snat_ip6); + ip46_address_set_ip6 (&session->value.cs_ip[VLIB_TX], + &ip6->dst_address); + } + + /* Port allocation, first try to use the original port, allocate one + if it is already used */ + sport = udp0->src_port; + rv = cnat_allocate_port (cm, &sport); + if (rv) + { + vlib_node_increment_counter (vm, cnat_snat_ip4_node.index, + CNAT_ERROR_EXHAUSTED_PORTS, 1); + next0 = CNAT_SNAT_NEXT_DROP; + goto trace; + } + + session->value.cs_port[VLIB_RX] = sport; + session->value.cs_port[VLIB_TX] = udp0->dst_port; + session->value.cs_lbi = INDEX_INVALID; + session->value.flags = + CNAT_SESSION_FLAG_NO_CLIENT | CNAT_SESSION_FLAG_ALLOC_PORT; + + cnat_session_create (session, ctx, CNAT_SESSION_FLAG_HAS_SNAT); + } + + + if (AF_IP4 == ctx->af) + cnat_translation_ip4 (session, ip4, udp0); + else + cnat_translation_ip6 (session, ip6, udp0); + +trace: + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) + { + cnat_snat_trace_t *t; + + t = vlib_add_trace (vm, node, b, sizeof (*t)); + + if (NULL != session) + clib_memcpy (&t->session, session, sizeof (t->session)); + } + return next0; +} + +VLIB_NODE_FN (cnat_snat_ip4_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + return cnat_node_inline (vm, node, frame, cnat_snat_inline, AF_IP4, + 1 /* do_trace */ ); + return cnat_node_inline (vm, node, frame, cnat_snat_inline, AF_IP4, + 0 /* do_trace */ ); +} + +VLIB_NODE_FN (cnat_snat_ip6_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + return cnat_node_inline (vm, node, frame, cnat_snat_inline, AF_IP6, + 1 /* do_trace */ ); + return cnat_node_inline (vm, node, frame, cnat_snat_inline, AF_IP6, + 0 /* do_trace */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (cnat_snat_ip4_node) = +{ + .name = "ip4-cnat-snat", + .vector_size = sizeof (u32), + .format_trace = format_cnat_snat_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = CNAT_N_ERROR, + .error_strings = cnat_error_strings, + .n_next_nodes = CNAT_SNAT_N_NEXT, + .next_nodes = + { + [CNAT_SNAT_NEXT_DROP] = "ip4-drop", + } +}; + +VLIB_REGISTER_NODE (cnat_snat_ip6_node) = +{ + .name = "ip6-cnat-snat", + .vector_size = sizeof (u32), + .format_trace = format_cnat_snat_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = CNAT_N_ERROR, + .error_strings = cnat_error_strings, + .n_next_nodes = CNAT_SNAT_N_NEXT, + .next_nodes = + { + [CNAT_SNAT_NEXT_DROP] = "ip6-drop", + } +}; +/* *INDENT-ON* */ + + +VNET_FEATURE_INIT (cnat_snat_ip4_node, static) = +{ +.arc_name = "ip4-unicast",.node_name = "ip4-cnat-snat",}; + +VNET_FEATURE_INIT (cnat_snat_ip6_node, static) = +{ +.arc_name = "ip6-unicast",.node_name = "ip6-cnat-snat",}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/cnat/cnat_node_vip.c b/src/plugins/cnat/cnat_node_vip.c new file mode 100644 index 00000000000..afabed08f1a --- /dev/null +++ b/src/plugins/cnat/cnat_node_vip.c @@ -0,0 +1,308 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +typedef struct cnat_translation_trace_t_ +{ + cnat_session_t session; + cnat_translation_t tr; + u32 found_session; + u32 created_session; + u32 has_tr; +} cnat_translation_trace_t; + +typedef enum cnat_translation_next_t_ +{ + CNAT_TRANSLATION_NEXT_DROP, + CNAT_TRANSLATION_NEXT_LOOKUP, + CNAT_TRANSLATION_N_NEXT, +} cnat_translation_next_t; + +vlib_node_registration_t cnat_vip_ip4_node; +vlib_node_registration_t cnat_vip_ip6_node; + +static u8 * +format_cnat_translation_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + cnat_translation_trace_t *t = + va_arg (*args, cnat_translation_trace_t *); + + if (t->found_session) + s = format (s, "found: %U", format_cnat_session, &t->session, 1); + else if (t->created_session) + s = format (s, "created: %U\n tr: %U", + format_cnat_session, &t->session, 1, + format_cnat_translation, + &t->tr, 0); + else if (t->has_tr) + s = format (s, "tr pass: %U", format_cnat_translation, + &t->tr, 0); + else + s = format (s, "not found"); + return s; +} + +/* CNat sub for NAT behind a fib entry (VIP or interposed real IP) */ +always_inline uword +cnat_vip_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_buffer_t * b, + cnat_node_ctx_t * ctx, int rv, cnat_session_t * session) +{ + vlib_combined_counter_main_t *cntm = &cnat_translation_counters; + cnat_main_t *cm = &cnat_main; + const cnat_translation_t *ct = NULL; + ip4_header_t *ip4; + ip_protocol_t iproto; + ip6_header_t *ip6; + udp_header_t *udp0; + cnat_client_t *cc; + u16 next0; + index_t cti; + int created_session = 0; + if (AF_IP4 == ctx->af) + { + ip4 = vlib_buffer_get_current (b); + iproto = ip4->protocol; + udp0 = (udp_header_t *) (ip4 + 1); + } + else + { + ip6 = vlib_buffer_get_current (b); + iproto = ip6->protocol; + udp0 = (udp_header_t *) (ip6 + 1); + } + + cc = cnat_client_get (vnet_buffer (b)->ip.adj_index[VLIB_TX]); + + if (iproto != IP_PROTOCOL_UDP && iproto != IP_PROTOCOL_TCP) + { + /* Dont translate & follow the fib programming */ + next0 = cc->cc_parent.dpoi_next_node; + vnet_buffer (b)->ip.adj_index[VLIB_TX] = cc->cc_parent.dpoi_index; + goto trace; + } + + ct = cnat_find_translation (cc->parent_cci, + clib_host_to_net_u16 (udp0->dst_port), + iproto); + + if (!rv) + { + /* session table hit */ + cnat_timestamp_update (session->value.cs_ts_index, ctx->now); + + if (NULL != ct) + { + /* Translate & follow the translation given LB */ + next0 = ct->ct_lb.dpoi_next_node; + vnet_buffer (b)->ip.adj_index[VLIB_TX] = session->value.cs_lbi; + } + else if (session->value.flags & CNAT_SESSION_FLAG_HAS_SNAT) + { + /* The return needs DNAT, so we need an additionnal + * lookup after translation */ + next0 = CNAT_TRANSLATION_NEXT_LOOKUP; + } + else + { + /* Translate & follow the fib programming */ + next0 = cc->cc_parent.dpoi_next_node; + vnet_buffer (b)->ip.adj_index[VLIB_TX] = cc->cc_parent.dpoi_index; + } + } + else + { + if (NULL == ct) + { + /* Dont translate & Follow the fib programming */ + vnet_buffer (b)->ip.adj_index[VLIB_TX] = cc->cc_parent.dpoi_index; + next0 = cc->cc_parent.dpoi_next_node; + goto trace; + } + + /* New flow, create the sessions */ + const load_balance_t *lb0; + cnat_ep_trk_t *trk0; + u32 hash_c0, bucket0; + u32 rsession_flags = 0; + const dpo_id_t *dpo0; + + lb0 = load_balance_get (ct->ct_lb.dpoi_index); + if (!lb0->lb_n_buckets) + { + /* Dont translate & Follow the fib programming */ + vnet_buffer (b)->ip.adj_index[VLIB_TX] = cc->cc_parent.dpoi_index; + next0 = cc->cc_parent.dpoi_next_node; + goto trace; + } + + /* session table miss */ + hash_c0 = (AF_IP4 == ctx->af ? + ip4_compute_flow_hash (ip4, lb0->lb_hash_config) : + ip6_compute_flow_hash (ip6, lb0->lb_hash_config)); + bucket0 = hash_c0 & lb0->lb_n_buckets_minus_1; + dpo0 = load_balance_get_fwd_bucket (lb0, bucket0); + + /* add the session */ + trk0 = &ct->ct_paths[bucket0]; + + ip46_address_copy (&session->value.cs_ip[VLIB_TX], + &trk0->ct_ep[VLIB_TX].ce_ip.ip); + if (ip_address_is_zero (&trk0->ct_ep[VLIB_RX].ce_ip)) + { + if (AF_IP4 == ctx->af) + ip46_address_set_ip4 (&session->value.cs_ip[VLIB_RX], + &ip4->src_address); + else + ip46_address_set_ip6 (&session->value.cs_ip[VLIB_RX], + &ip6->src_address); + } + else + { + /* We source NAT with the translation */ + rsession_flags |= CNAT_SESSION_FLAG_HAS_SNAT; + ip46_address_copy (&session->value.cs_ip[VLIB_RX], + &trk0->ct_ep[VLIB_RX].ce_ip.ip); + } + session->value.cs_port[VLIB_TX] = + clib_host_to_net_u16 (trk0->ct_ep[VLIB_TX].ce_port); + session->value.cs_port[VLIB_RX] = + clib_host_to_net_u16 (trk0->ct_ep[VLIB_RX].ce_port); + + session->value.flags = 0; + if (!session->value.cs_port[VLIB_RX]) + { + u16 sport; + sport = udp0->src_port; + /* Allocate a port only if asked and if we actually sNATed */ + if ((ct->flags & CNAT_TRANSLATION_FLAG_ALLOCATE_PORT) + && (rsession_flags & CNAT_SESSION_FLAG_HAS_SNAT)) { + sport = 0; /* force allocation */ + session->value.flags |= CNAT_SESSION_FLAG_ALLOC_PORT; + rv = cnat_allocate_port (cm, &sport); + if (rv) + { + vlib_node_increment_counter (vm, cnat_vip_ip4_node.index, + CNAT_ERROR_EXHAUSTED_PORTS, 1); + next0 = CNAT_TRANSLATION_NEXT_DROP; + goto trace; + } + } + + session->value.cs_port[VLIB_RX] = sport; + } + session->value.cs_lbi = dpo0->dpoi_index; + + cnat_client_cnt_session (cc); + cnat_session_create (session, ctx, rsession_flags); + created_session = 1; + + next0 = ct->ct_lb.dpoi_next_node; + vnet_buffer (b)->ip.adj_index[VLIB_TX] = session->value.cs_lbi; + } + + + if (AF_IP4 == ctx->af) + cnat_translation_ip4 (session, ip4, udp0); + else + cnat_translation_ip6 (session, ip6, udp0); + + if (NULL != ct) + { + cti = ct - cnat_translation_pool; + vlib_increment_combined_counter (cntm, ctx->thread_index, cti, 1, + vlib_buffer_length_in_chain (vm, b)); + } + +trace: + if (PREDICT_FALSE (ctx->do_trace)) + { + cnat_translation_trace_t *t; + + t = vlib_add_trace (vm, node, b, sizeof (*t)); + + t->found_session = !rv; + t->created_session = created_session; + if (t->found_session || t->created_session) + clib_memcpy (&t->session, session, sizeof (t->session)); + t->has_tr = (NULL != ct); + if (t->has_tr) + clib_memcpy (&t->tr, ct, sizeof (cnat_translation_t)); + } + return next0; +} + +VLIB_NODE_FN (cnat_vip_ip4_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + return cnat_node_inline (vm, node, frame, cnat_vip_inline, AF_IP4, + 1 /* do_trace */ ); + return cnat_node_inline (vm, node, frame, cnat_vip_inline, AF_IP4, + 0 /* do_trace */ ); +} + +VLIB_NODE_FN (cnat_vip_ip6_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + return cnat_node_inline (vm, node, frame, cnat_vip_inline, AF_IP6, + 1 /* do_trace */ ); + return cnat_node_inline (vm, node, frame, cnat_vip_inline, AF_IP6, + 0 /* do_trace */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (cnat_vip_ip4_node) = +{ + .name = "ip4-cnat-tx", + .vector_size = sizeof (u32), + .format_trace = format_cnat_translation_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = 0, + .n_next_nodes = CNAT_TRANSLATION_N_NEXT, + .next_nodes = + { + [CNAT_TRANSLATION_NEXT_DROP] = "ip4-drop", + [CNAT_TRANSLATION_NEXT_LOOKUP] = "ip4-lookup", + } +}; +VLIB_REGISTER_NODE (cnat_vip_ip6_node) = +{ + .name = "ip6-cnat-tx", + .vector_size = sizeof (u32), + .format_trace = format_cnat_translation_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = 0, + .n_next_nodes = CNAT_TRANSLATION_N_NEXT, + .next_nodes = + { + [CNAT_TRANSLATION_NEXT_DROP] = "ip6-drop", + [CNAT_TRANSLATION_NEXT_LOOKUP] = "ip6-lookup", + } +}; +/* *INDENT-ON* */ + diff --git a/src/plugins/cnat/cnat_scanner.c b/src/plugins/cnat/cnat_scanner.c new file mode 100644 index 00000000000..f5af327bffe --- /dev/null +++ b/src/plugins/cnat/cnat_scanner.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +typedef enum cnat_scanner_cmd_t_ +{ + CNAT_SCANNER_OFF, + CNAT_SCANNER_ON, +} cnat_scanner_cmd_t; + +static uword +cnat_scanner_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + uword event_type, *event_data = 0; + cnat_main_t *cm = &cnat_main; + f64 start_time; + int enabled = 1, i = 0; + + while (1) + { + if (enabled) + vlib_process_wait_for_event_or_clock (vm, cm->scanner_timeout); + else + vlib_process_wait_for_event (vm); + + event_type = vlib_process_get_events (vm, &event_data); + vec_reset_length (event_data); + + start_time = vlib_time_now (vm); + + switch (event_type) + { + /* timer expired */ + case ~0: + break; + case CNAT_SCANNER_OFF: + enabled = 0; + break; + case CNAT_SCANNER_ON: + enabled = 1; + break; + default: + ASSERT (0); + } + + cnat_client_throttle_pool_process (); + i = cnat_session_scan (vm, start_time, i); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (cnat_scanner_process_node) = { + .function = cnat_scanner_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "cnat-scanner-process", +}; +/* *INDENT-ON* */ + +static clib_error_t * +cnat_scanner_cmd (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * c) +{ + cnat_scanner_cmd_t cmd; + + cmd = CNAT_SCANNER_ON; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "on")) + cmd = CNAT_SCANNER_ON; + else if (unformat (input, "off")) + cmd = CNAT_SCANNER_OFF; + else + return (clib_error_return (0, "unknown input '%U'", + format_unformat_error, input)); + } + + vlib_process_signal_event (vm, cnat_scanner_process_node.index, cmd, 0); + + return (NULL); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cnat_scanner_cmd_node, static) = { + .path = "test cnat scanner", + .function = cnat_scanner_cmd, + .short_help = "test cnat scanner", +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/cnat/cnat_session.c b/src/plugins/cnat/cnat_session.c new file mode 100644 index 00000000000..7f95e1bc501 --- /dev/null +++ b/src/plugins/cnat/cnat_session.c @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + + +clib_bihash_40_48_t cnat_session_db; + + +typedef struct cnat_session_walk_ctx_t_ +{ + cnat_session_walk_cb_t cb; + void *ctx; +} cnat_session_walk_ctx_t; + +static int +cnat_session_walk_cb (BVT (clib_bihash_kv) * kv, void *arg) +{ + cnat_session_t *session = (cnat_session_t *) kv; + cnat_session_walk_ctx_t *ctx = arg; + + ctx->cb (session, ctx->ctx); + + return (BIHASH_WALK_CONTINUE); +} + +void +cnat_session_walk (cnat_session_walk_cb_t cb, void *ctx) +{ + cnat_session_walk_ctx_t wctx = { + .cb = cb, + .ctx = ctx, + }; + BV (clib_bihash_foreach_key_value_pair) (&cnat_session_db, + cnat_session_walk_cb, &wctx); +} + +typedef struct cnat_session_purge_walk_t_ +{ + clib_bihash_kv_40_48_t *keys; +} cnat_session_purge_walk_ctx_t; + +static int +cnat_session_purge_walk (BVT (clib_bihash_kv) * key, void *arg) +{ + cnat_session_purge_walk_ctx_t *ctx = arg; + + vec_add1 (ctx->keys, *key); + + return (BIHASH_WALK_CONTINUE); +} + +u8 * +format_cnat_session (u8 * s, va_list * args) +{ + cnat_session_t *sess = va_arg (*args, cnat_session_t *); + CLIB_UNUSED (int verbose) = va_arg (*args, int); + f64 ts = 0; + if (!pool_is_free_index (cnat_timestamps, sess->value.cs_ts_index)) + ts = cnat_timestamp_exp (sess->value.cs_ts_index); + + s = + format (s, + "session:[%U;%d -> %U;%d, %U] => %U;%d -> %U;%d lb:%d age:%f", + format_ip46_address, &sess->key.cs_ip[VLIB_RX], IP46_TYPE_ANY, + clib_host_to_net_u16 (sess->key.cs_port[VLIB_RX]), + format_ip46_address, &sess->key.cs_ip[VLIB_TX], IP46_TYPE_ANY, + clib_host_to_net_u16 (sess->key.cs_port[VLIB_TX]), + format_ip_protocol, sess->key.cs_proto, format_ip46_address, + &sess->value.cs_ip[VLIB_RX], IP46_TYPE_ANY, + clib_host_to_net_u16 (sess->value.cs_port[VLIB_RX]), + format_ip46_address, &sess->value.cs_ip[VLIB_TX], IP46_TYPE_ANY, + clib_host_to_net_u16 (sess->value.cs_port[VLIB_TX]), + sess->value.cs_lbi, ts); + + return (s); +} + +static clib_error_t * +cnat_session_show (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + u8 verbose = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose")) + verbose = 1; + else + return (clib_error_return (0, "unknown input '%U'", + format_unformat_error, input)); + } + + vlib_cli_output (vm, "CNat Sessions: now:%f\n%U\n", + vlib_time_now (vm), + BV (format_bihash), &cnat_session_db, verbose); + + return (NULL); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cnat_session_show_cmd_node, static) = { + .path = "show cnat session", + .function = cnat_session_show, + .short_help = "show cnat session", + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +void +cnat_session_free (cnat_session_t * session) +{ + clib_bihash_kv_40_48_t *bkey = (clib_bihash_kv_40_48_t *) session; + /* age it */ + if (session->value.flags & CNAT_SESSION_FLAG_ALLOC_PORT) + cnat_free_port (session->value.cs_port[VLIB_RX]); + if (!(session->value.flags & CNAT_SESSION_FLAG_NO_CLIENT)) + cnat_client_free_by_ip (&session->key.cs_ip[VLIB_TX], session->key.cs_af); + cnat_timestamp_free (session->value.cs_ts_index); + + clib_bihash_add_del_40_48 (&cnat_session_db, bkey, 0 /* is_add */ ); +} + +int +cnat_session_purge (void) +{ + /* flush all the session from the DB */ + cnat_session_purge_walk_ctx_t ctx = { }; + clib_bihash_kv_40_48_t *key; + + BV (clib_bihash_foreach_key_value_pair) (&cnat_session_db, + cnat_session_purge_walk, &ctx); + + vec_foreach (key, ctx.keys) cnat_session_free ((cnat_session_t *) key); + + vec_free (ctx.keys); + + return (0); +} + +u64 +cnat_session_scan (vlib_main_t * vm, f64 start_time, int i) +{ + BVT (clib_bihash) * h = &cnat_session_db; + int j, k; + + /* Don't scan the l2 fib if it hasn't been instantiated yet */ + if (alloc_arena (h) == 0) + return 0.0; + + for (i = 0; i < h->nbuckets; i++) + { + /* allow no more than 100us without a pause */ + if ((vlib_time_now (vm) - start_time) > 10e-5) + return (i); + + if (i < (h->nbuckets - 3)) + { + BVT (clib_bihash_bucket) * b = + BV (clib_bihash_get_bucket) (h, i + 3); + CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); + b = BV (clib_bihash_get_bucket) (h, i + 1); + if (!BV (clib_bihash_bucket_is_empty) (b)) + { + BVT (clib_bihash_value) * v = + BV (clib_bihash_get_value) (h, b->offset); + CLIB_PREFETCH (v, CLIB_CACHE_LINE_BYTES, LOAD); + } + } + + BVT (clib_bihash_bucket) * b = BV (clib_bihash_get_bucket) (h, i); + if (BV (clib_bihash_bucket_is_empty) (b)) + continue; + BVT (clib_bihash_value) * v = BV (clib_bihash_get_value) (h, b->offset); + for (j = 0; j < (1 << b->log2_pages); j++) + { + for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) + { + if (v->kvp[k].key[0] == ~0ULL && v->kvp[k].value[0] == ~0ULL) + continue; + + cnat_session_t *session = (cnat_session_t *) & v->kvp[k]; + + if (start_time > + cnat_timestamp_exp (session->value.cs_ts_index)) + { + /* age it */ + cnat_session_free (session); + + /* + * Note: we may have just freed the bucket's backing + * storage, so check right here... + */ + if (BV (clib_bihash_bucket_is_empty) (b)) + goto doublebreak; + } + } + v++; + } + doublebreak: + ; + } + + /* start again */ + return (0); +} + +static clib_error_t * +cnat_session_init (vlib_main_t * vm) +{ + cnat_main_t *cm = &cnat_main; + BV (clib_bihash_init) (&cnat_session_db, + "CNat Session DB", cm->session_hash_buckets, + cm->session_hash_memory); + BV (clib_bihash_set_kvp_format_fn) (&cnat_session_db, format_cnat_session); + + return (NULL); +} + +VLIB_INIT_FUNCTION (cnat_session_init); + +static clib_error_t * +cnat_timestamp_show (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + cnat_timestamp_t *ts; + clib_rwlock_reader_lock (&cnat_main.ts_lock); + /* *INDENT-OFF* */ + pool_foreach (ts, cnat_timestamps, ({ + vlib_cli_output (vm, "[%d] last_seen:%f lifetime:%u ref:%u", + ts - cnat_timestamps, + ts->last_seen, ts->lifetime, ts->refcnt); + })); + /* *INDENT-ON* */ + clib_rwlock_reader_unlock (&cnat_main.ts_lock); + return (NULL); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cnat_timestamp_show_cmd, static) = { + .path = "show cnat timestamp", + .function = cnat_timestamp_show, + .short_help = "show cnat timestamp", + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/cnat/cnat_session.h b/src/plugins/cnat/cnat_session.h new file mode 100644 index 00000000000..9e1e89342a8 --- /dev/null +++ b/src/plugins/cnat/cnat_session.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CNAT_SESSION_H__ +#define __CNAT_SESSION_H__ + +#include + +#include +#include +#include + + +/** + * A session represents the memory of a translation. + * In the tx direction (from behind to in front of the NAT), the + * session is preserved so subsequent packets follow the same path + * even if the translation has been updated. In the tx direction + * the session represents the swap from the VIP to the server address + * In the RX direction the swap is from the server address/port to VIP. + * + * A session exists only as key and value in the bihash, there is no + * pool for this object. If there were a pool, one would need to be + * concerned about what worker is using it. + */ +typedef struct cnat_session_t_ +{ + /** + * this key sits in the same memory location a 'key' in the bihash kvp + */ + struct + { + /** + * IP 4/6 address in the rx/tx direction + */ + ip46_address_t cs_ip[VLIB_N_DIR]; + + /** + * ports in rx/tx + */ + u16 cs_port[VLIB_N_DIR]; + + /** + * The IP protocol TCP or UDP only supported + */ + ip_protocol_t cs_proto; + + /** + * The address family describing the IP addresses + */ + u8 cs_af; + + /** + * spare space + */ + u8 __cs_pad[2]; + } key; + /** + * this value sits in the same memory location a 'value' in the bihash kvp + */ + struct + { + /** + * The IP address to translate to. + */ + ip46_address_t cs_ip[VLIB_N_DIR]; + + /** + * the port to translate to. + */ + u16 cs_port[VLIB_N_DIR]; + + /** + * The load balance object to use to forward + */ + index_t cs_lbi; + + /** + * Timestamp index this session was last used + */ + u32 cs_ts_index; + /** + * Indicates a return path session that was source NATed + * on the way in. + */ + u32 flags; + } value; +} cnat_session_t; + +typedef enum cnat_session_flag_t_ +{ + CNAT_SESSION_FLAG_HAS_SNAT = (1 << 0), + CNAT_SESSION_FLAG_ALLOC_PORT = (1 << 1), + CNAT_SESSION_FLAG_NO_CLIENT = (1 << 2), +} cnat_session_flag_t; + +extern u8 *format_cnat_session (u8 * s, va_list * args); + +/** + * Ensure the session object correctly overlays the bihash key/value pair + */ +STATIC_ASSERT (STRUCT_OFFSET_OF (cnat_session_t, key) == + STRUCT_OFFSET_OF (clib_bihash_kv_40_48_t, key), + "key overlaps"); +STATIC_ASSERT (STRUCT_OFFSET_OF (cnat_session_t, value) == + STRUCT_OFFSET_OF (clib_bihash_kv_40_48_t, value), + "value overlaps"); +STATIC_ASSERT (sizeof (cnat_session_t) == sizeof (clib_bihash_kv_40_48_t), + "session kvp"); + +/** + * The DB of sessions + */ +extern clib_bihash_40_48_t cnat_session_db; + +/** + * Callback function invoked during a walk of all translations + */ +typedef walk_rc_t (*cnat_session_walk_cb_t) (const cnat_session_t * + session, void *ctx); + +/** + * Walk/visit each of the cnat session + */ +extern void cnat_session_walk (cnat_session_walk_cb_t cb, void *ctx); + +/** + * Scan the session DB for expired sessions + */ +extern u64 cnat_session_scan (vlib_main_t * vm, f64 start_time, int i); + +/** + * Purge all the sessions + */ +extern int cnat_session_purge (void); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + +#endif diff --git a/src/plugins/cnat/cnat_snat.c b/src/plugins/cnat/cnat_snat.c new file mode 100644 index 00000000000..2f6a6314c5b --- /dev/null +++ b/src/plugins/cnat/cnat_snat.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +static void +cnat_compute_prefix_lengths_in_search_order (cnat_snat_pfx_table_t * + table, ip_address_family_t af) +{ + int i; + vec_reset_length (table->meta[af].prefix_lengths_in_search_order); + /* Note: bitmap reversed so this is in fact a longest prefix match */ + /* *INDENT-OFF* */ + clib_bitmap_foreach (i, table->meta[af].non_empty_dst_address_length_bitmap, + ({ + int dst_address_length = 128 - i; + vec_add1 (table->meta[af].prefix_lengths_in_search_order, dst_address_length); + })); + /* *INDENT-ON* */ +} + +int +cnat_add_snat_prefix (ip_prefix_t * pfx) +{ + /* All packets destined to this prefix won't be source-NAT-ed */ + cnat_snat_pfx_table_t *table = &cnat_main.snat_pfx_table; + clib_bihash_kv_24_8_t kv; + ip6_address_t *mask; + u64 af = ip_prefix_version (pfx);; + + mask = &table->ip_masks[pfx->len]; + if (AF_IP4 == af) + { + kv.key[0] = (u64) ip_prefix_v4 (pfx).as_u32 & mask->as_u64[0]; + kv.key[1] = 0; + } + else + { + kv.key[0] = ip_prefix_v6 (pfx).as_u64[0] & mask->as_u64[0]; + kv.key[1] = ip_prefix_v6 (pfx).as_u64[1] & mask->as_u64[1]; + } + kv.key[2] = ((u64) af << 32) | pfx->len; + clib_bihash_add_del_24_8 (&table->ip_hash, &kv, 1 /* is_add */ ); + + table->meta[af].dst_address_length_refcounts[pfx->len]++; + table->meta[af].non_empty_dst_address_length_bitmap = + clib_bitmap_set (table->meta[af].non_empty_dst_address_length_bitmap, + 128 - pfx->len, 1); + cnat_compute_prefix_lengths_in_search_order (table, af); + return 0; +} + +int +cnat_del_snat_prefix (ip_prefix_t * pfx) +{ + cnat_snat_pfx_table_t *table = &cnat_main.snat_pfx_table; + clib_bihash_kv_24_8_t kv, val; + ip6_address_t *mask; + u64 af = ip_prefix_version (pfx);; + + mask = &table->ip_masks[pfx->len]; + if (AF_IP4 == af) + { + kv.key[0] = (u64) ip_prefix_v4 (pfx).as_u32 & mask->as_u64[0]; + kv.key[1] = 0; + } + else + { + kv.key[0] = ip_prefix_v6 (pfx).as_u64[0] & mask->as_u64[0]; + kv.key[1] = ip_prefix_v6 (pfx).as_u64[1] & mask->as_u64[1]; + } + kv.key[2] = ((u64) af << 32) | pfx->len; + + if (clib_bihash_search_24_8 (&table->ip_hash, &kv, &val)) + { + return 1; + } + clib_bihash_add_del_24_8 (&table->ip_hash, &kv, 0 /* is_add */ ); + /* refcount accounting */ + ASSERT (table->meta[af].dst_address_length_refcounts[pfx->len] > 0); + if (--table->meta[af].dst_address_length_refcounts[pfx->len] == 0) + { + table->meta[af].non_empty_dst_address_length_bitmap = + clib_bitmap_set (table->meta[af].non_empty_dst_address_length_bitmap, + 128 - pfx->len, 0); + cnat_compute_prefix_lengths_in_search_order (table, af); + } + return 0; +} + +u8 * +format_cnat_snat_prefix (u8 * s, va_list * args) +{ + clib_bihash_kv_24_8_t *kv = va_arg (*args, clib_bihash_kv_24_8_t *); + CLIB_UNUSED (int verbose) = va_arg (*args, int); + u32 af = kv->key[2] >> 32; + u32 len = kv->key[2] & 0xffffffff; + if (AF_IP4 == af) + s = format (s, "%U/%d", format_ip4_address, &kv->key[0], len); + else + s = format (s, "%U/%d", format_ip6_address, &kv->key[0], len); + return (s); +} + +static clib_error_t * +cnat_set_snat (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + ip_address_t addr; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U", unformat_ip_address, &addr)) + { + if (ip_addr_version (&addr) == AF_IP4) + clib_memcpy (&cnat_main.snat_ip4, &ip_addr_v4 (&addr), + sizeof (ip4_address_t)); + else + clib_memcpy (&cnat_main.snat_ip6, &ip_addr_v6 (&addr), + sizeof (ip6_address_t)); + } + else + return (clib_error_return (0, "unknown input '%U'", + format_unformat_error, input)); + } + + return (NULL); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cnat_set_snat_command, static) = +{ + .path = "cnat snat with", + .short_help = "cnat snat with [ip]", + .function = cnat_set_snat, +}; +/* *INDENT-ON* */ + +static clib_error_t * +cnat_snat_exclude (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + ip_prefix_t pfx; + u8 is_add = 1; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U", unformat_ip_prefix, &pfx)) + ; + else if (unformat (input, "del")) + is_add = 0; + else + return (clib_error_return (0, "unknown input '%U'", + format_unformat_error, input)); + } + + if (is_add) + rv = cnat_add_snat_prefix (&pfx); + else + rv = cnat_del_snat_prefix (&pfx); + + if (rv) + { + return (clib_error_return (0, "error %d", rv, input)); + } + + return (NULL); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cnat_snat_exclude_command, static) = +{ + .path = "cnat snat exclude", + .short_help = "cnat snat exclude [ip]", + .function = cnat_snat_exclude, +}; +/* *INDENT-ON* */ + +static clib_error_t * +cnat_show_snat (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + cnat_snat_pfx_table_t *table = &cnat_main.snat_pfx_table; + vlib_cli_output (vm, "Source NAT\nip4: %U\nip6: %U\n", + format_ip4_address, &cnat_main.snat_ip4, + format_ip6_address, &cnat_main.snat_ip6); + vlib_cli_output (vm, "Prefixes:\n%U\n", + format_bihash_24_8, &table->ip_hash, 1); + return (NULL); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cnat_show_snat_command, static) = +{ + .path = "show cnat snat", + .short_help = "show cnat snat", + .function = cnat_show_snat, +}; +/* *INDENT-ON* */ + +static clib_error_t * +cnat_snat_init (vlib_main_t * vm) +{ + cnat_snat_pfx_table_t *table = &cnat_main.snat_pfx_table; + cnat_main_t *cm = &cnat_main; + int i; + for (i = 0; i < ARRAY_LEN (table->ip_masks); i++) + { + u32 j, i0, i1; + + i0 = i / 32; + i1 = i % 32; + + for (j = 0; j < i0; j++) + table->ip_masks[i].as_u32[j] = ~0; + + if (i1) + table->ip_masks[i].as_u32[i0] = + clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1)); + } + clib_bihash_init_24_8 (&table->ip_hash, "snat prefixes", + cm->snat_hash_buckets, cm->snat_hash_memory); + clib_bihash_set_kvp_format_fn_24_8 (&table->ip_hash, + format_cnat_snat_prefix); + + return (NULL); +} + +VLIB_INIT_FUNCTION (cnat_snat_init); + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/cnat/cnat_snat.h b/src/plugins/cnat/cnat_snat.h new file mode 100644 index 00000000000..97bad8b01d0 --- /dev/null +++ b/src/plugins/cnat/cnat_snat.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CNAT_SNAT_H__ +#define __CNAT_SNAT_H__ + +#include + +always_inline int +cnat_search_snat_prefix (ip46_address_t * addr, ip_address_family_t af) +{ + /* Returns 0 if addr matches any of the listed prefixes */ + cnat_snat_pfx_table_t *table = &cnat_main.snat_pfx_table; + clib_bihash_kv_24_8_t kv, val; + int i, n_p, rv; + n_p = vec_len (table->meta[af].prefix_lengths_in_search_order); + if (AF_IP4 == af) + { + kv.key[0] = addr->ip4.as_u32; + kv.key[1] = 0; + } + else + { + kv.key[0] = addr->as_u64[0]; + kv.key[1] = addr->as_u64[1]; + } + + /* + * start search from a mask length same length or shorter. + * we don't want matches longer than the mask passed + */ + i = 0; + for (; i < n_p; i++) + { + int dst_address_length = + table->meta[af].prefix_lengths_in_search_order[i]; + ip6_address_t *mask = &table->ip_masks[dst_address_length]; + + ASSERT (dst_address_length >= 0 && dst_address_length <= 128); + /* As lengths are decreasing, masks are increasingly specific. */ + kv.key[0] &= mask->as_u64[0]; + kv.key[1] &= mask->as_u64[1]; + kv.key[2] = ((u64) af << 32) | dst_address_length; + rv = clib_bihash_search_inline_2_24_8 (&table->ip_hash, &kv, &val); + if (rv == 0) + return 0; + } + return -1; +} + +extern int cnat_add_snat_prefix (ip_prefix_t * pfx); +extern int cnat_del_snat_prefix (ip_prefix_t * pfx); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + +#endif diff --git a/src/plugins/cnat/cnat_translation.c b/src/plugins/cnat/cnat_translation.c new file mode 100644 index 00000000000..f680a162ec8 --- /dev/null +++ b/src/plugins/cnat/cnat_translation.c @@ -0,0 +1,432 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +cnat_translation_t *cnat_translation_pool; +clib_bihash_8_8_t cnat_translation_db; + +static fib_node_type_t cnat_translation_fib_node_type; + +vlib_combined_counter_main_t cnat_translation_counters = { + .name = "cnat-translation", + .stat_segment_name = "/net/cnat-translation", +}; + +static void +cnat_tracker_release (cnat_ep_trk_t * trk) +{ + fib_entry_untrack (trk->ct_fei, trk->ct_sibling); +} + +static void +cnat_tracker_track (index_t cti, + const cnat_endpoint_tuple_t * path, cnat_ep_trk_t * trk) +{ + fib_prefix_t pfx; + + ip_address_to_fib_prefix (&path->dst_ep.ce_ip, &pfx); + + clib_memcpy (&trk->ct_ep[VLIB_TX], &path->dst_ep, + sizeof (trk->ct_ep[VLIB_TX])); + clib_memcpy (&trk->ct_ep[VLIB_RX], &path->src_ep, + sizeof (trk->ct_ep[VLIB_RX])); + + trk->ct_fei = fib_entry_track (CNAT_FIB_TABLE, + &pfx, + cnat_translation_fib_node_type, + cti, &trk->ct_sibling); + + fib_entry_contribute_forwarding (trk->ct_fei, + fib_forw_chain_type_from_fib_proto + (pfx.fp_proto), &trk->ct_dpo); +} + +void +cnat_add_translation_to_db (index_t cci, u16 port, ip_protocol_t proto, + index_t cti) +{ + clib_bihash_kv_8_8_t bkey; + u64 key; + + key = (proto << 16) | port; + key = key << 32 | (u32) cci; + + bkey.key = key; + bkey.value = cti; + + clib_bihash_add_del_8_8 (&cnat_translation_db, &bkey, 1); +} + +void +cnat_remove_translation_from_db (index_t cci, u16 port, ip_protocol_t proto) +{ + clib_bihash_kv_8_8_t bkey; + u64 key; + + key = (proto << 16) | port; + key = key << 32 | (u32) cci; + + bkey.key = key; + + clib_bihash_add_del_8_8 (&cnat_translation_db, &bkey, 0); +} + +static void +cnat_translation_stack (cnat_translation_t * ct) +{ + fib_protocol_t fproto; + cnat_ep_trk_t *trk; + dpo_proto_t dproto; + index_t lbi; + + fproto = ip_address_family_to_fib_proto (ct->ct_vip.ce_ip.version); + dproto = fib_proto_to_dpo (fproto); + + lbi = load_balance_create (vec_len (ct->ct_paths), + fib_proto_to_dpo (fproto), IP_FLOW_HASH_DEFAULT); + + vec_foreach (trk, ct->ct_paths) + load_balance_set_bucket (lbi, trk - ct->ct_paths, &trk->ct_dpo); + + dpo_set (&ct->ct_lb, DPO_LOAD_BALANCE, dproto, lbi); + dpo_stack (cnat_client_dpo, dproto, &ct->ct_lb, &ct->ct_lb); +} + +int +cnat_translation_delete (u32 id) +{ + cnat_translation_t *ct; + cnat_ep_trk_t *trk; + + if (pool_is_free_index (cnat_translation_pool, id)) + return (VNET_API_ERROR_NO_SUCH_ENTRY); + + ct = pool_elt_at_index (cnat_translation_pool, id); + + dpo_reset (&ct->ct_lb); + + vec_foreach (trk, ct->ct_paths) cnat_tracker_release (trk); + + cnat_remove_translation_from_db (ct->ct_cci, ct->ct_vip.ce_port, + ct->ct_proto); + cnat_client_translation_deleted (ct->ct_cci); + pool_put (cnat_translation_pool, ct); + + return (0); +} + +u32 +cnat_translation_update (const cnat_endpoint_t * vip, + ip_protocol_t proto, + const cnat_endpoint_tuple_t * paths, u8 flags) +{ + const cnat_endpoint_tuple_t *path; + const cnat_client_t *cc; + cnat_translation_t *ct; + cnat_ep_trk_t *trk; + index_t cci; + + /* do we know of this ep's vip */ + cci = cnat_client_add (&vip->ce_ip, flags); + cc = cnat_client_get (cci); + + ct = cnat_find_translation (cc->parent_cci, vip->ce_port, proto); + + if (NULL == ct) + { + pool_get_zero (cnat_translation_pool, ct); + + clib_memcpy (&ct->ct_vip, vip, sizeof (*vip)); + ct->ct_proto = proto; + ct->ct_cci = cci; + ct->index = ct - cnat_translation_pool; + + cnat_add_translation_to_db (cci, ct->ct_vip.ce_port, ct->ct_proto, + ct->index); + cnat_client_translation_added (cci); + + vlib_validate_combined_counter (&cnat_translation_counters, ct->index); + vlib_zero_combined_counter (&cnat_translation_counters, ct->index); + } + ct->flags = flags; + + vec_foreach (trk, ct->ct_paths) + { + cnat_tracker_release (trk); + } + + vec_reset_length (ct->ct_paths); + + vec_foreach (path, paths) + { + vec_add2 (ct->ct_paths, trk, 1); + + cnat_tracker_track (ct->index, path, trk); + } + + cnat_translation_stack (ct); + + return (ct->index); +} + +void +cnat_translation_walk (cnat_translation_walk_cb_t cb, void *ctx) +{ + u32 api; + + /* *INDENT-OFF* */ + pool_foreach_index(api, cnat_translation_pool, + ({ + if (!cb(api, ctx)) + break; + })); + /* *INDENT-ON* */ +} + +static u8 * +format_cnat_ep_trk (u8 * s, va_list * args) +{ + cnat_ep_trk_t *ck = va_arg (*args, cnat_ep_trk_t *); + u32 indent = va_arg (*args, u32); + + s = format (s, "%U->%U", format_cnat_endpoint, &ck->ct_ep[VLIB_RX], + format_cnat_endpoint, &ck->ct_ep[VLIB_TX]); + s = format (s, "\n%Ufib-entry:%d", format_white_space, indent, ck->ct_fei); + s = format (s, "\n%U%U", + format_white_space, indent, format_dpo_id, &ck->ct_dpo, 6); + + return (s); +} + +u8 * +format_cnat_translation (u8 * s, va_list * args) +{ + cnat_translation_t *ct = va_arg (*args, cnat_translation_t *); + cnat_ep_trk_t *ck; + + s = format (s, "[%d] ", ct->index); + s = format (s, "%U %U", format_cnat_endpoint, &ct->ct_vip, + format_ip_protocol, ct->ct_proto); + + vec_foreach (ck, ct->ct_paths) + s = format (s, "\n%U", format_cnat_ep_trk, ck, 2); + + /* If printing a trace, the LB object might be deleted */ + if (!pool_is_free_index (load_balance_pool, ct->ct_lb.dpoi_index)) + { + s = format (s, "\n via:"); + s = format (s, "\n%U%U", + format_white_space, 2, format_dpo_id, &ct->ct_lb, 2); + } + + return (s); +} + +static clib_error_t * +cnat_translation_show (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + index_t cti; + cnat_translation_t *ct; + + cti = INDEX_INVALID; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%d", &cti)) + ; + else + return (clib_error_return (0, "unknown input '%U'", + format_unformat_error, input)); + } + + if (INDEX_INVALID == cti) + { + /* *INDENT-OFF* */ + pool_foreach_index(cti, cnat_translation_pool, + ({ + ct = pool_elt_at_index (cnat_translation_pool, cti); + vlib_cli_output(vm, "%U", format_cnat_translation, ct); + })); + /* *INDENT-ON* */ + } + else + { + vlib_cli_output (vm, "Invalid policy ID:%d", cti); + } + + return (NULL); +} + +int +cnat_translation_purge (void) +{ + /* purge all the translations */ + index_t tri, *trp, *trs = NULL; + + /* *INDENT-OFF* */ + pool_foreach_index(tri, cnat_translation_pool, + ({ + vec_add1(trs, tri); + })); + /* *INDENT-ON* */ + + vec_foreach (trp, trs) cnat_translation_delete (*trp); + + ASSERT (0 == pool_elts (cnat_translation_pool)); + + vec_free (trs); + + return (0); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cnat_translation_show_cmd_node, static) = { + .path = "show cnat translation", + .function = cnat_translation_show, + .short_help = "show cnat translation ", + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +static fib_node_t * +cnat_translation_get_node (fib_node_index_t index) +{ + cnat_translation_t *ct = cnat_translation_get (index); + return (&(ct->ct_node)); +} + +static cnat_translation_t * +cnat_translation_get_from_node (fib_node_t * node) +{ + return ((cnat_translation_t *) (((char *) node) - + STRUCT_OFFSET_OF (cnat_translation_t, + ct_node))); +} + +static void +cnat_translation_last_lock_gone (fib_node_t * node) +{ + /**/} + +/* + * A back walk has reached this ABF policy + */ +static fib_node_back_walk_rc_t +cnat_translation_back_walk_notify (fib_node_t * node, + fib_node_back_walk_ctx_t * ctx) +{ + /* + * re-stack the fmask on the n-eos of the via + */ + cnat_translation_t *ct = cnat_translation_get_from_node (node); + + cnat_translation_stack (ct); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/* + * The translation's graph node virtual function table + */ +static const fib_node_vft_t cnat_translation_vft = { + .fnv_get = cnat_translation_get_node, + .fnv_last_lock = cnat_translation_last_lock_gone, + .fnv_back_walk = cnat_translation_back_walk_notify, +}; + +static clib_error_t * +cnat_translation_cli_add_del (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 del_index = INDEX_INVALID; + ip_protocol_t proto = IP_PROTOCOL_TCP; + cnat_endpoint_t vip; + u8 flags = CNAT_FLAG_EXCLUSIVE; + cnat_endpoint_tuple_t tmp, *paths = NULL, *path; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "add")) + del_index = INDEX_INVALID; + else if (unformat (input, "del %d", &del_index)) + ; + else if (unformat (input, "proto %U", unformat_ip_protocol, &proto)) + ; + else if (unformat (input, "vip %U", unformat_cnat_ep, &vip)) + flags = CNAT_FLAG_EXCLUSIVE; + else if (unformat (input, "real %U", unformat_cnat_ep, &vip)) + flags = 0; + else if (unformat (input, "to %U", unformat_cnat_ep_tuple, &tmp)) + { + pool_get (paths, path); + clib_memcpy (path, &tmp, sizeof (cnat_endpoint_tuple_t)); + } + else + return (clib_error_return (0, "unknown input '%U'", + format_unformat_error, input)); + } + + if (INDEX_INVALID == del_index) + cnat_translation_update (&vip, proto, paths, flags); + else + cnat_translation_delete (del_index); + + pool_free (paths); + return (NULL); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cnat_translation_cli_add_del_command, static) = +{ + .path = "cnat translation", + .short_help = "cnat translation [add|del] proto [TCP|UDP] [vip|real] [ip] [port] [to [ip] [port]->[ip] [port]]", + .function = cnat_translation_cli_add_del, +}; +/* *INDENT-ON* */ + +static clib_error_t * +cnat_translation_init (vlib_main_t * vm) +{ + cnat_main_t *cm = &cnat_main; + cnat_translation_fib_node_type = + fib_node_register_new_type (&cnat_translation_vft); + + clib_bihash_init_8_8 (&cnat_translation_db, "CNat translation DB", + cm->translation_hash_buckets, + cm->translation_hash_memory); + + return (NULL); +} + +VLIB_INIT_FUNCTION (cnat_translation_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/cnat/cnat_translation.h b/src/plugins/cnat/cnat_translation.h new file mode 100644 index 00000000000..748487a908a --- /dev/null +++ b/src/plugins/cnat/cnat_translation.h @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CNAT_TRANSLATION_H__ +#define __CNAT_TRANSLATION_H__ + +#include +#include +#include + +/** + * Counters for each translation + */ +extern vlib_combined_counter_main_t cnat_translation_counters; + +/** + * Data used to track an EP in the FIB + */ +typedef struct cnat_ep_trk_t_ +{ + /** + * The EP being tracked + */ + cnat_endpoint_t ct_ep[VLIB_N_DIR]; + + /** + * The FIB entry for the EP + */ + fib_node_index_t ct_fei; + + /** + * The sibling on the entry's child list + */ + u32 ct_sibling; + + /** + * The forwarding contributed by the entry + */ + dpo_id_t ct_dpo; +} cnat_ep_trk_t; + +typedef enum cnat_translation_flag_t_ +{ + CNAT_TRANSLATION_FLAG_ALLOCATE_PORT = (1 << 0), +} cnat_translation_flag_t; + +/** + * A Translation represents the translation of a VEP to one of a set + * of real server addresses + */ +typedef struct cnat_translation_t_ +{ + /** + * Linkage into the FIB graph + */ + fib_node_t ct_node; + + /** + * The LB used to forward to the backends + */ + dpo_id_t ct_lb; + + /** + * The Virtual end point + */ + cnat_endpoint_t ct_vip; + + /** + * The vector of tracked back-ends + */ + cnat_ep_trk_t *ct_paths; + + /** + * The ip protocol for the translation + */ + ip_protocol_t ct_proto; + + /** + * The client object this translation belongs on + */ + index_t ct_cci; + + /** + * Own index (if copied for trace) + */ + index_t index; + + /** + * Translation flags + */ + u8 flags; +} cnat_translation_t; + +extern cnat_translation_t *cnat_translation_pool; + +extern u8 *format_cnat_translation (u8 * s, va_list * args); + +/** + * create or update a translation + * + * @param vip The Virtual Endpoint + * @param ip_proto The ip protocol to translate + * @param backends the backends to choose from + * + * @return the ID of the translation. used to delete and gather stats + */ +extern u32 cnat_translation_update (const cnat_endpoint_t * vip, + ip_protocol_t ip_proto, + const cnat_endpoint_tuple_t * + backends, u8 flags); + +/** + * Add a translation to the bihash + * + * @param cci the ID of the parent client + * @param port the translation port + * @param proto the translation proto + * @param cti the translation index to be used as value + */ +extern void cnat_add_translation_to_db (index_t cci, u16 port, + ip_protocol_t proto, index_t cti); + +/** + * Remove a translation from the bihash + * + * @param cci the ID of the parent client + * @param port the translation port + * @param proto the translation proto + */ +extern void cnat_remove_translation_from_db (index_t cci, u16 port, + ip_protocol_t proto); + +/** + * Delete a translation + * + * @param id the ID as returned from the create + */ +extern int cnat_translation_delete (u32 id); + +/** + * Callback function invoked during a walk of all translations + */ +typedef walk_rc_t (*cnat_translation_walk_cb_t) (index_t index, void *ctx); + +/** + * Walk/visit each of the translations + */ +extern void cnat_translation_walk (cnat_translation_walk_cb_t cb, void *ctx); + +/** + * Purge all the trahslations + */ +extern int cnat_translation_purge (void); + +/* + * Data plane functions + */ +extern clib_bihash_8_8_t cnat_translation_db; + +static_always_inline cnat_translation_t * +cnat_translation_get (index_t cti) +{ + return (pool_elt_at_index (cnat_translation_pool, cti)); +} + +static_always_inline cnat_translation_t * +cnat_find_translation (index_t cti, u16 port, ip_protocol_t proto) +{ + clib_bihash_kv_8_8_t bkey, bvalue; + u64 key; + int rv; + + key = (proto << 16) | port; + key = key << 32 | (u32) cti; + + bkey.key = key; + rv = clib_bihash_search_inline_2_8_8 (&cnat_translation_db, &bkey, &bvalue); + if (!rv) + return (pool_elt_at_index (cnat_translation_pool, bvalue.value)); + + return (NULL); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + +#endif diff --git a/src/plugins/cnat/cnat_types.c b/src/plugins/cnat/cnat_types.c new file mode 100644 index 00000000000..1f2287e1de3 --- /dev/null +++ b/src/plugins/cnat/cnat_types.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +cnat_main_t cnat_main; +fib_source_t cnat_fib_source; +cnat_timestamp_t *cnat_timestamps; +throttle_t cnat_throttle; + +char *cnat_error_strings[] = { +#define cnat_error(n,s) s, +#include +#undef cnat_error +}; + +uword +unformat_cnat_ep (unformat_input_t * input, va_list * args) +{ + cnat_endpoint_t *a = va_arg (*args, cnat_endpoint_t *); + int port = 0; + + clib_memset (a, 0, sizeof (*a)); + if (unformat (input, "%U %d", unformat_ip_address, &a->ce_ip, &port)) + ; + else if (unformat_user (input, unformat_ip_address, &a->ce_ip)) + ; + else if (unformat (input, "%d", &port)) + ; + else + return 0; + a->ce_port = (u16) port; + return 1; +} + +uword +unformat_cnat_ep_tuple (unformat_input_t * input, va_list * args) +{ + cnat_endpoint_tuple_t *a = va_arg (*args, cnat_endpoint_tuple_t *); + if (unformat (input, "%U->%U", unformat_cnat_ep, &a->src_ep, + unformat_cnat_ep, &a->dst_ep)) + ; + else if (unformat (input, "->%U", unformat_cnat_ep, &a->dst_ep)) + ; + else if (unformat (input, "%U->", unformat_cnat_ep, &a->src_ep)) + ; + else + return 0; + return 1; +} + +u8 * +format_cnat_endpoint (u8 * s, va_list * args) +{ + cnat_endpoint_t *cep = va_arg (*args, cnat_endpoint_t *); + + s = format (s, "%U;%d", format_ip_address, &cep->ce_ip, cep->ce_port); + + return (s); +} + +static clib_error_t * +cnat_types_init (vlib_main_t * vm) +{ + vlib_thread_main_t *tm = &vlib_thread_main; + u32 n_vlib_mains = tm->n_vlib_mains; + cnat_fib_source = fib_source_allocate ("cnat", + CNAT_FIB_SOURCE_PRIORITY, + FIB_SOURCE_BH_SIMPLE); + + clib_rwlock_init (&cnat_main.ts_lock); + clib_spinlock_init (&cnat_main.src_ports_lock); + clib_bitmap_validate (cnat_main.src_ports, UINT16_MAX); + throttle_init (&cnat_throttle, n_vlib_mains, 1e-3); + + return (NULL); +} + +static clib_error_t * +cnat_config (vlib_main_t * vm, unformat_input_t * input) +{ + cnat_main_t *cm = &cnat_main; + + cm->session_hash_memory = CNAT_DEFAULT_SESSION_MEMORY; + cm->session_hash_buckets = CNAT_DEFAULT_SESSION_BUCKETS; + cm->translation_hash_memory = CNAT_DEFAULT_TRANSLATION_MEMORY; + cm->translation_hash_buckets = CNAT_DEFAULT_TRANSLATION_BUCKETS; + cm->snat_hash_memory = CNAT_DEFAULT_SNAT_MEMORY; + cm->snat_hash_buckets = CNAT_DEFAULT_SNAT_BUCKETS; + cm->scanner_timeout = CNAT_DEFAULT_SCANNER_TIMEOUT; + cm->session_max_age = CNAT_DEFAULT_SESSION_MAX_AGE; + cm->tcp_max_age = CNAT_DEFAULT_TCP_MAX_AGE; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (input, "session-db-buckets %u", &cm->session_hash_buckets)) + ; + else if (unformat (input, "session-db-memory %U", + unformat_memory_size, &cm->session_hash_memory)) + ; + else if (unformat (input, "translation-db-buckets %u", + &cm->translation_hash_buckets)) + ; + else if (unformat (input, "translation-db-memory %U", + unformat_memory_size, &cm->translation_hash_memory)) + ; + else if (unformat (input, "snat-db-buckets %u", &cm->snat_hash_buckets)) + ; + else if (unformat (input, "snat-db-memory %U", + unformat_memory_size, &cm->snat_hash_memory)) + ; + else if (unformat (input, "session-cleanup-timeout %f", + &cm->scanner_timeout)) + ; + else if (unformat (input, "session-max-age %u", &cm->session_max_age)) + ; + else if (unformat (input, "tcp-max-age %u", &cm->tcp_max_age)) + ; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + + return 0; +} + +VLIB_EARLY_CONFIG_FUNCTION (cnat_config, "cnat"); +VLIB_INIT_FUNCTION (cnat_types_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/cnat/cnat_types.h b/src/plugins/cnat/cnat_types.h new file mode 100644 index 00000000000..8659aa5e9fd --- /dev/null +++ b/src/plugins/cnat/cnat_types.h @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CNAT_TYPES_H__ +#define __CNAT_TYPES_H__ + +#include +#include +#include +#include + +/* only in the default table for v4 and v6 */ +#define CNAT_FIB_TABLE 0 + +/* default lifetime of NAT sessions (seconds) */ +#define CNAT_DEFAULT_SESSION_MAX_AGE 30 +/* lifetime of TCP conn NAT sessions after SYNACK (seconds) */ +#define CNAT_DEFAULT_TCP_MAX_AGE 3600 +/* lifetime of TCP conn NAT sessions after RST/FIN (seconds) */ +#define CNAT_DEFAULT_TCP_RST_TIMEOUT 5 +#define CNAT_DEFAULT_SCANNER_TIMEOUT (1.0) + +#define CNAT_DEFAULT_SESSION_BUCKETS 1024 +#define CNAT_DEFAULT_TRANSLATION_BUCKETS 1024 +#define CNAT_DEFAULT_SNAT_BUCKETS 1024 + +#define CNAT_DEFAULT_SESSION_MEMORY (1 << 20) +#define CNAT_DEFAULT_TRANSLATION_MEMORY (256 << 10) +#define CNAT_DEFAULT_SNAT_MEMORY (64 << 20) + +/* This should be strictly lower than FIB_SOURCE_INTERFACE + * from fib_source.h */ +#define CNAT_FIB_SOURCE_PRIORITY 0x02 + +/* Initial refcnt for timestamps (2 : session & rsession) */ +#define CNAT_TIMESTAMP_INIT_REFCNT 2 + +#define MIN_SRC_PORT ((u16) 0xC000) + +typedef struct cnat_endpoint_t_ +{ + ip_address_t ce_ip; + u16 ce_port; +} cnat_endpoint_t; + +typedef struct cnat_endpoint_tuple_t_ +{ + cnat_endpoint_t dst_ep; + cnat_endpoint_t src_ep; +} cnat_endpoint_tuple_t; + + + +typedef struct +{ + u32 dst_address_length_refcounts[129]; + u16 *prefix_lengths_in_search_order; + uword *non_empty_dst_address_length_bitmap; +} cnat_snat_pfx_table_meta_t; + +typedef struct +{ + /* Stores (ip family, prefix & mask) */ + clib_bihash_24_8_t ip_hash; + /* family dependant cache */ + cnat_snat_pfx_table_meta_t meta[2]; + /* Precomputed ip masks (ip4 & ip6) */ + ip6_address_t ip_masks[129]; +} cnat_snat_pfx_table_t; + +typedef struct cnat_main_ +{ + /* Memory size of the session bihash */ + uword session_hash_memory; + + /* Number of buckets of the session bihash */ + u32 session_hash_buckets; + + /* Memory size of the translation bihash */ + uword translation_hash_memory; + + /* Number of buckets of the translation bihash */ + u32 translation_hash_buckets; + + /* Memory size of the source NAT prefix bihash */ + uword snat_hash_memory; + + /* Number of buckets of the source NAT prefix bihash */ + u32 snat_hash_buckets; + + /* Timeout after which to clear sessions (in seconds) */ + u32 session_max_age; + + /* Timeout after which to clear an established TCP + * session (in seconds) */ + u32 tcp_max_age; + + /* delay in seconds between two scans of session/clients tables */ + f64 scanner_timeout; + + /* Lock for the timestamp pool */ + clib_rwlock_t ts_lock; + + /* Source ports bitmap for snat */ + clib_bitmap_t *src_ports; + + /* Lock for src_ports access */ + clib_spinlock_t src_ports_lock; + + /* Ip4 Address to use for source NATing */ + ip4_address_t snat_ip4; + + /* Ip6 Address to use for source NATing */ + ip6_address_t snat_ip6; + + /* Longest prefix Match table for source NATing */ + cnat_snat_pfx_table_t snat_pfx_table; +} cnat_main_t; + +typedef struct cnat_timestamp_t_ +{ + /* Last time said session was seen */ + f64 last_seen; + /* expire after N seconds */ + u16 lifetime; + /* Users refcount, initially 3 (session, rsession, dpo) */ + u16 refcnt; +} cnat_timestamp_t; + +typedef struct cnat_node_ctx_t_ +{ + f64 now; + u64 seed; + u32 thread_index; + ip_address_family_t af; + u8 do_trace; +} cnat_node_ctx_t; + +extern u8 *format_cnat_endpoint (u8 * s, va_list * args); +extern uword unformat_cnat_ep_tuple (unformat_input_t * input, + va_list * args); +extern uword unformat_cnat_ep (unformat_input_t * input, va_list * args); +extern cnat_timestamp_t *cnat_timestamps; +extern fib_source_t cnat_fib_source; +extern cnat_main_t cnat_main; +extern throttle_t cnat_throttle; + +extern char *cnat_error_strings[]; + +typedef enum +{ +#define cnat_error(n,s) CNAT_ERROR_##n, +#include +#undef cnat_error + CNAT_N_ERROR, +} cnat_error_t; + +/* + Dataplane functions +*/ + +always_inline u32 +cnat_timestamp_new (f64 t) +{ + u32 index; + cnat_timestamp_t *ts; + clib_rwlock_writer_lock (&cnat_main.ts_lock); + pool_get (cnat_timestamps, ts); + ts->last_seen = t; + ts->lifetime = cnat_main.session_max_age; + ts->refcnt = CNAT_TIMESTAMP_INIT_REFCNT; + index = ts - cnat_timestamps; + clib_rwlock_writer_unlock (&cnat_main.ts_lock); + return index; +} + +always_inline void +cnat_timestamp_inc_refcnt (u32 index) +{ + clib_rwlock_reader_lock (&cnat_main.ts_lock); + cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index); + ts->refcnt++; + clib_rwlock_reader_unlock (&cnat_main.ts_lock); +} + +always_inline void +cnat_timestamp_update (u32 index, f64 t) +{ + return; + clib_rwlock_reader_lock (&cnat_main.ts_lock); + cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index); + ts->last_seen = t; + clib_rwlock_reader_unlock (&cnat_main.ts_lock); +} + +always_inline void +cnat_timestamp_set_lifetime (u32 index, u16 lifetime) +{ + clib_rwlock_reader_lock (&cnat_main.ts_lock); + cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index); + ts->lifetime = lifetime; + clib_rwlock_reader_unlock (&cnat_main.ts_lock); +} + +always_inline f64 +cnat_timestamp_exp (u32 index) +{ + f64 t; + if (INDEX_INVALID == index) + return -1; + clib_rwlock_reader_lock (&cnat_main.ts_lock); + cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index); + t = ts->last_seen + (f64) ts->lifetime; + clib_rwlock_reader_unlock (&cnat_main.ts_lock); + return t; +} + +always_inline void +cnat_timestamp_free (u32 index) +{ + if (INDEX_INVALID == index) + return; + clib_rwlock_writer_lock (&cnat_main.ts_lock); + cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index); + ts->refcnt--; + if (0 == ts->refcnt) + pool_put (cnat_timestamps, ts); + clib_rwlock_writer_unlock (&cnat_main.ts_lock); +} + +always_inline void +cnat_free_port (u16 port) +{ + cnat_main_t *cm = &cnat_main; + clib_spinlock_lock (&cm->src_ports_lock); + clib_bitmap_set_no_check (cm->src_ports, port, 0); + clib_spinlock_unlock (&cm->src_ports_lock); +} + +always_inline int +cnat_allocate_port (cnat_main_t * cm, u16 * port) +{ + *port = clib_net_to_host_u16 (*port); + if (*port == 0) + *port = MIN_SRC_PORT; + clib_spinlock_lock (&cm->src_ports_lock); + if (clib_bitmap_get_no_check (cm->src_ports, *port)) + { + *port = clib_bitmap_next_clear (cm->src_ports, *port); + if (PREDICT_FALSE (*port >= UINT16_MAX)) + *port = clib_bitmap_next_clear (cm->src_ports, MIN_SRC_PORT); + if (PREDICT_FALSE (*port >= UINT16_MAX)) + return -1; + } + clib_bitmap_set_no_check (cm->src_ports, *port, 1); + *port = clib_host_to_net_u16 (*port); + clib_spinlock_unlock (&cm->src_ports_lock); + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + +#endif diff --git a/src/plugins/cnat/test/test_cnat.py b/src/plugins/cnat/test/test_cnat.py new file mode 100644 index 00000000000..18e3baadbed --- /dev/null +++ b/src/plugins/cnat/test/test_cnat.py @@ -0,0 +1,596 @@ +#!/usr/bin/env python3 + +import unittest + +from framework import VppTestCase, VppTestRunner +from vpp_ip import DpoProto + +from scapy.packet import Raw +from scapy.layers.l2 import Ether +from scapy.layers.inet import IP, UDP, TCP +from scapy.layers.inet6 import IPv6 + +from ipaddress import ip_address, ip_network, \ + IPv4Address, IPv6Address, IPv4Network, IPv6Network + +from vpp_object import VppObject +from vpp_papi import VppEnum + +N_PKTS = 15 + + +def find_cnat_translation(test, id): + ts = test.vapi.cnat_translation_dump() + for t in ts: + if id == t.translation.id: + return True + return False + + +class Ep(object): + """ CNat endpoint """ + + def __init__(self, ip, port, l4p=TCP): + self.ip = ip + self.port = port + self.l4p = l4p + + def encode(self): + return {'addr': self.ip, + 'port': self.port} + + def __str__(self): + return ("%s:%d" % (self.ip, self.port)) + + +class EpTuple(object): + """ CNat endpoint """ + + def __init__(self, src, dst): + self.src = src + self.dst = dst + + def encode(self): + return {'src_ep': self.src.encode(), + 'dst_ep': self.dst.encode()} + + def __str__(self): + return ("%s->%s" % (self.src, self.dst)) + + +class VppCNatTranslation(VppObject): + + def __init__(self, test, iproto, vip, paths): + self._test = test + self.vip = vip + self.iproto = iproto + self.paths = paths + self.encoded_paths = [] + for path in self.paths: + self.encoded_paths.append(path.encode()) + + @property + def vl4_proto(self): + ip_proto = VppEnum.vl_api_ip_proto_t + return { + UDP: ip_proto.IP_API_PROTO_UDP, + TCP: ip_proto.IP_API_PROTO_TCP, + }[self.iproto] + + def delete(self): + r = self._test.vapi.cnat_translation_del(id=self.id) + + def add_vpp_config(self): + r = self._test.vapi.cnat_translation_update( + {'vip': self.vip.encode(), + 'ip_proto': self.vl4_proto, + 'n_paths': len(self.paths), + 'paths': self.encoded_paths}) + self._test.registry.register(self, self._test.logger) + self.id = r.id + + def modify_vpp_config(self, paths): + self.paths = paths + self.encoded_paths = [] + for path in self.paths: + self.encoded_paths.append(path.encode()) + + r = self._test.vapi.cnat_translation_update( + {'vip': self.vip.encode(), + 'ip_proto': self.vl4_proto, + 'n_paths': len(self.paths), + 'paths': self.encoded_paths}) + self._test.registry.register(self, self._test.logger) + + def remove_vpp_config(self): + self._test.vapi.cnat_translation_del(self.id) + + def query_vpp_config(self): + return find_cnat_translation(self._test, self.id) + + def object_id(self): + return ("cnat-translation-%s" % (self.vip)) + + def get_stats(self): + c = self._test.statistics.get_counter("/net/cnat-translation") + return c[0][self.id] + + +class VppCNATSourceNat(VppObject): + + def __init__(self, test, address, exclude_subnets=[]): + self._test = test + self.address = address + self.exclude_subnets = exclude_subnets + + def add_vpp_config(self): + a = ip_address(self.address) + if 4 == a.version: + self._test.vapi.cnat_set_snat_addresses(snat_ip4=self.address) + else: + self._test.vapi.cnat_set_snat_addresses(snat_ip6=self.address) + for subnet in self.exclude_subnets: + self.cnat_exclude_subnet(subnet, True) + + def cnat_exclude_subnet(self, exclude_subnet, isAdd=True): + add = 1 if isAdd else 0 + self._test.vapi.cnat_add_del_snat_prefix( + prefix=exclude_subnet, is_add=add) + + def query_vpp_config(self): + return False + + def remove_vpp_config(self): + return False + + +class TestCNatTranslation(VppTestCase): + """ CNat Translation """ + extra_vpp_punt_config = ["cnat", "{", + "session-max-age", "1", + "tcp-max-age", "1", "}"] + + @classmethod + def setUpClass(cls): + super(TestCNatTranslation, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(TestCNatTranslation, cls).tearDownClass() + + def setUp(self): + super(TestCNatTranslation, self).setUp() + + self.create_pg_interfaces(range(3)) + + for i in self.pg_interfaces: + i.admin_up() + i.config_ip4() + i.resolve_arp() + i.config_ip6() + i.resolve_ndp() + + def tearDown(self): + for i in self.pg_interfaces: + i.unconfig_ip4() + i.unconfig_ip6() + i.admin_down() + super(TestCNatTranslation, self).tearDown() + + def cnat_create_translation(self, vip, nbr, isV6=False): + ip_v = "ip6" if isV6 else "ip4" + dep = Ep(getattr(self.pg1.remote_hosts[nbr], ip_v), 4000 + nbr) + sep = Ep("::", 0) if isV6 else Ep("0.0.0.0", 0) + t1 = VppCNatTranslation( + self, vip.l4p, vip, + [EpTuple(sep, dep), EpTuple(sep, dep)]) + t1.add_vpp_config() + return t1 + + def cnat_test_translation(self, t1, nbr, sports, isV6=False): + ip_v = "ip6" if isV6 else "ip4" + ip_class = IPv6 if isV6 else IP + vip = t1.vip + + # + # Flows + # + for src in self.pg0.remote_hosts: + for sport in sports: + # from client to vip + p1 = (Ether(dst=self.pg0.local_mac, + src=src.mac) / + ip_class(src=getattr(src, ip_v), dst=vip.ip) / + vip.l4p(sport=sport, dport=vip.port) / + Raw()) + + self.vapi.cli("trace add pg-input 1") + rxs = self.send_and_expect(self.pg0, + p1 * N_PKTS, + self.pg1) + + for rx in rxs: + self.assert_packet_checksums_valid(rx) + self.assertEqual( + rx[ip_class].dst, + getattr(self.pg1.remote_hosts[nbr], ip_v)) + self.assertEqual(rx[vip.l4p].dport, 4000 + nbr) + self.assertEqual( + rx[ip_class].src, + getattr(src, ip_v)) + self.assertEqual(rx[vip.l4p].sport, sport) + + # from vip to client + p1 = (Ether(dst=self.pg1.local_mac, + src=self.pg1.remote_mac) / + ip_class(src=getattr( + self.pg1.remote_hosts[nbr], + ip_v), + dst=getattr(src, ip_v)) / + vip.l4p(sport=4000 + nbr, dport=sport) / + Raw()) + + rxs = self.send_and_expect(self.pg1, + p1 * N_PKTS, + self.pg0) + + for rx in rxs: + self.assert_packet_checksums_valid(rx) + self.assertEqual( + rx[ip_class].dst, + getattr(src, ip_v)) + self.assertEqual(rx[vip.l4p].dport, sport) + self.assertEqual(rx[ip_class].src, vip.ip) + self.assertEqual(rx[vip.l4p].sport, vip.port) + + # + # packets to the VIP that do not match a + # translation are dropped + # + p1 = (Ether(dst=self.pg0.local_mac, + src=src.mac) / + ip_class(src=getattr(src, ip_v), dst=vip.ip) / + vip.l4p(sport=sport, dport=6666) / + Raw()) + + self.send_and_assert_no_replies(self.pg0, + p1 * N_PKTS, + self.pg1) + + # + # packets from the VIP that do not match a + # session are forwarded + # + p1 = (Ether(dst=self.pg1.local_mac, + src=self.pg1.remote_mac) / + ip_class(src=getattr( + self.pg1.remote_hosts[nbr], + ip_v), + dst=getattr(src, ip_v)) / + vip.l4p(sport=6666, dport=sport) / + Raw()) + + rxs = self.send_and_expect(self.pg1, + p1 * N_PKTS, + self.pg0) + + self.assertEqual(t1.get_stats()['packets'], + N_PKTS * + len(sports) * + len(self.pg0.remote_hosts)) + + def cnat_test_translation_update(self, t1, sports, isV6=False): + ip_v = "ip6" if isV6 else "ip4" + ip_class = IPv6 if isV6 else IP + vip = t1.vip + + # + # modify the translation to use a different backend + # + dep = Ep(getattr(self.pg2, 'remote_' + ip_v), 5000) + sep = Ep("::", 0) if isV6 else Ep("0.0.0.0", 0) + t1.modify_vpp_config([EpTuple(sep, dep)]) + + # + # existing flows follow the old path + # + for src in self.pg0.remote_hosts: + for sport in sports: + # from client to vip + p1 = (Ether(dst=self.pg0.local_mac, + src=src.mac) / + ip_class(src=getattr(src, ip_v), dst=vip.ip) / + vip.l4p(sport=sport, dport=vip.port) / + Raw()) + + rxs = self.send_and_expect(self.pg0, + p1 * N_PKTS, + self.pg1) + + # + # new flows go to the new backend + # + for src in self.pg0.remote_hosts: + p1 = (Ether(dst=self.pg0.local_mac, + src=src.mac) / + ip_class(src=getattr(src, ip_v), dst=vip.ip) / + vip.l4p(sport=9999, dport=vip.port) / + Raw()) + + rxs = self.send_and_expect(self.pg0, + p1 * N_PKTS, + self.pg2) + + def cnat_translation(self, vips, isV6=False): + """ CNat Translation """ + + ip_class = IPv6 if isV6 else IP + ip_v = "ip6" if isV6 else "ip4" + sports = [1234, 1233] + + # + # turn the scanner off whilst testing otherwise sessions + # will time out + # + self.vapi.cli("test cnat scanner off") + + sessions = self.vapi.cnat_session_dump() + + trs = [] + for nbr, vip in enumerate(vips): + trs.append(self.cnat_create_translation(vip, nbr, isV6=isV6)) + + self.logger.info(self.vapi.cli("sh cnat client")) + self.logger.info(self.vapi.cli("sh cnat translation")) + + # + # translations + # + for nbr, vip in enumerate(vips): + self.cnat_test_translation(trs[nbr], nbr, sports, isV6=isV6) + self.cnat_test_translation_update(trs[nbr], sports, isV6=isV6) + if isV6: + self.logger.info(self.vapi.cli( + "sh ip6 fib %s" % self.pg0.remote_ip6)) + else: + self.logger.info(self.vapi.cli( + "sh ip fib %s" % self.pg0.remote_ip4)) + self.logger.info(self.vapi.cli("sh cnat session verbose")) + + # + # turn the scanner back on and wait untill the sessions + # all disapper + # + self.vapi.cli("test cnat scanner on") + + n_tries = 0 + sessions = self.vapi.cnat_session_dump() + while (len(sessions) and n_tries < 100): + n_tries += 1 + sessions = self.vapi.cnat_session_dump() + self.sleep(2) + + self.assertTrue(n_tries < 100) + + # + # load some flows again and purge + # + for vip in vips: + for src in self.pg0.remote_hosts: + for sport in sports: + # from client to vip + p1 = (Ether(dst=self.pg0.local_mac, + src=src.mac) / + ip_class(src=getattr(src, ip_v), dst=vip.ip) / + vip.l4p(sport=sport, dport=vip.port) / + Raw()) + self.send_and_expect(self.pg0, + p1 * N_PKTS, + self.pg2) + + for tr in trs: + tr.delete() + + self.assertTrue(self.vapi.cnat_session_dump()) + self.vapi.cnat_session_purge() + self.assertFalse(self.vapi.cnat_session_dump()) + + def test_cnat6(self): + # """ CNat Translation ipv6 """ + vips = [ + Ep("30::1", 5555), + Ep("30::2", 5554), + Ep("30::2", 5553, UDP), + ] + + self.pg0.generate_remote_hosts(len(vips)) + self.pg0.configure_ipv6_neighbors() + self.pg1.generate_remote_hosts(len(vips)) + self.pg1.configure_ipv6_neighbors() + + self.cnat_translation(vips, isV6=True) + + def test_cnat4(self): + # """ CNat Translation ipv4 """ + + vips = [ + Ep("30.0.0.1", 5555), + Ep("30.0.0.2", 5554), + Ep("30.0.0.2", 5553, UDP), + ] + + self.pg0.generate_remote_hosts(len(vips)) + self.pg0.configure_ipv4_neighbors() + self.pg1.generate_remote_hosts(len(vips)) + self.pg1.configure_ipv4_neighbors() + + self.cnat_translation(vips) + + +class TestCNatSourceNAT(VppTestCase): + """ CNat Source NAT """ + extra_vpp_punt_config = ["cnat", "{", + "session-max-age", "1", + "tcp-max-age", "1", "}"] + + @classmethod + def setUpClass(cls): + super(TestCNatSourceNAT, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(TestCNatSourceNAT, cls).tearDownClass() + + def setUp(self): + super(TestCNatSourceNAT, self).setUp() + + self.create_pg_interfaces(range(3)) + + for i in self.pg_interfaces: + i.admin_up() + i.config_ip4() + i.resolve_arp() + i.config_ip6() + i.resolve_ndp() + + def tearDown(self): + for i in self.pg_interfaces: + i.unconfig_ip4() + i.unconfig_ip6() + i.admin_down() + super(TestCNatSourceNAT, self).tearDown() + + def cnat_create_translation(self, srcNatAddr, interface, isV6=False): + t1 = VppCNATSourceNat(self, srcNatAddr) + t1.add_vpp_config() + cnat_arc_name = "ip6-unicast" if isV6 else "ip4-unicast" + cnat_feature_name = "ip6-cnat-snat" if isV6 else "ip4-cnat-snat" + self.vapi.feature_enable_disable( + enable=1, + arc_name=cnat_arc_name, + feature_name=cnat_feature_name, + sw_if_index=interface.sw_if_index) + + return t1 + + def cnat_test_sourcenat(self, srcNatAddr, l4p=TCP, isV6=False): + ip_v = "ip6" if isV6 else "ip4" + ip_class = IPv6 if isV6 else IP + sports = [1234, 1235, 1236] + dports = [6661, 6662, 6663] + + self.pg0.generate_remote_hosts(1) + self.pg0.configure_ipv4_neighbors() + self.pg0.configure_ipv6_neighbors() + self.pg1.generate_remote_hosts(len(sports)) + self.pg1.configure_ipv4_neighbors() + self.pg1.configure_ipv6_neighbors() + + self.vapi.cli("test cnat scanner on") + t1 = self.cnat_create_translation(srcNatAddr, self.pg0) + + for nbr, remote_host in enumerate(self.pg1.remote_hosts): + # from pods to outside network + p1 = ( + Ether( + dst=self.pg0.local_mac, + src=self.pg0.remote_hosts[0].mac) / + ip_class( + src=getattr(self.pg0.remote_hosts[0], ip_v), + dst=getattr(remote_host, ip_v)) / + l4p(sport=sports[nbr], dport=dports[nbr]) / + Raw()) + + rxs = self.send_and_expect( + self.pg0, + p1 * N_PKTS, + self.pg1) + for rx in rxs: + self.assert_packet_checksums_valid(rx) + self.assertEqual( + rx[ip_class].dst, + getattr(remote_host, ip_v)) + self.assertEqual(rx[l4p].dport, dports[nbr]) + self.assertEqual( + rx[ip_class].src, + srcNatAddr) + sport = rx[l4p].sport + + # from outside to pods + p2 = ( + Ether( + dst=self.pg1.local_mac, + src=self.pg1.remote_hosts[nbr].mac) / + ip_class(src=getattr(remote_host, ip_v), dst=srcNatAddr) / + l4p(sport=dports[nbr], dport=sport) / + Raw()) + + rxs = self.send_and_expect( + self.pg1, + p2 * N_PKTS, + self.pg0) + + for rx in rxs: + self.assert_packet_checksums_valid(rx) + self.assertEqual( + rx[ip_class].dst, + getattr(self.pg0.remote_hosts[0], ip_v)) + self.assertEqual(rx[l4p].dport, sports[nbr]) + self.assertEqual(rx[l4p].sport, dports[nbr]) + self.assertEqual( + rx[ip_class].src, + getattr(remote_host, ip_v)) + + # add remote host to exclude list + subnet_mask = 100 if isV6 else 16 + subnet = getattr(remote_host, ip_v) + "/" + str(subnet_mask) + exclude_subnet = ip_network(subnet, strict=False) + + t1.cnat_exclude_subnet(exclude_subnet) + self.vapi.cnat_session_purge() + + rxs = self.send_and_expect( + self.pg0, + p1 * N_PKTS, + self.pg1) + for rx in rxs: + self.assert_packet_checksums_valid(rx) + self.assertEqual( + rx[ip_class].dst, + getattr(remote_host, ip_v)) + self.assertEqual(rx[l4p].dport, dports[nbr]) + self.assertEqual( + rx[ip_class].src, + getattr(self.pg0.remote_hosts[0], ip_v)) + + # remove remote host from exclude list + t1.cnat_exclude_subnet(exclude_subnet, isAdd=False) + self.vapi.cnat_session_purge() + + rxs = self.send_and_expect( + self.pg0, + p1 * N_PKTS, + self.pg1) + + for rx in rxs: + self.assert_packet_checksums_valid(rx) + self.assertEqual( + rx[ip_class].dst, + getattr(remote_host, ip_v)) + self.assertEqual(rx[l4p].dport, dports[nbr]) + self.assertEqual( + rx[ip_class].src, + srcNatAddr) + + # def test_cnat6_sourcenat(self): + # # """ CNat Source Nat ipv6 """ + # self.cnat_test_sourcenat(self.pg2.remote_hosts[0].ip6, TCP, True) + # self.cnat_test_sourcenat(self.pg2.remote_hosts[0].ip6, UDP, True) + + def test_cnat4_sourcenat(self): + # """ CNat Source Nat ipv4 """ + self.cnat_test_sourcenat(self.pg2.remote_hosts[0].ip4, TCP) + self.cnat_test_sourcenat(self.pg2.remote_hosts[0].ip4, UDP) + +if __name__ == '__main__': + unittest.main(testRunner=VppTestRunner) diff --git a/src/vnet/ip/ip_types.c b/src/vnet/ip/ip_types.c index 5041c129245..3d489e4e00d 100644 --- a/src/vnet/ip/ip_types.c +++ b/src/vnet/ip/ip_types.c @@ -261,6 +261,30 @@ ip_address_from_46 (const ip46_address_t * nh, ip_addr_version (ip) = ip_address_family_from_fib_proto (fproto); } +/** + * convert from a IP address to a FIB prefix + */ +void +ip_address_to_fib_prefix (const ip_address_t * addr, fib_prefix_t * prefix) +{ + if (addr->version == AF_IP4) + { + prefix->fp_len = 32; + prefix->fp_proto = FIB_PROTOCOL_IP4; + clib_memset (&prefix->fp_addr.pad, 0, sizeof (prefix->fp_addr.pad)); + memcpy (&prefix->fp_addr.ip4, &addr->ip.ip4, + sizeof (prefix->fp_addr.ip4)); + } + else + { + prefix->fp_len = 128; + prefix->fp_proto = FIB_PROTOCOL_IP6; + memcpy (&prefix->fp_addr.ip6, &addr->ip.ip6, + sizeof (prefix->fp_addr.ip6)); + } + prefix->___fp___pad = 0; +} + static void ip_prefix_normalize_ip4 (ip4_address_t * ip4, u8 preflen) { @@ -364,6 +388,17 @@ ip_prefix_cmp (ip_prefix_t * p1, ip_prefix_t * p2) return cmp; } +/** + * convert from a LISP to a FIB prefix + */ +void +ip_prefix_to_fib_prefix (const ip_prefix_t * ip_prefix, + fib_prefix_t * fib_prefix) +{ + ip_address_to_fib_prefix (&ip_prefix->addr, fib_prefix); + fib_prefix->fp_len = ip_prefix->len; +} + static bool ip4_prefix_validate (const ip_prefix_t * ip) { diff --git a/src/vnet/lisp-cp/control.c b/src/vnet/lisp-cp/control.c index c1593662b6d..7e9d059f9ed 100644 --- a/src/vnet/lisp-cp/control.c +++ b/src/vnet/lisp-cp/control.c @@ -123,41 +123,6 @@ ip_interface_get_first_ip_address (lisp_cp_main_t * lcm, u32 sw_if_index, return 1; } -/** - * convert from a LISP address to a FIB prefix - */ -void -ip_address_to_fib_prefix (const ip_address_t * addr, fib_prefix_t * prefix) -{ - if (addr->version == AF_IP4) - { - prefix->fp_len = 32; - prefix->fp_proto = FIB_PROTOCOL_IP4; - clib_memset (&prefix->fp_addr.pad, 0, sizeof (prefix->fp_addr.pad)); - memcpy (&prefix->fp_addr.ip4, &addr->ip.ip4, - sizeof (prefix->fp_addr.ip4)); - } - else - { - prefix->fp_len = 128; - prefix->fp_proto = FIB_PROTOCOL_IP6; - memcpy (&prefix->fp_addr.ip6, &addr->ip.ip6, - sizeof (prefix->fp_addr.ip6)); - } - prefix->___fp___pad = 0; -} - -/** - * convert from a LISP to a FIB prefix - */ -void -ip_prefix_to_fib_prefix (const ip_prefix_t * ip_prefix, - fib_prefix_t * fib_prefix) -{ - ip_address_to_fib_prefix (&ip_prefix->addr, fib_prefix); - fib_prefix->fp_len = ip_prefix->len; -} - /** * Find the sw_if_index of the interface that would be used to egress towards * dst.