pvti: Packet Vector Tunnel Interface

This plugin implements a PoC of UDP-based tunnel substrate whose aim is
to specifically provide higher MTU to the upper layers by chunking
the payload PDUs into smaller packets with full 5-tuple.

At the same time, if there are multiple small packets to
the same destination during the vector processing, they
are packed into "carrier" packets up to underlay MTU size.

It does assume a trustworthy underlying medium, thus for the
operation over Internet it requires the use of encryption layer
underneath.

Type: feature
Change-Id: I323958fa8de62584f6ed15643ea689568a9a62bc
Signed-off-by: Andrew Yourtchenko <ayourtch@gmail.com>
This commit is contained in:
Andrew Yourtchenko 2024-01-23 11:57:51 +00:00 committed by Damjan Marion
parent 6ccfc3991d
commit 0acb398d6d
20 changed files with 4353 additions and 0 deletions

View File

@ -847,6 +847,11 @@ I: tracenode
M: Maxime Peim <mpeim@cisco.com>
F: src/plugins/tracenode
Plugin - Packet Vector Tunnel Interface
I: pvti
M: Andrew Yourtchenko <ayourtch@gmail.com>
F: src/plugins/pvti
cJSON
I: cjson
M: Ole Troan <ot@cisco.com>

View File

@ -903,6 +903,9 @@ pthreads
pton
pushingapatch
putatively
pvti
PVTI
Pvti
pwait
py
pypi

View File

@ -0,0 +1,40 @@
# Copyright (c) 2024 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
add_vpp_plugin(pvti
SOURCES
pvti_if.c
pvti.c
input.h
input.c
input-main.c
output.h
output.c
output-main.c
bypass.h
bypass.c
bypass-main.c
api.c
pvti.h
MULTIARCH_SOURCES
input.c
output.c
bypass.c
API_FILES
pvti.api
# API_TEST_SOURCES
# pvti_test.c
)

View File

@ -0,0 +1,8 @@
---
name: Packet Vector Tunnel
maintainer: Andrew Yourtchenko <ayourtch@gmail.com>
features:
- support inner MTU up to ~8K over standard 1280..1500 MTU substrate
description: "Large MTU Tunnels"
state: development
properties: [API, CLI]

137
src/plugins/pvti/api.c Normal file
View File

@ -0,0 +1,137 @@
#include <vnet/vnet.h>
#include <vlibmemory/api.h>
#include <vnet/format_fns.h>
#include <vnet/ip/ip_types_api.h>
#include <vlibapi/api.h>
#include <pvti/pvti.api_enum.h>
#include <pvti/pvti.api_types.h>
#include <pvti/pvti.h>
#include <pvti/pvti_if.h>
#define REPLY_MSG_ID_BASE pvm->msg_id_base
#include <vlibapi/api_helper_macros.h>
typedef struct
{
vl_api_registration_t *reg;
u32 context;
} pvti_if_details_ctx_t;
typedef struct
{
} pvti_interface_dump_ctx_t;
static walk_rc_t
pvti_if_send_details (index_t pvtii, void *data)
{
vl_api_pvti_interface_details_t *rmp;
pvti_if_details_ctx_t *ctx = data;
const pvti_if_t *pvi;
pvi = pvti_if_get (pvtii);
rmp = vl_msg_api_alloc_zero (sizeof (*rmp));
rmp->_vl_msg_id =
htons (VL_API_PVTI_INTERFACE_DETAILS + pvti_main.msg_id_base);
rmp->interface.sw_if_index = htonl (pvi->sw_if_index);
rmp->interface.local_port = htons (pvi->local_port);
rmp->interface.remote_port = htons (pvi->remote_port);
rmp->interface.underlay_mtu = htons (pvi->underlay_mtu);
ip_address_encode2 (&pvi->local_ip, &rmp->interface.local_ip);
ip_address_encode2 (&pvi->remote_ip, &rmp->interface.remote_ip);
rmp->context = ctx->context;
vl_api_send_msg (ctx->reg, (u8 *) rmp);
return (WALK_CONTINUE);
}
static void
vl_api_pvti_interface_dump_t_handler (vl_api_pvti_interface_dump_t *mp)
{
vl_api_registration_t *reg;
// pvti_main_t *pvm = &pvti_main;
reg = vl_api_client_index_to_registration (mp->client_index);
if (reg == 0)
return;
pvti_if_details_ctx_t ctx = {
.reg = reg,
.context = mp->context,
};
u32 sw_if_index = ntohl (mp->sw_if_index);
if (sw_if_index == ~0)
pvti_if_walk (pvti_if_send_details, &ctx);
else
{
index_t pvtii = pvti_if_find_by_sw_if_index (sw_if_index);
if (pvtii != INDEX_INVALID)
pvti_if_send_details (pvtii, &ctx);
}
}
static void
vl_api_pvti_interface_create_t_handler (vl_api_pvti_interface_create_t *mp)
{
vl_api_pvti_interface_create_reply_t *rmp;
pvti_main_t *pvm = &pvti_main;
int rv = ~0;
u32 sw_if_index = ~0;
ip_address_t local_ip;
ip_address_t remote_ip;
ip_address_decode2 (&mp->interface.local_ip, &local_ip);
ip_address_decode2 (&mp->interface.remote_ip, &remote_ip);
u16 lport = clib_host_to_net_u16 (mp->interface.local_port);
u16 rport = clib_host_to_net_u16 (mp->interface.remote_port);
u16 underlay_mtu = clib_host_to_net_u16 (mp->interface.underlay_mtu);
u32 underlay_fib_index =
clib_host_to_net_u32 (mp->interface.underlay_fib_index);
pvti_peer_address_method_t peer_address_method =
mp->interface.peer_address_from_payload ? PVTI_PEER_ADDRESS_FROM_PAYLOAD :
PVTI_PEER_ADDRESS_FIXED;
if (underlay_mtu == 0)
{
underlay_mtu = 1500;
}
rv =
pvti_if_create (&local_ip, lport, &remote_ip, rport, peer_address_method,
underlay_mtu, underlay_fib_index, &sw_if_index);
REPLY_MACRO2 (VL_API_PVTI_INTERFACE_CREATE_REPLY,
{ rmp->sw_if_index = htonl (sw_if_index); });
}
static void
vl_api_pvti_interface_delete_t_handler (vl_api_pvti_interface_delete_t *mp)
{
vl_api_pvti_interface_delete_reply_t *rmp;
pvti_main_t *pvm = &pvti_main;
int rv = 0;
rv = pvti_if_delete (ntohl (mp->sw_if_index));
REPLY_MACRO (VL_API_PVTI_INTERFACE_DELETE_REPLY);
}
/* API definitions */
#include <pvti/pvti.api.c>
void
pvti_api_init ()
{
pvti_main_t *pvm = &pvti_main;
/* Add our API messages to the global name_crc hash table */
pvm->msg_id_base = setup_message_id_table ();
}

View File

@ -0,0 +1,79 @@
/*
* Copyright (c) 2024 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <pvti/bypass.h>
/* packet trace format function */
static u8 *
format_pvti_bypass_trace (u8 *s, va_list *args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
pvti_bypass_trace_t *t = va_arg (*args, pvti_bypass_trace_t *);
s = format (s, "PVTI-BYPASS: sw_if_index %d, next index %d\n",
t->sw_if_index, t->next_index);
s = format (s, " src %U sport %d dport %d\n", format_ip_address,
&t->remote_ip, t->remote_port, t->local_port);
s = format (s, " seq: %d", t->seq);
return s;
}
vlib_node_registration_t pvti4_bypass_node;
vlib_node_registration_t pvti6_bypass_node;
static char *pvti_bypass_error_strings[] = {
#define _(sym, string) string,
foreach_pvti_bypass_error
#undef _
};
VLIB_REGISTER_NODE (pvti4_bypass_node) =
{
.name = "ip4-pvti-bypass",
.vector_size = sizeof (u32),
.format_trace = format_pvti_bypass_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = ARRAY_LEN(pvti_bypass_error_strings),
.error_strings = pvti_bypass_error_strings,
.n_next_nodes = PVTI_BYPASS_N_NEXT,
.next_nodes = {
[PVTI_BYPASS_NEXT_DROP] = "error-drop",
[PVTI_BYPASS_NEXT_PVTI_INPUT] = "pvti4-input",
},
};
VLIB_REGISTER_NODE (pvti6_bypass_node) =
{
.name = "ip6-pvti-bypass",
.vector_size = sizeof (u32),
.format_trace = format_pvti_bypass_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = ARRAY_LEN(pvti_bypass_error_strings),
.error_strings = pvti_bypass_error_strings,
.n_next_nodes = PVTI_BYPASS_N_NEXT,
.next_nodes = {
[PVTI_BYPASS_NEXT_DROP] = "error-drop",
[PVTI_BYPASS_NEXT_PVTI_INPUT] = "pvti6-input",
},
};

202
src/plugins/pvti/bypass.c Normal file
View File

@ -0,0 +1,202 @@
/*
* Copyright (c) 2024 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vlib/vlib.h>
#include <vnet/vnet.h>
#include <vnet/pg/pg.h>
#include <vppinfra/error.h>
#include <pvti/pvti.h>
#include <pvti/pvti_if.h>
#include <pvti/bypass.h>
always_inline u16
pvti_bypass_node_common (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, bool is_ip6)
{
u32 n_left_from, *from, *to_next;
pvti_bypass_next_t next_index;
vlib_node_runtime_t *error_node =
vlib_node_get_runtime (vm, ip4_input_node.index);
u32 pkts_processed = 0;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
while (n_left_from > 0)
{
u32 n_left_to_next;
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
while (n_left_from > 0 && n_left_to_next > 0)
{
vlib_buffer_t *b0;
u32 sw_if_index0 = 0;
ip4_header_t *ip40;
ip6_header_t *ip60;
udp_header_t *udp0;
u32 bi0, ip_len0, udp_len0, flags0, next0;
u8 error0, good_udp0, proto0;
i32 len_diff0;
bi0 = to_next[0] = from[0];
from += 1;
n_left_from -= 1;
to_next += 1;
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
/* setup the packet for the next feature */
vnet_feature_next (&next0, b0);
if (is_ip6)
{
ip60 = vlib_buffer_get_current (b0);
}
else
{
ip40 = vlib_buffer_get_current (b0);
}
if (is_ip6)
{
proto0 = ip60->protocol;
}
else
{
/* Treat IP frag packets as "experimental" protocol for now */
proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol;
}
/* Process packet 0 */
if (proto0 != IP_PROTOCOL_UDP)
goto exit; /* not UDP packet */
if (is_ip6)
udp0 = ip6_next_header (ip60);
else
udp0 = ip4_next_header (ip40);
/* look up the destination ip and port */
u32 pvti_index0 = INDEX_INVALID;
if (is_ip6)
{
pvti_index0 = pvti_if_find_by_remote_ip6_and_port (
&ip60->src_address, clib_net_to_host_u16 (udp0->src_port));
}
else
{
pvti_index0 = pvti_if_find_by_remote_ip4_and_port (
&ip40->src_address, clib_net_to_host_u16 (udp0->src_port));
}
if (pvti_index0 == INDEX_INVALID)
goto exit;
flags0 = b0->flags;
good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
/* Don't verify UDP checksum for packets with explicit zero checksum.
*/
good_udp0 |= udp0->checksum == 0;
/* Verify UDP length */
if (is_ip6)
ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
else
ip_len0 = clib_net_to_host_u16 (ip40->length);
udp_len0 = clib_net_to_host_u16 (udp0->length);
len_diff0 = ip_len0 - udp_len0;
/* Verify UDP checksum */
if (PREDICT_FALSE (!good_udp0))
{
if (is_ip6)
flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
else
flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
}
if (is_ip6)
{
error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
}
else
{
error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
}
next0 = error0 ? PVTI_BYPASS_NEXT_DROP : PVTI_BYPASS_NEXT_PVTI_INPUT;
b0->error = error0 ? error_node->errors[error0] : 0;
/* pvtiX-input node expect current at PVTI header */
if (is_ip6)
vlib_buffer_advance (b0, sizeof (ip6_header_t) +
sizeof (udp_header_t));
else
vlib_buffer_advance (b0, sizeof (ip4_header_t) +
sizeof (udp_header_t));
exit:
if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
(b0->flags & VLIB_BUFFER_IS_TRACED)))
{
pvti_bypass_trace_t *t =
vlib_add_trace (vm, node, b0, sizeof (*t));
t->sw_if_index = sw_if_index0;
t->next_index = next0;
t->seq = 0; // clib_net_to_host_u32 (pvti0->seq);
if (is_ip6)
{
}
else
{
t->remote_ip.ip.ip4 = ip40->src_address;
t->remote_ip.version = AF_IP4;
}
// t->local_port = h0->udp.dst_port;
// t->remote_port = h0->udp.src_port;
}
pkts_processed += 1;
/* verify speculative enqueue, maybe switch current next frame */
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
vlib_node_increment_counter (vm, node->node_index,
PVTI_BYPASS_ERROR_PROCESSED, pkts_processed);
return frame->n_vectors;
}
VLIB_NODE_FN (pvti4_bypass_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
return pvti_bypass_node_common (vm, node, frame, 0);
}
VLIB_NODE_FN (pvti6_bypass_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
return pvti_bypass_node_common (vm, node, frame, 1);
}

53
src/plugins/pvti/bypass.h Normal file
View File

@ -0,0 +1,53 @@
/*
* Copyright (c) 2024 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __included_pvti_bypass_h__
#define __included_pvti_bypass_h__
#include <vlib/vlib.h>
#include <vnet/vnet.h>
#include <vnet/pg/pg.h>
#include <vppinfra/error.h>
#include <pvti/pvti.h>
#include <pvti/pvti_if.h>
typedef struct
{
u32 next_index;
u32 sw_if_index;
ip_address_t remote_ip;
u16 remote_port;
u16 local_port;
u32 seq;
} pvti_bypass_trace_t;
#define foreach_pvti_bypass_error \
_ (PROCESSED, "PVTI bypass tunnel packets processed")
typedef enum
{
#define _(sym, str) PVTI_BYPASS_ERROR_##sym,
foreach_pvti_bypass_error
#undef _
PVTI_BYPASS_N_ERROR,
} pvti_bypass_error_t;
typedef enum
{
PVTI_BYPASS_NEXT_DROP,
PVTI_BYPASS_NEXT_PVTI_INPUT,
PVTI_BYPASS_N_NEXT,
} pvti_bypass_next_t;
#endif // pvti_bypass_h

View File

@ -0,0 +1,115 @@
/*
* Copyright (c) 2024 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <pvti/input.h>
static char *pvti_input_error_strings[] = {
#define _(sym, string) string,
foreach_pvti_input_error
#undef _
};
#define _(f, s) s,
static char *pvti_input_trace_type_names[] = { foreach_pvti_input_trace_type };
#undef _
static char *
get_pvti_trace_type_name (u8 ptype)
{
if (ptype < PVTI_INPUT_TRACE_N_TYPES)
{
return pvti_input_trace_type_names[ptype];
}
else
{
return "unknown";
}
}
/* packet trace format function */
static u8 *
format_pvti_input_trace (u8 *s, va_list *args)
{
int i;
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
pvti_input_trace_t *t = va_arg (*args, pvti_input_trace_t *);
u32 indent = format_get_indent (s);
s = format (s,
"PVTI-IN: sw_if_index %d, next index %d, trace_type: %s(%d), "
"chunkcnt: %d\n",
t->sw_if_index, t->next_index,
get_pvti_trace_type_name (t->trace_type), t->trace_type,
t->chunk_count);
s = format (s, " src %U sport %d dport %d\n", format_ip_address,
&t->remote_ip, t->remote_port, t->local_port);
s = format (s, " seq: %d, chunk_count: %d\n", t->seq, t->chunk_count);
u16 max = t->chunk_count > MAX_CHUNKS ? MAX_CHUNKS : t->chunk_count;
for (i = 0; i < max; i++)
{
s = format (s, " %02d: sz %d\n", i, t->chunks[i].total_chunk_length);
}
s = format (s, "\n%U%U", format_white_space, indent,
format_ip_adjacency_packet_data, t->packet_data,
sizeof (t->packet_data));
return s;
}
vlib_node_registration_t pvti4_input_node;
vlib_node_registration_t pvti6_input_node;
VLIB_REGISTER_NODE (pvti4_input_node) =
{
.name = "pvti4-input",
.vector_size = sizeof (u32),
.format_trace = format_pvti_input_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = ARRAY_LEN(pvti_input_error_strings),
.error_strings = pvti_input_error_strings,
.n_next_nodes = PVTI_INPUT_N_NEXT,
.next_nodes = {
[PVTI_INPUT_NEXT_DROP] = "error-drop",
[PVTI_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
[PVTI_INPUT_NEXT_IP6_INPUT] = "ip6-input",
[PVTI_INPUT_NEXT_PUNT] = "error-punt",
},
};
VLIB_REGISTER_NODE (pvti6_input_node) =
{
.name = "pvti6-input",
.vector_size = sizeof (u32),
.format_trace = format_pvti_input_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = ARRAY_LEN(pvti_input_error_strings),
.error_strings = pvti_input_error_strings,
.n_next_nodes = PVTI_INPUT_N_NEXT,
.next_nodes = {
[PVTI_INPUT_NEXT_DROP] = "error-drop",
[PVTI_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
[PVTI_INPUT_NEXT_IP6_INPUT] = "ip6-input",
[PVTI_INPUT_NEXT_PUNT] = "error-punt",
},
};

496
src/plugins/pvti/input.c Normal file

File diff suppressed because it is too large Load Diff

87
src/plugins/pvti/input.h Normal file
View File

@ -0,0 +1,87 @@
/*
* Copyright (c) 2024 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __included_pvti_input_h__
#define __included_pvti_input_h__
#include <vlib/vlib.h>
#include <vnet/vnet.h>
#include <vnet/pg/pg.h>
#include <vppinfra/error.h>
#include <pvti/pvti.h>
#include <pvti/pvti_if.h>
typedef struct
{
u16 total_chunk_length;
} pvti_input_chunk_t;
#define MAX_CHUNKS 32
#define PVTI_RX_MAX_LENGTH 2048
typedef struct
{
u32 next_index;
u32 sw_if_index;
ip_address_t remote_ip;
u16 remote_port;
u16 local_port;
u32 seq;
pvti_input_chunk_t chunks[MAX_CHUNKS];
u8 chunk_count;
u8 trace_type;
u8 packet_data[64];
} pvti_input_trace_t;
#define foreach_pvti_input_trace_type \
_ (drop, "drop") \
_ (decap, "decapsulate") \
_ (free, "free") \
_ (enqueue, "enqueue")
typedef enum
{
#define _(f, s) PVTI_INPUT_TRACE_##f,
foreach_pvti_input_trace_type
#undef _
PVTI_INPUT_TRACE_N_TYPES,
} pvti_input_trace_type_t;
#define foreach_pvti_input_error \
_ (PROCESSED, "PVTI tunneled packets processed") \
_ (DECAPSULATED, "PVTI inner packets decapsulated") \
_ (PEER, "Could not find a peer") \
_ (NOCHUNKS, "Packet has no chunks") \
_ (NO_BUFFERS, "No buffers available to decapsulate") \
_ (TOOMANYREASS, "Packet has more reassembly chunks than total") \
_ (PACKET_TOO_SHORT, "Packet too short")
typedef enum
{
#define _(sym, str) PVTI_INPUT_ERROR_##sym,
foreach_pvti_input_error
#undef _
PVTI_INPUT_N_ERROR,
} pvti_input_error_t;
typedef enum
{
PVTI_INPUT_NEXT_DROP,
PVTI_INPUT_NEXT_IP4_INPUT,
PVTI_INPUT_NEXT_IP6_INPUT,
PVTI_INPUT_NEXT_PUNT,
PVTI_INPUT_N_NEXT,
} pvti_input_next_t;
#endif // pvti_input_h

View File

@ -0,0 +1,85 @@
/*
* Copyright (c) 2024 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <pvti/output.h>
/* packet trace format function */
static u8 *
format_pvti_output_trace (u8 *s, va_list *args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
pvti_output_trace_t *t = va_arg (*args, pvti_output_trace_t *);
u32 indent = format_get_indent (s);
s =
format (s, "PVTI-OUT(%d): sw_if_index %d, next index %d, underlay_mtu %d,",
t->trace_type, t->sw_if_index, t->next_index, t->underlay_mtu);
s = format (s, "\n%U stream_index %d, bi0_max_current_length %d, tx_seq %d",
format_white_space, indent, t->stream_index,
t->bi0_max_current_length, t->tx_seq);
s = format (s, "\n%U%U", format_white_space, indent,
format_ip_adjacency_packet_data, t->packet_data,
sizeof (t->packet_data));
return s;
}
vlib_node_registration_t pvti_output_node;
static char *pvti_output_error_strings[] = {
#define _(sym, string) string,
foreach_pvti_output_error
#undef _
};
VLIB_REGISTER_NODE (pvti4_output_node) =
{
.name = "pvti4-output",
.vector_size = sizeof (u32),
.format_trace = format_pvti_output_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = ARRAY_LEN(pvti_output_error_strings),
.error_strings = pvti_output_error_strings,
.n_next_nodes = PVTI_OUTPUT_N_NEXT,
.next_nodes = {
[PVTI_OUTPUT_NEXT_DROP] = "error-drop",
[PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx",
[PVTI_OUTPUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
[PVTI_OUTPUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
},
};
VLIB_REGISTER_NODE (pvti6_output_node) =
{
.name = "pvti6-output",
.vector_size = sizeof (u32),
.format_trace = format_pvti_output_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = ARRAY_LEN(pvti_output_error_strings),
.error_strings = pvti_output_error_strings,
.n_next_nodes = PVTI_OUTPUT_N_NEXT,
.next_nodes = {
[PVTI_OUTPUT_NEXT_DROP] = "error-drop",
[PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx",
[PVTI_OUTPUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
[PVTI_OUTPUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
},
};

543
src/plugins/pvti/output.c Normal file

File diff suppressed because it is too large Load Diff

75
src/plugins/pvti/output.h Normal file
View File

@ -0,0 +1,75 @@
/*
* Copyright (c) 2024 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __included_pvti_output_h__
#define __included_pvti_output_h__
#include <vlib/vlib.h>
#include <vnet/vnet.h>
#include <vnet/pg/pg.h>
#include <vppinfra/error.h>
#include <pvti/pvti.h>
#include <pvti/pvti_if.h>
typedef struct
{
u32 next_index;
u32 sw_if_index;
u32 tx_seq;
u16 underlay_mtu;
u16 bi0_max_current_length;
u8 stream_index;
u8 trace_type;
u8 packet_data[96];
} pvti_output_trace_t;
#define foreach_pvti_output_error \
_ (NONE, "No error") \
_ (PROCESSED, "Packets processed") \
_ (ENCAPSULATED, "Packets encapsulated") \
_ (PEER, "No peer found") \
_ (MAKE_PEER, "Could not make peer") \
_ (RECHARGE0, "Could not recharge 0") \
_ (RECHARGE1, "Could not recharge 1") \
_ (NO_PRE_SPACE, "Not enought pre-data space") \
_ (CHOPPED, "Packets chopped") \
_ (OVERFLOW, "Packets overflowed") \
_ (OVERFLOW_CANTFIT, "Packets overflowed and cant fit excess")
typedef enum
{
#define _(sym, str) PVTI_OUTPUT_ERROR_##sym,
foreach_pvti_output_error
#undef _
PVTI_OUTPUT_N_ERROR,
} pvti_output_error_t;
typedef enum
{
PVTI_INDEPENDENT_CHUNK = 0,
PVTI_REASS_CHUNK,
} pvti_chunk_type_t;
#define MAX_CURR_LEN_UNKNOWN 0xffff
typedef enum
{
PVTI_OUTPUT_NEXT_DROP,
PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT,
PVTI_OUTPUT_NEXT_IP4_LOOKUP,
PVTI_OUTPUT_NEXT_IP6_LOOKUP,
PVTI_OUTPUT_N_NEXT,
} pvti_output_next_t;
#endif // pvti_output_h

111
src/plugins/pvti/pvti.api Normal file
View File

@ -0,0 +1,111 @@
/*
* pvti.api - binary API skeleton
*
* Copyright (c) 2024 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @file pvti.api
* @brief VPP control-plane API messages.
*
* This file defines VPP control-plane binary API messages which are generally
* called through a shared memory interface.
*/
/* Version and type recitations */
option version = "0.0.1";
import "vnet/interface_types.api";
import "vnet/ip/ip_types.api";
/** \brief A composite type uniquely defining a PVTI tunnel.
@param sw_if_index - ignored on create/delete, present in details.
@param src_ip - Source IP address
@param src_port - Source UDP port
@param dst_ip - Destination IP address
@param dst_port - Destination UDP port
@param underlay_mtu - Underlay MTU for packet splitting/coalescing
@param underlay_fib_index - Underlay FIB index to be used after encap
*/
typedef pvti_tunnel
{
vl_api_interface_index_t sw_if_index;
vl_api_address_t local_ip;
u16 local_port;
vl_api_address_t remote_ip;
bool peer_address_from_payload;
u16 remote_port;
u16 underlay_mtu;
u32 underlay_fib_index;
};
/** @brief API to enable / disable pvti on an interface
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param enable_disable - 1 to enable, 0 to disable the feature
@param sw_if_index - interface handle
*/
define pvti_interface_create
{
option status="in_progress";
/* Client identifier, set from api_main.my_client_index */
u32 client_index;
/* Arbitrary context, so client can match reply to request */
u32 context;
vl_api_pvti_tunnel_t interface;
};
define pvti_interface_create_reply
{
option status="in_progress";
u32 context;
i32 retval;
/* Index for the newly created interface */
vl_api_interface_index_t sw_if_index;
};
autoreply define pvti_interface_delete {
option status="in_progress";
/* Client identifier, set from api_main.my_client_index */
u32 client_index;
/* Arbitrary context, so client can match reply to request */
u32 context;
vl_api_interface_index_t sw_if_index;
};
define pvti_interface_dump
{
option status="in_progress";
u32 client_index;
u32 context;
vl_api_interface_index_t sw_if_index;
};
define pvti_interface_details
{
option status="in_progress";
u32 context;
vl_api_pvti_tunnel_t interface;
};

481
src/plugins/pvti/pvti.c Normal file

File diff suppressed because it is too large Load Diff

257
src/plugins/pvti/pvti.h Normal file
View File

@ -0,0 +1,257 @@
/*
* pvti.h - skeleton vpp engine plug-in header file
*
* Copyright (c) 2024 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __included_pvti_h__
#define __included_pvti_h__
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
#include <vppinfra/hash.h>
#include <vppinfra/error.h>
#define VPP_MAX_THREADS (1 << 8)
#define MAX_RX_STREAMS 256
#define PVTI_ALIGN_BYTES 9
typedef CLIB_PACKED (struct {
u32 seq;
u8 stream_index; // set to the cpu# on the sending side
u8 chunk_count;
u8 reass_chunk_count; // number of chunks in the front that are related to
// previously started buffer
// mandatory_flags_mask highlights which of the flags cause packet drop if
// not understood, and which of them can be just ignored.
u8 mandatory_flags_mask;
u8 flags_value;
u8 pad_bytes;
u8 pad[0];
}) pvti_packet_header_t;
typedef CLIB_PACKED (struct {
ip4_header_t ip4;
udp_header_t udp;
// not part of encap header pvti_packet_header_t pv;
}) pvti_ip4_encap_header_t;
typedef CLIB_PACKED (struct {
ip6_header_t ip6;
udp_header_t udp;
// not part of encap header pvti_packet_header_t pv;
}) pvti_ip6_encap_header_t;
typedef CLIB_PACKED (struct {
u16 total_chunk_length;
// More fragments: this chunk is not the last block fragment
#define CHUNK_FLAGS_MF (1 << 0)
// More blocks: this block has chained blocks that follow
#define CHUNK_FLAGS_MB (1 << 1)
u16 _pad0;
u32 _pad1;
u8 chunk_data[0];
}) pvti_chunk_header_t;
typedef struct
{
// a buffer being built from the smaller packets
u32 bi0;
// how big can this buffer grow
u32 bi0_max_current_length;
// how many chunks are already in the buffer
u8 chunk_count;
// leading reassembly chunk count
u8 reass_chunk_count;
u32 current_tx_seq;
} pvti_per_tx_stream_data_t;
typedef struct
{
/* The seq# that we last processed */
u32 last_rx_seq;
// a current buffer that is being reassembled
u32 rx_bi0;
// The root buffer, most of the times == rx_bi0 except in the case of chained
// buffers.
u32 rx_bi0_first;
// Next index for dispatch when the reassembly is done
u16 rx_next0;
// expected totall inner length for the packet
u16 rx_expected_inner_length;
u16 rx_received_inner_length;
} pvti_per_rx_stream_data_t;
typedef struct
{
ip_address_t local_ip;
ip_address_t remote_ip;
u16 remote_port;
u16 local_port;
u16 underlay_mtu;
u32 underlay_fib_index;
u32 pvti_if_index;
bool deleted;
bool is_bo0_traced;
u32 bo0_max_current_length;
u8 chunk_count;
u8 reass_chunk_count;
u32 current_tx_seq;
vlib_buffer_t *bo0;
} pvti_tx_peer_t;
typedef struct
{
ip_address_t local_ip;
ip_address_t remote_ip;
u16 remote_port;
u16 local_port;
pvti_per_rx_stream_data_t rx_streams[MAX_RX_STREAMS];
u32 pvti_if_index;
bool deleted;
} pvti_rx_peer_t;
typedef struct
{
/* pool of destination-based structures which are used to build the packets
*/
pvti_tx_peer_t *tx_peers;
/* vector of buffers to send */
u32 *pending_tx_buffers;
u16 *pending_tx_nexts;
/* pool of source-based structures for the remote peers' data tracking
*/
pvti_rx_peer_t *rx_peers;
/* vector of buffers being decapsulated */
u32 *pending_rx_buffers;
u16 *pending_rx_nexts;
} pvti_per_thread_data_t;
typedef struct
{
ip_address_t local_ip;
ip_address_t remote_ip;
u16 remote_port;
u16 local_port;
u16 underlay_mtu;
u32 underlay_fib_index;
bool peer_address_from_payload;
u64 created_at;
u32 sw_if_index;
u32 hw_if_index;
// per-stream data for TX
pvti_per_tx_stream_data_t tx_streams[256];
pvti_per_rx_stream_data_t rx_streams[256];
} pvti_if_t;
typedef struct
{
/* API message ID base */
u16 msg_id_base;
/* have we initialized the data structures ? */
bool is_initialized;
/* interface pool */
pvti_if_t *if_pool;
/* if_index in the pool above by sw_if_index */
index_t *if_index_by_sw_if_index;
/* indices by port */
index_t **if_indices_by_port;
/* per-thread data, ip4[0] and ip6[1] */
pvti_per_thread_data_t *per_thread_data[2];
/* on/off switch for the periodic function */
u8 periodic_timer_enabled;
/* Node index, non-zero if the periodic process has been created */
u32 periodic_node_index;
/* graph node state */
uword *bm_ip4_bypass_enabled_by_sw_if;
uword *bm_ip6_bypass_enabled_by_sw_if;
/* convenience */
vlib_main_t *vlib_main;
vnet_main_t *vnet_main;
ethernet_main_t *ethernet_main;
} pvti_main_t;
extern pvti_main_t pvti_main;
extern vlib_node_registration_t pvti_node;
extern vlib_node_registration_t pvti4_input_node;
extern vlib_node_registration_t pvti4_output_node;
extern vlib_node_registration_t pvti6_input_node;
extern vlib_node_registration_t pvti6_output_node;
extern vlib_node_registration_t pvti_periodic_node;
always_inline u8
pvti_get_stream_index (int is_ip6)
{
u32 thread_index = vlib_get_thread_index ();
ASSERT ((thread_index & 0xffffff80) == 0);
u8 stream_index = (thread_index & 0x7f) | (is_ip6 ? 0x80 : 0);
return stream_index;
}
/* attempt to get a new buffer */
always_inline u32
pvti_get_new_buffer (vlib_main_t *vm)
{
u32 bi0 = INDEX_INVALID;
if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
{
return INDEX_INVALID;
}
vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
b0->current_data = 0;
b0->current_length = 0;
return bi0;
}
/* Periodic function events */
#define PVTI_EVENT1 1
#define PVTI_EVENT2 2
#define PVTI_EVENT_PERIODIC_ENABLE_DISABLE 3
void pvti_create_periodic_process (pvti_main_t *);
void pvti_verify_initialized (pvti_main_t *pvm);
#endif /* __included_pvti_h__ */

376
src/plugins/pvti/pvti_if.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,47 @@
#ifndef PVTI_IF_H
#define PVTI_IF_H
#include <vnet/interface_funcs.h>
typedef enum
{
PVTI_PEER_ADDRESS_FIXED = 0,
PVTI_PEER_ADDRESS_FROM_PAYLOAD
} pvti_peer_address_method_t;
typedef walk_rc_t (*pvti_if_walk_cb_t) (index_t wgi, void *data);
void pvti_if_walk (pvti_if_walk_cb_t fn, void *data);
int pvti_if_create (ip_address_t *local_ip, u16 local_port,
ip_address_t *remote_ip, u16 remote_port,
pvti_peer_address_method_t peer_address_method,
u16 underlay_mtu, u32 underlay_fib_index,
u32 *sw_if_indexp);
index_t pvti_if_find_by_sw_if_index (u32 sw_if_index);
index_t pvti_if_find_by_remote_ip4_and_port (ip4_address_t *remote_ip4,
u16 remote_port);
index_t pvti_if_find_by_remote_ip6_and_port (ip6_address_t *remote_ip4,
u16 remote_port);
index_t pvti_if_find_by_remote_ip_and_port (ip_address_t *remote_ip,
u16 remote_port);
int pvti_if_delete (u32 sw_if_index);
u8 *format_pvti_if (u8 *s, va_list *args);
static_always_inline pvti_if_t *
pvti_if_get (index_t pvtii)
{
if (INDEX_INVALID == pvtii)
return (NULL);
return (pool_elt_at_index (pvti_main.if_pool, pvtii));
}
static_always_inline index_t
pvti_if_get_index (pvti_if_t *pvti_if)
{
return pvti_if - pvti_main.if_pool;
}
#endif

1153
test/test_pvti.py Normal file

File diff suppressed because it is too large Load Diff