wireguard: reduce memcopy and prefetch header

Originally wireguard implementation does memory copy of the whole
packet in encryption and decryption.

This patch removes unnecessary packet copy in wireguard. In addition,
it contains some performance improvement such as prefetching header
and deleting unnecessary lock and unlock for decryption.

Type: improvement

Signed-off-by: Gabriel Oginski <gabrielx.oginski@intel.com>
Change-Id: I1fe8e54d749e6922465341083b448c842e2b670f
This commit is contained in:
Gabriel Oginski
2021-10-08 09:09:45 +01:00
committed by Matthew Smith
parent 505fd37b31
commit 4739c8833e
6 changed files with 48 additions and 35 deletions

View File

@@ -366,15 +366,9 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
goto out;
}
u8 *decr_data = wmp->per_thread_data[thread_index].data;
enum noise_state_crypt state_cr = noise_remote_decrypt (vm,
&peer->remote,
data->receiver_index,
data->counter,
data->encrypted_data,
encr_len,
decr_data);
enum noise_state_crypt state_cr = noise_remote_decrypt (
vm, &peer->remote, data->receiver_index, data->counter,
data->encrypted_data, encr_len, data->encrypted_data);
if (PREDICT_FALSE (state_cr == SC_CONN_RESET))
{
@@ -392,7 +386,7 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
goto out;
}
clib_memcpy (vlib_buffer_get_current (b[0]), decr_data, decr_len);
vlib_buffer_advance (b[0], sizeof (message_data_t));
b[0]->current_length = decr_len;
vnet_buffer_offload_flags_clear (b[0],
VNET_BUFFER_OFFLOAD_F_UDP_CKSUM);

View File

@@ -549,7 +549,6 @@ noise_remote_encrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t * r_idx,
noise_keypair_t *kp;
enum noise_state_crypt ret = SC_FAILED;
clib_rwlock_reader_lock (&r->r_keypair_lock);
if ((kp = r->r_current) == NULL)
goto error;
@@ -589,7 +588,6 @@ noise_remote_encrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t * r_idx,
ret = SC_OK;
error:
clib_rwlock_reader_unlock (&r->r_keypair_lock);
return ret;
}
@@ -600,7 +598,6 @@ noise_remote_decrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t r_idx,
{
noise_keypair_t *kp;
enum noise_state_crypt ret = SC_FAILED;
clib_rwlock_reader_lock (&r->r_keypair_lock);
if (r->r_current != NULL && r->r_current->kp_local_index == r_idx)
{
@@ -644,7 +641,6 @@ noise_remote_decrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t r_idx,
* data packet can't confirm a session that we are an INITIATOR of. */
if (kp == r->r_next)
{
clib_rwlock_reader_unlock (&r->r_keypair_lock);
clib_rwlock_writer_lock (&r->r_keypair_lock);
if (kp == r->r_next && kp->kp_local_index == r_idx)
{
@@ -655,11 +651,9 @@ noise_remote_decrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t r_idx,
ret = SC_CONN_RESET;
clib_rwlock_writer_unlock (&r->r_keypair_lock);
clib_rwlock_reader_lock (&r->r_keypair_lock);
goto error;
}
clib_rwlock_writer_unlock (&r->r_keypair_lock);
clib_rwlock_reader_lock (&r->r_keypair_lock);
}
/* Similar to when we encrypt, we want to notify the caller when we
@@ -676,7 +670,6 @@ noise_remote_decrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t r_idx,
ret = SC_OK;
error:
clib_rwlock_reader_unlock (&r->r_keypair_lock);
return ret;
}

View File

@@ -100,8 +100,9 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
u32 n_left_from;
u32 *from;
ip4_udp_header_t *hdr4_out = NULL;
ip6_udp_header_t *hdr6_out = NULL;
ip4_udp_wg_header_t *hdr4_out = NULL;
ip6_udp_wg_header_t *hdr6_out = NULL;
message_data_t *message_data_wg = NULL;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 nexts[VLIB_FRAME_SIZE], *next;
u32 thread_index = vm->thread_index;
@@ -113,7 +114,6 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_get_buffers (vm, from, bufs, n_left_from);
wg_main_t *wmp = &wg_main;
wg_peer_t *peer = NULL;
while (n_left_from > 0)
@@ -124,6 +124,14 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u8 *plain_data;
u16 plain_data_len;
if (n_left_from > 2)
{
u8 *p;
vlib_prefetch_buffer_header (b[2], LOAD);
p = vlib_buffer_get_current (b[1]);
CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
}
next[0] = WG_OUTPUT_NEXT_ERROR;
peeri =
wg_peer_get_by_adj_index (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
@@ -160,10 +168,12 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (is_ip4_out)
{
hdr4_out = vlib_buffer_get_current (b[0]);
message_data_wg = &hdr4_out->wg;
}
else
{
hdr6_out = vlib_buffer_get_current (b[0]);
message_data_wg = &hdr6_out->wg;
}
iph_offset = vnet_buffer (b[0])->ip.save_rewrite_length;
@@ -184,14 +194,11 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
goto out;
}
message_data_t *encrypted_packet =
(message_data_t *) wmp->per_thread_data[thread_index].data;
enum noise_state_crypt state;
state = noise_remote_encrypt (
vm, &peer->remote, &encrypted_packet->receiver_index,
&encrypted_packet->counter, plain_data, plain_data_len,
encrypted_packet->encrypted_data);
vm, &peer->remote, &message_data_wg->receiver_index,
&message_data_wg->counter, plain_data, plain_data_len, plain_data);
if (PREDICT_FALSE (state == SC_KEEP_KEY_FRESH))
{
@@ -207,12 +214,10 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
/* Here we are sure that can send packet to next node */
next[0] = WG_OUTPUT_NEXT_INTERFACE_OUTPUT;
encrypted_packet->header.type = MESSAGE_DATA;
clib_memcpy (plain_data, (u8 *) encrypted_packet, encrypted_packet_len);
if (is_ip4_out)
{
hdr4_out->wg.header.type = MESSAGE_DATA;
hdr4_out->udp.length = clib_host_to_net_u16 (encrypted_packet_len +
sizeof (udp_header_t));
b[0]->current_length =
@@ -222,6 +227,7 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
else
{
hdr6_out->wg.header.type = MESSAGE_DATA;
hdr6_out->udp.length = clib_host_to_net_u16 (encrypted_packet_len +
sizeof (udp_header_t));
b[0]->current_length =
@@ -244,9 +250,9 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
t->peer = peeri;
t->is_ip4 = is_ip4_out;
if (hdr4_out)
clib_memcpy (t->header, hdr4_out, sizeof (*hdr4_out));
clib_memcpy (t->header, hdr4_out, sizeof (ip4_udp_header_t));
else if (hdr6_out)
clib_memcpy (t->header, hdr6_out, sizeof (*hdr6_out));
clib_memcpy (t->header, hdr6_out, sizeof (ip6_udp_header_t));
}
next:

View File

@@ -103,7 +103,8 @@ wg_peer_build_rewrite (const wg_peer_t *peer, u8 is_ip4)
{
ip4_udp_header_t *hdr;
vec_validate (rewrite, sizeof (*hdr) - 1);
/* reserve space for ip4, udp and wireguard headers */
vec_validate (rewrite, sizeof (ip4_udp_wg_header_t) - 1);
hdr = (ip4_udp_header_t *) rewrite;
hdr->ip4.ip_version_and_header_length = 0x45;
@@ -121,7 +122,8 @@ wg_peer_build_rewrite (const wg_peer_t *peer, u8 is_ip4)
{
ip6_udp_header_t *hdr;
vec_validate (rewrite, sizeof (*hdr) - 1);
/* reserve space for ip6, udp and wireguard headers */
vec_validate (rewrite, sizeof (ip6_udp_wg_header_t) - 1);
hdr = (ip6_udp_header_t *) rewrite;
hdr->ip6.ip_version_traffic_class_and_flow_label = 0x60;

View File

@@ -33,12 +33,26 @@ typedef struct ip4_udp_header_t_
udp_header_t udp;
} __clib_packed ip4_udp_header_t;
typedef struct ip4_udp_wg_header_t_
{
ip4_header_t ip4;
udp_header_t udp;
message_data_t wg;
} __clib_packed ip4_udp_wg_header_t;
typedef struct ip6_udp_header_t_
{
ip6_header_t ip6;
udp_header_t udp;
} __clib_packed ip6_udp_header_t;
typedef struct ip6_udp_wg_header_t_
{
ip6_header_t ip6;
udp_header_t udp;
message_data_t wg;
} __clib_packed ip6_udp_wg_header_t;
u8 *format_ip4_udp_header (u8 * s, va_list * va);
u8 *format_ip6_udp_header (u8 *s, va_list *va);

View File

@@ -50,7 +50,9 @@ wg_buffer_prepend_rewrite (vlib_buffer_t *b0, const wg_peer_t *peer, u8 is_ip4)
vlib_buffer_advance (b0, -sizeof (*hdr4));
hdr4 = vlib_buffer_get_current (b0);
clib_memcpy (hdr4, peer->rewrite, vec_len (peer->rewrite));
/* copy only ip4 and udp header; wireguard header not needed */
clib_memcpy (hdr4, peer->rewrite, sizeof (ip4_udp_header_t));
hdr4->udp.length =
clib_host_to_net_u16 (b0->current_length - sizeof (ip4_header_t));
@@ -64,7 +66,9 @@ wg_buffer_prepend_rewrite (vlib_buffer_t *b0, const wg_peer_t *peer, u8 is_ip4)
vlib_buffer_advance (b0, -sizeof (*hdr6));
hdr6 = vlib_buffer_get_current (b0);
clib_memcpy (hdr6, peer->rewrite, vec_len (peer->rewrite));
/* copy only ip6 and udp header; wireguard header not needed */
clib_memcpy (hdr6, peer->rewrite, sizeof (ip6_udp_header_t));
hdr6->udp.length =
clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t));