wireguard: reduce memcopy and prefetch header
Originally wireguard implementation does memory copy of the whole packet in encryption and decryption. This patch removes unnecessary packet copy in wireguard. In addition, it contains some performance improvement such as prefetching header and deleting unnecessary lock and unlock for decryption. Type: improvement Signed-off-by: Gabriel Oginski <gabrielx.oginski@intel.com> Change-Id: I1fe8e54d749e6922465341083b448c842e2b670f
This commit is contained in:
committed by
Matthew Smith
parent
505fd37b31
commit
4739c8833e
@@ -366,15 +366,9 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
goto out;
|
||||
}
|
||||
|
||||
u8 *decr_data = wmp->per_thread_data[thread_index].data;
|
||||
|
||||
enum noise_state_crypt state_cr = noise_remote_decrypt (vm,
|
||||
&peer->remote,
|
||||
data->receiver_index,
|
||||
data->counter,
|
||||
data->encrypted_data,
|
||||
encr_len,
|
||||
decr_data);
|
||||
enum noise_state_crypt state_cr = noise_remote_decrypt (
|
||||
vm, &peer->remote, data->receiver_index, data->counter,
|
||||
data->encrypted_data, encr_len, data->encrypted_data);
|
||||
|
||||
if (PREDICT_FALSE (state_cr == SC_CONN_RESET))
|
||||
{
|
||||
@@ -392,7 +386,7 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
goto out;
|
||||
}
|
||||
|
||||
clib_memcpy (vlib_buffer_get_current (b[0]), decr_data, decr_len);
|
||||
vlib_buffer_advance (b[0], sizeof (message_data_t));
|
||||
b[0]->current_length = decr_len;
|
||||
vnet_buffer_offload_flags_clear (b[0],
|
||||
VNET_BUFFER_OFFLOAD_F_UDP_CKSUM);
|
||||
|
||||
@@ -549,7 +549,6 @@ noise_remote_encrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t * r_idx,
|
||||
noise_keypair_t *kp;
|
||||
enum noise_state_crypt ret = SC_FAILED;
|
||||
|
||||
clib_rwlock_reader_lock (&r->r_keypair_lock);
|
||||
if ((kp = r->r_current) == NULL)
|
||||
goto error;
|
||||
|
||||
@@ -589,7 +588,6 @@ noise_remote_encrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t * r_idx,
|
||||
|
||||
ret = SC_OK;
|
||||
error:
|
||||
clib_rwlock_reader_unlock (&r->r_keypair_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -600,7 +598,6 @@ noise_remote_decrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t r_idx,
|
||||
{
|
||||
noise_keypair_t *kp;
|
||||
enum noise_state_crypt ret = SC_FAILED;
|
||||
clib_rwlock_reader_lock (&r->r_keypair_lock);
|
||||
|
||||
if (r->r_current != NULL && r->r_current->kp_local_index == r_idx)
|
||||
{
|
||||
@@ -644,7 +641,6 @@ noise_remote_decrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t r_idx,
|
||||
* data packet can't confirm a session that we are an INITIATOR of. */
|
||||
if (kp == r->r_next)
|
||||
{
|
||||
clib_rwlock_reader_unlock (&r->r_keypair_lock);
|
||||
clib_rwlock_writer_lock (&r->r_keypair_lock);
|
||||
if (kp == r->r_next && kp->kp_local_index == r_idx)
|
||||
{
|
||||
@@ -655,11 +651,9 @@ noise_remote_decrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t r_idx,
|
||||
|
||||
ret = SC_CONN_RESET;
|
||||
clib_rwlock_writer_unlock (&r->r_keypair_lock);
|
||||
clib_rwlock_reader_lock (&r->r_keypair_lock);
|
||||
goto error;
|
||||
}
|
||||
clib_rwlock_writer_unlock (&r->r_keypair_lock);
|
||||
clib_rwlock_reader_lock (&r->r_keypair_lock);
|
||||
}
|
||||
|
||||
/* Similar to when we encrypt, we want to notify the caller when we
|
||||
@@ -676,7 +670,6 @@ noise_remote_decrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t r_idx,
|
||||
|
||||
ret = SC_OK;
|
||||
error:
|
||||
clib_rwlock_reader_unlock (&r->r_keypair_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -100,8 +100,9 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
{
|
||||
u32 n_left_from;
|
||||
u32 *from;
|
||||
ip4_udp_header_t *hdr4_out = NULL;
|
||||
ip6_udp_header_t *hdr6_out = NULL;
|
||||
ip4_udp_wg_header_t *hdr4_out = NULL;
|
||||
ip6_udp_wg_header_t *hdr6_out = NULL;
|
||||
message_data_t *message_data_wg = NULL;
|
||||
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
|
||||
u16 nexts[VLIB_FRAME_SIZE], *next;
|
||||
u32 thread_index = vm->thread_index;
|
||||
@@ -113,7 +114,6 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
|
||||
vlib_get_buffers (vm, from, bufs, n_left_from);
|
||||
|
||||
wg_main_t *wmp = &wg_main;
|
||||
wg_peer_t *peer = NULL;
|
||||
|
||||
while (n_left_from > 0)
|
||||
@@ -124,6 +124,14 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
u8 *plain_data;
|
||||
u16 plain_data_len;
|
||||
|
||||
if (n_left_from > 2)
|
||||
{
|
||||
u8 *p;
|
||||
vlib_prefetch_buffer_header (b[2], LOAD);
|
||||
p = vlib_buffer_get_current (b[1]);
|
||||
CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
|
||||
}
|
||||
|
||||
next[0] = WG_OUTPUT_NEXT_ERROR;
|
||||
peeri =
|
||||
wg_peer_get_by_adj_index (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
|
||||
@@ -160,10 +168,12 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
if (is_ip4_out)
|
||||
{
|
||||
hdr4_out = vlib_buffer_get_current (b[0]);
|
||||
message_data_wg = &hdr4_out->wg;
|
||||
}
|
||||
else
|
||||
{
|
||||
hdr6_out = vlib_buffer_get_current (b[0]);
|
||||
message_data_wg = &hdr6_out->wg;
|
||||
}
|
||||
|
||||
iph_offset = vnet_buffer (b[0])->ip.save_rewrite_length;
|
||||
@@ -184,14 +194,11 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
goto out;
|
||||
}
|
||||
|
||||
message_data_t *encrypted_packet =
|
||||
(message_data_t *) wmp->per_thread_data[thread_index].data;
|
||||
|
||||
enum noise_state_crypt state;
|
||||
|
||||
state = noise_remote_encrypt (
|
||||
vm, &peer->remote, &encrypted_packet->receiver_index,
|
||||
&encrypted_packet->counter, plain_data, plain_data_len,
|
||||
encrypted_packet->encrypted_data);
|
||||
vm, &peer->remote, &message_data_wg->receiver_index,
|
||||
&message_data_wg->counter, plain_data, plain_data_len, plain_data);
|
||||
|
||||
if (PREDICT_FALSE (state == SC_KEEP_KEY_FRESH))
|
||||
{
|
||||
@@ -207,12 +214,10 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
|
||||
/* Here we are sure that can send packet to next node */
|
||||
next[0] = WG_OUTPUT_NEXT_INTERFACE_OUTPUT;
|
||||
encrypted_packet->header.type = MESSAGE_DATA;
|
||||
|
||||
clib_memcpy (plain_data, (u8 *) encrypted_packet, encrypted_packet_len);
|
||||
|
||||
if (is_ip4_out)
|
||||
{
|
||||
hdr4_out->wg.header.type = MESSAGE_DATA;
|
||||
hdr4_out->udp.length = clib_host_to_net_u16 (encrypted_packet_len +
|
||||
sizeof (udp_header_t));
|
||||
b[0]->current_length =
|
||||
@@ -222,6 +227,7 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
}
|
||||
else
|
||||
{
|
||||
hdr6_out->wg.header.type = MESSAGE_DATA;
|
||||
hdr6_out->udp.length = clib_host_to_net_u16 (encrypted_packet_len +
|
||||
sizeof (udp_header_t));
|
||||
b[0]->current_length =
|
||||
@@ -244,9 +250,9 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
|
||||
t->peer = peeri;
|
||||
t->is_ip4 = is_ip4_out;
|
||||
if (hdr4_out)
|
||||
clib_memcpy (t->header, hdr4_out, sizeof (*hdr4_out));
|
||||
clib_memcpy (t->header, hdr4_out, sizeof (ip4_udp_header_t));
|
||||
else if (hdr6_out)
|
||||
clib_memcpy (t->header, hdr6_out, sizeof (*hdr6_out));
|
||||
clib_memcpy (t->header, hdr6_out, sizeof (ip6_udp_header_t));
|
||||
}
|
||||
|
||||
next:
|
||||
|
||||
@@ -103,7 +103,8 @@ wg_peer_build_rewrite (const wg_peer_t *peer, u8 is_ip4)
|
||||
{
|
||||
ip4_udp_header_t *hdr;
|
||||
|
||||
vec_validate (rewrite, sizeof (*hdr) - 1);
|
||||
/* reserve space for ip4, udp and wireguard headers */
|
||||
vec_validate (rewrite, sizeof (ip4_udp_wg_header_t) - 1);
|
||||
hdr = (ip4_udp_header_t *) rewrite;
|
||||
|
||||
hdr->ip4.ip_version_and_header_length = 0x45;
|
||||
@@ -121,7 +122,8 @@ wg_peer_build_rewrite (const wg_peer_t *peer, u8 is_ip4)
|
||||
{
|
||||
ip6_udp_header_t *hdr;
|
||||
|
||||
vec_validate (rewrite, sizeof (*hdr) - 1);
|
||||
/* reserve space for ip6, udp and wireguard headers */
|
||||
vec_validate (rewrite, sizeof (ip6_udp_wg_header_t) - 1);
|
||||
hdr = (ip6_udp_header_t *) rewrite;
|
||||
|
||||
hdr->ip6.ip_version_traffic_class_and_flow_label = 0x60;
|
||||
|
||||
@@ -33,12 +33,26 @@ typedef struct ip4_udp_header_t_
|
||||
udp_header_t udp;
|
||||
} __clib_packed ip4_udp_header_t;
|
||||
|
||||
typedef struct ip4_udp_wg_header_t_
|
||||
{
|
||||
ip4_header_t ip4;
|
||||
udp_header_t udp;
|
||||
message_data_t wg;
|
||||
} __clib_packed ip4_udp_wg_header_t;
|
||||
|
||||
typedef struct ip6_udp_header_t_
|
||||
{
|
||||
ip6_header_t ip6;
|
||||
udp_header_t udp;
|
||||
} __clib_packed ip6_udp_header_t;
|
||||
|
||||
typedef struct ip6_udp_wg_header_t_
|
||||
{
|
||||
ip6_header_t ip6;
|
||||
udp_header_t udp;
|
||||
message_data_t wg;
|
||||
} __clib_packed ip6_udp_wg_header_t;
|
||||
|
||||
u8 *format_ip4_udp_header (u8 * s, va_list * va);
|
||||
u8 *format_ip6_udp_header (u8 *s, va_list *va);
|
||||
|
||||
|
||||
@@ -50,7 +50,9 @@ wg_buffer_prepend_rewrite (vlib_buffer_t *b0, const wg_peer_t *peer, u8 is_ip4)
|
||||
vlib_buffer_advance (b0, -sizeof (*hdr4));
|
||||
|
||||
hdr4 = vlib_buffer_get_current (b0);
|
||||
clib_memcpy (hdr4, peer->rewrite, vec_len (peer->rewrite));
|
||||
|
||||
/* copy only ip4 and udp header; wireguard header not needed */
|
||||
clib_memcpy (hdr4, peer->rewrite, sizeof (ip4_udp_header_t));
|
||||
|
||||
hdr4->udp.length =
|
||||
clib_host_to_net_u16 (b0->current_length - sizeof (ip4_header_t));
|
||||
@@ -64,7 +66,9 @@ wg_buffer_prepend_rewrite (vlib_buffer_t *b0, const wg_peer_t *peer, u8 is_ip4)
|
||||
vlib_buffer_advance (b0, -sizeof (*hdr6));
|
||||
|
||||
hdr6 = vlib_buffer_get_current (b0);
|
||||
clib_memcpy (hdr6, peer->rewrite, vec_len (peer->rewrite));
|
||||
|
||||
/* copy only ip6 and udp header; wireguard header not needed */
|
||||
clib_memcpy (hdr6, peer->rewrite, sizeof (ip6_udp_header_t));
|
||||
|
||||
hdr6->udp.length =
|
||||
clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t));
|
||||
|
||||
Reference in New Issue
Block a user