quic: quicly crypto offloading

- Implement our own quic packet allocator to allocate more memory at the end of the
packet to store crypto offloading related data
- 1RTT packets offloading encryption/decryption using vnet crypto
- Add cli to change max packet per key

Type: feature

Change-Id: I7557fd457d7ba492329d5d8ed192509cbd727f9c
Signed-off-by: MathiasRaoul <mathias.raoul@gmail.com>
This commit is contained in:
MathiasRaoul
2020-01-09 14:50:53 +00:00
committed by Dave Wallace
parent 776644efe7
commit 92de6b65be
4 changed files with 579 additions and 57 deletions

View File

@ -26,10 +26,14 @@
#include <quic/quic.h> #include <quic/quic.h>
#include <quic/certs.h> #include <quic/certs.h>
#include <quic/error.h> #include <quic/error.h>
#include <quic/quic_crypto.h>
#include <quicly/constants.h> #include <quicly/constants.h>
#include <quicly/defaults.h> #include <quicly/defaults.h>
#include <picotls.h>
#include <quic/quic_crypto.h>
extern quicly_crypto_engine_t quic_crypto_engine;
static char *quic_error_strings[] = { static char *quic_error_strings[] = {
#define quic_error(n,s) s, #define quic_error(n,s) s,
@ -37,7 +41,9 @@ static char *quic_error_strings[] = {
#undef quic_error #undef quic_error
}; };
static quic_main_t quic_main; #define DEFAULT_MAX_PACKETS_PER_KEY 16777216
quic_main_t quic_main;
static void quic_update_timer (quic_ctx_t * ctx); static void quic_update_timer (quic_ctx_t * ctx);
static void quic_check_quic_session_connected (quic_ctx_t * ctx); static void quic_check_quic_session_connected (quic_ctx_t * ctx);
static int quic_reset_connection (u64 udp_session_handle, static int quic_reset_connection (u64 udp_session_handle,
@ -85,6 +91,33 @@ quic_crypto_context_free_if_needed (crypto_context_t * crctx, u8 thread_index)
pool_put (qm->wrk_ctx[thread_index].crypto_ctx_pool, crctx); pool_put (qm->wrk_ctx[thread_index].crypto_ctx_pool, crctx);
} }
static quicly_datagram_t *
quic_alloc_packet (quicly_packet_allocator_t * self, size_t payloadsize)
{
quicly_datagram_t *packet;
if ((packet =
clib_mem_alloc (sizeof (*packet) + payloadsize +
sizeof (quic_encrypt_cb_ctx))) == NULL)
return NULL;
packet->data.base =
(uint8_t *) packet + sizeof (*packet) + sizeof (quic_encrypt_cb_ctx);
quic_encrypt_cb_ctx *encrypt_cb_ctx =
(quic_encrypt_cb_ctx *) ((uint8_t *) packet + sizeof (*packet));
clib_memset (encrypt_cb_ctx, 0, sizeof (*encrypt_cb_ctx));
return packet;
}
static void
quic_free_packet (quicly_packet_allocator_t * self,
quicly_datagram_t * packet)
{
clib_mem_free (packet);
}
quicly_packet_allocator_t quic_packet_allocator =
{ quic_alloc_packet, quic_free_packet };
static int static int
quic_app_cert_key_pair_delete_callback (app_cert_key_pair_t * ckpair) quic_app_cert_key_pair_delete_callback (app_cert_key_pair_t * ckpair)
{ {
@ -154,6 +187,32 @@ quic_list_crypto_context_command_fn (vlib_main_t * vm,
return 0; return 0;
} }
static clib_error_t *
quic_set_max_packets_per_key_fn (vlib_main_t * vm,
unformat_input_t * input,
vlib_cli_command_t * cmd)
{
quic_main_t *qm = &quic_main;
unformat_input_t _line_input, *line_input = &_line_input;
u64 tmp;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (line_input, "%U", unformat_memory_size, &tmp))
{
qm->max_packets_per_key = tmp;
}
else
return clib_error_return (0, "unknown input '%U'",
format_unformat_error, line_input);
}
return 0;
}
static void static void
quic_release_crypto_context (u32 crypto_context_index, u8 thread_index) quic_release_crypto_context (u32 crypto_context_index, u8 thread_index)
{ {
@ -203,12 +262,15 @@ quic_init_crypto_context (crypto_context_t * crctx, quic_ctx_t * ctx)
clib_memcpy (quicly_ctx, &quicly_spec_context, sizeof (quicly_context_t)); clib_memcpy (quicly_ctx, &quicly_spec_context, sizeof (quicly_context_t));
quicly_ctx->max_packet_size = QUIC_MAX_PACKET_SIZE; quicly_ctx->max_packet_size = QUIC_MAX_PACKET_SIZE;
quicly_ctx->max_packets_per_key = qm->max_packets_per_key;
quicly_ctx->tls = ptls_ctx; quicly_ctx->tls = ptls_ctx;
quicly_ctx->stream_open = &on_stream_open; quicly_ctx->stream_open = &on_stream_open;
quicly_ctx->closed_by_peer = &on_closed_by_peer; quicly_ctx->closed_by_peer = &on_closed_by_peer;
quicly_ctx->now = &quicly_vpp_now_cb; quicly_ctx->now = &quicly_vpp_now_cb;
quicly_amend_ptls_context (quicly_ctx->tls); quicly_amend_ptls_context (quicly_ctx->tls);
quicly_ctx->packet_allocator = &quic_packet_allocator;
quicly_ctx->crypto_engine = &quic_crypto_engine;
quicly_ctx->transport_params.max_data = QUIC_INT_MAX; quicly_ctx->transport_params.max_data = QUIC_INT_MAX;
quicly_ctx->transport_params.max_streams_uni = (uint64_t) 1 << 60; quicly_ctx->transport_params.max_streams_uni = (uint64_t) 1 << 60;
quicly_ctx->transport_params.max_streams_bidi = (uint64_t) 1 << 60; quicly_ctx->transport_params.max_streams_bidi = (uint64_t) 1 << 60;
@ -350,7 +412,7 @@ quic_ctx_get_if_valid (u32 ctx_index, u32 thread_index)
return pool_elt_at_index (quic_main.ctx_pool[thread_index], ctx_index); return pool_elt_at_index (quic_main.ctx_pool[thread_index], ctx_index);
} }
static quic_ctx_t * quic_ctx_t *
quic_get_conn_ctx (quicly_conn_t * conn) quic_get_conn_ctx (quicly_conn_t * conn)
{ {
u64 conn_data; u64 conn_data;
@ -633,12 +695,14 @@ quic_send_datagram (session_t * udp_session, quicly_datagram_t * packet)
static int static int
quic_send_packets (quic_ctx_t * ctx) quic_send_packets (quic_ctx_t * ctx)
{ {
quic_main_t *qm = &quic_main;
quicly_datagram_t *packets[QUIC_SEND_PACKET_VEC_SIZE]; quicly_datagram_t *packets[QUIC_SEND_PACKET_VEC_SIZE];
session_t *udp_session; session_t *udp_session;
quicly_conn_t *conn; quicly_conn_t *conn;
size_t num_packets, i, max_packets; size_t num_packets, i, max_packets;
quicly_packet_allocator_t *pa; quicly_packet_allocator_t *pa;
int err = 0; int err = 0;
u32 thread_index = vlib_get_thread_index ();
/* We have sctx, get qctx */ /* We have sctx, get qctx */
if (quic_ctx_is_stream (ctx)) if (quic_ctx_is_stream (ctx))
@ -669,8 +733,12 @@ quic_send_packets (quic_ctx_t * ctx)
if ((err = quicly_send (conn, packets, &num_packets))) if ((err = quicly_send (conn, packets, &num_packets)))
goto quicly_error; goto quicly_error;
quic_crypto_batch_tx_packets (&qm->wrk_ctx
[thread_index].crypto_context_batch);
for (i = 0; i != num_packets; ++i) for (i = 0; i != num_packets; ++i)
{ {
quic_crypto_finalize_send_packet (packets[i]);
if ((err = quic_send_datagram (udp_session, packets[i]))) if ((err = quic_send_datagram (udp_session, packets[i])))
goto quicly_error; goto quicly_error;
@ -1122,7 +1190,6 @@ quic_expired_timers_dispatch (u32 * expired_timers)
} }
/* Transport proto functions */ /* Transport proto functions */
static int static int
quic_connect_stream (session_t * quic_session, session_endpoint_cfg_t * sep) quic_connect_stream (session_t * quic_session, session_endpoint_cfg_t * sep)
{ {
@ -1990,7 +2057,7 @@ quic_accept_connection (quic_rx_packet_ctx_t * pctx)
ctx = quic_ctx_get (pctx->ctx_index, pctx->thread_index); ctx = quic_ctx_get (pctx->ctx_index, pctx->thread_index);
if (ctx->c_s_index != QUIC_SESSION_INVALID) if (ctx->c_s_index != QUIC_SESSION_INVALID)
{ {
QUIC_DBG (2, "already accepted ctx 0x%x", ctx_index); QUIC_DBG (2, "already accepted ctx 0x%x", ctx->c_s_index);
return; return;
} }
@ -2128,6 +2195,8 @@ quic_process_one_rx_packet (u64 udp_session_handle, svm_fifo_t * f,
if (rv == QUIC_PACKET_TYPE_RECEIVE) if (rv == QUIC_PACKET_TYPE_RECEIVE)
{ {
pctx->ptype = QUIC_PACKET_TYPE_RECEIVE; pctx->ptype = QUIC_PACKET_TYPE_RECEIVE;
quic_ctx_t *qctx = quic_ctx_get (pctx->ctx_index, thread_index);
quic_crypto_decrypt_packet (qctx, pctx);
return 0; return 0;
} }
else if (rv == QUIC_PACKET_TYPE_MIGRATE) else if (rv == QUIC_PACKET_TYPE_MIGRATE)
@ -2153,6 +2222,7 @@ static int
quic_udp_session_rx_callback (session_t * udp_session) quic_udp_session_rx_callback (session_t * udp_session)
{ {
/* Read data from UDP rx_fifo and pass it to the quicly conn. */ /* Read data from UDP rx_fifo and pass it to the quicly conn. */
quic_main_t *qm = &quic_main;
quic_ctx_t *ctx = NULL, *prev_ctx = NULL; quic_ctx_t *ctx = NULL, *prev_ctx = NULL;
svm_fifo_t *f = udp_session->rx_fifo; svm_fifo_t *f = udp_session->rx_fifo;
u32 max_deq; u32 max_deq;
@ -2212,6 +2282,9 @@ rx_start:
} }
} }
quic_crypto_batch_rx_packets (&qm->
wrk_ctx[thread_index].crypto_context_batch);
for (i = 0; i < max_packets; i++) for (i = 0; i < max_packets; i++)
{ {
switch (packets_ctx[i].ptype) switch (packets_ctx[i].ptype)
@ -2412,6 +2485,7 @@ quic_init (vlib_main_t * vm)
vec_validate (qm->ctx_pool, num_threads - 1); vec_validate (qm->ctx_pool, num_threads - 1);
vec_validate (qm->wrk_ctx, num_threads - 1); vec_validate (qm->wrk_ctx, num_threads - 1);
for (i = 0; i < num_threads; i++) for (i = 0; i < num_threads; i++)
{ {
qm->wrk_ctx[i].next_cid.thread_id = i; qm->wrk_ctx[i].next_cid.thread_id = i;
@ -2421,6 +2495,9 @@ quic_init (vlib_main_t * vm)
tw->last_run_time = vlib_time_now (vlib_get_main ()); tw->last_run_time = vlib_time_now (vlib_get_main ());
clib_bihash_init_24_8 (&qm->wrk_ctx[i].crypto_context_hash, clib_bihash_init_24_8 (&qm->wrk_ctx[i].crypto_context_hash,
"quic crypto contexts", 64, 128 << 10); "quic crypto contexts", 64, 128 << 10);
qm->wrk_ctx[i].crypto_context_batch.nb_rx_packets = 0;
qm->wrk_ctx[i].crypto_context_batch.nb_tx_packets = 0;
} }
clib_bihash_init_16_8 (&qm->connection_hash, "quic connections", 1024, clib_bihash_init_16_8 (&qm->connection_hash, "quic connections", 1024,
@ -2441,7 +2518,9 @@ quic_init (vlib_main_t * vm)
quic_register_cipher_suite (CRYPTO_ENGINE_VPP, quic_crypto_cipher_suites); quic_register_cipher_suite (CRYPTO_ENGINE_VPP, quic_crypto_cipher_suites);
quic_register_cipher_suite (CRYPTO_ENGINE_PICOTLS, quic_register_cipher_suite (CRYPTO_ENGINE_PICOTLS,
ptls_openssl_cipher_suites); ptls_openssl_cipher_suites);
qm->default_crypto_engine = CRYPTO_ENGINE_PICOTLS; qm->default_crypto_engine = CRYPTO_ENGINE_VPP;
qm->max_packets_per_key = DEFAULT_MAX_PACKETS_PER_KEY;
clib_rwlock_init (&qm->crypto_keys_quic_rw_lock);
vec_free (a->name); vec_free (a->name);
return 0; return 0;
} }
@ -2760,6 +2839,12 @@ VLIB_CLI_COMMAND (quic_list_crypto_context_command, static) =
.short_help = "list quic crypto contextes", .short_help = "list quic crypto contextes",
.function = quic_list_crypto_context_command_fn, .function = quic_list_crypto_context_command_fn,
}; };
VLIB_CLI_COMMAND (quic_set_max_packets_per_key, static) =
{
.path = "set quic max_packets_per_key",
.short_help = "set quic max_packets_per_key 16777216",
.function = quic_set_max_packets_per_key_fn,
};
VLIB_PLUGIN_REGISTER () = VLIB_PLUGIN_REGISTER () =
{ {
.version = VPP_BUILD_VER, .version = VPP_BUILD_VER,

View File

@ -24,6 +24,9 @@
#include <quicly.h> #include <quicly.h>
#include <vnet/crypto/crypto.h>
#include <vppinfra/lock.h>
/* QUIC log levels /* QUIC log levels
* 1 - errors * 1 - errors
* 2 - connection/stream events * 2 - connection/stream events
@ -42,8 +45,11 @@
#define QUIC_SEND_PACKET_VEC_SIZE 16 #define QUIC_SEND_PACKET_VEC_SIZE 16
#define QUIC_IV_LEN 17 #define QUIC_IV_LEN 17
#define QUIC_MAX_COALESCED_PACKET 4
#define QUIC_SEND_MAX_BATCH_PACKETS 16 #define QUIC_SEND_MAX_BATCH_PACKETS 16
#define QUIC_RCV_MAX_BATCH_PACKETS 16 #define QUIC_RCV_MAX_BATCH_PACKETS 16
#define QUIC_DEFAULT_CONN_TIMEOUT (30 * 1000) /* 30 seconds */ #define QUIC_DEFAULT_CONN_TIMEOUT (30 * 1000) /* 30 seconds */
/* Taken from quicly.c */ /* Taken from quicly.c */
@ -62,6 +68,10 @@
#define QUIC_APP_ACCEPT_NOTIFY_ERROR QUICLY_ERROR_FROM_APPLICATION_ERROR_CODE(0x2) #define QUIC_APP_ACCEPT_NOTIFY_ERROR QUICLY_ERROR_FROM_APPLICATION_ERROR_CODE(0x2)
#define QUIC_APP_CONNECT_NOTIFY_ERROR QUICLY_ERROR_FROM_APPLICATION_ERROR_CODE(0x3) #define QUIC_APP_CONNECT_NOTIFY_ERROR QUICLY_ERROR_FROM_APPLICATION_ERROR_CODE(0x3)
#define QUIC_DECRYPT_PACKET_OK 0
#define QUIC_DECRYPT_PACKET_NOTOFFLOADED 1
#define QUIC_DECRYPT_PACKET_ERROR 2
#if QUIC_DEBUG #if QUIC_DEBUG
#define QUIC_DBG(_lvl, _fmt, _args...) \ #define QUIC_DBG(_lvl, _fmt, _args...) \
if (_lvl <= QUIC_DEBUG) \ if (_lvl <= QUIC_DEBUG) \
@ -156,6 +166,14 @@ typedef struct quic_ctx_
u32 crypto_engine; u32 crypto_engine;
u32 crypto_context_index; u32 crypto_context_index;
u8 flags; u8 flags;
struct
{
ptls_cipher_context_t *hp_ctx;
ptls_aead_context_t *aead_ctx;
} ingress_keys;
int key_phase_ingress;
} quic_ctx_t; } quic_ctx_t;
/* Make sure our custom fields don't overlap with the fields we use in /* Make sure our custom fields don't overlap with the fields we use in
@ -191,6 +209,25 @@ typedef struct quic_crypto_context_data_
ptls_context_t ptls_ctx; ptls_context_t ptls_ctx;
} quic_crypto_context_data_t; } quic_crypto_context_data_t;
typedef struct quic_encrypt_cb_ctx_
{
quicly_datagram_t *packet;
struct quic_finalize_send_packet_cb_ctx_
{
size_t payload_from;
size_t first_byte_at;
ptls_cipher_context_t *hp;
} snd_ctx[QUIC_MAX_COALESCED_PACKET];
size_t snd_ctx_count;
} quic_encrypt_cb_ctx;
typedef struct quic_crypto_batch_ctx_
{
vnet_crypto_op_t aead_crypto_tx_packets_ops[QUIC_SEND_MAX_BATCH_PACKETS],
aead_crypto_rx_packets_ops[QUIC_RCV_MAX_BATCH_PACKETS];
size_t nb_tx_packets, nb_rx_packets;
} quic_crypto_batch_ctx_t;
typedef struct quic_worker_ctx_ typedef struct quic_worker_ctx_
{ {
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@ -199,6 +236,7 @@ typedef struct quic_worker_ctx_
quicly_cid_plaintext_t next_cid; quicly_cid_plaintext_t next_cid;
crypto_context_t *crypto_ctx_pool; /**< per thread pool of crypto contexes */ crypto_context_t *crypto_ctx_pool; /**< per thread pool of crypto contexes */
clib_bihash_24_8_t crypto_context_hash; /**< per thread [params:crypto_ctx_index] hash */ clib_bihash_24_8_t crypto_context_hash; /**< per thread [params:crypto_ctx_index] hash */
quic_crypto_batch_ctx_t crypto_context_batch;
} quic_worker_ctx_t; } quic_worker_ctx_t;
typedef struct quic_rx_packet_ctx_ typedef struct quic_rx_packet_ctx_
@ -228,6 +266,7 @@ typedef struct quic_main_
ptls_cipher_suite_t ***quic_ciphers; /**< available ciphers by crypto engine */ ptls_cipher_suite_t ***quic_ciphers; /**< available ciphers by crypto engine */
uword *available_crypto_engines; /**< Bitmap for registered engines */ uword *available_crypto_engines; /**< Bitmap for registered engines */
u8 default_crypto_engine; /**< Used if you do connect with CRYPTO_ENGINE_NONE (0) */ u8 default_crypto_engine; /**< Used if you do connect with CRYPTO_ENGINE_NONE (0) */
u64 max_packets_per_key; /**< number of packets that can be sent without a key update */
ptls_handshake_properties_t hs_properties; ptls_handshake_properties_t hs_properties;
quic_session_cache_t session_cache; quic_session_cache_t session_cache;
@ -235,6 +274,8 @@ typedef struct quic_main_
u32 udp_fifo_size; u32 udp_fifo_size;
u32 udp_fifo_prealloc; u32 udp_fifo_prealloc;
u32 connection_timeout; u32 connection_timeout;
clib_rwlock_t crypto_keys_quic_rw_lock;
} quic_main_t; } quic_main_t;
#endif /* __included_quic_h__ */ #endif /* __included_quic_h__ */

File diff suppressed because it is too large Load Diff

View File

@ -18,11 +18,29 @@
#include <quicly.h> #include <quicly.h>
struct quic_ctx_t;
struct quic_rx_packet_ctx_t;
struct quic_crypto_batch_ctx_t;
extern ptls_cipher_suite_t *quic_crypto_cipher_suites[]; extern ptls_cipher_suite_t *quic_crypto_cipher_suites[];
int quic_encrypt_ticket_cb (ptls_encrypt_ticket_t * _self, ptls_t * tls, int quic_encrypt_ticket_cb (ptls_encrypt_ticket_t * _self, ptls_t * tls,
int is_encrypt, ptls_buffer_t * dst, int is_encrypt, ptls_buffer_t * dst,
ptls_iovec_t src); ptls_iovec_t src);
void quic_crypto_decrypt_packet (quic_ctx_t * qctx,
quic_rx_packet_ctx_t * pctx);
void quic_crypto_batch_tx_packets (quic_crypto_batch_ctx_t * batch_ctx);
void quic_crypto_batch_rx_packets (quic_crypto_batch_ctx_t * batch_ctx);
void quic_crypto_finalize_send_packet (quicly_datagram_t * packet);
void
quic_crypto_finalize_send_packet_cb (struct st_quicly_crypto_engine_t *engine,
quicly_conn_t * conn,
ptls_cipher_context_t * hp,
ptls_aead_context_t * aead,
quicly_datagram_t * packet,
size_t first_byte_at,
size_t payload_from, int coalesced);
#endif /* __included_vpp_quic_crypto_h__ */ #endif /* __included_vpp_quic_crypto_h__ */