crypto: SW scheduler async crypto engine

Type: feature

This patch adds new sw_scheduler async crypto engine.
The engine transforms async frames info sync crypto ops and
delegates them to active sync engines. With the patch it
is possible to increase the single worker crypto throughput
by offloading the crypto workload to multiple workers.

By default all workers in the system will attend the crypto
workload processing. However a worker's available cycles
are limited. To avail more cycles to one worker to process
other workload (e.g. the worker core that handles the RX/TX
and IPSec stack processing), a useful cli command is added
to remove itself (or add it back later) from the heavy
crypto workload but only let other workers to process the
crypto. The command is:

 - set sw_scheduler worker <idx> crypto <on|off>

It also adds new interrupt mode to async crypto dispatch node.
This mode signals the node when new frames are enqueued
as opposed to polling mode that continuously calls dispatch node.

New cli commands:
 - set crypto async dispatch [polling|interrupt]
 - show crypto async status (displays mode and nodes' states)

Signed-off-by: PiotrX Kleski <piotrx.kleski@intel.com>
Signed-off-by: DariuszX Kazimierski <dariuszx.kazimierski@intel.com>
Reviewed-by: Fan Zhang <roy.fan.zhang@intel.com>
Change-Id: I332655f347bb9e3bc9c64166e86e393e911bdb39
This commit is contained in:
PiotrX Kleski
2020-07-08 14:36:34 +02:00
committed by Damjan Marion
parent 56230097e2
commit 2284817eae
8 changed files with 1028 additions and 72 deletions
@@ -0,0 +1,17 @@
# Copyright (c) 2020 Intel and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
add_vpp_plugin(crypto_sw_scheduler
SOURCES
main.c
)
@@ -0,0 +1,61 @@
/*
* Copyright (c) 2020 Intel and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vnet/crypto/crypto.h>
#ifndef __crypto_sw_scheduler_h__
#define __crypto_sw_scheduler_h__
#define CRYPTO_SW_SCHEDULER_QUEUE_SIZE 64
#define CRYPTO_SW_SCHEDULER_QUEUE_MASK (CRYPTO_SW_SCHEDULER_QUEUE_SIZE - 1)
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
u32 head;
u32 tail;
vnet_crypto_async_frame_t *jobs[0];
} crypto_sw_scheduler_queue_t;
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
crypto_sw_scheduler_queue_t *queues[VNET_CRYPTO_ASYNC_OP_N_IDS];
vnet_crypto_op_t *crypto_ops;
vnet_crypto_op_t *integ_ops;
vnet_crypto_op_t *chained_crypto_ops;
vnet_crypto_op_t *chained_integ_ops;
vnet_crypto_op_chunk_t *chunks;
u8 self_crypto_enabled;
} crypto_sw_scheduler_per_thread_data_t;
typedef struct
{
u32 crypto_engine_index;
crypto_sw_scheduler_per_thread_data_t *per_thread_data;
vnet_crypto_key_t *keys;
} crypto_sw_scheduler_main_t;
extern crypto_sw_scheduler_main_t crypto_sw_scheduler_main;
#endif // __crypto_native_h__
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/
File diff suppressed because it is too large Load Diff
+4 -2
View File
@@ -706,7 +706,8 @@ cryptodev_get_ring_head (struct rte_ring * ring)
}
static_always_inline vnet_crypto_async_frame_t *
cryptodev_frame_dequeue (vlib_main_t * vm)
cryptodev_frame_dequeue (vlib_main_t * vm, u32 * nb_elts_processed,
u32 * enqueue_thread_idx)
{
cryptodev_main_t *cmt = &cryptodev_main;
cryptodev_numa_data_t *numa = cmt->per_numa_data + vm->numa_node;
@@ -768,7 +769,8 @@ cryptodev_frame_dequeue (vlib_main_t * vm)
VNET_CRYPTO_FRAME_STATE_SUCCESS : VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
rte_mempool_put_bulk (numa->cop_pool, (void **) cet->cops, frame->n_elts);
*nb_elts_processed = frame->n_elts;
*enqueue_thread_idx = frame->enqueue_thread_index;
return frame;
}
+73
View File
@@ -310,6 +310,48 @@ VLIB_CLI_COMMAND (show_crypto_async_handlers_command, static) =
/* *INDENT-ON* */
static clib_error_t *
show_crypto_async_status_command_fn (vlib_main_t * vm,
unformat_input_t * input,
vlib_cli_command_t * cmd)
{
vnet_crypto_main_t *cm = &crypto_main;
u32 skip_master = vlib_num_workers () > 0;
vlib_thread_main_t *tm = vlib_get_thread_main ();
unformat_input_t _line_input, *line_input = &_line_input;
int i;
if (unformat_user (input, unformat_line_input, line_input))
unformat_free (line_input);
vlib_cli_output (vm, "Crypto async dispatch mode: %s",
cm->dispatch_mode ==
VNET_CRYPTO_ASYNC_DISPATCH_POLLING ? "POLLING" :
"INTERRUPT");
for (i = skip_master; i < tm->n_vlib_mains; i++)
{
vlib_node_state_t state =
vlib_node_get_state (vlib_mains[i], cm->crypto_node_index);
if (state == VLIB_NODE_STATE_POLLING)
vlib_cli_output (vm, "threadId: %-6d POLLING", i);
if (state == VLIB_NODE_STATE_INTERRUPT)
vlib_cli_output (vm, "threadId: %-6d INTERRUPT", i);
if (state == VLIB_NODE_STATE_DISABLED)
vlib_cli_output (vm, "threadId: %-6d DISABLED", i);
}
return 0;
}
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_crypto_async_status_command, static) =
{
.path = "show crypto async status",
.short_help = "show crypto async status",
.function = show_crypto_async_status_command_fn,
};
/* *INDENT-ON* */
static clib_error_t *
set_crypto_async_handler_command_fn (vlib_main_t * vm,
unformat_input_t * input,
@@ -393,6 +435,37 @@ VLIB_CLI_COMMAND (set_crypto_async_handler_command, static) =
};
/* *INDENT-ON* */
static clib_error_t *
set_crypto_async_dispatch_polling_command_fn (vlib_main_t * vm,
unformat_input_t * input,
vlib_cli_command_t * cmd)
{
vnet_crypto_set_async_dispatch_mode (VNET_CRYPTO_ASYNC_DISPATCH_POLLING);
return 0;
}
static clib_error_t *
set_crypto_async_dispatch_interrupt_command_fn (vlib_main_t * vm,
unformat_input_t * input,
vlib_cli_command_t * cmd)
{
vnet_crypto_set_async_dispatch_mode (VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT);
return 0;
}
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_crypto_async_dispatch_polling_command, static) =
{
.path = "set crypto async dispatch polling",
.short_help = "set crypto async dispatch polling|interrupt",
.function = set_crypto_async_dispatch_polling_command_fn,
};
VLIB_CLI_COMMAND (set_crypto_async_dispatch_interrupt_command, static) =
{
.path = "set crypto async dispatch interrupt",
.short_help = "set crypto async dispatch polling|interrupt",
.function = set_crypto_async_dispatch_interrupt_command_fn,
};
/*
* fd.io coding-style-patch-verification: ON
*
+65 -17
View File
@@ -446,18 +446,20 @@ vnet_crypto_key_add_linked (vlib_main_t * vm,
clib_error_t *
crypto_dispatch_enable_disable (int is_enable)
{
vlib_main_t *vm = vlib_get_main ();
vlib_thread_main_t *tm = vlib_get_thread_main ();
vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "crypto-dispatch");
vnet_crypto_main_t *cm = &crypto_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
u32 skip_master = vlib_num_workers () > 0, i;
u32 state_change = 0;
vlib_node_state_t state;
vlib_node_state_t state = VLIB_NODE_STATE_DISABLED;
u8 state_change = 0;
CLIB_MEMORY_STORE_BARRIER ();
if (is_enable && cm->async_refcnt > 0)
{
state_change = 1;
state = VLIB_NODE_STATE_POLLING;
state =
cm->dispatch_mode ==
VNET_CRYPTO_ASYNC_DISPATCH_POLLING ? VLIB_NODE_STATE_POLLING :
VLIB_NODE_STATE_INTERRUPT;
}
if (!is_enable && cm->async_refcnt == 0)
@@ -468,8 +470,11 @@ crypto_dispatch_enable_disable (int is_enable)
if (state_change)
for (i = skip_master; i < tm->n_vlib_mains; i++)
vlib_node_set_state (vlib_mains[i], node->index, state);
{
if (state !=
vlib_node_get_state (vlib_mains[i], cm->crypto_node_index))
vlib_node_set_state (vlib_mains[i], cm->crypto_node_index, state);
}
return 0;
}
@@ -553,20 +558,20 @@ vnet_crypto_register_post_node (vlib_main_t * vm, char *post_node_name)
void
vnet_crypto_request_async_mode (int is_enable)
{
vlib_main_t *vm = vlib_get_main ();
vlib_thread_main_t *tm = vlib_get_thread_main ();
vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "crypto-dispatch");
vnet_crypto_main_t *cm = &crypto_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
u32 skip_master = vlib_num_workers () > 0, i;
u32 state_change = 0;
vlib_node_state_t state;
vlib_node_state_t state = VLIB_NODE_STATE_DISABLED;
u8 state_change = 0;
CLIB_MEMORY_STORE_BARRIER ();
if (is_enable && cm->async_refcnt == 0)
{
state_change = 1;
state = VLIB_NODE_STATE_POLLING;
state =
cm->dispatch_mode == VNET_CRYPTO_ASYNC_DISPATCH_POLLING ?
VLIB_NODE_STATE_POLLING : VLIB_NODE_STATE_INTERRUPT;
}
if (!is_enable && cm->async_refcnt == 1)
{
state_change = 1;
@@ -575,7 +580,11 @@ vnet_crypto_request_async_mode (int is_enable)
if (state_change)
for (i = skip_master; i < tm->n_vlib_mains; i++)
vlib_node_set_state (vlib_mains[i], node->index, state);
{
if (state !=
vlib_node_get_state (vlib_mains[i], cm->crypto_node_index))
vlib_node_set_state (vlib_mains[i], cm->crypto_node_index, state);
}
if (is_enable)
cm->async_refcnt += 1;
@@ -583,6 +592,40 @@ vnet_crypto_request_async_mode (int is_enable)
cm->async_refcnt -= 1;
}
void
vnet_crypto_set_async_dispatch_mode (u8 mode)
{
vnet_crypto_main_t *cm = &crypto_main;
u32 skip_master = vlib_num_workers () > 0, i;
vlib_thread_main_t *tm = vlib_get_thread_main ();
vlib_node_state_t state = VLIB_NODE_STATE_DISABLED;
CLIB_MEMORY_STORE_BARRIER ();
cm->dispatch_mode = mode;
if (mode == VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT)
{
state =
cm->async_refcnt == 0 ?
VLIB_NODE_STATE_DISABLED : VLIB_NODE_STATE_INTERRUPT;
}
else if (mode == VNET_CRYPTO_ASYNC_DISPATCH_POLLING)
{
state =
cm->async_refcnt == 0 ?
VLIB_NODE_STATE_DISABLED : VLIB_NODE_STATE_POLLING;
}
for (i = skip_master; i < tm->n_vlib_mains; i++)
{
if (state != vlib_node_get_state (vlib_mains[i], cm->crypto_node_index))
vlib_node_set_state (vlib_mains[i], cm->crypto_node_index, state);
}
clib_warning ("Switching dispatch mode might not work is some situations.");
clib_warning
("Use 'show crypto async status' to verify that the nodes' states were set");
clib_warning ("and if not, set 'crypto async dispatch' mode again.");
}
int
vnet_crypto_is_set_async_handler (vnet_crypto_async_op_id_t op)
{
@@ -663,6 +706,8 @@ vnet_crypto_init (vlib_main_t * vm)
vnet_crypto_main_t *cm = &crypto_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
vnet_crypto_thread_t *ct = 0;
cm->dispatch_mode = VNET_CRYPTO_ASYNC_DISPATCH_POLLING;
cm->engine_index_by_name = hash_create_string ( /* size */ 0,
sizeof (uword));
cm->alg_index_by_name = hash_create_string (0, sizeof (uword));
@@ -705,7 +750,10 @@ vnet_crypto_init (vlib_main_t * vm)
s);
foreach_crypto_link_async_alg
#undef _
return 0;
cm->crypto_node_index =
vlib_get_node_by_name (vm, (u8 *) "crypto-dispatch")->index;
return 0;
}
VLIB_INIT_FUNCTION (vnet_crypto_init);
+34 -10
View File
@@ -18,7 +18,7 @@
#include <vlib/vlib.h>
#define VNET_CRYPTO_FRAME_SIZE 32
#define VNET_CRYPTO_FRAME_SIZE 64
/* CRYPTO_ID, PRETTY_NAME, KEY_LENGTH_IN_BYTES */
#define foreach_crypto_cipher_alg \
@@ -322,15 +322,17 @@ typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
#define VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED 0
#define VNET_CRYPTO_FRAME_STATE_WORK_IN_PROGRESS 1
#define VNET_CRYPTO_FRAME_STATE_SUCCESS 2
#define VNET_CRYPTO_FRAME_STATE_ELT_ERROR 3
#define VNET_CRYPTO_FRAME_STATE_PENDING 1 /* frame waiting to be processed */
#define VNET_CRYPTO_FRAME_STATE_WORK_IN_PROGRESS 2
#define VNET_CRYPTO_FRAME_STATE_SUCCESS 3
#define VNET_CRYPTO_FRAME_STATE_ELT_ERROR 4
u8 state;
vnet_crypto_async_op_id_t op:8;
u16 n_elts;
vnet_crypto_async_frame_elt_t elts[VNET_CRYPTO_FRAME_SIZE];
u32 buffer_indices[VNET_CRYPTO_FRAME_SIZE];
u16 next_node_index[VNET_CRYPTO_FRAME_SIZE];
u32 enqueue_thread_index;
} vnet_crypto_async_frame_t;
typedef struct
@@ -357,13 +359,16 @@ typedef void (vnet_crypto_key_handler_t) (vlib_main_t * vm,
vnet_crypto_key_index_t idx);
/** async crypto function handlers **/
typedef int (vnet_crypto_frame_enqueue_t) (vlib_main_t * vm,
vnet_crypto_async_frame_t * frame);
typedef int
(vnet_crypto_frame_enqueue_t) (vlib_main_t * vm,
vnet_crypto_async_frame_t * frame);
typedef vnet_crypto_async_frame_t *
(vnet_crypto_frame_dequeue_t) (vlib_main_t * vm);
(vnet_crypto_frame_dequeue_t) (vlib_main_t * vm, u32 * nb_elts_processed,
u32 * enqueue_thread_idx);
u32 vnet_crypto_register_engine (vlib_main_t * vm, char *name, int prio,
char *desc);
u32
vnet_crypto_register_engine (vlib_main_t * vm, char *name, int prio,
char *desc);
void vnet_crypto_register_ops_handler (vlib_main_t * vm, u32 engine_index,
vnet_crypto_op_id_t opt,
@@ -431,6 +436,10 @@ typedef struct
vnet_crypto_async_alg_data_t *async_algs;
u32 async_refcnt;
vnet_crypto_async_next_node_t *next_nodes;
u32 crypto_node_index;
#define VNET_CRYPTO_ASYNC_DISPATCH_POLLING 0
#define VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT 1
u8 dispatch_mode;
} vnet_crypto_main_t;
extern vnet_crypto_main_t crypto_main;
@@ -466,6 +475,8 @@ int vnet_crypto_is_set_async_handler (vnet_crypto_async_op_id_t opt);
void vnet_crypto_request_async_mode (int is_enable);
void vnet_crypto_set_async_dispatch_mode (u8 mode);
vnet_crypto_async_alg_t vnet_crypto_link_algs (vnet_crypto_alg_t crypto_alg,
vnet_crypto_alg_t integ_alg);
@@ -551,14 +562,18 @@ vnet_crypto_async_submit_open_frame (vlib_main_t * vm,
vnet_crypto_async_frame_t * frame)
{
vnet_crypto_main_t *cm = &crypto_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
vnet_crypto_thread_t *ct = cm->threads + vm->thread_index;
vnet_crypto_async_op_id_t opt = frame->op;
u32 i = vlib_num_workers () > 0;
int ret = (cm->enqueue_handlers[frame->op]) (vm, frame);
frame->enqueue_thread_index = vm->thread_index;
clib_bitmap_set_no_check (cm->async_active_ids, opt, 1);
if (PREDICT_TRUE (ret == 0))
{
vnet_crypto_async_frame_t *nf = 0;
frame->state = VNET_CRYPTO_FRAME_STATE_WORK_IN_PROGRESS;
frame->state = VNET_CRYPTO_FRAME_STATE_PENDING;
pool_get_aligned (ct->frame_pool, nf, CLIB_CACHE_LINE_BYTES);
if (CLIB_DEBUG > 0)
clib_memset (nf, 0xfe, sizeof (*nf));
@@ -567,6 +582,15 @@ vnet_crypto_async_submit_open_frame (vlib_main_t * vm,
nf->n_elts = 0;
ct->frames[opt] = nf;
}
if (cm->dispatch_mode == VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT)
{
for (; i < tm->n_vlib_mains; i++)
{
vlib_node_set_interrupt_pending (vlib_mains[i],
cm->crypto_node_index);
}
}
return ret;
}
+61 -43
View File
@@ -74,60 +74,78 @@ vnet_crypto_async_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
static_always_inline u32
crypto_dequeue_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_crypto_thread_t * ct,
vnet_crypto_frame_dequeue_t * hdl,
u32 n_cache, u32 * n_total)
vnet_crypto_frame_dequeue_t * hdl, u32 n_cache,
u32 * n_total)
{
vnet_crypto_async_frame_t *cf = (hdl) (vm);
vnet_crypto_main_t *cm = &crypto_main;
u32 n_elts = 0;
u32 enqueue_thread_idx = ~0;
vnet_crypto_async_frame_t *cf = (hdl) (vm, &n_elts, &enqueue_thread_idx);
*n_total += n_elts;
while (cf)
while (cf || n_elts)
{
vec_validate (ct->buffer_indice, n_cache + cf->n_elts);
vec_validate (ct->nexts, n_cache + cf->n_elts);
clib_memcpy_fast (ct->buffer_indice + n_cache, cf->buffer_indices,
sizeof (u32) * cf->n_elts);
if (cf->state == VNET_CRYPTO_FRAME_STATE_SUCCESS)
if (cf)
{
clib_memcpy_fast (ct->nexts + n_cache, cf->next_node_index,
sizeof (u16) * cf->n_elts);
}
else
{
u32 i;
for (i = 0; i < cf->n_elts; i++)
vec_validate (ct->buffer_indice, n_cache + cf->n_elts);
vec_validate (ct->nexts, n_cache + cf->n_elts);
clib_memcpy_fast (ct->buffer_indice + n_cache, cf->buffer_indices,
sizeof (u32) * cf->n_elts);
if (cf->state == VNET_CRYPTO_FRAME_STATE_SUCCESS)
{
if (cf->elts[i].status != VNET_CRYPTO_OP_STATUS_COMPLETED)
clib_memcpy_fast (ct->nexts + n_cache, cf->next_node_index,
sizeof (u16) * cf->n_elts);
}
else
{
u32 i;
for (i = 0; i < cf->n_elts; i++)
{
ct->nexts[i + n_cache] = CRYPTO_DISPATCH_NEXT_ERR_DROP;
vlib_node_increment_counter (vm, node->node_index,
cf->elts[i].status, 1);
if (cf->elts[i].status != VNET_CRYPTO_OP_STATUS_COMPLETED)
{
ct->nexts[i + n_cache] = CRYPTO_DISPATCH_NEXT_ERR_DROP;
vlib_node_increment_counter (vm, node->node_index,
cf->elts[i].status, 1);
}
else
ct->nexts[i + n_cache] = cf->next_node_index[i];
}
else
ct->nexts[i + n_cache] = cf->next_node_index[i];
}
}
n_cache += cf->n_elts;
*n_total += cf->n_elts;
if (n_cache >= VLIB_FRAME_SIZE)
{
vlib_buffer_enqueue_to_next (vm, node, ct->buffer_indice, ct->nexts,
n_cache);
n_cache = 0;
}
if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
{
u32 i;
for (i = 0; i < cf->n_elts; i++)
n_cache += cf->n_elts;
if (n_cache >= VLIB_FRAME_SIZE)
{
vlib_buffer_t *b = vlib_get_buffer (vm, cf->buffer_indices[i]);
if (b->flags & VLIB_BUFFER_IS_TRACED)
vnet_crypto_async_add_trace (vm, node, b, cf->op,
cf->elts[i].status);
vlib_buffer_enqueue_to_next (vm, node, ct->buffer_indice,
ct->nexts, n_cache);
n_cache = 0;
}
if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
{
u32 i;
for (i = 0; i < cf->n_elts; i++)
{
vlib_buffer_t *b = vlib_get_buffer (vm,
cf->buffer_indices[i]);
if (b->flags & VLIB_BUFFER_IS_TRACED)
vnet_crypto_async_add_trace (vm, node, b, cf->op,
cf->elts[i].status);
}
}
vnet_crypto_async_free_frame (vm, cf);
}
vnet_crypto_async_free_frame (vm, cf);
cf = (hdl) (vm);
/* signal enqueue-thread to dequeue the processed frame (n_elts>0) */
if (cm->dispatch_mode == VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT
&& n_elts > 0)
{
vlib_node_set_interrupt_pending (vlib_mains[enqueue_thread_idx],
cm->crypto_node_index);
}
n_elts = 0;
enqueue_thread_idx = 0;
cf = (hdl) (vm, &n_elts, &enqueue_thread_idx);
*n_total += n_elts;
}
return n_cache;