From 2d725c61286ccb8625ffad5c678cee337f88bceb Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 13 Nov 2023 12:18:24 +0000 Subject: [PATCH] ena: Amazon Elastic Network Adapter (ENA) native driver Type: feature Change-Id: Icd9de05f2cbac0e5a6dfb1f1414f21dc4b893104 Signed-off-by: Damjan Marion --- MAINTAINERS | 5 + docs/spelling_wordlist.txt | 1 + src/plugins/dev_ena/CMakeLists.txt | 21 + src/plugins/dev_ena/aenq.c | 186 ++++++++ src/plugins/dev_ena/aq.c | 359 ++++++++++++++ src/plugins/dev_ena/ena.c | 265 +++++++++++ src/plugins/dev_ena/ena.h | 234 +++++++++ src/plugins/dev_ena/ena_admin_defs.h | 685 +++++++++++++++++++++++++++ src/plugins/dev_ena/ena_aenq_defs.h | 107 +++++ src/plugins/dev_ena/ena_defs.h | 25 + src/plugins/dev_ena/ena_inlines.h | 40 ++ src/plugins/dev_ena/ena_io_defs.h | 179 +++++++ src/plugins/dev_ena/ena_reg_defs.h | 150 ++++++ src/plugins/dev_ena/format.c | 146 ++++++ src/plugins/dev_ena/format_aq.c | 412 ++++++++++++++++ src/plugins/dev_ena/port.c | 96 ++++ src/plugins/dev_ena/queue.c | 384 +++++++++++++++ src/plugins/dev_ena/reg.c | 172 +++++++ src/plugins/dev_ena/rx_node.c | 457 ++++++++++++++++++ src/plugins/dev_ena/tx_node.c | 514 ++++++++++++++++++++ src/vlib/buffer.h | 1 + src/vppinfra/vector_avx2.h | 2 +- 22 files changed, 4440 insertions(+), 1 deletion(-) create mode 100644 src/plugins/dev_ena/CMakeLists.txt create mode 100644 src/plugins/dev_ena/aenq.c create mode 100644 src/plugins/dev_ena/aq.c create mode 100644 src/plugins/dev_ena/ena.c create mode 100644 src/plugins/dev_ena/ena.h create mode 100644 src/plugins/dev_ena/ena_admin_defs.h create mode 100644 src/plugins/dev_ena/ena_aenq_defs.h create mode 100644 src/plugins/dev_ena/ena_defs.h create mode 100644 src/plugins/dev_ena/ena_inlines.h create mode 100644 src/plugins/dev_ena/ena_io_defs.h create mode 100644 src/plugins/dev_ena/ena_reg_defs.h create mode 100644 src/plugins/dev_ena/format.c create mode 100644 src/plugins/dev_ena/format_aq.c create mode 100644 src/plugins/dev_ena/port.c create mode 100644 src/plugins/dev_ena/queue.c create mode 100644 src/plugins/dev_ena/reg.c create mode 100644 src/plugins/dev_ena/rx_node.c create mode 100644 src/plugins/dev_ena/tx_node.c diff --git a/MAINTAINERS b/MAINTAINERS index 2abc3d74604..1ed8378c358 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -415,6 +415,11 @@ I: iavf M: Damjan Marion F: src/plugins/dev_iavf/ +Plugin - Amazon Elastic Network Adapter (ENA) device driver +I: ena +M: Damjan Marion +F: src/plugins/dev_ena/ + Plugin - Dispatch Trace PCAP I: dispatch-trace M: Dave Barach diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 7fec295bf63..1aa5249cbd5 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -314,6 +314,7 @@ elts emacs emerg emphasise +ena enablement encap encap diff --git a/src/plugins/dev_ena/CMakeLists.txt b/src/plugins/dev_ena/CMakeLists.txt new file mode 100644 index 00000000000..d9224d6fd9b --- /dev/null +++ b/src/plugins/dev_ena/CMakeLists.txt @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright(c) 2022 Cisco Systems, Inc. + +add_vpp_plugin(dev_ena + SOURCES + aq.c + aenq.c + ena.c + format.c + format_aq.c + port.c + queue.c + rx_node.c + tx_node.c + reg.c + + MULTIARCH_SOURCES + rx_node.c + tx_node.c +) + diff --git a/src/plugins/dev_ena/aenq.c b/src/plugins/dev_ena/aenq.c new file mode 100644 index 00000000000..64be3c4af3a --- /dev/null +++ b/src/plugins/dev_ena/aenq.c @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Cisco Systems, Inc. + */ + +#include +#include + +#include +#include + +#define ENA_AENQ_POLL_INTERVAL 0.2 + +VLIB_REGISTER_LOG_CLASS (ena_log, static) = { + .class_name = "ena", + .subclass_name = "aenq", +}; + +void +ena_aenq_free (vlib_main_t *vm, vnet_dev_t *dev) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + + log_debug (dev, ""); + + ASSERT (ed->aenq_started == 0); + + vnet_dev_dma_mem_free (vm, dev, ed->aenq.entries); + ed->aenq.entries = 0; + ed->aenq.depth = 0; +} + +vnet_dev_rv_t +ena_aenq_olloc (vlib_main_t *vm, vnet_dev_t *dev, u16 depth) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + u32 alloc_sz = sizeof (ena_aenq_entry_t) * depth; + vnet_dev_rv_t rv; + + log_debug (dev, ""); + + ASSERT (ed->aenq.entries == 0); + + if ((rv = vnet_dev_dma_mem_alloc (vm, dev, alloc_sz, 0, + (void **) &ed->aenq.entries))) + goto err; + + ed->aenq.depth = depth; + + return VNET_DEV_OK; +err: + ena_aenq_free (vm, dev); + return rv; +} + +static ena_aenq_entry_t * +ena_get_next_aenq_entry (vnet_dev_t *dev) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + u16 index = ed->aenq.head & pow2_mask (ENA_ASYNC_QUEUE_LOG2_DEPTH); + u16 phase = 1 & (ed->aenq.head >> ENA_ASYNC_QUEUE_LOG2_DEPTH); + ena_aenq_entry_t *e = ed->aenq.entries + index; + + if (e->phase != phase) + return 0; + + ed->aenq.head++; + + return e; +} + +static void +ena_aenq_poll (vlib_main_t *vm, vnet_dev_t *dev) +{ + ena_aenq_entry_t *ae; + + while ((ae = ena_get_next_aenq_entry (dev))) + { + ena_device_t *ed = vnet_dev_get_data (dev); + vnet_dev_port_state_changes_t changes = {}; + + log_debug (dev, "aenq: group %u syndrome %u phase %u timestamp %lu", + ae->group, ae->syndrome, ae->phase, ae->timestamp); + + switch (ae->group) + { + case ENA_AENQ_GROUP_LINK_CHANGE: + log_debug (dev, "link_change: status %u", + ae->link_change.link_status); + changes.link_state = 1; + changes.change.link_state = 1; + foreach_vnet_dev_port (p, dev) + vnet_dev_port_state_change (vm, p, changes); + break; + + case ENA_AENQ_GROUP_NOTIFICATION: + log_warn (dev, "unhandled AENQ notification received [syndrome %u]", + ae->syndrome); + break; + + case ENA_AENQ_GROUP_KEEP_ALIVE: + if (ae->keep_alive.rx_drops || ae->keep_alive.tx_drops) + log_debug (dev, "keep_alive: rx_drops %lu tx_drops %lu", + ae->keep_alive.rx_drops, ae->keep_alive.tx_drops); + ed->aenq.rx_drops = ae->keep_alive.rx_drops - ed->aenq.rx_drops0; + ed->aenq.tx_drops = ae->keep_alive.tx_drops - ed->aenq.tx_drops0; + ed->aenq.last_keepalive = vlib_time_now (vm); + break; + + default: + log_debug (dev, "unknown aenq entry (group %u) %U", ae->group, + format_hexdump, ae, sizeof (*ae)); + }; + } +} + +vnet_dev_rv_t +ena_aenq_start (vlib_main_t *vm, vnet_dev_t *dev) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + u16 depth = ed->aenq.depth; + u32 alloc_sz = sizeof (ena_aenq_entry_t) * depth; + + ASSERT (ed->aenq_started == 0); + ASSERT (ed->aq_started == 1); + + ena_reg_aenq_caps_t aenq_caps = { + .depth = depth, + .entry_size = sizeof (ena_aenq_entry_t), + }; + + if (ena_aq_feature_is_supported (dev, ENA_ADMIN_FEAT_ID_AENQ_CONFIG)) + { + ena_aq_feat_aenq_config_t aenq; + vnet_dev_rv_t rv; + + if ((rv = ena_aq_get_feature (vm, dev, ENA_ADMIN_FEAT_ID_AENQ_CONFIG, + &aenq))) + { + log_err (dev, "aenq_start: get_Feature(AENQ_CONFIG) failed"); + return rv; + } + + aenq.enabled_groups.link_change = 1; + aenq.enabled_groups.fatal_error = 1; + aenq.enabled_groups.warning = 1; + aenq.enabled_groups.notification = 1; + aenq.enabled_groups.keep_alive = 1; + aenq.enabled_groups.as_u32 &= aenq.supported_groups.as_u32; + aenq.supported_groups.as_u32 = 0; + + if ((rv = ena_aq_set_feature (vm, dev, ENA_ADMIN_FEAT_ID_AENQ_CONFIG, + &aenq))) + { + log_err (dev, "aenq_start: set_Feature(AENQ_CONFIG) failed"); + return rv; + } + } + + clib_memset (ed->aenq.entries, 0, alloc_sz); + ed->aenq.head = depth; + + ena_reg_set_dma_addr (vm, dev, ENA_REG_AENQ_BASE_LO, ENA_REG_AENQ_BASE_HI, + ed->aenq.entries); + + ena_reg_write (dev, ENA_REG_AENQ_CAPS, &aenq_caps); + ena_reg_write (dev, ENA_REG_AENQ_HEAD_DB, &(u32){ depth }); + + ed->aenq_started = 1; + + vnet_dev_poll_dev_add (vm, dev, ENA_AENQ_POLL_INTERVAL, ena_aenq_poll); + + return VNET_DEV_OK; +} + +void +ena_aenq_stop (vlib_main_t *vm, vnet_dev_t *dev) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + if (ed->aenq_started == 1) + { + ena_reg_aenq_caps_t aenq_caps = {}; + vnet_dev_poll_dev_remove (vm, dev, ena_aenq_poll); + ena_reg_write (dev, ENA_REG_AENQ_CAPS, &aenq_caps); + ed->aenq_started = 0; + } +} diff --git a/src/plugins/dev_ena/aq.c b/src/plugins/dev_ena/aq.c new file mode 100644 index 00000000000..290d5bd52c6 --- /dev/null +++ b/src/plugins/dev_ena/aq.c @@ -0,0 +1,359 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Cisco Systems, Inc. + */ + +#include +#include + +#include +#include +#include + +VLIB_REGISTER_LOG_CLASS (ena_log, static) = { + .class_name = "ena", + .subclass_name = "admin", +}; + +VLIB_REGISTER_LOG_CLASS (ena_stats_log, static) = { + .class_name = "ena", + .subclass_name = "admin-stats", +}; + +ena_aq_feat_info_t feat_info[] = { +#define _(v, ver, gt, st, n, s) \ + [v] = { .name = #n, \ + .version = (ver), \ + .data_sz = sizeof (s), \ + .get = (gt), \ + .set = (st) }, + foreach_ena_aq_feature_id +#undef _ +}; + +ena_aq_feat_info_t * +ena_aq_get_feat_info (ena_aq_feature_id_t id) +{ + if (id >= ARRAY_LEN (feat_info) || feat_info[id].data_sz == 0) + return 0; + + return feat_info + id; +} + +void +ena_aq_free (vlib_main_t *vm, vnet_dev_t *dev) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + vnet_dev_dma_mem_free (vm, dev, ed->aq.cq_entries); + vnet_dev_dma_mem_free (vm, dev, ed->aq.sq_entries); + ed->aq.depth = 0; +} + +vnet_dev_rv_t +ena_aq_olloc (vlib_main_t *vm, vnet_dev_t *dev, u16 depth) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + vnet_dev_dma_mem_free (vm, dev, ed->aq.cq_entries); + vnet_dev_dma_mem_free (vm, dev, ed->aq.sq_entries); + u32 sq_alloc_sz = sizeof (ena_aq_sq_entry_t) * depth; + u32 cq_alloc_sz = sizeof (ena_aq_cq_entry_t) * depth; + vnet_dev_rv_t rv; + + ASSERT (ed->aq.sq_entries == 0); + ASSERT (ed->aq.cq_entries == 0); + + rv = vnet_dev_dma_mem_alloc (vm, dev, sq_alloc_sz, 0, + (void **) &ed->aq.sq_entries); + if (rv != VNET_DEV_OK) + goto err; + + rv = vnet_dev_dma_mem_alloc (vm, dev, cq_alloc_sz, 0, + (void **) &ed->aq.cq_entries); + if (rv != VNET_DEV_OK) + goto err; + + ed->aq.depth = depth; + + return VNET_DEV_OK; +err: + ena_aq_free (vm, dev); + return rv; +} + +vnet_dev_rv_t +ena_aq_start (vlib_main_t *vm, vnet_dev_t *dev) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + u16 depth = ed->aq.depth; + u32 sq_alloc_sz = sizeof (ena_aq_sq_entry_t) * depth; + u32 cq_alloc_sz = sizeof (ena_aq_cq_entry_t) * depth; + + ASSERT (ed->aq_started == 0); + + ena_reg_aq_caps_t aq_caps = { + .depth = depth, + .entry_size = sizeof (ena_aq_sq_entry_t), + }; + + ena_reg_acq_caps_t acq_caps = { + .depth = depth, + .entry_size = sizeof (ena_aq_cq_entry_t), + }; + + clib_memset (ed->aq.sq_entries, 0, sq_alloc_sz); + clib_memset (ed->aq.cq_entries, 0, cq_alloc_sz); + + ed->aq.sq_next = 0; + ed->aq.cq_head = 0; + + ena_reg_set_dma_addr (vm, dev, ENA_REG_AQ_BASE_LO, ENA_REG_AQ_BASE_HI, + ed->aq.sq_entries); + ena_reg_set_dma_addr (vm, dev, ENA_REG_ACQ_BASE_LO, ENA_REG_ACQ_BASE_HI, + ed->aq.cq_entries); + + ena_reg_write (dev, ENA_REG_AQ_CAPS, &aq_caps); + ena_reg_write (dev, ENA_REG_ACQ_CAPS, &acq_caps); + + ed->aq_started = 1; + + return VNET_DEV_OK; +} + +void +ena_aq_stop (vlib_main_t *vm, vnet_dev_t *dev) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + ena_reg_aq_caps_t aq_caps = {}; + ena_reg_acq_caps_t acq_caps = {}; + + if (ed->aq_started) + { + ena_reg_write (dev, ENA_REG_AQ_CAPS, &aq_caps); + ena_reg_write (dev, ENA_REG_ACQ_CAPS, &acq_caps); + ed->aq_started = 0; + } +} +vnet_dev_rv_t +ena_aq_req (vlib_main_t *vm, vnet_dev_t *dev, ena_aq_opcode_t opcode, + void *sqe_data, u8 sqe_data_sz, void *cqe_data, u8 cqe_data_sz) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + u32 next = ed->aq.sq_next++; + u32 index = next & pow2_mask (ENA_ADMIN_QUEUE_LOG2_DEPTH); + u8 phase = 1 & (~(next >> ENA_ADMIN_QUEUE_LOG2_DEPTH)); + ena_aq_sq_entry_t *sqe = ed->aq.sq_entries + index; + ena_aq_cq_entry_t *cqe = ed->aq.cq_entries + index; + f64 suspend_time = 1e-6; + + clib_memcpy_fast (&sqe->data, sqe_data, sqe_data_sz); + sqe->opcode = opcode; + sqe->command_id = index; + sqe->phase = phase; + + ena_reg_write (dev, ENA_REG_AQ_DB, &ed->aq.sq_next); + + while (cqe->phase != phase) + { + vlib_process_suspend (vm, suspend_time); + suspend_time *= 2; + if (suspend_time > 1e-3) + { + log_err (dev, "admin queue timeout (opcode %U)", + format_ena_aq_opcode, opcode); + return VNET_DEV_ERR_TIMEOUT; + } + } + + if (cqe->status != ENA_ADMIN_COMPL_STATUS_SUCCESS) + { + log_err (dev, + "cqe[%u]: opcode %U status %U ext_status %u sq_head_idx %u", + cqe - ed->aq.cq_entries, format_ena_aq_opcode, opcode, + format_ena_aq_status, cqe->status, cqe->extended_status, + cqe->sq_head_indx); + return VNET_DEV_ERR_DEVICE_NO_REPLY; + } + + log_debug (dev, "cqe: status %u ext_status %u sq_head_idx %u", cqe->status, + cqe->extended_status, cqe->sq_head_indx); + + if (cqe_data && cqe_data_sz) + clib_memcpy_fast (cqe_data, &cqe->data, cqe_data_sz); + return VNET_DEV_OK; +} + +vnet_dev_rv_t +ena_aq_set_feature (vlib_main_t *vm, vnet_dev_t *dev, + ena_aq_feature_id_t feat_id, void *data) +{ + vnet_dev_rv_t rv; + + struct + { + ena_aq_aq_ctrl_buff_info_t control_buffer; + ena_aq_get_set_feature_common_desc_t feat_common; + u32 data[11]; + } fd = { + .feat_common.feature_id = feat_id, + .feat_common.feature_version = feat_info[feat_id].version, + }; + + log_debug (dev, "set_feature(%s):\n %U", feat_info[feat_id].name, + format_ena_aq_feat_desc, feat_id, data); + + ASSERT (feat_info[feat_id].data_sz > 1); + clib_memcpy (&fd.data, data, feat_info[feat_id].data_sz); + + rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_SET_FEATURE, &fd, sizeof (fd), 0, 0); + + if (rv != VNET_DEV_OK) + log_err (dev, "get_feature(%U) failed", format_ena_aq_feat_name, feat_id); + + return rv; +} + +vnet_dev_rv_t +ena_aq_get_feature (vlib_main_t *vm, vnet_dev_t *dev, + ena_aq_feature_id_t feat_id, void *data) +{ + vnet_dev_rv_t rv; + + struct + { + ena_aq_aq_ctrl_buff_info_t control_buffer; + ena_aq_get_set_feature_common_desc_t feat_common; + u32 data[11]; + } fd = { + .feat_common.feature_id = feat_id, + .feat_common.feature_version = feat_info[feat_id].version, + }; + + rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_GET_FEATURE, &fd, sizeof (fd), data, + feat_info[feat_id].data_sz); + + if (rv != VNET_DEV_OK) + { + log_err (dev, "get_feature(%U) failed", format_ena_aq_feat_name, + feat_id); + return rv; + } + + ASSERT (feat_info[feat_id].data_sz > 1); + + log_debug (dev, "get_feature(%s):\n %U", feat_info[feat_id].name, + format_ena_aq_feat_desc, feat_id, data); + + return 0; +} + +vnet_dev_rv_t +ena_aq_create_sq (vlib_main_t *vm, vnet_dev_t *dev, + ena_aq_create_sq_cmd_t *cmd, ena_aq_create_sq_resp_t *resp) +{ + vnet_dev_rv_t rv; + + log_debug (dev, "create_sq_cmd_req:\n %U", format_ena_aq_create_sq_cmd, + cmd); + + rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_CREATE_SQ, cmd, sizeof (*cmd), resp, + sizeof (*resp)); + + if (rv != VNET_DEV_OK) + log_debug (dev, "create_sq_cmd_resp:\n %U", format_ena_aq_create_sq_resp, + resp); + return rv; +} + +vnet_dev_rv_t +ena_aq_create_cq (vlib_main_t *vm, vnet_dev_t *dev, + ena_aq_create_cq_cmd_t *cmd, ena_aq_create_cq_resp_t *resp) +{ + vnet_dev_rv_t rv; + + log_debug (dev, "create_cq_cmd_req:\n %U", format_ena_aq_create_cq_cmd, + cmd); + + rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_CREATE_CQ, cmd, sizeof (*cmd), resp, + sizeof (*resp)); + + if (rv != VNET_DEV_OK) + log_debug (dev, "create_cq_cmd_resp:\n %U", format_ena_aq_create_cq_resp, + resp); + + return rv; +} + +vnet_dev_rv_t +ena_aq_destroy_sq (vlib_main_t *vm, vnet_dev_t *dev, + ena_aq_destroy_sq_cmd_t *cmd) +{ + log_debug (dev, "destroy_sq_cmd_req:\n %U", format_ena_aq_destroy_sq_cmd, + cmd); + + return ena_aq_req (vm, dev, ENA_AQ_OPCODE_DESTROY_SQ, cmd, sizeof (*cmd), 0, + 0); +} + +vnet_dev_rv_t +ena_aq_destroy_cq (vlib_main_t *vm, vnet_dev_t *dev, + ena_aq_destroy_cq_cmd_t *cmd) +{ + log_debug (dev, "destroy_cq_cmd_req:\n %U", format_ena_aq_destroy_cq_cmd, + cmd); + + return ena_aq_req (vm, dev, ENA_AQ_OPCODE_DESTROY_CQ, cmd, sizeof (*cmd), 0, + 0); +} + +vnet_dev_rv_t +ena_aq_get_stats (vlib_main_t *vm, vnet_dev_t *dev, ena_aq_stats_type_t type, + ena_aq_stats_scope_t scope, u16 queue_idx, void *data) +{ + vnet_dev_rv_t rv; + format_function_t *ff = 0; + u8 data_sz[] = { + [ENA_ADMIN_STATS_TYPE_BASIC] = sizeof (ena_aq_basic_stats_t), + [ENA_ADMIN_STATS_TYPE_EXTENDED] = 0, + [ENA_ADMIN_STATS_TYPE_ENI] = sizeof (ena_aq_eni_stats_t), + }; + + char *type_str[] = { +#define _(n, s) [n] = #s, + foreach_ena_aq_stats_type +#undef _ + }; + + char *scope_str[] = { +#define _(n, s) [n] = #s, + foreach_ena_aq_stats_scope +#undef _ + }; + + ena_aq_get_stats_cmd_t cmd = { + .type = type, + .scope = scope, + .queue_idx = scope == ENA_ADMIN_STATS_SCOPE_SPECIFIC_QUEUE ? queue_idx : 0, + .device_id = 0xffff, + }; + + if ((rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_GET_STATS, &cmd, sizeof (cmd), + data, data_sz[type]))) + { + ena_stats_log_err (dev, "get_stats(%s, %s) failed", type_str[type], + scope_str[scope]); + return rv; + } + + if (type == ENA_ADMIN_STATS_TYPE_BASIC) + ff = format_ena_aq_basic_stats; + else if (type == ENA_ADMIN_STATS_TYPE_ENI) + ff = format_ena_aq_eni_stats; + + if (ff) + ena_stats_log_debug (dev, "get_stats(%s, %s, %u):\n %U", type_str[type], + scope_str[scope], queue_idx, ff, data); + else + ena_stats_log_debug (dev, "get_stats(%s, %s, %u): unknown data", + type_str[type], scope_str[scope], queue_idx); + + return VNET_DEV_OK; +} diff --git a/src/plugins/dev_ena/ena.c b/src/plugins/dev_ena/ena.c new file mode 100644 index 00000000000..ead090839c7 --- /dev/null +++ b/src/plugins/dev_ena/ena.c @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static ena_aq_host_info_t host_info = { + .os_type = 3, /* DPDK */ + .kernel_ver_str = VPP_BUILD_VER, + .os_dist_str = VPP_BUILD_VER, + .driver_version = { + .major = 16, + .minor = 0, + .sub_minor = 0, + }, + .ena_spec_version = { + .major = 2, + .minor = 0, + }, + .driver_supported_features = { + .rx_offset = 1, + .rss_configurable_function_key = 1, + } +}; + +VLIB_REGISTER_LOG_CLASS (ena_log, static) = { + .class_name = "ena", + .subclass_name = "init", +}; + +#define _(f, n, s, d) \ + { .name = #n, .desc = d, .severity = VL_COUNTER_SEVERITY_##s }, + +static vlib_error_desc_t ena_rx_node_counters[] = { + foreach_ena_rx_node_counter +}; +static vlib_error_desc_t ena_tx_node_counters[] = { + foreach_ena_tx_node_counter +}; +#undef _ + +vnet_dev_node_t ena_rx_node = { + .error_counters = ena_rx_node_counters, + .n_error_counters = ARRAY_LEN (ena_rx_node_counters), + .format_trace = format_ena_rx_trace, +}; + +vnet_dev_node_t ena_tx_node = { + .error_counters = ena_tx_node_counters, + .n_error_counters = ARRAY_LEN (ena_tx_node_counters), +}; + +static void +ena_deinit (vlib_main_t *vm, vnet_dev_t *dev) +{ + ena_aenq_stop (vm, dev); + ena_aq_stop (vm, dev); +} + +static vnet_dev_rv_t +ena_alloc (vlib_main_t *vm, vnet_dev_t *dev) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + vnet_dev_rv_t rv; + + if ((rv = vnet_dev_dma_mem_alloc (vm, dev, 4096, 4096, + (void **) &ed->host_info))) + return rv; + + if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (ena_mmio_resp_t), 0, + (void **) &ed->mmio_resp))) + return rv; + + if ((rv = ena_aq_olloc (vm, dev, ENA_ADMIN_QUEUE_DEPTH))) + return rv; + + if ((rv = ena_aenq_olloc (vm, dev, ENA_ASYNC_QUEUE_DEPTH))) + return rv; + + return VNET_DEV_OK; +} + +static void +ena_free (vlib_main_t *vm, vnet_dev_t *dev) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + + ena_aenq_free (vm, dev); + ena_aq_free (vm, dev); + + vnet_dev_dma_mem_free (vm, dev, ed->host_info); + vnet_dev_dma_mem_free (vm, dev, ed->mmio_resp); +} + +static vnet_dev_rv_t +ena_init (vlib_main_t *vm, vnet_dev_t *dev) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + ena_aq_feat_host_attr_config_t host_attr = {}; + vlib_pci_config_hdr_t pci_cfg_hdr; + vnet_dev_rv_t rv = VNET_DEV_OK; + + vnet_dev_port_add_args_t port = { + .port = { + .attr = { + .type = VNET_DEV_PORT_TYPE_ETHERNET, + }, + .ops = { + .init = ena_port_init, + .start = ena_port_start, + .stop = ena_port_stop, + .config_change = ena_port_cfg_change, + .config_change_validate = ena_port_cfg_change_validate, + }, + .data_size = sizeof (ena_port_t), + }, + .rx_node = &ena_rx_node, + .tx_node = &ena_tx_node, + .rx_queue = { + .config = { + .data_size = sizeof (ena_rxq_t), + .default_size = 512, + .min_size = 32, + .size_is_power_of_two = 1, + }, + .ops = { + .alloc = ena_rx_queue_alloc, + .start = ena_rx_queue_start, + .stop = ena_rx_queue_stop, + .free = ena_rx_queue_free, + }, + }, + .tx_queue = { + .config = { + .data_size = sizeof (ena_txq_t), + .default_size = 512, + .min_size = 32, + .size_is_power_of_two = 1, + }, + .ops = { + .alloc = ena_tx_queue_alloc, + .start = ena_tx_queue_start, + .stop = ena_tx_queue_stop, + .free = ena_tx_queue_free, + }, + }, + }; + + if ((rv = vnet_dev_pci_read_config_header (vm, dev, &pci_cfg_hdr))) + goto err; + + log_debug (dev, "revision_id 0x%x", pci_cfg_hdr.revision_id); + + ed->readless = (pci_cfg_hdr.revision_id & 1) == 0; + + if ((rv = vnet_dev_pci_map_region (vm, dev, 0, &ed->reg_bar))) + goto err; + + if ((rv = ena_reg_reset (vm, dev, ENA_RESET_REASON_NORMAL))) + goto err; + + if ((rv = ena_aq_start (vm, dev))) + goto err; + + *ed->host_info = host_info; + ed->host_info->num_cpus = vlib_get_n_threads (); + ena_set_mem_addr (vm, dev, &host_attr.os_info_ba, ed->host_info); + + if ((rv = ena_aq_set_feature (vm, dev, ENA_ADMIN_FEAT_ID_HOST_ATTR_CONFIG, + &host_attr))) + return rv; + + if ((rv = ena_aq_get_feature (vm, dev, ENA_ADMIN_FEAT_ID_DEVICE_ATTRIBUTES, + &ed->dev_attr))) + return rv; + + if (ena_aq_feature_is_supported (dev, ENA_ADMIN_FEAT_ID_MAX_QUEUES_EXT)) + { + ena_aq_feat_max_queue_ext_t max_q_ext; + if ((rv = ena_aq_get_feature (vm, dev, ENA_ADMIN_FEAT_ID_MAX_QUEUES_EXT, + &max_q_ext))) + goto err; + port.port.attr.max_rx_queues = + clib_min (max_q_ext.max_rx_cq_num, max_q_ext.max_rx_sq_num); + port.port.attr.max_tx_queues = + clib_min (max_q_ext.max_tx_cq_num, max_q_ext.max_tx_sq_num); + port.rx_queue.config.max_size = + clib_min (max_q_ext.max_rx_cq_depth, max_q_ext.max_rx_sq_depth); + port.tx_queue.config.max_size = + clib_min (max_q_ext.max_tx_cq_depth, max_q_ext.max_tx_sq_depth); + } + else + { + log_err (dev, "device doesn't support MAX_QUEUES_EXT"); + return VNET_DEV_ERR_UNSUPPORTED_DEVICE_VER; + } + + if ((rv = ena_aenq_start (vm, dev))) + goto err; + + port.port.attr.max_supported_rx_frame_size = ed->dev_attr.max_mtu; + + if (ena_aq_feature_is_supported (dev, ENA_ADMIN_FEAT_ID_MTU)) + port.port.attr.caps.change_max_rx_frame_size = 1; + + vnet_dev_set_hw_addr_eth_mac (&port.port.attr.hw_addr, + ed->dev_attr.mac_addr); + + return vnet_dev_port_add (vm, dev, 0, &port); + +err: + ena_free (vm, dev); + return rv; +} + +static u8 * +ena_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, void *dev_info) +{ + vnet_dev_bus_pci_device_info_t *di = dev_info; + const struct + { + u16 device_id; + char *description; + } ena_dev_types[] = { + { .device_id = 0x0ec2, .description = "Elastic Network Adapter (ENA) PF" }, + { .device_id = 0xec20, .description = "Elastic Network Adapter (ENA) VF" }, + }; + + if (di->vendor_id != 0x1d0f) /* AMAZON */ + return 0; + + FOREACH_ARRAY_ELT (dt, ena_dev_types) + { + if (dt->device_id == di->device_id) + return format (0, "%s", dt->description); + } + + return 0; +} + +VNET_DEV_REGISTER_DRIVER (ena) = { + .name = "ena", + .bus = "pci", + .device_data_sz = sizeof (ena_device_t), + .ops = { + .alloc = ena_alloc, + .init = ena_init, + .deinit = ena_deinit, + .free = ena_free, + .format_info = format_ena_dev_info, + .probe = ena_probe, + }, +}; + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "dev_ena", +}; diff --git a/src/plugins/dev_ena/ena.h b/src/plugins/dev_ena/ena.h new file mode 100644 index 00000000000..4acb8d9625a --- /dev/null +++ b/src/plugins/dev_ena/ena.h @@ -0,0 +1,234 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#ifndef _ENA_H_ +#define _ENA_H_ + +#include +#include +#include +#include +#include +#include + +#define ENA_ADMIN_QUEUE_LOG2_DEPTH 2 +#define ENA_ASYNC_QUEUE_LOG2_DEPTH 5 +#define ENA_ADMIN_QUEUE_DEPTH (1 << ENA_ADMIN_QUEUE_LOG2_DEPTH) +#define ENA_ASYNC_QUEUE_DEPTH (1 << ENA_ASYNC_QUEUE_LOG2_DEPTH) + +typedef struct +{ + u8 readless : 1; + u8 aq_started : 1; + u8 aenq_started : 1; + u8 llq : 1; + + void *reg_bar; + + /* mmio */ + ena_mmio_resp_t *mmio_resp; + + /* admin queue */ + struct + { + ena_aq_sq_entry_t *sq_entries; + ena_aq_cq_entry_t *cq_entries; + u16 sq_next; + u16 cq_head; + u16 depth; + } aq; + + /* host info */ + ena_aq_host_info_t *host_info; + + /* device info */ + ena_aq_feat_device_attr_t dev_attr; + + /* async event notification */ + struct + { + ena_aenq_entry_t *entries; + u16 head; + u16 depth; + f64 last_keepalive; + u64 tx_drops, tx_drops0; + u64 rx_drops, rx_drops0; + } aenq; + +} ena_device_t; + +typedef struct +{ +} ena_port_t; + +typedef struct +{ + u32 *buffer_indices; + u16 *compl_sqe_indices; + ena_rx_desc_t *sqes; + ena_rx_cdesc_t *cqes; + u32 *sq_db; + u32 sq_next; + u32 cq_next; + u16 cq_idx; + u16 sq_idx; + u16 n_compl_sqes; + u8 cq_created : 1; + u8 sq_created : 1; +} ena_rxq_t; + +typedef struct +{ + u32 *buffer_indices; + ena_tx_desc_t *sqes; + ena_tx_llq_desc128_t *llq_descs; + ena_tx_cdesc_t *cqes; + u64 *sqe_templates; + u32 *sq_db; + u32 sq_tail; + u32 sq_head; + u32 cq_next; + u16 cq_idx; + u16 sq_idx; + u8 cq_created : 1; + u8 sq_created : 1; + u8 llq : 1; +} ena_txq_t; + +typedef struct +{ + u16 qid; + u16 next_index; + u32 hw_if_index; + ena_rx_cdesc_status_t status; + u16 length; + u16 n_desc; + u16 req_id; +} ena_rx_trace_t; + +/* admin.c */ +typedef struct +{ + char *name; + u8 version; + u8 data_sz; + u8 get; + u8 set; +} ena_aq_feat_info_t; + +ena_aq_feat_info_t *ena_aq_get_feat_info (ena_aq_feature_id_t); +vnet_dev_rv_t ena_aq_olloc (vlib_main_t *, vnet_dev_t *, u16); +vnet_dev_rv_t ena_aq_start (vlib_main_t *, vnet_dev_t *); +void ena_aq_stop (vlib_main_t *, vnet_dev_t *); +void ena_aq_free (vlib_main_t *, vnet_dev_t *); +vnet_dev_rv_t ena_aq_create_sq (vlib_main_t *, vnet_dev_t *, + ena_aq_create_sq_cmd_t *, + ena_aq_create_sq_resp_t *); +vnet_dev_rv_t ena_aq_create_cq (vlib_main_t *, vnet_dev_t *, + ena_aq_create_cq_cmd_t *, + ena_aq_create_cq_resp_t *); +vnet_dev_rv_t ena_aq_destroy_sq (vlib_main_t *, vnet_dev_t *, + ena_aq_destroy_sq_cmd_t *); +vnet_dev_rv_t ena_aq_destroy_cq (vlib_main_t *, vnet_dev_t *, + ena_aq_destroy_cq_cmd_t *); +vnet_dev_rv_t ena_aq_set_feature (vlib_main_t *, vnet_dev_t *, + ena_aq_feature_id_t, void *); +vnet_dev_rv_t ena_aq_get_feature (vlib_main_t *, vnet_dev_t *, + ena_aq_feature_id_t, void *); +vnet_dev_rv_t ena_aq_get_stats (vlib_main_t *, vnet_dev_t *, + ena_aq_stats_type_t, ena_aq_stats_scope_t, u16, + void *); + +/* aenq.c */ +vnet_dev_rv_t ena_aenq_olloc (vlib_main_t *, vnet_dev_t *, u16); +vnet_dev_rv_t ena_aenq_start (vlib_main_t *, vnet_dev_t *); +void ena_aenq_stop (vlib_main_t *, vnet_dev_t *); +void ena_aenq_free (vlib_main_t *, vnet_dev_t *); + +/* reg.c */ +void ena_reg_write (vnet_dev_t *, ena_reg_t, void *); +void ena_reg_read (vnet_dev_t *, ena_reg_t, const void *); +void ena_reg_set_dma_addr (vlib_main_t *, vnet_dev_t *, u32, u32, void *); +vnet_dev_rv_t ena_reg_reset (vlib_main_t *, vnet_dev_t *, ena_reset_reason_t); + +/* port.c */ +vnet_dev_rv_t ena_port_init (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_rv_t ena_port_start (vlib_main_t *, vnet_dev_port_t *); +void ena_port_stop (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_rv_t ena_port_cfg_change (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_cfg_change_req_t *); +vnet_dev_rv_t ena_port_cfg_change_validate (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_cfg_change_req_t *); + +/* queue.c */ +vnet_dev_rv_t ena_rx_queue_alloc (vlib_main_t *, vnet_dev_rx_queue_t *); +vnet_dev_rv_t ena_tx_queue_alloc (vlib_main_t *, vnet_dev_tx_queue_t *); +void ena_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *); +void ena_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *); +vnet_dev_rv_t ena_rx_queue_start (vlib_main_t *, vnet_dev_rx_queue_t *); +vnet_dev_rv_t ena_tx_queue_start (vlib_main_t *, vnet_dev_tx_queue_t *); +void ena_rx_queue_stop (vlib_main_t *, vnet_dev_rx_queue_t *); +void ena_tx_queue_stop (vlib_main_t *, vnet_dev_tx_queue_t *); + +/* format.c */ +format_function_t format_ena_dev_info; +format_function_t format_ena_mem_addr; +format_function_t format_ena_tx_desc; +format_function_t format_ena_rx_trace; + +/* format_admin.c */ +format_function_t format_ena_aq_feat_desc; +format_function_t format_ena_aq_feat_name; +format_function_t format_ena_aq_opcode; +format_function_t format_ena_aq_status; +format_function_t format_ena_aq_feat_id_bitmap; +format_function_t format_ena_aq_create_sq_cmd; +format_function_t format_ena_aq_create_cq_cmd; +format_function_t format_ena_aq_create_sq_resp; +format_function_t format_ena_aq_create_cq_resp; +format_function_t format_ena_aq_destroy_sq_cmd; +format_function_t format_ena_aq_destroy_cq_cmd; +format_function_t format_ena_aq_basic_stats; +format_function_t format_ena_aq_eni_stats; + +#define foreach_ena_rx_node_counter \ + _ (BUFFER_ALLOC, buffer_alloc, ERROR, "buffer alloc error") + +typedef enum +{ +#define _(f, lf, t, s) ENA_RX_NODE_CTR_##f, + foreach_ena_rx_node_counter +#undef _ + ENA_RX_NODE_N_CTRS, +} ena_rx_node_ctr_t; + +#define foreach_ena_tx_node_counter \ + _ (CHAIN_TOO_LONG, chain_too_long, ERROR, "buffer chain too long") \ + _ (NO_FREE_SLOTS, no_free_slots, ERROR, "no free tx slots") + +typedef enum +{ +#define _(f, lf, t, s) ENA_TX_NODE_CTR_##f, + foreach_ena_tx_node_counter +#undef _ + ENA_TX_NODE_N_CTRS, +} ena_tx_node_ctr_t; + +#define log_debug(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_DEBUG, ena_log.class, "%U" f, format_vnet_dev_log, \ + (dev), clib_string_skip_prefix (__func__, "ena_"), ##__VA_ARGS__) +#define log_info(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_INFO, ena_log.class, "%U: " f, \ + format_vnet_dev_addr, (dev), ##__VA_ARGS__) +#define log_notice(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_NOTICE, ena_log.class, "%U: " f, \ + format_vnet_dev_addr, (dev), ##__VA_ARGS__) +#define log_warn(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_WARNING, ena_log.class, "%U: " f, \ + format_vnet_dev_addr, (dev), ##__VA_ARGS__) +#define log_err(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_ERR, ena_log.class, "%U: " f, \ + format_vnet_dev_addr, (dev), ##__VA_ARGS__) + +#endif /* _ENA_H_ */ diff --git a/src/plugins/dev_ena/ena_admin_defs.h b/src/plugins/dev_ena/ena_admin_defs.h new file mode 100644 index 00000000000..6433a1563b8 --- /dev/null +++ b/src/plugins/dev_ena/ena_admin_defs.h @@ -0,0 +1,685 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Cisco Systems, Inc. + */ + +#ifndef _ENA_ADMIN_DEFS_H_ +#define _ENA_ADMIN_DEFS_H_ + +#include +#include + +#define foreach_ena_aq_opcode \ + _ (1, CREATE_SQ) \ + _ (2, DESTROY_SQ) \ + _ (3, CREATE_CQ) \ + _ (4, DESTROY_CQ) \ + _ (8, GET_FEATURE) \ + _ (9, SET_FEATURE) \ + _ (11, GET_STATS) + +typedef enum +{ +#define _(v, n) ENA_AQ_OPCODE_##n = (v), + foreach_ena_aq_opcode +#undef _ +} __clib_packed ena_aq_opcode_t; + +#define foreach_ena_aq_compl_status \ + _ (0, SUCCESS) \ + _ (1, RESOURCE_ALLOCATION_FAILURE) \ + _ (2, BAD_OPCODE) \ + _ (3, UNSUPPORTED_OPCODE) \ + _ (4, MALFORMED_REQUEST) \ + _ (5, ILLEGAL_PARAMETER) \ + _ (6, UNKNOWN_ERROR) \ + _ (7, RESOURCE_BUSY) + +typedef enum +{ +#define _(v, n) ENA_ADMIN_COMPL_STATUS_##n = (v), + foreach_ena_aq_compl_status +#undef _ +} __clib_packed ena_aq_compl_status_t; + +/* id, versiom, get, set, name, struct */ +#define foreach_ena_aq_feature_id \ + _ (1, 0, 1, 0, DEVICE_ATTRIBUTES, ena_aq_feat_device_attr_t) \ + _ (2, 0, 1, 0, MAX_QUEUES_NUM, ena_aq_feat_max_queue_num_t) \ + _ (3, 0, 1, 0, HW_HINTS, ena_aq_feat_hw_hints_t) \ + _ (4, 0, 1, 1, LLQ, ena_aq_feat_llq_t) \ + _ (5, 0, 1, 0, EXTRA_PROPERTIES_STRINGS, \ + ena_aq_feat_extra_properties_strings_t) \ + _ (6, 0, 1, 0, EXTRA_PROPERTIES_FLAGS, \ + ena_aq_feat_extra_properties_flags_t) \ + _ (7, 1, 1, 0, MAX_QUEUES_EXT, ena_aq_feat_max_queue_ext_t) \ + _ (10, 0, 1, 1, RSS_HASH_FUNCTION, ena_aq_feat_rss_hash_function_t) \ + _ (11, 0, 1, 0, STATELESS_OFFLOAD_CONFIG, \ + ena_aq_feat_stateless_offload_config_t) \ + _ (12, 0, 1, 1, RSS_INDIRECTION_TABLE_CONFIG, \ + ena_aq_feat_rss_ind_table_config_t) \ + _ (14, 0, 0, 1, MTU, ena_aq_feat_mtu_t) \ + _ (18, 0, 1, 1, RSS_HASH_INPUT, ena_aq_feat_rss_hash_input_t) \ + _ (20, 0, 1, 0, INTERRUPT_MODERATION, ena_aq_feat_intr_moder_t) \ + _ (26, 0, 1, 1, AENQ_CONFIG, ena_aq_feat_aenq_config_t) \ + _ (27, 0, 1, 0, LINK_CONFIG, ena_aq_feat_link_config_t) \ + _ (28, 0, 0, 1, HOST_ATTR_CONFIG, ena_aq_feat_host_attr_config_t) \ + _ (29, 0, 1, 1, PHC_CONFIG, ena_aq_feat_phc_config_t) + +typedef enum +{ +#define _(v, ver, r, w, n, s) ENA_ADMIN_FEAT_ID_##n = (v), + foreach_ena_aq_feature_id +#undef _ +} __clib_packed ena_aq_feature_id_t; + +#define foreach_ena_aq_stats_type \ + _ (0, BASIC) \ + _ (1, EXTENDED) \ + _ (2, ENI) + +#define foreach_ena_aq_stats_scope \ + _ (0, SPECIFIC_QUEUE) \ + _ (1, ETH_TRAFFIC) + +typedef enum +{ +#define _(v, n) ENA_ADMIN_STATS_TYPE_##n = (v), + foreach_ena_aq_stats_type +#undef _ +} __clib_packed ena_aq_stats_type_t; + +typedef enum +{ +#define _(v, n) ENA_ADMIN_STATS_SCOPE_##n = (v), + foreach_ena_aq_stats_scope +#undef _ +} __clib_packed ena_aq_stats_scope_t; + +typedef struct +{ + u32 addr_lo; + u16 addr_hi; + u16 _reserved_16; +} ena_mem_addr_t; + +#define foreach_ena_aq_aenq_groups \ + _ (link_change) \ + _ (fatal_error) \ + _ (warning) \ + _ (notification) \ + _ (keep_alive) \ + _ (refresh_capabilities) \ + _ (conf_notifications) + +typedef union +{ + struct + { +#define _(g) u32 g : 1; + foreach_ena_aq_aenq_groups +#undef _ + }; + u32 as_u32; +} ena_aq_aenq_groups_t; + +STATIC_ASSERT_SIZEOF (ena_aq_aenq_groups_t, 4); + +typedef struct +{ + u32 length; + ena_mem_addr_t addr; +} ena_aq_aq_ctrl_buff_info_t; + +typedef struct +{ + u32 impl_id; + u32 device_version; + u32 supported_features; + u32 _reserved3; + u32 phys_addr_width; + u32 virt_addr_width; + u8 mac_addr[6]; + u8 _reserved7[2]; + u32 max_mtu; +} ena_aq_feat_device_attr_t; + +typedef struct +{ + union + { + struct + { + u16 l3_sort : 1; + u16 l4_sort : 1; + }; + u16 supported_input_sort; + }; + union + { + struct + { + u16 enable_l3_sort : 1; + u16 enable_l4_sort : 1; + }; + u16 enabled_input_sort; + }; +} ena_aq_feat_rss_hash_input_t; + +STATIC_ASSERT_SIZEOF (ena_aq_feat_rss_hash_input_t, 4); + +typedef struct +{ + u16 intr_delay_resolution; + u16 reserved; +} ena_aq_feat_intr_moder_t; + +typedef struct +{ + ena_aq_aenq_groups_t supported_groups; + ena_aq_aenq_groups_t enabled_groups; +} ena_aq_feat_aenq_config_t; + +#define foreach_ena_aq_link_types \ + _ (0, 1000, 1G) \ + _ (1, 2500, 2_5G) \ + _ (2, 5000, 5G) \ + _ (3, 10000, 10G) \ + _ (4, 25000, 25G) \ + _ (5, 40000, 40G) \ + _ (6, 50000, 50G) \ + _ (7, 100000, 100G) \ + _ (8, 200000, 200G) \ + _ (9, 400000, 400G) + +typedef enum +{ +#define _(b, v, n) ENA_ADMIN_LINK_TYPE_##n = (1U << b), + foreach_ena_aq_link_types +#undef _ +} ena_aq_link_types_t; + +typedef struct +{ + u32 speed; + ena_aq_link_types_t supported; + u32 autoneg : 1; + u32 duplex : 1; +} ena_aq_feat_link_config_t; + +STATIC_ASSERT_SIZEOF (ena_aq_feat_link_config_t, 12); + +typedef struct +{ + u32 tx; + u32 rx_supported; + u32 rx_enabled; +} ena_aq_feat_stateless_offload_config_t; + +typedef struct +{ + u16 cq_idx; + u16 reserved; +} ena_aq_feat_rss_ind_table_entry_t; + +typedef struct +{ + u16 min_size; + u16 max_size; + u16 size; + u8 one_entry_update : 1; + u8 reserved; + u32 inline_index; + ena_aq_feat_rss_ind_table_entry_t inline_entry; +} ena_aq_feat_rss_ind_table_config_t; + +typedef struct +{ + u32 mtu; +} ena_aq_feat_mtu_t; + +typedef struct +{ + u32 count; +} ena_aq_feat_extra_properties_strings_t; + +typedef struct +{ + u32 flags; +} ena_aq_feat_extra_properties_flags_t; + +typedef struct +{ + u32 max_sq_num; + u32 max_sq_depth; + u32 max_cq_num; + u32 max_cq_depth; + u32 max_legacy_llq_num; + u32 max_legacy_llq_depth; + u32 max_header_size; + u16 max_packet_tx_descs; + u16 max_packet_rx_descs; +} ena_aq_feat_max_queue_num_t; + +typedef struct +{ + u16 mmio_read_timeout; + u16 driver_watchdog_timeout; + u16 missing_tx_completion_timeout; + u16 missed_tx_completion_count_threshold_to_reset; + u16 admin_completion_tx_timeout; + u16 netdev_wd_timeout; + u16 max_tx_sgl_size; + u16 max_rx_sgl_size; + u16 reserved[8]; +} ena_aq_feat_hw_hints_t; + +typedef struct +{ + u8 version; + u8 _reserved1[3]; + u32 max_tx_sq_num; + u32 max_tx_cq_num; + u32 max_rx_sq_num; + u32 max_rx_cq_num; + u32 max_tx_sq_depth; + u32 max_tx_cq_depth; + u32 max_rx_sq_depth; + u32 max_rx_cq_depth; + u32 max_tx_header_size; + u16 max_per_packet_tx_descs; + u16 max_per_packet_rx_descs; +} ena_aq_feat_max_queue_ext_t; + +typedef struct +{ + u32 supported_func; + u32 selected_func; + u32 init_val; +} ena_aq_feat_rss_hash_function_t; + +typedef struct +{ + ena_mem_addr_t os_info_ba; + ena_mem_addr_t debug_ba; + u32 debug_area_size; +} ena_aq_feat_host_attr_config_t; + +typedef struct +{ + u8 type; + u8 reserved1[3]; + u32 doorbell_offset; + u32 expire_timeout_usec; + u32 block_timeout_usec; + ena_mem_addr_t output_address; + u32 output_length; +} ena_aq_feat_phc_config_t; + +typedef struct +{ + u32 max_llq_num; + u32 max_llq_depth; + u16 header_location_ctrl_supported; + u16 header_location_ctrl_enabled; + u16 entry_size_ctrl_supported; + u16 entry_size_ctrl_enabled; + u16 desc_num_before_header_supported; + u16 desc_num_before_header_enabled; + u16 descriptors_stride_ctrl_supported; + u16 descriptors_stride_ctrl_enabled; + union + { + struct + { + u16 supported_flags; + u16 max_tx_burst_size; + } get; + struct + { + u16 enabled_flags; + } set; + } accel_mode; +} ena_aq_feat_llq_t; + +typedef struct +{ + /* feat common */ + u8 flags; + ena_aq_feature_id_t feature_id; + u8 feature_version; + u8 _reserved; +} ena_aq_get_set_feature_common_desc_t; + +STATIC_ASSERT_SIZEOF (ena_aq_get_set_feature_common_desc_t, 4); + +typedef struct +{ + ena_aq_aq_ctrl_buff_info_t control_buffer; + ena_aq_stats_type_t type; + ena_aq_stats_scope_t scope; + u16 _reserved3; + u16 queue_idx; + u16 device_id; +} ena_aq_get_stats_cmd_t; +STATIC_ASSERT_SIZEOF (ena_aq_get_stats_cmd_t, 20); + +typedef enum +{ + ENA_ADMIN_SQ_DIRECTION_TX = 1, + ENA_ADMIN_SQ_DIRECTION_RX = 2, +} ena_aq_sq_direction_t; + +typedef enum +{ + ENA_ADMIN_SQ_PLACEMENT_POLICY_HOST = 1, + ENA_ADMIN_SQ_PLACEMENT_POLICY_DEVICE = 3, +} ena_aq_sq_placement_policy_t; + +typedef enum +{ + ENA_ADMIN_SQ_COMPLETION_POLICY_DESC = 0, + ENA_ADMIN_SQ_COMPLETION_POLICY_DESC_ON_DEMAND = 1, + ENA_ADMIN_SQ_COMPLETION_POLICY_HEAD_ON_DEMAND = 2, + ENA_ADMIN_SQ_COMPLETION_POLICY_HEAD = 3, +} ena_aq_completion_policy_t; + +typedef struct +{ + union + { + struct + { + u8 _reserved0_0 : 5; + u8 sq_direction : 3; /* ena_aq_sq_direction_t */ + }; + u8 sq_identity; + }; + + u8 _reserved1; + + union + { + struct + { + u8 placement_policy : 4; /* ena_aq_sq_placement_policy_t */ + u8 completion_policy : 3; /* ena_aq_completion_policy_t */ + u8 _reserved2_7 : 1; + }; + u8 sq_caps_2; + }; + + union + { + struct + { + u8 is_physically_contiguous : 1; + u8 _reserved3_1 : 7; + }; + u8 sq_caps_3; + }; + + u16 cq_idx; + u16 sq_depth; + ena_mem_addr_t sq_ba; + ena_mem_addr_t sq_head_writeback; /* used if completion_policy is 2 or 3 */ + u32 _reserved0_w7; + u32 _reserved0_w8; +} ena_aq_create_sq_cmd_t; + +typedef struct +{ + u16 sq_idx; + u16 _reserved; + u32 sq_doorbell_offset; /* REG BAR offset of queue dorbell */ + u32 llq_descriptors_offset; /* LLQ MEM BAR offset of descriptors */ + u32 llq_headers_offset; /* LLQ MEM BAR offset of headers mem */ +} ena_aq_create_sq_resp_t; + +typedef struct +{ + union + { + struct + { + u8 _reserved0_0 : 5; + u8 interrupt_mode_enabled : 1; + u8 _reserved0_6 : 2; + }; + u8 cq_caps_1; + }; + + union + { + struct + { + u8 cq_entry_size_words : 4; + u8 _reserved1_4 : 4; + }; + u8 cq_caps_2; + }; + + u16 cq_depth; + u32 msix_vector; + ena_mem_addr_t cq_ba; +} ena_aq_create_cq_cmd_t; + +typedef struct +{ + u16 cq_idx; + u16 cq_actual_depth; + u32 numa_node_register_offset; + u32 cq_head_db_register_offset; + u32 cq_interrupt_unmask_register_offset; +} ena_aq_create_cq_resp_t; + +typedef struct +{ + u16 sq_idx; + union + { + struct + { + u8 _reserved : 5; + u8 sq_direction : 3; /* ena_aq_sq_direction_t */ + }; + u8 sq_identity; + }; + u8 _reserved1; +} ena_aq_destroy_sq_cmd_t; + +typedef struct +{ + u16 cq_idx; + u16 _reserved1; +} ena_aq_destroy_cq_cmd_t; + +STATIC_ASSERT_SIZEOF (ena_aq_create_sq_cmd_t, 32); +STATIC_ASSERT_SIZEOF (ena_aq_create_sq_resp_t, 16); +STATIC_ASSERT_SIZEOF (ena_aq_create_cq_cmd_t, 16); +STATIC_ASSERT_SIZEOF (ena_aq_create_cq_resp_t, 16); +STATIC_ASSERT_SIZEOF (ena_aq_destroy_sq_cmd_t, 4); +STATIC_ASSERT_SIZEOF (ena_aq_destroy_cq_cmd_t, 4); + +typedef struct +{ + /* common desc */ + u16 command_id; + ena_aq_opcode_t opcode; + + union + { + struct + { + u8 phase : 1; + u8 ctrl_data : 1; + u8 ctrl_data_indirect : 1; + u8 _reserved_3_3 : 5; + }; + u8 flags; + }; + + u32 data[15]; +} ena_aq_sq_entry_t; + +STATIC_ASSERT_SIZEOF (ena_aq_sq_entry_t, 64); + +typedef struct +{ + u32 os_type; + u8 os_dist_str[128]; + u32 os_dist; + u8 kernel_ver_str[32]; + u32 kernel_ver; + + struct + { + u8 major; + u8 minor; + u8 sub_minor; + u8 module_type; + } driver_version; + + u32 supported_network_features[2]; + + struct + { + u16 minor : 8; + u16 major : 8; + } ena_spec_version; + + struct + { + u16 function : 3; + u16 device : 5; + u16 bus : 8; + } bdf; + + u16 num_cpus; + u16 _reserved; + + union + { + struct + { + u32 _reserved0 : 1; + u32 rx_offset : 1; + u32 interrupt_moderation : 1; + u32 rx_buf_mirroring : 1; + u32 rss_configurable_function_key : 1; + u32 _reserved5 : 1; + u32 rx_page_reuse : 1; + u32 tx_ipv6_csum_offload : 1; + u32 _reserved8 : 24; + }; + u32 as_u32; + } driver_supported_features; + +} ena_aq_host_info_t; + +STATIC_ASSERT_SIZEOF (ena_aq_host_info_t, 196); + +typedef struct +{ + union + { + u64 tx_bytes; + struct + { + u32 tx_bytes_low; + u32 tx_bytes_high; + }; + }; + union + { + u64 tx_pkts; + struct + { + u32 tx_pkts_low; + u32 tx_pkts_high; + }; + }; + union + { + u64 rx_bytes; + struct + { + u32 rx_bytes_low; + u32 rx_bytes_high; + }; + }; + union + { + u64 rx_pkts; + struct + { + u32 rx_pkts_low; + u32 rx_pkts_high; + }; + }; + union + { + u64 rx_drops; + struct + { + u32 rx_drops_low; + u32 rx_drops_high; + }; + }; + union + { + u64 tx_drops; + struct + { + u32 tx_drops_low; + u32 tx_drops_high; + }; + }; +} ena_aq_basic_stats_t; + +#define foreach_ena_aq_basic_counter \ + _ (rx_pkts, "RX Packets") \ + _ (tx_pkts, "TX Packets") \ + _ (rx_bytes, "RX Bytes") \ + _ (tx_bytes, "TX Bytes") \ + _ (rx_drops, "RX Packet Drops") \ + _ (tx_drops, "TX Packet Drops") + +typedef struct +{ + u64 bw_in_allowance_exceeded; + u64 bw_out_allowance_exceeded; + u64 pps_allowance_exceeded; + u64 conntrack_allowance_exceeded; + u64 linklocal_allowance_exceeded; +} ena_aq_eni_stats_t; + +#define foreach_ena_aq_eni_counter \ + _ (bw_in_allowance_exceeded, "Input BW Allowance Exceeded") \ + _ (bw_out_allowance_exceeded, "Output BW Allowance Exceeded") \ + _ (pps_allowance_exceeded, "PPS Allowance Exceeded") \ + _ (conntrack_allowance_exceeded, "ConnTrack Allowance Exceeded") \ + _ (linklocal_allowance_exceeded, "LinkLocal Allowance Exceeded") + +typedef struct +{ + /* common desc */ + u16 command; + ena_aq_compl_status_t status; + union + { + struct + { + u8 phase : 1; + u8 _reserved3_1 : 7; + }; + u8 flags; + }; + u16 extended_status; + u16 sq_head_indx; + + u32 data[14]; +} ena_aq_cq_entry_t; + +STATIC_ASSERT_SIZEOF (ena_aq_cq_entry_t, 64); + +#endif /* _ENA_ADMIN_DEFS_H_ */ diff --git a/src/plugins/dev_ena/ena_aenq_defs.h b/src/plugins/dev_ena/ena_aenq_defs.h new file mode 100644 index 00000000000..4530f5e7a42 --- /dev/null +++ b/src/plugins/dev_ena/ena_aenq_defs.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Cisco Systems, Inc. + */ + +#ifndef _ENA_AENQ_DEFS_H_ +#define _ENA_AENQ_DEFS_H_ + +#include +#include + +#define foreach_aenq_group \ + _ (0, LINK_CHANGE) \ + _ (1, FATAL_ERROR) \ + _ (2, WARNING) \ + _ (3, NOTIFICATION) \ + _ (4, KEEP_ALIVE) \ + _ (5, REFRESH_CAPABILITIES) \ + _ (6, CONF_NOTIFICATIONS) + +#define foreach_aenq_syndrome \ + _ (0, SUSPEND) \ + _ (1, RESUME) \ + _ (2, UPDATE_HINTS) + +typedef enum +{ +#define _(v, n) ENA_AENQ_GROUP_##n = (v), + foreach_aenq_group +#undef _ +} ena_aenq_group_t; + +typedef enum +{ +#define _(v, n) ENA_AENQ_SYNDROME_##n = (v), + foreach_aenq_syndrome +#undef _ +} ena_aenq_syndrome_t; + +typedef struct +{ + ena_aenq_group_t group : 16; + ena_aenq_syndrome_t syndrome : 16; + + union + { + struct + { + u8 phase : 1; + }; + u8 flags; + }; + u8 reserved1[3]; + + union + { + u64 timestamp; + struct + { + u32 timestamp_low; + u32 timestamp_high; + }; + }; + + union + { + u32 data[12]; + + struct + { + union + { + struct + { + u32 link_status : 1; + }; + u32 flags; + }; + } link_change; + + struct + { + union + { + u64 rx_drops; + struct + { + u32 rx_drops_low; + u32 rx_drops_high; + }; + }; + + union + { + u64 tx_drops; + struct + { + u32 tx_drops_low; + u32 tx_drops_high; + }; + }; + } keep_alive; + }; +} __clib_packed ena_aenq_entry_t; + +STATIC_ASSERT_SIZEOF (ena_aenq_entry_t, 64); + +#endif /* _ENA_AENQ_DEFS_H_ */ diff --git a/src/plugins/dev_ena/ena_defs.h b/src/plugins/dev_ena/ena_defs.h new file mode 100644 index 00000000000..1e52ed4e05b --- /dev/null +++ b/src/plugins/dev_ena/ena_defs.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Cisco Systems, Inc. + */ + +#ifndef _ENA_DEFS_H_ +#define _ENA_DEFS_H_ + +#include +#include +#include +#include +#include +#include + +/* + * MMIO Response + */ +typedef struct +{ + u16 req_id; + u16 reg_off; + u32 reg_val; +} ena_mmio_resp_t; + +#endif /* _ENA_DEFS_H_ */ diff --git a/src/plugins/dev_ena/ena_inlines.h b/src/plugins/dev_ena/ena_inlines.h new file mode 100644 index 00000000000..106bd5eaa21 --- /dev/null +++ b/src/plugins/dev_ena/ena_inlines.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Cisco Systems, Inc. + */ + +#ifndef _ENA_INLINES_H_ +#define _ENA_INLINES_H_ + +#include +#include +#include + +#define ena_log_is_debug() \ + vlib_log_is_enabled (VLIB_LOG_LEVEL_DEBUG, ena_log.class) + +#define ena_stats_log_err(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_ERR, ena_stats_log.class, "%U: " f, \ + format_vnet_dev_addr, dev, ##__VA_ARGS__) + +#define ena_stats_log_debug(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_DEBUG, ena_stats_log.class, "%U: " f, \ + format_vnet_dev_addr, dev, ##__VA_ARGS__) + +#define ena_stats_log_is_debug() \ + vlib_log_is_enabled (VLIB_LOG_LEVEL_DEBUG, ena_stats_log.class) + +static_always_inline void +ena_set_mem_addr (vlib_main_t *vm, vnet_dev_t *dev, ena_mem_addr_t *m, void *p) +{ + u64 pa = vnet_dev_get_dma_addr (vm, dev, p); + *m = (ena_mem_addr_t){ .addr_lo = (u32) pa, .addr_hi = (u16) (pa >> 32) }; +} + +static_always_inline int +ena_aq_feature_is_supported (vnet_dev_t *dev, ena_aq_feature_id_t feat_id) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + return (ed->dev_attr.supported_features & (1U << feat_id)) != 0; +} + +#endif /* ENA_INLINES_H */ diff --git a/src/plugins/dev_ena/ena_io_defs.h b/src/plugins/dev_ena/ena_io_defs.h new file mode 100644 index 00000000000..89ca2ac6498 --- /dev/null +++ b/src/plugins/dev_ena/ena_io_defs.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Cisco Systems, Inc. + */ + +#ifndef _ENA_IO_DEFS_H_ +#define _ENA_IO_DEFS_H_ + +#include +#include +#include + +typedef struct +{ + u16 length; /* 0 = 64K */ + u8 reserved2; + union + { + struct + { + u8 phase : 1; + u8 reserved1 : 1; + u8 first : 1; /* first descriptor in transaction */ + u8 last : 1; /* last descriptor in transaction */ + u8 comp_req : 1; /* should completion be posted? */ + u8 reserved5 : 1; + u8 reserved67 : 2; + }; + u8 ctrl; + }; + u16 req_id; + u16 reserved6; +} ena_rx_desc_lo_t; + +STATIC_ASSERT_SIZEOF (ena_rx_desc_lo_t, 8); + +typedef struct +{ + union + { + struct + { + ena_rx_desc_lo_t lo; + u32 buff_addr_lo; + u16 buff_addr_hi; + u16 reserved16_w3; + }; + u64x2 as_u64x2; + }; +} ena_rx_desc_t; + +STATIC_ASSERT_SIZEOF (ena_rx_desc_t, 16); + +#define foreach_ena_rx_cdesc_status \ + _ (5, l3_proto_idx) \ + _ (2, src_vlan_cnt) \ + _ (1, _reserved7) \ + _ (5, l4_proto_idx) \ + _ (1, l3_csum_err) \ + _ (1, l4_csum_err) \ + _ (1, ipv4_frag) \ + _ (1, l4_csum_checked) \ + _ (7, _reserved17) \ + _ (1, phase) \ + _ (1, l3_csum2) \ + _ (1, first) \ + _ (1, last) \ + _ (2, _reserved28) \ + _ (1, buffer) \ + _ (1, _reserved31) + +typedef struct +{ + union + { + struct + { +#define _(b, n) u32 n : (b); + foreach_ena_rx_cdesc_status +#undef _ + }; + u32 as_u32; + }; +} ena_rx_cdesc_status_t; + +typedef struct +{ + ena_rx_cdesc_status_t status; + u16 length; + u16 req_id; + u32 hash; + u16 sub_qid; + u8 offset; + u8 reserved; +} ena_rx_cdesc_t; + +STATIC_ASSERT_SIZEOF (ena_rx_cdesc_t, 16); + +#define foreach_ena_tx_desc \ + /* len_ctrl */ \ + _ (16, length) \ + _ (6, req_id_hi) \ + _ (1, _reserved0_22) \ + _ (1, meta_desc) \ + _ (1, phase) \ + _ (1, _reserved0_25) \ + _ (1, first) \ + _ (1, last) \ + _ (1, comp_req) \ + _ (2, _reserved0_29) \ + _ (1, _reserved0_31) \ + /* meta_ctrl */ \ + _ (4, l3_proto_idx) \ + _ (1, df) \ + _ (2, _reserved1_5) \ + _ (1, tso_en) \ + _ (5, l4_proto_idx) \ + _ (1, l3_csum_en) \ + _ (1, l4_csum_en) \ + _ (1, ethernet_fcs_dis) \ + _ (1, _reserved1_16) \ + _ (1, l4_csum_partial) \ + _ (3, _reserved_1_18) \ + _ (1, _reserved_1_21) \ + _ (10, req_id_lo) + +typedef struct +{ + union + { + struct + { +#define _(b, n) u32 n : (b); + foreach_ena_tx_desc +#undef _ + u32 buff_addr_lo; + u16 buff_addr_hi; + u8 _reserved3_16; + u8 header_length; + }; + + u16x8 as_u16x8; + u32x4 as_u32x4; + u64x2 as_u64x2; + }; +} ena_tx_desc_t; + +STATIC_ASSERT_SIZEOF (ena_tx_desc_t, 16); + +typedef struct +{ + ena_tx_desc_t desc[2]; + u8 data[96]; +} __clib_aligned (128) +ena_tx_llq_desc128_t; +STATIC_ASSERT_SIZEOF (ena_tx_llq_desc128_t, 128); + +typedef union +{ + struct + { + u16 req_id; + u8 status; + union + { + struct + { + u8 phase : 1; + }; + u8 flags; + }; + u16 sub_qid; + u16 sq_head_idx; + }; + u64 as_u64; +} ena_tx_cdesc_t; + +STATIC_ASSERT_SIZEOF (ena_tx_cdesc_t, 8); + +#endif /* _ENA_IO_DEFS_H_ */ diff --git a/src/plugins/dev_ena/ena_reg_defs.h b/src/plugins/dev_ena/ena_reg_defs.h new file mode 100644 index 00000000000..11d458e21ac --- /dev/null +++ b/src/plugins/dev_ena/ena_reg_defs.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Cisco Systems, Inc. + */ + +#ifndef _ENA_REG_DEFS_H_ +#define _ENA_REG_DEFS_H_ + +#include +#include + +#define ena_reg_version_t_fields \ + __ (8, minor) \ + __ (8, major) + +#define ena_reg_controller_version_t_fields \ + __ (8, subminor) \ + __ (8, minor) \ + __ (8, major) \ + __ (8, impl_id) + +#define ena_reg_caps_t_fields \ + __ (1, contiguous_queue_required) \ + __ (5, reset_timeout) \ + __ (2, _unused) \ + __ (8, dma_addr_width) \ + __ (4, admin_cmd_to) + +#define ena_reg_aq_caps_t_fields \ + __ (16, depth) \ + __ (16, entry_size) + +#define ena_reg_acq_caps_t_fields \ + __ (16, depth) \ + __ (16, entry_size) + +#define ena_reg_aenq_caps_t_fields \ + __ (16, depth) \ + __ (16, entry_size) + +#define ena_reg_dev_ctl_t_fields \ + __ (1, dev_reset) \ + __ (1, aq_restart) \ + __ (1, quiescent) \ + __ (1, io_resume) \ + __ (24, _unused) \ + __ (4, reset_reason) + +#define ena_reg_dev_sts_t_fields \ + __ (1, ready) \ + __ (1, aq_restart_in_progress) \ + __ (1, aq_restart_finished) \ + __ (1, reset_in_progress) \ + __ (1, reset_finished) \ + __ (1, fatal_error) \ + __ (1, quiescent_state_in_progress) \ + __ (1, quiescent_state_achieved) + +#define ena_reg_mmio_reg_read_t_fields \ + __ (16, req_id) \ + __ (16, reg_off) + +#define ena_reg_rss_ind_entry_update_t_fields \ + __ (16, index) \ + __ (16, cx_idx) + +#define __(l, f) u32 f : l; +#define _(n) \ + typedef union \ + { \ + struct \ + { \ + n##_fields; \ + }; \ + u32 as_u32; \ + } n; + +_ (ena_reg_version_t) +_ (ena_reg_controller_version_t) +_ (ena_reg_caps_t) +_ (ena_reg_aq_caps_t) +_ (ena_reg_acq_caps_t) +_ (ena_reg_aenq_caps_t) +_ (ena_reg_dev_ctl_t) +_ (ena_reg_dev_sts_t) +_ (ena_reg_mmio_reg_read_t) +_ (ena_reg_rss_ind_entry_update_t) +#undef _ +#undef __ + +#define foreach_ena_reg \ + _ (0x00, 1, VERSION, ena_reg_version_t_fields) \ + _ (0x04, 1, CONTROLLER_VERSION, ena_reg_controller_version_t_fields) \ + _ (0x08, 1, CAPS, ena_reg_caps_t_fields) \ + _ (0x0c, 1, EXT_CAPS, ) \ + _ (0x10, 1, AQ_BASE_LO, ) \ + _ (0x14, 1, AQ_BASE_HI, ) \ + _ (0x18, 1, AQ_CAPS, ena_reg_aq_caps_t_fields) \ + _ (0x20, 1, ACQ_BASE_LO, ) \ + _ (0x24, 1, ACQ_BASE_HI, ) \ + _ (0x28, 1, ACQ_CAPS, ena_reg_acq_caps_t_fields) \ + _ (0x2c, 0, AQ_DB, ) \ + _ (0x30, 0, ACQ_TAIL, ) \ + _ (0x34, 1, AENQ_CAPS, ena_reg_aenq_caps_t_fields) \ + _ (0x38, 0, AENQ_BASE_LO, ) \ + _ (0x3c, 0, AENQ_BASE_HI, ) \ + _ (0x40, 0, AENQ_HEAD_DB, ) \ + _ (0x44, 0, AENQ_TAIL, ) \ + _ (0x4c, 1, INTR_MASK, ) \ + _ (0x54, 0, DEV_CTL, ena_reg_dev_ctl_t_fields) \ + _ (0x58, 1, DEV_STS, ena_reg_dev_sts_t_fields) \ + _ (0x5c, 0, MMIO_REG_READ, ena_reg_mmio_reg_read_t_fields) \ + _ (0x60, 0, MMIO_RESP_LO, ) \ + _ (0x64, 0, MMIO_RESP_HI, ) \ + _ (0x68, 0, RSS_IND_ENTRY_UPDATE, ena_reg_rss_ind_entry_update_t_fields) + +typedef enum +{ +#define _(o, r, n, f) ENA_REG_##n = o, + foreach_ena_reg +#undef _ +} ena_reg_t; + +#define foreach_ena_reset_reason \ + _ (0, NORMAL) \ + _ (1, KEEP_ALIVE_TO) \ + _ (2, ADMIN_TO) \ + _ (3, MISS_TX_CMPL) \ + _ (4, INV_RX_REQ_ID) \ + _ (5, INV_TX_REQ_ID) \ + _ (6, TOO_MANY_RX_DESCS) \ + _ (7, INIT_ERR) \ + _ (8, DRIVER_INVALID_STATE) \ + _ (9, OS_TRIGGER) \ + _ (10, OS_NETDEV_WD) \ + _ (11, SHUTDOWN) \ + _ (12, USER_TRIGGER) \ + _ (13, GENERIC) \ + _ (14, MISS_INTERRUPT) \ + _ (15, SUSPECTED_POLL_STARVATION) \ + _ (16, RX_DESCRIPTOR_MALFORMED) \ + _ (17, TX_DESCRIPTOR_MALFORMED) + +typedef enum +{ +#define _(o, n) ENA_RESET_REASON_##n = o, + foreach_ena_reset_reason +#undef _ +} ena_reset_reason_t; + +#endif /* _ENA_REG_DEFS_H_ */ diff --git a/src/plugins/dev_ena/format.c b/src/plugins/dev_ena/format.c new file mode 100644 index 00000000000..2db52b50f66 --- /dev/null +++ b/src/plugins/dev_ena/format.c @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include "vlib/pci/pci.h" +#include "vnet/error.h" +#include "vppinfra/error.h" +#include +#include +#include +#include + +u8 * +format_ena_dev_info (u8 *s, va_list *args) +{ + vlib_main_t *vm = vlib_get_main (); + vnet_dev_format_args_t __clib_unused *a = + va_arg (*args, vnet_dev_format_args_t *); + vnet_dev_t *dev = va_arg (*args, vnet_dev_t *); + ena_device_t *ed = vnet_dev_get_data (dev); + u32 indent = format_get_indent (s) + 2; + + format (s, "Elastic Network Adapter:"); + format (s, "\n%UDevice version is %u, implementation id is %u", + format_white_space, indent, ed->dev_attr.device_version, + ed->dev_attr.impl_id); + format (s, "\n%Urx drops %lu, tx drops %lu", format_white_space, indent, + ed->aenq.rx_drops, ed->aenq.tx_drops); + format (s, "\n%ULast keepalive arrived ", format_white_space, indent); + if (ed->aenq.last_keepalive != 0.0) + format (s, "%.2f seconds ago", + vlib_time_now (vm) - ed->aenq.last_keepalive); + else + format (s, "never"); + return s; +} + +u8 * +format_ena_mem_addr (u8 *s, va_list *args) +{ + ena_mem_addr_t *ema = va_arg (*args, ena_mem_addr_t *); + return format (s, "0x%lx", (u64) ema->addr_hi << 32 | ema->addr_lo); +} + +u8 * +format_ena_tx_desc (u8 *s, va_list *args) +{ + ena_tx_desc_t *d = va_arg (*args, ena_tx_desc_t *); + s = + format (s, "addr 0x%012lx", (u64) d->buff_addr_hi << 32 | d->buff_addr_lo); + s = format (s, " len %u", d->length); + s = format (s, " req_id 0x%x", d->req_id_lo | d->req_id_hi << 10); + if (d->header_length) + s = format (s, " hdr_len %u", d->header_length); +#define _(v, n) \ + if ((v) < 6 && #n[0] != '_' && d->n) \ + s = format (s, " " #n " %u", d->n); + foreach_ena_tx_desc +#undef _ + return s; +} + +u8 * +format_ena_rx_desc_status (u8 *s, va_list *args) +{ + ena_rx_cdesc_status_t st = va_arg (*args, ena_rx_cdesc_status_t); + s = format (s, "0x%x", st.as_u32); + if (st.as_u32 != 0) + { + int not_first_line = 0; + s = format (s, " -> "); +#define _(b, n) \ + if (st.n) \ + s = format (s, "%s%s %u", not_first_line++ ? ", " : "", #n, st.n); + foreach_ena_rx_cdesc_status +#undef _ + } + return s; +} + +u8 * +format_ena_rx_trace (u8 *s, va_list *args) +{ + vlib_main_t *vm = va_arg (*args, vlib_main_t *); + vlib_node_t *node = va_arg (*args, vlib_node_t *); + ena_rx_trace_t *t = va_arg (*args, ena_rx_trace_t *); + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index); + u32 indent = format_get_indent (s); + + s = format ( + s, "ena: %v (%d) qid %u next-node %U length %u req-id 0x%x n-desc %u", + hi->name, t->hw_if_index, t->qid, format_vlib_next_node_name, vm, + node->index, t->next_index, t->length, t->req_id, t->n_desc); + s = format (s, "\n%Ustatus: %U", format_white_space, indent + 2, + format_ena_rx_desc_status, t->status); + return s; +} + +u8 * +format_ena_regs (u8 *s, va_list *args) +{ + vnet_dev_t *dev = va_arg (*args, vnet_dev_t *); + int offset = va_arg (*args, int); + u32 indent = format_get_indent (s); + u32 rv = 0, f, v; + u8 *s2 = 0; + +#define _(o, r, rn, m) \ + if ((offset == -1 || offset == o) && r == 1) \ + { \ + s = format (s, "\n%U", format_white_space, indent); \ + vec_reset_length (s2); \ + s2 = format (s2, "[0x%02x] %s:", o, #rn); \ + ena_reg_read (dev, o, &rv); \ + s = format (s, "%-34v = 0x%08x", s2, rv); \ + f = 0; \ + m \ + } + +#define __(l, fn) \ + if (#fn[0] != '_') \ + { \ + vec_reset_length (s2); \ + s2 = format (s2, "\n%U", format_white_space, indent); \ + s2 = format (s2, " [%2u:%2u] %s", f + l - 1, f, #fn); \ + s = format (s, " %-35v = ", s2); \ + v = (rv >> f) & pow2_mask (l); \ + if (l < 3) \ + s = format (s, "%u", v); \ + else if (l <= 8) \ + s = format (s, "0x%02x (%u)", v, v); \ + else if (l <= 16) \ + s = format (s, "0x%04x", v); \ + else \ + s = format (s, "0x%08x", v); \ + } \ + f += l; + + foreach_ena_reg; +#undef _ + + vec_free (s2); + + return s; +} diff --git a/src/plugins/dev_ena/format_aq.c b/src/plugins/dev_ena/format_aq.c new file mode 100644 index 00000000000..18bad1e050b --- /dev/null +++ b/src/plugins/dev_ena/format_aq.c @@ -0,0 +1,412 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Cisco Systems, Inc. + */ + +#include +#include + +#include +#include +#include + +static char *opcode_names[] = { +#define _(v, s) [v] = #s, + foreach_ena_aq_opcode +#undef _ +}; + +static char *status_names[] = { +#define _(v, s) [v] = #s, + foreach_ena_aq_compl_status +#undef _ +}; + +#define __maxval(s, f) (u64) (((typeof ((s)[0])){ .f = -1LL }).f) + +#define __name(s, n) \ + { \ + s = format (s, "%s%U%-32s: ", line ? "\n" : "", format_white_space, \ + line ? indent : 0, #n); \ + line++; \ + } + +#define _format_number(s, d, n, ...) \ + { \ + __name (s, n); \ + if (d->n < 10) \ + s = format (s, "%u", d->n); \ + else if (__maxval (d, n) <= 255) \ + s = format (s, "0x%02x (%u)", d->n, d->n); \ + else if (__maxval (d, n) <= 65535) \ + s = format (s, "0x%04x (%u)", d->n, d->n); \ + else \ + s = format (s, "0x%08x (%u)", d->n, d->n); \ + } + +#define _format_with_fn_and_ptr(s, c, n, f) \ + { \ + __name (s, n); \ + s = format (s, "%U", f, &((c)->n)); \ + } + +#define _format_with_fn_and_val(s, c, n, f) \ + { \ + __name (s, n); \ + s = format (s, "%U", f, (c)->n); \ + } +#define _format_ena_memory(s, c, n) \ + _format_with_fn_and_ptr (s, c, n, format_ena_mem_addr) + +u8 * +format_ena_aq_opcode (u8 *s, va_list *args) +{ + u32 opcode = va_arg (*args, u32); + + if (opcode >= ARRAY_LEN (opcode_names) || opcode_names[opcode] == 0) + return format (s, "UNKNOWN(%u)", opcode); + return format (s, "%s", opcode_names[opcode]); +} + +u8 * +format_ena_aq_status (u8 *s, va_list *args) +{ + u32 status = va_arg (*args, u32); + + if (status >= ARRAY_LEN (status_names) || status_names[status] == 0) + return format (s, "UNKNOWN(%u)", status); + return format (s, "%s", status_names[status]); +} + +u8 * +format_ena_aq_aenq_groups (u8 *s, va_list *args) +{ + ena_aq_aenq_groups_t g = va_arg (*args, ena_aq_aenq_groups_t); + u32 i, not_first = 0; + u32 indent = format_get_indent (s); + +#define _(x) \ + if (g.x) \ + { \ + if (format_get_indent (s) > 80) \ + s = format (s, "\n%U", format_white_space, indent); \ + s = format (s, "%s%s", not_first++ ? " " : "", #x); \ + g.x = 0; \ + } + foreach_ena_aq_aenq_groups; +#undef _ + + foreach_set_bit_index (i, g.as_u32) + s = format (s, "%sunknown-%u", not_first++ ? " " : "", i); + + return s; +} + +u8 * +format_ena_aq_feat_id_bitmap (u8 *s, va_list *args) +{ + u32 bmp = va_arg (*args, u32); + int i, line = 0; + u32 indent = format_get_indent (s); + + foreach_set_bit_index (i, bmp) + { + ena_aq_feat_info_t *info = ena_aq_get_feat_info (i); + if (line++) + s = format (s, ", "); + if (format_get_indent (s) > 80) + s = format (s, "\n%U", format_white_space, indent); + if (info) + s = format (s, "%s", info->name); + else + s = format (s, "unknown-%u", i); + } + + return s; +} + +u8 * +format_ena_aq_feat_name (u8 *s, va_list *args) +{ + ena_aq_feature_id_t feat_id = va_arg (*args, int); + char *feat_names[] = { +#define _(v, r, gt, st, s, u) [v] = #s, + foreach_ena_aq_feature_id +#undef _ + }; + + if (feat_id >= ARRAY_LEN (feat_names) || feat_names[feat_id] == 0) + return format (s, "UNKNOWN(%u)", feat_id); + return format (s, "%s", feat_names[feat_id]); +} + +u8 * +format_ena_aq_feat_desc (u8 *s, va_list *args) +{ + ena_aq_feature_id_t feat_id = va_arg (*args, int); + void *data = va_arg (*args, void *); + ena_aq_feat_info_t *info = ena_aq_get_feat_info (feat_id); + u32 indent = format_get_indent (s); + u32 line = 0; + + switch (feat_id) + { + case ENA_ADMIN_FEAT_ID_DEVICE_ATTRIBUTES: + { + ena_aq_feat_device_attr_t *d = data; + _format_number (s, d, impl_id); + _format_number (s, d, device_version); + _format_number (s, d, phys_addr_width); + _format_number (s, d, virt_addr_width); + _format_with_fn_and_val (s, d, mac_addr, format_ethernet_address); + _format_number (s, d, max_mtu); + _format_with_fn_and_val (s, d, supported_features, + format_ena_aq_feat_id_bitmap); + } + break; + + case ENA_ADMIN_FEAT_ID_AENQ_CONFIG: + { + ena_aq_feat_aenq_config_t *d = data; + _format_with_fn_and_val (s, d, supported_groups, + format_ena_aq_aenq_groups); + _format_with_fn_and_val (s, d, enabled_groups, + format_ena_aq_aenq_groups); + } + break; + + case ENA_ADMIN_FEAT_ID_INTERRUPT_MODERATION: + { + ena_aq_feat_intr_moder_t *d = data; + _format_number (s, d, intr_delay_resolution); + } + break; + + case ENA_ADMIN_FEAT_ID_STATELESS_OFFLOAD_CONFIG: + { + ena_aq_feat_stateless_offload_config_t *d = data; + _format_number (s, d, rx_supported); + _format_number (s, d, rx_enabled); + _format_number (s, d, tx); + } + break; + + case ENA_ADMIN_FEAT_ID_RSS_INDIRECTION_TABLE_CONFIG: + { + ena_aq_feat_rss_ind_table_config_t *d = data; + _format_number (s, d, min_size); + _format_number (s, d, max_size); + _format_number (s, d, size); + _format_number (s, d, one_entry_update); + _format_number (s, d, inline_index); + _format_number (s, d, inline_entry.cq_idx); + } + break; + + case ENA_ADMIN_FEAT_ID_MAX_QUEUES_NUM: + { + ena_aq_feat_max_queue_num_t *d = data; + _format_number (s, d, max_sq_num); + _format_number (s, d, max_sq_depth); + _format_number (s, d, max_cq_num); + _format_number (s, d, max_cq_depth); + _format_number (s, d, max_legacy_llq_num); + _format_number (s, d, max_legacy_llq_depth); + _format_number (s, d, max_header_size); + _format_number (s, d, max_packet_tx_descs); + _format_number (s, d, max_packet_rx_descs); + } + break; + + case ENA_ADMIN_FEAT_ID_MAX_QUEUES_EXT: + { + ena_aq_feat_max_queue_ext_t *d = data; + _format_number (s, d, max_rx_sq_num); + _format_number (s, d, max_rx_cq_num); + _format_number (s, d, max_tx_sq_num); + _format_number (s, d, max_tx_cq_num); + _format_number (s, d, max_rx_sq_depth); + _format_number (s, d, max_rx_cq_depth); + _format_number (s, d, max_tx_sq_depth); + _format_number (s, d, max_tx_cq_depth); + _format_number (s, d, version); + _format_number (s, d, max_tx_header_size); + _format_number (s, d, max_per_packet_tx_descs); + _format_number (s, d, max_per_packet_rx_descs); + } + break; + + case ENA_ADMIN_FEAT_ID_RSS_HASH_FUNCTION: + { + ena_aq_feat_rss_hash_function_t *d = data; + _format_number (s, d, supported_func); + _format_number (s, d, selected_func); + _format_number (s, d, init_val); + } + break; + + case ENA_ADMIN_FEAT_ID_LLQ: + { + ena_aq_feat_llq_t *d = data; + _format_number (s, d, max_llq_num); + _format_number (s, d, max_llq_depth); + _format_number (s, d, header_location_ctrl_supported); + _format_number (s, d, header_location_ctrl_enabled); + _format_number (s, d, entry_size_ctrl_supported); + _format_number (s, d, entry_size_ctrl_enabled); + _format_number (s, d, desc_num_before_header_supported); + _format_number (s, d, desc_num_before_header_enabled); + _format_number (s, d, descriptors_stride_ctrl_supported); + _format_number (s, d, descriptors_stride_ctrl_enabled); + _format_number (s, d, accel_mode.get.supported_flags); + _format_number (s, d, accel_mode.get.max_tx_burst_size); + _format_number (s, d, accel_mode.set.enabled_flags); + } + break; + + case ENA_ADMIN_FEAT_ID_EXTRA_PROPERTIES_STRINGS: + { + ena_aq_feat_extra_properties_strings_t *d = data; + _format_number (s, d, count); + } + break; + + case ENA_ADMIN_FEAT_ID_EXTRA_PROPERTIES_FLAGS: + { + ena_aq_feat_extra_properties_flags_t *d = data; + _format_number (s, d, flags); + } + break; + + case ENA_ADMIN_FEAT_ID_HOST_ATTR_CONFIG: + { + ena_aq_feat_host_attr_config_t *d = data; + _format_ena_memory (s, d, os_info_ba); + _format_ena_memory (s, d, debug_ba); + _format_number (s, d, debug_area_size); + } + break; + + default: + if (info) + s = format (s, "%U", format_hexdump, data, info->data_sz); + break; + } + + return s; +} + +u8 * +format_ena_aq_create_sq_cmd (u8 *s, va_list *args) +{ + ena_aq_create_sq_cmd_t *cmd = va_arg (*args, ena_aq_create_sq_cmd_t *); + u32 indent = format_get_indent (s); + u32 line = 0; + + _format_number (s, cmd, sq_direction); + _format_number (s, cmd, placement_policy); + _format_number (s, cmd, completion_policy); + _format_number (s, cmd, is_physically_contiguous); + _format_number (s, cmd, cq_idx); + _format_number (s, cmd, sq_depth); + _format_ena_memory (s, cmd, sq_ba); + _format_ena_memory (s, cmd, sq_head_writeback); + return s; +} + +u8 * +format_ena_aq_create_cq_cmd (u8 *s, va_list *args) +{ + ena_aq_create_cq_cmd_t *cmd = va_arg (*args, ena_aq_create_cq_cmd_t *); + u32 indent = format_get_indent (s); + u32 line = 0; + + _format_number (s, cmd, interrupt_mode_enabled); + _format_number (s, cmd, cq_entry_size_words); + _format_number (s, cmd, cq_depth); + _format_number (s, cmd, msix_vector); + _format_ena_memory (s, cmd, cq_ba); + return s; +} + +u8 * +format_ena_aq_create_sq_resp (u8 *s, va_list *args) +{ + ena_aq_create_sq_resp_t *resp = va_arg (*args, ena_aq_create_sq_resp_t *); + u32 indent = format_get_indent (s); + u32 line = 0; + + _format_number (s, resp, sq_idx); + _format_number (s, resp, sq_doorbell_offset); + _format_number (s, resp, llq_descriptors_offset); + _format_number (s, resp, llq_headers_offset); + return s; +} + +u8 * +format_ena_aq_create_cq_resp (u8 *s, va_list *args) +{ + ena_aq_create_cq_resp_t *resp = va_arg (*args, ena_aq_create_cq_resp_t *); + u32 indent = format_get_indent (s); + u32 line = 0; + + _format_number (s, resp, cq_idx); + _format_number (s, resp, cq_actual_depth); + _format_number (s, resp, numa_node_register_offset); + _format_number (s, resp, cq_head_db_register_offset); + _format_number (s, resp, cq_interrupt_unmask_register_offset); + return s; +} + +u8 * +format_ena_aq_destroy_sq_cmd (u8 *s, va_list *args) +{ + ena_aq_destroy_sq_cmd_t *cmd = va_arg (*args, ena_aq_destroy_sq_cmd_t *); + u32 indent = format_get_indent (s); + u32 line = 0; + + _format_number (s, cmd, sq_idx); + _format_number (s, cmd, sq_direction); + return s; +} + +u8 * +format_ena_aq_destroy_cq_cmd (u8 *s, va_list *args) +{ + ena_aq_destroy_cq_cmd_t *cmd = va_arg (*args, ena_aq_destroy_cq_cmd_t *); + u32 indent = format_get_indent (s); + u32 line = 0; + + _format_number (s, cmd, cq_idx); + return s; +} + +u8 * +format_ena_aq_basic_stats (u8 *s, va_list *args) +{ + ena_aq_basic_stats_t *st = va_arg (*args, ena_aq_basic_stats_t *); + u32 indent = format_get_indent (s); + u32 line = 0; + + _format_number (s, st, tx_bytes); + _format_number (s, st, tx_pkts); + _format_number (s, st, rx_bytes); + _format_number (s, st, rx_pkts); + _format_number (s, st, rx_drops); + _format_number (s, st, tx_drops); + return s; +} + +u8 * +format_ena_aq_eni_stats (u8 *s, va_list *args) +{ + ena_aq_eni_stats_t *st = va_arg (*args, ena_aq_eni_stats_t *); + u32 indent = format_get_indent (s); + u32 line = 0; + + _format_number (s, st, bw_in_allowance_exceeded); + _format_number (s, st, bw_out_allowance_exceeded); + _format_number (s, st, pps_allowance_exceeded); + _format_number (s, st, conntrack_allowance_exceeded); + _format_number (s, st, linklocal_allowance_exceeded); + return s; +} diff --git a/src/plugins/dev_ena/port.c b/src/plugins/dev_ena/port.c new file mode 100644 index 00000000000..2b26fefc5e3 --- /dev/null +++ b/src/plugins/dev_ena/port.c @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +VLIB_REGISTER_LOG_CLASS (ena_log, static) = { + .class_name = "ena", + .subclass_name = "port", +}; + +vnet_dev_rv_t +ena_port_init (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + + log_debug (dev, "port %u", port->port_id); + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +ena_port_start (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + vnet_dev_rv_t rv; + + log_debug (dev, "port start: port %u", port->port_id); + + if (ena_aq_feature_is_supported (dev, ENA_ADMIN_FEAT_ID_MTU)) + { + ena_aq_feat_mtu_t mtu = { .mtu = port->max_rx_frame_size }; + + if ((rv = ena_aq_set_feature (vm, dev, ENA_ADMIN_FEAT_ID_MTU, &mtu))) + return rv; + } + + if ((rv = vnet_dev_port_start_all_rx_queues (vm, port))) + return rv; + + if ((rv = vnet_dev_port_start_all_tx_queues (vm, port))) + return rv; + + return VNET_DEV_OK; +} + +void +ena_port_stop (vlib_main_t *vm, vnet_dev_port_t *port) +{ + log_debug (port->dev, "port stop: port %u", port->port_id); +} + +vnet_dev_rv_t +ena_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_cfg_change_req_t *req) +{ + vnet_dev_rv_t rv = VNET_DEV_OK; + + switch (req->type) + { + case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE: + if (port->started) + rv = VNET_DEV_ERR_PORT_STARTED; + break; + + default: + rv = VNET_DEV_ERR_NOT_SUPPORTED; + }; + + return rv; +} + +vnet_dev_rv_t +ena_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_cfg_change_req_t *req) +{ + vnet_dev_rv_t rv = VNET_DEV_OK; + + switch (req->type) + { + case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE: + break; + + default: + return VNET_DEV_ERR_NOT_SUPPORTED; + }; + + return rv; +} diff --git a/src/plugins/dev_ena/queue.c b/src/plugins/dev_ena/queue.c new file mode 100644 index 00000000000..08c763c8461 --- /dev/null +++ b/src/plugins/dev_ena/queue.c @@ -0,0 +1,384 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Cisco Systems, Inc. + */ + +#include +#include + +#include +#include + +VLIB_REGISTER_LOG_CLASS (ena_log, static) = { + .class_name = "ena", + .subclass_name = "queue", +}; + +void +ena_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) +{ + ena_rxq_t *eq = vnet_dev_get_rx_queue_data (rxq); + vnet_dev_port_t *port = rxq->port; + vnet_dev_t *dev = port->dev; + + ASSERT (rxq->started == 0); + ASSERT (eq->cq_created == 0); + ASSERT (eq->sq_created == 0); + + log_debug (dev, "queue %u", rxq->queue_id); + + foreach_pointer (p, eq->buffer_indices, eq->compl_sqe_indices) + if (p) + clib_mem_free (p); + + foreach_pointer (p, eq->cqes, eq->sqes) + vnet_dev_dma_mem_free (vm, dev, p); +} + +vnet_dev_rv_t +ena_rx_queue_alloc (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) +{ + vnet_dev_port_t *port = rxq->port; + vnet_dev_t *dev = port->dev; + ena_rxq_t *eq = vnet_dev_get_rx_queue_data (rxq); + u16 size = rxq->size; + vnet_dev_rv_t rv; + + ASSERT (eq->buffer_indices == 0); + ASSERT (eq->compl_sqe_indices == 0); + ASSERT (eq->cqes == 0); + ASSERT (eq->sqes == 0); + + log_debug (dev, "queue %u", rxq->queue_id); + + eq->buffer_indices = clib_mem_alloc_aligned ( + sizeof (eq->buffer_indices[0]) * size, CLIB_CACHE_LINE_BYTES); + + eq->compl_sqe_indices = clib_mem_alloc_aligned ( + sizeof (eq->compl_sqe_indices[0]) * size, CLIB_CACHE_LINE_BYTES); + + if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (eq->cqes[0]) * size, 0, + (void **) &eq->cqes))) + goto err; + + if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (eq->sqes[0]) * size, 0, + (void **) &eq->sqes))) + goto err; + + return VNET_DEV_OK; + +err: + ena_rx_queue_free (vm, rxq); + return rv; +} + +void +ena_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) +{ + ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq); + vnet_dev_port_t *port = txq->port; + vnet_dev_t *dev = port->dev; + + ASSERT (txq->started == 0); + + log_debug (dev, "queue %u", txq->queue_id); + + foreach_pointer (p, eq->buffer_indices, eq->sqe_templates) + if (p) + clib_mem_free (p); + + foreach_pointer (p, eq->cqes, eq->sqes) + vnet_dev_dma_mem_free (vm, dev, p); +} + +vnet_dev_rv_t +ena_tx_queue_alloc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) +{ + vnet_dev_port_t *port = txq->port; + vnet_dev_t *dev = port->dev; + ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq); + u16 size = txq->size; + vnet_dev_rv_t rv; + + ASSERT (eq->buffer_indices == 0); + ASSERT (eq->sqe_templates == 0); + ASSERT (eq->cqes == 0); + ASSERT (eq->sqes == 0); + + log_debug (dev, "queue %u", txq->queue_id); + + eq->buffer_indices = clib_mem_alloc_aligned ( + sizeof (eq->buffer_indices[0]) * size, CLIB_CACHE_LINE_BYTES); + eq->sqe_templates = clib_mem_alloc_aligned ( + sizeof (eq->sqe_templates[0]) * size, CLIB_CACHE_LINE_BYTES); + + if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (eq->cqes[0]) * size, 0, + (void **) &eq->cqes))) + goto err; + + if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (eq->sqes[0]) * size, 0, + (void **) &eq->sqes))) + goto err; + + return VNET_DEV_OK; + +err: + ena_tx_queue_free (vm, txq); + return rv; +} + +vnet_dev_rv_t +ena_rx_queue_start (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) +{ + ena_rxq_t *eq = vnet_dev_get_rx_queue_data (rxq); + vnet_dev_port_t *port = rxq->port; + vnet_dev_t *dev = port->dev; + ena_device_t *ed = vnet_dev_get_data (dev); + u16 buffer_size = vnet_dev_get_rx_queue_buffer_data_size (vm, rxq); + u16 size = rxq->size; + vnet_dev_rv_t rv; + + /* Create Completion Queue */ + ena_aq_create_cq_resp_t cqresp; + ena_aq_create_cq_cmd_t cqcmd = { + .interrupt_mode_enabled = 1, + .cq_entry_size_words = sizeof (ena_rx_cdesc_t) / 4, + .cq_depth = size, + .msix_vector = ~0, + }; + + ena_set_mem_addr (vm, dev, &cqcmd.cq_ba, eq->cqes); + if ((rv = ena_aq_create_cq (vm, dev, &cqcmd, &cqresp))) + { + log_err (dev, "queue %u cq creation failed", rxq->queue_id); + goto error; + } + + eq->cq_idx = cqresp.cq_idx; + eq->cq_created = 1; + + log_debug (dev, "queue %u cq %u created", rxq->queue_id, eq->cq_idx); + + /* Create Submission Queue */ + ena_aq_create_sq_resp_t sqresp; + ena_aq_create_sq_cmd_t sqcmd = { + .sq_direction = ENA_ADMIN_SQ_DIRECTION_RX, + .placement_policy = ENA_ADMIN_SQ_PLACEMENT_POLICY_HOST, + .completion_policy = ENA_ADMIN_SQ_COMPLETION_POLICY_DESC, + .is_physically_contiguous = 1, + .sq_depth = size, + .cq_idx = cqresp.cq_idx, + }; + + ena_set_mem_addr (vm, dev, &sqcmd.sq_ba, eq->sqes); + if ((rv = ena_aq_create_sq (vm, dev, &sqcmd, &sqresp))) + { + log_err (dev, "queue %u sq creation failed", rxq->queue_id); + goto error; + } + + eq->sq_idx = sqresp.sq_idx; + eq->sq_db = (u32 *) ((u8 *) ed->reg_bar + sqresp.sq_doorbell_offset); + eq->sq_created = 1; + + log_debug (dev, "queue %u sq %u created, sq_db %p", rxq->queue_id, + eq->sq_idx, eq->sq_db); + + for (int i = 0; i < size; i++) + { + eq->sqes[i] = (ena_rx_desc_t){ + .lo = { + .length = buffer_size, + .comp_req = 1, + .first = 1, + .last = 1, + .reserved5 = 1, /* ena_com says MBO */ + .req_id = i, + }, + }; + eq->buffer_indices[i] = VLIB_BUFFER_INVALID_INDEX; + eq->compl_sqe_indices[i] = i; + } + + eq->sq_next = 0; + eq->n_compl_sqes = size; + + return VNET_DEV_OK; + +error: + ena_rx_queue_stop (vm, rxq); + return rv; +} + +vnet_dev_rv_t +ena_tx_queue_start (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) +{ + ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq); + vnet_dev_port_t *port = txq->port; + vnet_dev_t *dev = port->dev; + ena_device_t *ed = vnet_dev_get_data (dev); + u16 size = txq->size; + vnet_dev_rv_t rv; + + /* Create Completion Queue */ + ena_aq_create_cq_resp_t cqresp; + ena_aq_create_cq_cmd_t cqcmd = { + .interrupt_mode_enabled = 1, + .cq_entry_size_words = sizeof (ena_tx_cdesc_t) / 4, + .cq_depth = size, + .msix_vector = ~0, + }; + + ena_set_mem_addr (vm, dev, &cqcmd.cq_ba, eq->cqes); + if ((rv = ena_aq_create_cq (vm, dev, &cqcmd, &cqresp))) + { + log_err (dev, "queue %u cq creation failed", txq->queue_id); + goto error; + } + + eq->cq_idx = cqresp.cq_idx; + eq->cq_created = 1; + + log_debug (dev, "queue %u cq %u created", txq->queue_id, eq->cq_idx); + + /* Create Submission Queue */ + ena_aq_create_sq_resp_t sqresp; + ena_aq_create_sq_cmd_t sqcmd = { + .sq_direction = ENA_ADMIN_SQ_DIRECTION_TX, + .placement_policy = eq->llq ? ENA_ADMIN_SQ_PLACEMENT_POLICY_DEVICE : + ENA_ADMIN_SQ_PLACEMENT_POLICY_HOST, + .completion_policy = ENA_ADMIN_SQ_COMPLETION_POLICY_DESC, + .is_physically_contiguous = 1, + .sq_depth = size, + .cq_idx = cqresp.cq_idx, + }; + + if (eq->llq == 0) + ena_set_mem_addr (vm, dev, &sqcmd.sq_ba, eq->sqes); + if ((rv = ena_aq_create_sq (vm, dev, &sqcmd, &sqresp))) + { + log_err (dev, "queue %u sq creation failed", txq->queue_id); + goto error; + } + + eq->sq_idx = sqresp.sq_idx; + eq->sq_db = (u32 *) ((u8 *) ed->reg_bar + sqresp.sq_doorbell_offset); + eq->sq_created = 1; + + log_debug (dev, "queue %u sq %u created, sq_db %p", txq->queue_id, + eq->sq_idx, eq->sq_db); + + for (u32 i = 0; i < size; i++) + { + eq->sqe_templates[i] = + (ena_tx_desc_t){ .req_id_lo = i, .req_id_hi = i >> 10, .comp_req = 1 } + .as_u64x2[0]; + + eq->buffer_indices[i] = VLIB_BUFFER_INVALID_INDEX; + } + + eq->sq_head = 0; + eq->sq_tail = 0; + eq->cq_next = 0; + +#if 0 + if (txq->llq) + txq->llq_descs = + (ena_tx_llq_desc128_t *) ((u8 *) ed->mem_bar + + sqresp.llq_descriptors_offset); +#endif + + log_debug (dev, "queue %u sq %u created, sq_db %p llq_desc %p", + txq->queue_id, eq->sq_idx, eq->sq_db, + eq->llq ? eq->llq_descs : 0); + return VNET_DEV_OK; + +error: + ena_tx_queue_stop (vm, txq); + return rv; +} + +static void +ena_free_sq_buffer_indices (vlib_main_t *vm, u32 *sq_buffer_indices, + u32 n_desc) +{ + u32 *to = sq_buffer_indices; + + for (u32 *from = to; from < sq_buffer_indices + n_desc; from++) + if (from[0] != VLIB_BUFFER_INVALID_INDEX) + to++[0] = from[0]; + + if (to - sq_buffer_indices > 0) + vlib_buffer_free (vm, sq_buffer_indices, to - sq_buffer_indices); +} + +void +ena_rx_queue_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) +{ + ena_rxq_t *eq = vnet_dev_get_rx_queue_data (rxq); + vnet_dev_t *dev = rxq->port->dev; + vnet_dev_rv_t rv; + + if (eq->sq_created) + { + ena_aq_destroy_sq_cmd_t cmd = { + .sq_idx = eq->sq_idx, + .sq_direction = ENA_ADMIN_SQ_DIRECTION_TX, + }; + + if ((rv = ena_aq_destroy_sq (vm, dev, &cmd))) + log_err (dev, "queue %u failed to destroy sq %u", rxq->queue_id, + eq->sq_idx); + eq->sq_created = 0; + }; + + if (eq->cq_created) + { + ena_aq_destroy_cq_cmd_t cmd = { + .cq_idx = eq->cq_idx, + }; + + if ((rv = ena_aq_destroy_cq (vm, dev, &cmd))) + log_err (dev, "queue %u failed to destroy cq %u", rxq->queue_id, + eq->cq_idx); + eq->cq_created = 0; + }; + + if (eq->n_compl_sqes < rxq->size) + ena_free_sq_buffer_indices (vm, eq->buffer_indices, rxq->size); +} + +void +ena_tx_queue_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) +{ + ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq); + vnet_dev_t *dev = txq->port->dev; + vnet_dev_rv_t rv; + + if (eq->sq_created) + { + ena_aq_destroy_sq_cmd_t cmd = { + .sq_idx = eq->sq_idx, + .sq_direction = ENA_ADMIN_SQ_DIRECTION_TX, + }; + + if ((rv = ena_aq_destroy_sq (vm, dev, &cmd))) + log_err (dev, "queue %u failed to destroy sq %u", txq->queue_id, + eq->sq_idx); + eq->sq_created = 0; + }; + + if (eq->cq_created) + { + ena_aq_destroy_cq_cmd_t cmd = { + .cq_idx = eq->cq_idx, + }; + + if ((rv = ena_aq_destroy_cq (vm, dev, &cmd))) + log_err (dev, "queue %u failed to destroy cq %u", txq->queue_id, + eq->cq_idx); + eq->cq_created = 0; + }; + + if (eq->sq_head != eq->sq_tail) + ena_free_sq_buffer_indices (vm, eq->buffer_indices, txq->size); +} diff --git a/src/plugins/dev_ena/reg.c b/src/plugins/dev_ena/reg.c new file mode 100644 index 00000000000..7f2cc0f8aba --- /dev/null +++ b/src/plugins/dev_ena/reg.c @@ -0,0 +1,172 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Cisco Systems, Inc. + */ + +#include +#include + +#include +#include + +VLIB_REGISTER_LOG_CLASS (ena_log, static) = { + .class_name = "ena", + .subclass_name = "reg", +}; + +static vnet_dev_rv_t +ena_err (vnet_dev_t *dev, vnet_dev_rv_t rv, char *fmt, ...) +{ + va_list va; + u8 *s; + + va_start (va, fmt); + s = va_format (0, fmt, &va); + va_end (va); + log_err (dev, "%v", s); + vec_free (s); + return rv; +} + +static u8 * +format_ena_reg_name (u8 *s, va_list *args) +{ + int offset = va_arg (*args, int); + + char *reg_names[] = { +#define _(o, r, rn, m) [(o) >> 2] = #rn, + foreach_ena_reg +#undef _ + }; + + offset >>= 2; + + if (offset < 0 || offset >= ARRAY_LEN (reg_names) || reg_names[offset] == 0) + return format (s, "(unknown)"); + return format (s, "%s", reg_names[offset]); +} + +void +ena_reg_write (vnet_dev_t *dev, ena_reg_t reg, void *v) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + u32 *p = (u32 *) ((u8 *) ed->reg_bar + reg); + u32 val = *(u32 *) v; + log_debug (dev, "%s: reg %U (0x%02x) value 0x%08x", __func__, + format_ena_reg_name, reg, reg, val); + __atomic_store_n (p, val, __ATOMIC_RELEASE); +} + +void +ena_reg_set_dma_addr (vlib_main_t *vm, vnet_dev_t *dev, u32 rlo, u32 rhi, + void *p) +{ + uword pa = vnet_dev_get_dma_addr (vm, dev, p); + u32 reg = (u32) pa; + ena_reg_write (dev, rlo, ®); + reg = pa >> 32; + ena_reg_write (dev, rhi, ®); +} + +void +ena_reg_read (vnet_dev_t *dev, ena_reg_t reg, const void *v) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + vlib_main_t *vm = vlib_get_main (); + u32 rv; + f64 dt = 0, t0; + + if (ed->readless == 0) + { + rv = + __atomic_load_n ((u32 *) ((u8 *) ed->reg_bar + reg), __ATOMIC_SEQ_CST); + } + else + { + u32 *p = (u32 *) ((u8 *) ed->reg_bar + ENA_REG_MMIO_REG_READ); + + ena_reg_mmio_reg_read_t rr = { .reg_off = reg, .req_id = 1 }; + ed->mmio_resp->req_id = 0; + ed->mmio_resp->reg_val = ~0; + + __atomic_store_n (p, rr.as_u32, __ATOMIC_RELEASE); + + t0 = vlib_time_now (vm); + while (ed->mmio_resp->req_id == 0 && dt < 0.2) + { + CLIB_PAUSE (); + dt = vlib_time_now (vm) - t0; + } + + rv = ed->mmio_resp->reg_val; + } + + log_debug (dev, "%s: reg %U (0x%02x) value 0x%08x dt %.3fs", __func__, + format_ena_reg_name, reg, reg, rv, dt); + *(u32 *) v = rv; +} + +vnet_dev_rv_t +ena_reg_reset (vlib_main_t *vm, vnet_dev_t *dev, ena_reset_reason_t reason) +{ + ena_device_t *ed = vnet_dev_get_data (dev); + ena_reg_version_t ver; + ena_reg_controller_version_t ctrl_ver; + ena_reg_caps_t caps = {}; + ena_reg_dev_sts_t dev_sts = {}; + ena_reg_dev_ctl_t reset_start = { .dev_reset = 1, .reset_reason = reason }; + + if (ed->readless) + ena_reg_set_dma_addr (vm, dev, ENA_REG_MMIO_RESP_LO, ENA_REG_MMIO_RESP_HI, + ed->mmio_resp); + + ena_reg_read (dev, ENA_REG_DEV_STS, &dev_sts); + ena_reg_read (dev, ENA_REG_CAPS, &caps); + + if (caps.as_u32 == ~0 && dev_sts.as_u32 == ~0) + return ena_err (dev, VNET_DEV_ERR_BUS, "failed to read regs"); + + if (dev_sts.ready == 0) + return VNET_DEV_ERR_NOT_READY; + + log_debug (dev, "reg_reset: reset timeout is %u", caps.reset_timeout); + + ena_reg_write (dev, ENA_REG_DEV_CTL, &reset_start); + + if (ed->readless) + ena_reg_set_dma_addr (vm, dev, ENA_REG_MMIO_RESP_LO, ENA_REG_MMIO_RESP_HI, + ed->mmio_resp); + + while (1) + { + int i = 0; + ena_reg_read (dev, ENA_REG_DEV_STS, &dev_sts); + if (dev_sts.reset_in_progress) + break; + if (i++ == 20) + return ena_err (dev, VNET_DEV_ERR_BUS, "failed to initiate reset"); + vlib_process_suspend (vm, 0.001); + } + + ena_reg_write (dev, ENA_REG_DEV_CTL, &(ena_reg_dev_ctl_t){}); + + return 0; + while (1) + { + int i = 0; + ena_reg_read (dev, ENA_REG_DEV_STS, &dev_sts); + if (dev_sts.reset_in_progress == 0) + break; + if (i++ == 20) + return ena_err (dev, VNET_DEV_ERR_BUS, "failed to complete reset"); + vlib_process_suspend (vm, 0.001); + } + + ena_reg_read (dev, ENA_REG_VERSION, &ver); + ena_reg_read (dev, ENA_REG_CONTROLLER_VERSION, &ctrl_ver); + + log_info (dev, "version %u.%u controller_version %u.%u.%u impl_id %u\n", + ver.major, ver.minor, ctrl_ver.major, ctrl_ver.minor, + ctrl_ver.subminor, ctrl_ver.impl_id); + + return 0; +} diff --git a/src/plugins/dev_ena/rx_node.c b/src/plugins/dev_ena/rx_node.c new file mode 100644 index 00000000000..41fc5b8c943 --- /dev/null +++ b/src/plugins/dev_ena/rx_node.c @@ -0,0 +1,457 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#define ENA_RX_REFILL_BATCH 32 + +typedef struct +{ + u16 phase_bit; + u16 size; + u32 mask; + ena_rx_cdesc_status_t st_or; + ena_rx_cdesc_status_t st_and; + u16 *comp_sqe_indices; + u32 *sq_buffer_indices; +} ena_rx_ctx_t; + +static_always_inline void +ena_device_input_status_to_flags (ena_rx_cdesc_status_t *statuses, u32 *flags, + u32 n_desc, vlib_frame_bitmap_t first_bmp, + int maybe_chained) +{ + const ena_rx_cdesc_status_t mask_first = { .first = 1 }, + match_first1 = { .first = 1 }; + + const ena_rx_cdesc_status_t mask_last = { .last = 1 }, + match_last0 = { .last = 0 }; + + const ena_rx_cdesc_status_t mask_l4_csum = { .ipv4_frag = 1, + .l4_csum_checked = 1, + .l4_csum_err = 1 }, + match_l4_csum_ok = { .l4_csum_checked = 1 }; + + clib_memset_u32 (statuses + n_desc, 0, 8); +#if defined(CLIB_HAVE_VEC128) + +#if defined(CxLIB_HAVE_VEC512) +#define N 16 +#define u32xN u32x16 +#define u32xNu u32x16u +#define u32xN_splat u32x16_splat +#elif defined(CxLIB_HAVE_VEC256) +#define N 8 +#define u32xN u32x8 +#define u32xNu u32x8u +#define u32xN_splat u32x8_splat +#else +#define N 4 +#define u32xN u32x4 +#define u32xNu u32x4u +#define u32xN_splat u32x4_splat +#endif + + const u32xN st_mask_first = u32xN_splat (mask_first.as_u32); + const u32xN st_match_first1 = u32xN_splat (match_first1.as_u32); + const u32xN st_mask_last = u32xN_splat (mask_last.as_u32); + const u32xN st_match_last0 = u32xN_splat (match_last0.as_u32); + const u32xN st_mask_l4_csum = u32xN_splat (mask_l4_csum.as_u32); + const u32xN st_match_l4_csum_ok = u32xN_splat (match_l4_csum_ok.as_u32); + const u32xN f_total_len_valid = u32xN_splat (VLIB_BUFFER_TOTAL_LENGTH_VALID); + const u32xN f_next_preset = u32xN_splat (VLIB_BUFFER_NEXT_PRESENT); + const u32xN f_l4_csum = u32xN_splat (VNET_BUFFER_F_L4_CHECKSUM_CORRECT | + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED); + + for (u32 i = 0; i < round_pow2 (n_desc, 2 * N); i += 2 * N) + { + uword msk = 0; + u32xN f0, f1, r0, r1; + u32xN s0 = ((u32xNu *) (statuses + i))[0]; + u32xN s1 = ((u32xNu *) (statuses + i))[1]; + + r0 = (s0 & st_mask_first) == st_match_first1; + r1 = (s1 & st_mask_first) == st_match_first1; + f0 = r0 & f_total_len_valid; + f1 = r1 & f_total_len_valid; + + if (maybe_chained) + { +#if defined(CxLIB_HAVE_VEC512) + u64 msb_mask = 0x1111111111111111; + msk = bit_extract_u64 (u8x64_msb_mask ((u8x64) r0), msb_mask); + msk |= bit_extract_u64 (u8x64_msb_mask ((u8x64) r1), msb_mask) << 16; +#elif defined(CxLIB_HAVE_VEC256) + msk = u8x32_msb_mask ((u8x32) r0); + msk |= (u64) u8x32_msb_mask ((u8x32) r1) << 32; + msk = bit_extract_u64 (msk, 0x1111111111111111); +#else + msk = u8x16_msb_mask ((u8x16) r0); + msk |= (u32) u8x16_msb_mask ((u8x16) r1) << 16; + msk = bit_extract_u32 (msk, 0x11111111); +#endif + first_bmp[i / uword_bits] |= msk << (i % uword_bits); + } + + f0 |= ((s0 & st_mask_last) == st_match_last0) & f_next_preset; + f1 |= ((s1 & st_mask_last) == st_match_last0) & f_next_preset; + + f0 |= ((s0 & st_mask_l4_csum) == st_match_l4_csum_ok) & f_l4_csum; + f1 |= ((s1 & st_mask_l4_csum) == st_match_l4_csum_ok) & f_l4_csum; + + ((u32xNu *) (flags + i))[0] = f0; + ((u32xNu *) (flags + i))[1] = f1; + } +#else + while (n_left) + { + u16 f = 0; + ena_rx_cdesc_status_t st = statuses++[0]; + + if ((st.as_u32 & mask_first.as_u32) == match_first1.as_u32) + f |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + + if ((st.as_u32 & mask_last.as_u32) == match_last0.as_u32) + f |= VLIB_BUFFER_NEXT_PRESENT; + + if ((st.as_u32 & mask_l4_csum.as_u32) == match_l4_csum_ok.as_u32) + f |= VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | + VNET_BUFFER_F_L4_CHECKSUM_CORRECT; + + flags++[0] = f; + n_left--; + } +#endif +} + +static_always_inline u16 +ena_device_input_cq_dequeue_no_wrap (ena_rx_ctx_t *ctx, ena_rxq_t *q, + ena_rx_cdesc_status_t *statuses, + u16 *lengths, u16 *csi) +{ + u32 next = q->cq_next; + ena_rx_cdesc_t *cqes = q->cqes; + u32 phase = (next & ctx->size << 1) != 0; + u16 index = next & ctx->mask; + ena_rx_cdesc_t *cd = cqes + index; + ena_rx_cdesc_status_t st; + u32 n_to_check, i = 0; + + st = cd->status; + if (st.phase == phase) + return 0; + + n_to_check = clib_min (VLIB_FRAME_SIZE, ctx->size - index); + + ctx->st_or.as_u32 |= st.as_u32; + ctx->st_and.as_u32 &= st.as_u32; + statuses[i] = st; + lengths[i] = cd->length; + csi[i] = cd->req_id; + i++; + cd++; + +more: + for (st = cd->status; i < n_to_check && st.phase != phase; + i++, st = (++cd)->status) + { + ctx->st_or.as_u32 |= st.as_u32; + ctx->st_and.as_u32 &= st.as_u32; + statuses[i] = st; + lengths[i] = cd->length; + csi[i] = cd->req_id; + } + + if (i == n_to_check) + { + n_to_check = VLIB_FRAME_SIZE - n_to_check; + if (n_to_check) + { + phase ^= 1; + cd = cqes; + goto more; + } + } + + /* revert incomplete */ + if (PREDICT_FALSE (statuses[i - 1].last == 0)) + { + i--; + while (i && statuses[i - 1].last == 0) + i--; + } + + return i; +} + +static_always_inline void +ena_device_input_refill (vlib_main_t *vm, ena_rx_ctx_t *ctx, + vnet_dev_rx_queue_t *rxq, int use_va) +{ + ena_rxq_t *q = vnet_dev_get_rx_queue_data (rxq); + const u64x2 flip_phase = (ena_rx_desc_t){ .lo.phase = 1 }.as_u64x2; + u32 buffer_indices[ENA_RX_REFILL_BATCH]; + uword dma_addr[ENA_RX_REFILL_BATCH]; + u32 n_alloc, n_compl_sqes = q->n_compl_sqes; + u16 *csi = ctx->comp_sqe_indices; + ena_rx_desc_t *sqes = q->sqes; + + while (n_compl_sqes > 0) + { + n_alloc = vlib_buffer_alloc_from_pool ( + vm, buffer_indices, clib_min (ENA_RX_REFILL_BATCH, n_compl_sqes), + vnet_dev_get_rx_queue_buffer_pool_index (rxq)); + + if (PREDICT_FALSE (n_alloc == 0)) + break; + + vlib_get_buffers_with_offset (vm, buffer_indices, (void **) dma_addr, + ENA_RX_REFILL_BATCH, + STRUCT_OFFSET_OF (vlib_buffer_t, data)); + + if (!use_va) + for (u32 i = 0; i < n_alloc; i++) + dma_addr[i] = vlib_physmem_get_pa (vm, (void *) dma_addr[i]); + + for (u32 i = 0; i < n_alloc; i++) + { + u16 slot = csi[i]; + u64x2 r = sqes[slot].as_u64x2 ^ flip_phase; + ctx->sq_buffer_indices[slot] = buffer_indices[i]; + r[1] = dma_addr[i]; + sqes[slot].as_u64x2 = r; /* write SQE as single 16-byte store */ + } + + csi += n_alloc; + n_compl_sqes -= n_alloc; + } + + if (n_compl_sqes == q->n_compl_sqes) + return; + + q->sq_next += q->n_compl_sqes - n_compl_sqes; + __atomic_store_n (q->sq_db, q->sq_next, __ATOMIC_RELEASE); + + if (PREDICT_FALSE (n_compl_sqes)) + clib_memmove (ctx->comp_sqe_indices, csi, n_compl_sqes * sizeof (csi[0])); + + q->n_compl_sqes = n_compl_sqes; +} + +static_always_inline uword +ena_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vnet_dev_rx_queue_t *rxq) +{ + ena_rxq_t *q = vnet_dev_get_rx_queue_data (rxq); + vnet_dev_port_t *port = rxq->port; + vnet_main_t *vnm = vnet_get_main (); + vlib_buffer_t *buffers[VLIB_FRAME_SIZE], **b; + ena_rx_cdesc_status_t statuses[VLIB_FRAME_SIZE + 8]; + u16 lengths[VLIB_FRAME_SIZE + 8], *l; + u32 flags[VLIB_FRAME_SIZE + 8], *f; + u16 *csi; + uword n_rx_packets = 0, n_rx_bytes = 0; + vlib_frame_bitmap_t head_bmp = {}; + u32 sw_if_index = port->intf.sw_if_index; + u32 hw_if_index = port->intf.hw_if_index; + u32 n_trace, n_deq, n_left; + u32 cq_next = q->cq_next; + u32 next_index = rxq->next_index; + vlib_frame_t *next_frame; + vlib_buffer_template_t bt = rxq->buffer_template; + u32 *bi; + int maybe_chained; + + ASSERT (count_set_bits (rxq->size) == 1); + ena_rx_ctx_t ctx = { + .size = rxq->size, + .mask = rxq->size - 1, + .st_and.as_u32 = ~0, + .comp_sqe_indices = q->compl_sqe_indices, + .sq_buffer_indices = q->buffer_indices, + }; + + /* we may have completed SQE indices from previous run */ + csi = ctx.comp_sqe_indices + q->n_compl_sqes; + + n_deq = + ena_device_input_cq_dequeue_no_wrap (&ctx, q, statuses, lengths, csi); + + if (n_deq == 0) + goto refill; + + q->n_compl_sqes += n_deq; + + maybe_chained = ctx.st_and.first && ctx.st_and.last ? 0 : 1; + + next_frame = + vlib_get_next_frame_internal (vm, node, next_index, /* new frame */ 1); + bi = vlib_frame_vector_args (next_frame); + + /* move buffer indices from the ring */ + for (u32 i = 0; i < n_deq; i++) + { + u32 slot = csi[i]; + bi[i] = ctx.sq_buffer_indices[slot]; + ctx.sq_buffer_indices[slot] = VLIB_BUFFER_INVALID_INDEX; + } + + vlib_get_buffers (vm, bi, buffers, n_deq); + + if (PREDICT_FALSE (maybe_chained)) + ena_device_input_status_to_flags (statuses, flags, n_deq, head_bmp, 1); + else + ena_device_input_status_to_flags (statuses, flags, n_deq, head_bmp, 0); + + for (b = buffers, l = lengths, f = flags, n_left = n_deq; n_left >= 8; + b += 4, f += 4, l += 4, n_left -= 4) + { + clib_prefetch_store (b[4]); + clib_prefetch_store (b[5]); + clib_prefetch_store (b[6]); + clib_prefetch_store (b[7]); + b[0]->template = bt; + n_rx_bytes += b[0]->current_length = l[0]; + b[0]->flags = f[0]; + b[1]->template = bt; + n_rx_bytes += b[1]->current_length = l[1]; + b[1]->flags = f[1]; + b[2]->template = bt; + n_rx_bytes += b[2]->current_length = l[2]; + b[2]->flags = f[2]; + b[3]->template = bt; + n_rx_bytes += b[3]->current_length = l[3]; + b[3]->flags = f[3]; + } + + for (; n_left > 0; b += 1, f += 1, l += 1, n_left -= 1) + { + b[0]->template = bt; + n_rx_bytes += b[0]->current_length = l[0]; + b[0]->flags = f[0]; + } + + if (maybe_chained) + { + vlib_buffer_t *hb = 0; + vlib_frame_bitmap_t tail_buf_bmp = {}; + u32 i, total_len = 0, head_flags = 0, tail_flags = 0; + n_rx_packets = vlib_frame_bitmap_count_set_bits (head_bmp); + + vlib_frame_bitmap_init (tail_buf_bmp, n_deq); + vlib_frame_bitmap_xor (tail_buf_bmp, head_bmp); + + foreach_vlib_frame_bitmap_set_bit_index (i, tail_buf_bmp) + { + vlib_buffer_t *pb = buffers[i - 1]; + /* only store opertations here */ + pb->next_buffer = bi[i]; + if (vlib_frame_bitmap_is_bit_set (tail_buf_bmp, i - 1) == 0) + { + if (hb) + { + hb->total_length_not_including_first_buffer = total_len; + /* tail descriptor contains protocol info so we need to + * combine head and tail buffer flags */ + hb->flags = head_flags | tail_flags; + } + head_flags = flags[i - 1]; + total_len = 0; + hb = pb; + } + total_len += lengths[i]; + tail_flags = flags[i]; + } + + hb->total_length_not_including_first_buffer = total_len; + hb->flags = head_flags | tail_flags; + } + else + n_rx_packets = n_deq; + + /* packet tracing */ + if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node)))) + { + u32 i; + if (!maybe_chained) + vlib_frame_bitmap_init (head_bmp, n_deq); + foreach_vlib_frame_bitmap_set_bit_index (i, head_bmp) + { + vlib_buffer_t *b = buffers[i]; + if (vlib_trace_buffer (vm, node, next_index, b, 0)) + { + u32 j = i; + ena_rx_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr)); + tr->next_index = next_index; + tr->qid = rxq->queue_id; + tr->hw_if_index = hw_if_index; + tr->n_desc = 1; + tr->length = lengths[i]; + tr->req_id = csi[i]; + tr->status = statuses[i]; + while (statuses[j].last == 0) + { + j++; + tr->n_desc++; + tr->length += lengths[j]; + } + tr->status = statuses[j]; + + if (-n_trace) + goto trace_done; + } + } + trace_done: + vlib_set_trace_count (vm, node, n_trace); + } + + if (PREDICT_FALSE (maybe_chained)) + clib_compress_u32 (bi, bi, head_bmp, n_deq); + + if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT)) + { + ethernet_input_frame_t *ef; + next_frame->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX; + + ef = vlib_frame_scalar_args (next_frame); + ef->sw_if_index = sw_if_index; + ef->hw_if_index = hw_if_index; + + if (ctx.st_or.l3_csum_err == 0) + next_frame->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK; + vlib_frame_no_append (next_frame); + } + + vlib_put_next_frame (vm, node, next_index, VLIB_FRAME_SIZE - n_rx_packets); + + vlib_increment_combined_counter ( + vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, + vm->thread_index, hw_if_index, n_rx_packets, n_rx_bytes); + + q->cq_next = cq_next + n_deq; + +refill: + if (rxq->port->dev->va_dma) + ena_device_input_refill (vm, &ctx, rxq, 1); + else + ena_device_input_refill (vm, &ctx, rxq, 0); + + return n_rx_packets; +} + +VNET_DEV_NODE_FN (ena_rx_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + u32 n_rx = 0; + foreach_vnet_dev_rx_queue_runtime (rxq, node) + n_rx += ena_device_input_inline (vm, node, rxq); + return n_rx; +} diff --git a/src/plugins/dev_ena/tx_node.c b/src/plugins/dev_ena/tx_node.c new file mode 100644 index 00000000000..ae1b852c036 --- /dev/null +++ b/src/plugins/dev_ena/tx_node.c @@ -0,0 +1,514 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2023 Cisco Systems, Inc. + */ + +#include +#include +#include +#include +#include +#include + +#define ENA_TX_ENQ_BATCH_SZ 64 +#define ENA_MAX_LOG2_TXQ_SIZE 11 +#define ENA_TX_MAX_TAIL_LEN 5 + +typedef struct +{ + u32 n_bytes; + ena_device_t *ed; + u16 n_desc; + u32 mask; + u16 n_packets_left; + u16 n_free_slots; + u32 *from; + u32 *sq_buffer_indices; + u32 tmp_bi[VLIB_FRAME_SIZE]; + ena_tx_desc_t *sqes; + u64 *sqe_templates; + u16 n_dropped_chain_too_long; + u8 llq; + void *bd; +} ena_tx_ctx_t; + +/* bits inside req_id which represent SQE index */ +static const u16 reqid_sqe_idx_mask = (1U << ENA_MAX_LOG2_TXQ_SIZE) - 1; + +static_always_inline void +ena_txq_adv_sq_tail (ena_tx_ctx_t *ctx, ena_txq_t *eq) +{ + /* CQEs can arrive out of order, so we cannot blindly advance SQ tail for + * number of free slots, instead we need to check if slot contains invalid + * buffer index */ + + u32 sq_head = eq->sq_head; + u32 sq_tail = eq->sq_tail; + u16 n, offset = sq_tail & ctx->mask; + u32 *bi = ctx->sq_buffer_indices + offset; + u16 n_to_check = clib_min (sq_head - sq_tail, ctx->n_desc - offset); + +advance_sq_tail: + n = n_to_check; + +#ifdef CLIB_HAVE_VEC256 + for (; n >= 8; n -= 8, bi += 8) + if (!u32x8_is_all_equal (*(u32x8u *) bi, VLIB_BUFFER_INVALID_INDEX)) + break; +#elif defined(CLIB_HAVE_VEC128) + for (; n >= 4; n -= 4, bi += 4) + if (!u32x4_is_all_equal (*(u32x4u *) bi, VLIB_BUFFER_INVALID_INDEX)) + break; +#endif + + for (; n > 0; n -= 1, bi += 1) + if (bi[0] != VLIB_BUFFER_INVALID_INDEX) + break; + + sq_tail += n_to_check - n; + + if (n == 0 && sq_tail < sq_head) + { + n_to_check = sq_head - sq_tail; + bi = ctx->sq_buffer_indices; + goto advance_sq_tail; + } + + eq->sq_tail = sq_tail; +} + +static_always_inline void +ena_txq_deq (vlib_main_t *vm, ena_tx_ctx_t *ctx, ena_txq_t *txq) +{ + /* dequeue CQ, extract SQ slot and number of chained buffers from + * req_id, move completed buffer indices to temp array */ + const ena_tx_cdesc_t mask_phase = { .phase = 1 }; + ena_tx_cdesc_t *cqes = txq->cqes, *cd, match_phase = {}; + u32 cq_next = txq->cq_next; + u32 offset, n = 0; + u32 n_to_check; + u32 *buffers_to_free = ctx->tmp_bi; + u32 n_buffers_to_free = 0; + + offset = cq_next & ctx->mask; + cd = cqes + offset; + n_to_check = ctx->n_desc - offset; + match_phase.phase = ~(cq_next & (ctx->n_desc << 1)) != 0; + +#ifdef CLIB_HAVE_VEC256 + const u16 reqid_nic1 = 1U << ENA_MAX_LOG2_TXQ_SIZE; + const ena_tx_cdesc_t mask_reqid = { .req_id = reqid_sqe_idx_mask }, + match_ph0_nic1 = { .req_id = reqid_nic1, .phase = 0 }, + match_ph1_nic1 = { .req_id = reqid_nic1, .phase = 1 }, + mask_ph_nic = { .req_id = ~reqid_sqe_idx_mask, + .phase = 1 }; + /* both phase and req_id are in lower 32 bits */ + u32x8 mask_ph_nic_x8 = u32x8_splat (mask_ph_nic.as_u64); + u32x8 mask_reqid_x8 = u32x8_splat (mask_reqid.as_u64); + u32x8 match_ph_nic1_x8 = u32x8_splat ( + match_phase.phase ? match_ph1_nic1.as_u64 : match_ph0_nic1.as_u64); + u32x8 buf_inv_idx_x8 = u32x8_splat (VLIB_BUFFER_INVALID_INDEX); +#endif + +more: + while (n < n_to_check) + { + u16 req_id, n_in_chain; + +#ifdef CLIB_HAVE_VEC256 + while (n + 7 < n_to_check) + { + u32x8 r, v; + + /* load lower 32-bits of 8 CQEs in 256-bit register */ + r = u32x8_shuffle2 (*(u32x8u *) cd, *(u32x8u *) (cd + 4), 0, 2, 4, 6, + 8, 10, 12, 14); + + /* check if all 8 CQEs are completed and there is no chained bufs */ + if (u32x8_is_equal (r & mask_ph_nic_x8, match_ph_nic1_x8) == 0) + goto one_by_one; + + r &= mask_reqid_x8; + + /* take consumed buffer indices from ring */ + v = u32x8_gather_u32 (ctx->sq_buffer_indices, r, + sizeof (ctx->sq_buffer_indices[0])); + u32x8_scatter_u32 (ctx->sq_buffer_indices, r, buf_inv_idx_x8, + sizeof (ctx->sq_buffer_indices[0])); + *(u32x8u *) (buffers_to_free + n_buffers_to_free) = v; + n_buffers_to_free += 8; + + n += 8; + cd += 8; + continue; + } + one_by_one: +#endif + + if ((cd->as_u64 & mask_phase.as_u64) != match_phase.as_u64) + goto done; + + req_id = cd->req_id; + n_in_chain = req_id >> ENA_MAX_LOG2_TXQ_SIZE; + req_id &= reqid_sqe_idx_mask; + + buffers_to_free[n_buffers_to_free++] = ctx->sq_buffer_indices[req_id]; + ctx->sq_buffer_indices[req_id] = VLIB_BUFFER_INVALID_INDEX; + + if (PREDICT_FALSE (n_in_chain > 1)) + while (n_in_chain-- > 1) + { + req_id = (req_id + 1) & ctx->mask; + buffers_to_free[n_buffers_to_free++] = + ctx->sq_buffer_indices[req_id]; + ctx->sq_buffer_indices[req_id] = VLIB_BUFFER_INVALID_INDEX; + } + + n++; + cd++; + } + + if (PREDICT_FALSE (n == n_to_check)) + { + cq_next += n; + n = 0; + cd = cqes; + match_phase.phase ^= 1; +#ifdef CLIB_HAVE_VEC256 + match_ph_nic1_x8 ^= u32x8_splat (mask_phase.as_u64); +#endif + n_to_check = ctx->n_desc; + goto more; + } + +done: + + if (n_buffers_to_free) + { + cq_next += n; + + /* part two - free buffers stored in temporary array */ + vlib_buffer_free_no_next (vm, buffers_to_free, n_buffers_to_free); + txq->cq_next = cq_next; + + ena_txq_adv_sq_tail (ctx, txq); + } +} + +static_always_inline u16 +ena_txq_wr_sqe (vlib_main_t *vm, vlib_buffer_t *b, int use_iova, + ena_tx_desc_t *dp, u32 n_in_chain, ena_tx_desc_t desc) +{ + uword dma_addr = use_iova ? vlib_buffer_get_current_va (b) : + vlib_buffer_get_current_pa (vm, b); + u16 len = b->current_length; + + desc.req_id_hi = n_in_chain << (ENA_MAX_LOG2_TXQ_SIZE - 10); + desc.as_u16x8[0] = len; + ASSERT (dma_addr < 0xffffffffffff); /* > 48bit - should never happen */ + desc.as_u64x2[1] = dma_addr; /* this also overwrites header_length */ + + /* write descriptor as single 128-bit store */ + dp->as_u64x2 = desc.as_u64x2; + return len; +} + +static_always_inline void +ena_txq_copy_sqes (ena_tx_ctx_t *ctx, u32 off, ena_tx_desc_t *s, u32 n_desc) +{ + const u64 temp_phase_xor = (ena_tx_desc_t){ .phase = 1 }.as_u64x2[0]; + u32 n = 0; + + if (ctx->llq) + { + ena_tx_llq_desc128_t *llq_descs = (ena_tx_llq_desc128_t *) ctx->sqes; + for (; n < n_desc; n += 1, s += 1, off += 1) + { + ena_tx_llq_desc128_t td = {}; + u64 t = ctx->sqe_templates[off]; + u64x2 v = { t, 0 }; + ctx->sqe_templates[off] = t ^ temp_phase_xor; + td.desc[0].as_u64x2 = v | s->as_u64x2; + td.desc[0].phase = 1; + td.desc[0].header_length = 96; + td.desc[0].length -= 96; + td.desc[0].buff_addr_lo += 96; + vlib_buffer_t *b = + vlib_get_buffer (vlib_get_main (), ctx->sq_buffer_indices[off]); + clib_memcpy_fast (td.data, vlib_buffer_get_current (b), 96); + fformat (stderr, "%U\n", format_hexdump_u32, &td, 32); + fformat (stderr, "%U\n", format_ena_tx_desc, &td); + clib_memcpy_fast (llq_descs + off, &td, 128); + } + return; + } + +#ifdef CLIB_HAVE_VEC512 + u64x8 temp_phase_xor_x8 = u64x8_splat (temp_phase_xor); + for (; n + 7 < n_desc; n += 8, s += 8, off += 8) + { + u64x8 t8 = *(u64x8u *) (ctx->sqe_templates + off); + *(u64x8u *) (ctx->sqe_templates + off) = t8 ^ temp_phase_xor_x8; + u64x8 r0 = *(u64x8u *) s; + u64x8 r1 = *(u64x8u *) (s + 4); + r0 |= u64x8_shuffle2 (t8, (u64x8){}, 0, 9, 1, 11, 2, 13, 3, 15); + r1 |= u64x8_shuffle2 (t8, (u64x8){}, 4, 9, 5, 11, 6, 13, 7, 15); + *((u64x8u *) (ctx->sqes + off)) = r0; + *((u64x8u *) (ctx->sqes + off + 4)) = r1; + } +#elif defined(CLIB_HAVE_VEC256) + u64x4 temp_phase_xor_x4 = u64x4_splat (temp_phase_xor); + for (; n + 3 < n_desc; n += 4, s += 4, off += 4) + { + u64x4 t4 = *(u64x4u *) (ctx->sqe_templates + off); + *(u64x4u *) (ctx->sqe_templates + off) = t4 ^ temp_phase_xor_x4; + u64x4 r0 = *(u64x4u *) s; + u64x4 r1 = *(u64x4u *) (s + 2); + r0 |= u64x4_shuffle2 (t4, (u64x4){}, 0, 5, 1, 7); + r1 |= u64x4_shuffle2 (t4, (u64x4){}, 2, 5, 3, 7); + *((u64x4u *) (ctx->sqes + off)) = r0; + *((u64x4u *) (ctx->sqes + off + 2)) = r1; + } +#endif + + for (; n < n_desc; n += 1, s += 1, off += 1) + { + u64 t = ctx->sqe_templates[off]; + u64x2 v = { t, 0 }; + ctx->sqe_templates[off] = t ^ temp_phase_xor; + ctx->sqes[off].as_u64x2 = v | s->as_u64x2; + } +} + +static_always_inline u32 +ena_txq_enq_one (vlib_main_t *vm, ena_tx_ctx_t *ctx, vlib_buffer_t *b0, + ena_tx_desc_t *d, u16 n_free_desc, u32 *f, int use_iova) +{ + const ena_tx_desc_t single = { .first = 1, .last = 1 }; + vlib_buffer_t *b; + u32 i, n; + + /* non-chained buffer */ + if ((b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0) + { + ctx->n_bytes += ena_txq_wr_sqe (vm, b0, use_iova, d, 1, single); + f[0] = ctx->from[0]; + ctx->from += 1; + ctx->n_packets_left -= 1; + return 1; + } + + /* count number of buffers in chain */ + for (n = 1, b = b0; b->flags & VLIB_BUFFER_NEXT_PRESENT; n++) + b = vlib_get_buffer (vm, b->next_buffer); + + /* if chain is too long, drop packet */ + if (n > ENA_TX_MAX_TAIL_LEN + 1) + { + vlib_buffer_free_one (vm, ctx->from[0]); + ctx->from += 1; + ctx->n_packets_left -= 1; + ctx->n_dropped_chain_too_long++; + return 0; + } + + /* no enough descriptors to accomodate? */ + if (n > n_free_desc) + return 0; + + /* first */ + f++[0] = ctx->from[0]; + ctx->from += 1; + ctx->n_packets_left -= 1; + ctx->n_bytes += + ena_txq_wr_sqe (vm, b0, use_iova, d++, n, (ena_tx_desc_t){ .first = 1 }); + + /* mid */ + for (i = 1, b = b0; i < n - 1; i++) + { + f++[0] = b->next_buffer; + b = vlib_get_buffer (vm, b->next_buffer); + ctx->n_bytes += + ena_txq_wr_sqe (vm, b, use_iova, d++, 0, (ena_tx_desc_t){}); + } + + /* last */ + f[0] = b->next_buffer; + b = vlib_get_buffer (vm, b->next_buffer); + ctx->n_bytes += + ena_txq_wr_sqe (vm, b, use_iova, d, 0, (ena_tx_desc_t){ .last = 1 }); + + return n; +} + +static_always_inline uword +ena_txq_enq (vlib_main_t *vm, ena_tx_ctx_t *ctx, ena_txq_t *txq, int use_iova) +{ + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 *f = ctx->tmp_bi; + ena_tx_desc_t desc[ENA_TX_ENQ_BATCH_SZ], *d = desc; + const ena_tx_desc_t single = { .first = 1, .last = 1 }; + u32 n_desc_left, n; + + if (ctx->n_packets_left == 0) + return 0; + + if (ctx->n_free_slots == 0) + return 0; + + n_desc_left = clib_min (ENA_TX_ENQ_BATCH_SZ, ctx->n_free_slots); + + while (n_desc_left >= 4 && ctx->n_packets_left >= 8) + { + clib_prefetch_load (vlib_get_buffer (vm, ctx->from[4])); + b0 = vlib_get_buffer (vm, ctx->from[0]); + clib_prefetch_load (vlib_get_buffer (vm, ctx->from[5])); + b1 = vlib_get_buffer (vm, ctx->from[1]); + clib_prefetch_load (vlib_get_buffer (vm, ctx->from[6])); + b2 = vlib_get_buffer (vm, ctx->from[2]); + clib_prefetch_load (vlib_get_buffer (vm, ctx->from[7])); + b3 = vlib_get_buffer (vm, ctx->from[3]); + + if (PREDICT_FALSE (((b0->flags | b1->flags | b2->flags | b3->flags) & + VLIB_BUFFER_NEXT_PRESENT) == 0)) + { + ctx->n_bytes += ena_txq_wr_sqe (vm, b0, use_iova, d++, 1, single); + ctx->n_bytes += ena_txq_wr_sqe (vm, b1, use_iova, d++, 1, single); + ctx->n_bytes += ena_txq_wr_sqe (vm, b2, use_iova, d++, 1, single); + ctx->n_bytes += ena_txq_wr_sqe (vm, b3, use_iova, d++, 1, single); + vlib_buffer_copy_indices (f, ctx->from, 4); + ctx->from += 4; + ctx->n_packets_left -= 4; + + n_desc_left -= 4; + f += 4; + } + else + { + n = ena_txq_enq_one (vm, ctx, b0, d, n_desc_left, f, use_iova); + if (n == 0) + break; + n_desc_left -= n; + f += n; + d += n; + } + } + + while (n_desc_left > 0 && ctx->n_packets_left > 0) + { + vlib_buffer_t *b0; + + b0 = vlib_get_buffer (vm, ctx->from[0]); + n = ena_txq_enq_one (vm, ctx, b0, d, n_desc_left, f, use_iova); + if (n == 0) + break; + n_desc_left -= n; + f += n; + d += n; + } + + n = d - desc; + + if (n) + { + u32 head = txq->sq_head; + u32 offset = head & ctx->mask; + u32 n_before_wrap = ctx->n_desc - offset; + u32 n_copy; + + d = desc; + f = ctx->tmp_bi; + + if (n_before_wrap >= n) + { + n_copy = n; + vlib_buffer_copy_indices (ctx->sq_buffer_indices + offset, f, + n_copy); + ena_txq_copy_sqes (ctx, offset, d, n_copy); + } + else + { + n_copy = n_before_wrap; + vlib_buffer_copy_indices (ctx->sq_buffer_indices + offset, f, + n_copy); + ena_txq_copy_sqes (ctx, offset, d, n_copy); + + n_copy = n - n_before_wrap; + vlib_buffer_copy_indices (ctx->sq_buffer_indices, f + n_before_wrap, + n_copy); + ena_txq_copy_sqes (ctx, 0, d + n_before_wrap, n_copy); + } + + head += n; + __atomic_store_n (txq->sq_db, head, __ATOMIC_RELEASE); + txq->sq_head = head; + ctx->n_free_slots -= n; + + return n; + } + return 0; +} + +VNET_DEV_NODE_FN (ena_tx_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + vnet_dev_tx_node_runtime_t *tnr = vnet_dev_get_tx_node_runtime (node); + vnet_dev_tx_queue_t *txq = tnr->tx_queue; + vnet_dev_t *dev = txq->port->dev; + ena_device_t *ed = vnet_dev_get_data (dev); + ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq); + u32 n_pkts = 0; + + ena_tx_ctx_t ctx = { .mask = txq->size - 1, + .n_desc = txq->size, + .n_packets_left = frame->n_vectors, + .from = vlib_frame_vector_args (frame), + .sqe_templates = eq->sqe_templates, + .sqes = eq->sqes, + .sq_buffer_indices = eq->buffer_indices, + .llq = ed->llq }; + + vnet_dev_tx_queue_lock_if_needed (txq); + + /* try 3 times to enquee packets by first freeing consumed from the ring + * and then trying to enqueue as much as possible */ + for (int i = 0; i < 3; i++) + { + /* free buffers consumed by ENA */ + if (eq->sq_head != eq->sq_tail) + ena_txq_deq (vm, &ctx, eq); + + /* enqueue new buffers, try until last attempt enqueues 0 packets */ + ctx.n_free_slots = ctx.n_desc - (eq->sq_head - eq->sq_tail); + + if (dev->va_dma) + while (ena_txq_enq (vm, &ctx, eq, /* va */ 1) > 0) + ; + else + while (ena_txq_enq (vm, &ctx, eq, /* va */ 0) > 0) + ; + + if (ctx.n_packets_left == 0) + break; + } + + vnet_dev_tx_queue_unlock_if_needed (txq); + + if (ctx.n_dropped_chain_too_long) + vlib_error_count (vm, node->node_index, ENA_TX_NODE_CTR_CHAIN_TOO_LONG, + ctx.n_dropped_chain_too_long); + + n_pkts = frame->n_vectors - ctx.n_packets_left; + vlib_increment_combined_counter ( + vnet_get_main ()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, + vm->thread_index, tnr->hw_if_index, n_pkts, ctx.n_bytes); + + if (ctx.n_packets_left) + { + vlib_buffer_free (vm, ctx.from, ctx.n_packets_left); + vlib_error_count (vm, node->node_index, ENA_TX_NODE_CTR_NO_FREE_SLOTS, + ctx.n_packets_left); + } + + return n_pkts; +} diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index 4de2deab7d0..a747ea9f966 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -236,6 +236,7 @@ STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE % CLIB_CACHE_LINE_BYTES == 0, "VLIB_BUFFER_PRE_DATA_SIZE must be divisible by cache line size"); #define VLIB_BUFFER_HDR_SIZE (sizeof(vlib_buffer_t) - VLIB_BUFFER_PRE_DATA_SIZE) +#define VLIB_BUFFER_INVALID_INDEX 0xffffffff /** \brief Prefetch buffer metadata. The first 64 bytes of buffer contains most header information diff --git a/src/vppinfra/vector_avx2.h b/src/vppinfra/vector_avx2.h index ee3d5404f91..b832681632c 100644 --- a/src/vppinfra/vector_avx2.h +++ b/src/vppinfra/vector_avx2.h @@ -336,7 +336,7 @@ u32x8_scatter_one (u32x8 r, int index, void *p) } #define u32x8_gather_u32(base, indices, scale) \ - (u32x8) _mm256_i32gather_epi32 (base, (__m256i) indices, scale) + (u32x8) _mm256_i32gather_epi32 ((const int *) base, (__m256i) indices, scale) #ifdef __AVX512F__ #define u32x8_scatter_u32(base, indices, v, scale) \