Add RDMA ibverb driver plugin

RDMA ibverb is a userspace API to efficiently rx/tx packets. This is an
initial, unoptimized driver targeting Mellanox cards.
Next steps should include batching, multiqueue and additional cards.

Change-Id: I0309c7a543f75f2f9317eaf63ca502ac7a093ef9
Signed-off-by: Benoît Ganne <bganne@cisco.com>
This commit is contained in:
Benoît Ganne
2019-03-25 11:41:34 +01:00
committed by Damjan Marion
parent 6bc6fd0aeb
commit fe750c248b
15 changed files with 1507 additions and 60 deletions

View File

@@ -31,11 +31,18 @@ include packages.mk
include packages/nasm.mk
include packages/ipsec-mb.mk
include packages/dpdk.mk
include packages/rdma-core.mk
.PHONY: clean
clean:
@rm -rf $(B) $(I)
.PHONY: install
install: dpdk-install rdma-core-install
.PHONY: config
config: dpdk-config rdma-core-config
##############################################################################
# .deb packaging
##############################################################################
@@ -62,11 +69,6 @@ build-deb: $(DEV_DEB)
install-deb:
ifneq ($(INSTALLED_VER),$(DEB_VER)-$(PKG_SUFFIX))
@echo "=========================================================="
@echo " Out of date vpp-ext-deps package installed."
@echo " Installed: $(INSTALLED_VER)"
@echo " Needed: $(DEB_VER)-$(PKG_SUFFIX)"
@echo "=========================================================="
@make $(DEV_DEB)
@sudo dpkg -i $(DEV_DEB)
else
@@ -78,9 +80,9 @@ endif
check-deb:
ifneq ($(INSTALLED_VER),$(DEB_VER)-$(PKG_SUFFIX))
@echo "=========================================================="
@echo " Outdated DPDK package detected:"
@echo " Installed: vpp-ext-deps $(INSTALLED_VER)"
@echo " Current: vpp-ext-deps $(DEB_VER)-$(PKG_SUFFIX)"
@echo " Out of date vpp-ext-deps package installed."
@echo " Installed: $(INSTALLED_VER)"
@echo " Needed: $(DEB_VER)-$(PKG_SUFFIX)"
@echo ""
@echo " Please upgrade by invoking 'make install-ext-deps'"
@echo " from the top level directory."
@@ -115,16 +117,16 @@ ifneq ($(INSTALLED_RPM_VER),$(RPM_VER)-$(PKG_SUFFIX))
sudo rpm -Uih --force $(DEV_RPM)
else
@echo "=========================================================="
@echo " Up-to-date DPDK package already installed"
@echo " Up-to-date vpp-ext-deps package already installed"
@echo "=========================================================="
endif
check-rpm:
ifneq ($(INSTALLED_RPM_VER),$(RPM_VER)-$(PKG_SUFFIX))
@echo "=========================================================="
@echo " Outdated DPDK package detected:"
@echo " Installed: vpp-ext-deps $(INSTALLED_RPM_VER)"
@echo " Current: vpp-ext-deps $(RPM_VER)-$(PKG_SUFFIX)"
@echo " Out of date vpp-ext-deps package installed."
@echo " Installed: $(INSTALLED_RPM_VER)"
@echo " Needed: $(RPM_VER)-$(PKG_SUFFIX)"
@echo ""
@echo " Please upgrade by invoking 'make install-ext-deps'"
@echo " from the top level directory."
@@ -140,9 +142,9 @@ endif
ebuild-build:
ifeq ($(INSTALLED_VER)$(INSTALLED_RPM_VER),)
@echo "=========================================================="
@echo "Building DPDK from source. Consider installing development"
@echo "package by invoking 'make install-ext-deps' from the"
@echo "top level directory"
@echo "Building vpp-ext-deps from source. Consider installing"
@echo "development package by invoking 'make install-ext-deps'"
@echo "from the top level directory"
@echo "=========================================================="
make config
else

View File

@@ -20,7 +20,6 @@ override_dh_clean:
make $(MAKE_ARGS) clean
override_dh_auto_configure:
make $(MAKE_ARGS) config
override_dh_install:
make $(MAKE_ARGS) install

View File

@@ -31,12 +31,12 @@ $1_install_log ?= $(B)/$1.install.log
downloads/$($1_tarball):
mkdir -p downloads
@if [ -e $(DL_CACHE_DIR)/$($1_tarball) ] ; \
then cp $(DL_CACHE_DIR)/$($1_tarball) downloads/ ; \
then cp $(DL_CACHE_DIR)/$($1_tarball) $$@ ; \
else \
echo "Downloading $($1_url)" ; \
curl -o downloads/$($1_tarball) -LO $($1_url) ; \
curl -o $$@ -LO $($1_url) ; \
fi
@rm -f $(B)/.download.ok
@rm -f $(B)/.$1.download.ok
$(B)/.$1.download.ok: downloads/$($1_tarball)
@mkdir -p $(B)

View File

@@ -167,9 +167,7 @@ define set
fi
endef
all: build
$(B)/custom-config: $(B)/.patch.ok Makefile
$(B)/custom-config: $(B)/.dpdk-patch.ok Makefile
@echo --- generating custom config from $(DPDK_SOURCE)/config/defconfig_$(DPDK_TARGET) ---
@cpp -undef -ffreestanding -x assembler-with-cpp $(DPDK_SOURCE)/config/defconfig_$(DPDK_TARGET) $@
$(call set,RTE_MACHINE,$(DPDK_MACHINE))
@@ -230,18 +228,19 @@ $(B)/custom-config: $(B)/.patch.ok Makefile
$(call set,RTE_LIBRTE_DPAA_PMD,n)
$(call set,RTE_LIBRTE_PMD_DPAA_SEC,n)
$(call set,RTE_LIBRTE_PMD_DPAA_EVENTDEV,n)
@rm -f .config.ok
@rm -f .dpdk-config.ok
$(CURDIR)/$(DPDK_TARBALL):
DPDK_DOWNLOADS = $(CURDIR)/downloads/$(DPDK_TARBALL)
$(DPDK_DOWNLOADS):
mkdir -p downloads
@if [ -e $(DPDK_DOWNLOAD_DIR)/$(DPDK_TARBALL) ] ; \
then cp $(DPDK_DOWNLOAD_DIR)/$(DPDK_TARBALL) $(CURDIR) ; \
else curl -o $(CURDIR)/$(DPDK_TARBALL) -LO $(DPDK_TAR_URL) ; \
then cp $(DPDK_DOWNLOAD_DIR)/$(DPDK_TARBALL) $@ ; \
else curl -o $@ -LO $(DPDK_TAR_URL) ; \
fi
@rm -f $(B)/.download.ok
@rm -f $(B)/.dpdk-download.ok
DPDK_DOWNLOADS = $(CURDIR)/$(DPDK_TARBALL)
$(B)/.download.ok: $(DPDK_DOWNLOADS)
$(B)/.dpdk-download.ok: $(DPDK_DOWNLOADS)
@mkdir -p $(B)
@openssl md5 $< | cut -f 2 -d " " - > $(B)/$(DPDK_TARBALL).md5sum
@([ "$$(<$(B)/$(DPDK_TARBALL).md5sum)" = "$(DPDK_$(DPDK_VERSION)_TARBALL_MD5_CKSUM)" ] || \
@@ -249,18 +248,18 @@ $(B)/.download.ok: $(DPDK_DOWNLOADS)
rm $(B)/$(DPDK_TARBALL).md5sum && false ))
@touch $@
.PHONY: download
download: $(B)/.download.ok
.PHONY: dpdk-download
dpdk-download: $(B)/.dpdk-download.ok
$(B)/.extract.ok: $(B)/.download.ok
$(B)/.dpdk-extract.ok: $(B)/.dpdk-download.ok
@echo --- extracting $(DPDK_TARBALL) ---
@tar --directory $(B) --extract --file $(CURDIR)/$(DPDK_TARBALL)
@tar --directory $(B) --extract --file $(DPDK_DOWNLOADS)
@touch $@
.PHONY: extract
extract: $(B)/.extract.ok
.PHONY: dpdk-extract
dpdk-extract: $(B)/.dpdk-extract.ok
$(B)/.patch.ok: $(B)/.extract.ok
$(B)/.dpdk-patch.ok: $(B)/.dpdk-extract.ok
ifneq ($(wildcard $(CURDIR)/patches/dpdk_$(DPDK_VERSION)/*.patch),)
@echo --- patching ---
@for f in $(CURDIR)/patches/dpdk_$(DPDK_VERSION)/*.patch ; do \
@@ -270,26 +269,23 @@ ifneq ($(wildcard $(CURDIR)/patches/dpdk_$(DPDK_VERSION)/*.patch),)
endif
@touch $@
.PHONY: patch
patch: $(B)/.patch.ok
.PHONY: dpdk-patch
dpdk-patch: $(B)/.dpdk-patch.ok
$(B)/.config.ok: $(B)/.patch.ok $(B)/custom-config
$(B)/.dpdk-config.ok: $(B)/.dpdk-patch.ok $(B)/custom-config
@make $(DPDK_MAKE_ARGS) config
@touch $@
.PHONY: config
config: $(B)/.config.ok
.PHONY: dpdk-config
dpdk-config: $(B)/.dpdk-config.ok
.PHONY: build-dpdk
build-dpdk: $(DPDK_BUILD_DEPS)
@if [ ! -e $(B)/.config.ok ] ; then echo 'Please run "make config" first' && false ; fi
$(B)/.dpdk-build.ok: dpdk-config $(DPDK_BUILD_DEPS)
@if [ ! -e $(B)/.dpdk-config.ok ] ; then echo 'Please run "make config" first' && false ; fi
@make $(DPDK_MAKE_ARGS) install
$(B)/.build.ok: build-dpdk
@touch $@
.PHONY: build
build: $(B)/.build.ok
.PHONY: dpdk-build
dpdk-build: $(B)/.dpdk-build.ok
.PHONY: install
install: $(B)/.build.ok
.PHONY: dpdk-install
dpdk-install: $(B)/.dpdk-build.ok

46
build/external/packages/rdma-core.mk vendored Normal file
View File

@@ -0,0 +1,46 @@
# Copyright (c) 2018 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
rdma-core_version := 23
rdma-core_tarball := rdma-core-$(rdma-core_version).tar.gz
rdma-core_tarball_md5sum_22.1 := dde4d30e3db20893408ae51041117034
rdma-core_tarball_md5sum_23 := c78575735c4a71609c1a214ea16cd8dc
rdma-core_tarball_md5sum := $(rdma-core_tarball_md5sum_$(rdma-core_version))
rdma-core_tarball_strip_dirs := 1
rdma-core_url := http://github.com/linux-rdma/rdma-core/releases/download/v$(rdma-core_version)/$(rdma-core_tarball)
RDMA_FILES := include/infiniband/verbs.h \
include/infiniband/verbs_api.h \
include/infiniband/ib_user_ioctl_verbs.h \
include/rdma/ib_user_verbs.h \
lib/statics/libibverbs.a \
lib/statics/libmlx5.a
define rdma-core_config_cmds
cd $(rdma-core_build_dir) && \
cmake -G Ninja $(rdma-core_src_dir) \
-DENABLE_STATIC=1 -DENABLE_RESOLVE_NEIGH=0 -DNO_PYVERBS=1 -DENABLE_VALGRIND=0 \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_C_FLAGS=-fPIC > $(rdma-core_config_log)
endef
define rdma-core_build_cmds
cmake --build $(rdma-core_build_dir) -- libibverbs.a libmlx5.a > $(rdma-core_build_log)
endef
define rdma-core_install_cmds
mkdir -p $(rdma-core_install_dir)
tar -C $(rdma-core_build_dir) --xform='s|/statics/|/|' -hc $(RDMA_FILES) | tar -C $(rdma-core_install_dir) -xv > $(rdma-core_install_log)
endef
$(eval $(call package,rdma-core))

View File

@@ -0,0 +1,61 @@
# Copyright (c) 2018 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
message(STATUS "RDMA plugins - looking for ibverbs")
find_path(IBVERBS_INCLUDE_DIR NAMES infiniband/verbs.h)
find_library(IBVERBS_LIB NAMES libibverbs.a)
find_library(MLX5_LIB NAMES libmlx5.a)
if (NOT IBVERBS_LIB OR NOT MLX5_LIB)
message(WARNING "RDMA plugins - ibverbs not found - rdma_plugin disabled")
return()
endif()
if (MLX5_LIB)
string_append(RDMA_LINK_FLAGS "-Wl,--whole-archive,${MLX5_LIB},--no-whole-archive")
endif()
set(CMAKE_REQUIRED_FLAGS "-fPIC -shared ${IBVERBS_LIB} ${RDMA_LINK_FLAGS}")
CHECK_C_SOURCE_COMPILES("" IBVERBS_COMPILES_CHECK)
if (NOT IBVERBS_COMPILES_CHECK)
message(WARNING "RDMA plugins - no working ibverbs found - rdma_plugin disabled")
return()
endif()
message(STATUS "RDMA plugins - found ${IBVERBS_INCLUDE_DIR}")
message(STATUS "RDMA plugins - found ${IBVERBS_LIB}")
message(STATUS "RDMA plugins - found ${MLX5_LIB}")
include_directories(${IBVERBS_INCLUDE_DIR})
add_vpp_plugin(rdma
SOURCES
cli.c
device.c
format.c
plugin.c
input.c
output.c
MULTIARCH_SOURCES
input.c
output.c
LINK_FLAGS
"${RDMA_LINK_FLAGS}"
LINK_LIBRARIES
${IBVERBS_LIB}
)

133
src/plugins/rdma/cli.c Normal file
View File

@@ -0,0 +1,133 @@
/*
*------------------------------------------------------------------
* Copyright (c) 2018 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*------------------------------------------------------------------
*/
#include <stdint.h>
#include <net/if.h>
#include <sys/ioctl.h>
#include <inttypes.h>
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vlib/pci/pci.h>
#include <vnet/ethernet/ethernet.h>
#include <rdma/rdma.h>
static clib_error_t *
rdma_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
rdma_create_if_args_t args;
clib_memset (&args, 0, sizeof (rdma_create_if_args_t));
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (line_input, "name %s", &args.ifname))
;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
unformat_free (line_input);
rdma_create_if (vm, &args);
vec_free (args.ifname);
return args.error;
}
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (rdma_create_command, static) = {
.path = "create interface rdma",
.short_help = "create interface rdma <name ifname>",
.function = rdma_create_command_fn,
};
/* *INDENT-ON* */
static clib_error_t *
rdma_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
u32 sw_if_index = ~0;
vnet_hw_interface_t *hw;
rdma_main_t *rm = &rdma_main;
rdma_device_t *rd;
vnet_main_t *vnm = vnet_get_main ();
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (line_input, "sw_if_index %d", &sw_if_index))
;
else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
vnm, &sw_if_index))
;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
unformat_free (line_input);
if (sw_if_index == ~0)
return clib_error_return (0,
"please specify interface name or sw_if_index");
hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
if (hw == NULL || rdma_device_class.index != hw->dev_class_index)
return clib_error_return (0, "not an AVF interface");
rd = pool_elt_at_index (rm->devices, hw->dev_instance);
rdma_delete_if (vm, rd);
return 0;
}
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (rdma_delete_command, static) = {
.path = "delete interface rdma",
.short_help = "delete interface rdma "
"{<interface> | sw_if_index <sw_idx>}",
.function = rdma_delete_command_fn,
};
/* *INDENT-ON* */
clib_error_t *
rdma_cli_init (vlib_main_t * vm)
{
return 0;
}
VLIB_INIT_FUNCTION (rdma_cli_init);
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/

607
src/plugins/rdma/device.c Normal file
View File

File diff suppressed because it is too large Load Diff

89
src/plugins/rdma/format.c Normal file
View File

@@ -0,0 +1,89 @@
/*
*------------------------------------------------------------------
* Copyright (c) 2018 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*------------------------------------------------------------------
*/
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vlib/pci/pci.h>
#include <vnet/ethernet/ethernet.h>
#include <rdma/rdma.h>
u8 *
format_rdma_device_name (u8 * s, va_list * args)
{
u32 i = va_arg (*args, u32);
rdma_main_t *rm = &rdma_main;
rdma_device_t *rd = vec_elt_at_index (rm->devices, i);
s = format (s, "rdma-%u", rd->dev_instance);
return s;
}
u8 *
format_rdma_device_flags (u8 * s, va_list * args)
{
rdma_device_t *rd = va_arg (*args, rdma_device_t *);
u8 *t = 0;
#define _(a, b, c) if (rd->flags & (1 << a)) \
t = format (t, "%s%s", t ? " ":"", c);
foreach_rdma_device_flags
#undef _
s = format (s, "%v", t);
vec_free (t);
return s;
}
u8 *
format_rdma_device (u8 * s, va_list * args)
{
u32 i = va_arg (*args, u32);
rdma_main_t *rm = &rdma_main;
rdma_device_t *rd = vec_elt_at_index (rm->devices, i);
u32 indent = format_get_indent (s);
s = format (s, "flags: %U", format_rdma_device_flags, rd);
if (rd->error)
s = format (s, "\n%Uerror %U", format_white_space, indent,
format_clib_error, rd->error);
return s;
}
u8 *
format_rdma_input_trace (u8 * s, va_list * args)
{
vlib_main_t *vm = va_arg (*args, vlib_main_t *);
vlib_node_t *node = va_arg (*args, vlib_node_t *);
rdma_input_trace_t *t = va_arg (*args, rdma_input_trace_t *);
vnet_main_t *vnm = vnet_get_main ();
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
s = format (s, "rdma: %v (%d) next-node %U",
hi->name, t->hw_if_index, format_vlib_next_node_name, vm,
node->index, t->next_index);
return s;
}
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/

202
src/plugins/rdma/input.c Normal file
View File

@@ -0,0 +1,202 @@
/*
*------------------------------------------------------------------
* Copyright (c) 2018 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*------------------------------------------------------------------
*/
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vlib/pci/pci.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/devices/devices.h>
#include <rdma/rdma.h>
#define foreach_rdma_input_error \
_(BUFFER_ALLOC, "buffer alloc error")
typedef enum
{
#define _(f,s) RDMA_INPUT_ERROR_##f,
foreach_rdma_input_error
#undef _
RDMA_INPUT_N_ERROR,
} rdma_input_error_t;
static __clib_unused char *rdma_input_error_strings[] = {
#define _(n,s) s,
foreach_rdma_input_error
#undef _
};
static_always_inline void
rdma_device_input_refill (vlib_main_t * vm, rdma_device_t * rd,
rdma_rxq_t * rxq)
{
u32 n_alloc, n;
struct ibv_sge sg_entry;
struct ibv_recv_wr wr, *bad_wr;
u32 buffers[VLIB_FRAME_SIZE];
if (rxq->n_enq >= rxq->size)
return;
n_alloc = clib_min (VLIB_FRAME_SIZE, rxq->size - rxq->n_enq);
n_alloc = vlib_buffer_alloc (vm, buffers, n_alloc);
sg_entry.length = vlib_buffer_get_default_data_size (vm);
sg_entry.lkey = rd->mr->lkey;
wr.num_sge = 1;
wr.sg_list = &sg_entry;
wr.next = NULL;
for (n = 0; n < n_alloc; n++)
{
vlib_buffer_t *b = vlib_get_buffer (vm, buffers[n]);
sg_entry.addr = vlib_buffer_get_va (b);
wr.wr_id = buffers[n];
if (ibv_post_recv (rxq->qp, &wr, &bad_wr) != 0)
vlib_buffer_free (vm, buffers + n, 1);
else
rxq->n_enq++;
}
}
static_always_inline uword
rdma_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame, rdma_device_t * rd, u16 qid)
{
vnet_main_t *vnm = vnet_get_main ();
rdma_rxq_t *rxq = vec_elt_at_index (rd->rxqs, qid);
u32 n_trace;
struct ibv_wc wc[VLIB_FRAME_SIZE];
u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
u32 *bi, *to_next, n_left_to_next;
int i;
u32 n_rx_packets = 0, n_rx_bytes = 0;
n_rx_packets = ibv_poll_cq (rxq->cq, VLIB_FRAME_SIZE, wc);
if (n_rx_packets <= 0)
rdma_device_input_refill (vm, rd, rxq);
if (PREDICT_FALSE (rd->per_interface_next_index != ~0))
next_index = rd->per_interface_next_index;
vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
for (i = 0; i < n_rx_packets; i++)
{
u32 bi = wc[i].wr_id;
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
b->current_length = wc[i].byte_len;
vnet_buffer (b)->sw_if_index[VLIB_RX] = rd->sw_if_index;
vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
to_next[i] = bi;
n_rx_bytes += wc[i].byte_len;
}
if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node))))
{
u32 n_left = n_rx_packets, i = 0;
bi = to_next;
while (n_trace && n_left)
{
vlib_buffer_t *b;
rdma_input_trace_t *tr;
b = vlib_get_buffer (vm, bi[0]);
vlib_trace_buffer (vm, node, next_index, b, /* follow_chain */ 0);
tr = vlib_add_trace (vm, node, b, sizeof (*tr));
tr->next_index = next_index;
tr->hw_if_index = rd->hw_if_index;
/* next */
n_trace--;
n_left--;
bi++;
i++;
}
vlib_set_trace_count (vm, node, n_trace);
}
if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT))
{
vlib_next_frame_t *nf;
vlib_frame_t *f;
ethernet_input_frame_t *ef;
nf = vlib_node_runtime_get_next_frame (vm, node, next_index);
f = vlib_get_frame (vm, nf->frame_index);
f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
ef = vlib_frame_scalar_args (f);
ef->sw_if_index = rd->sw_if_index;
ef->hw_if_index = rd->hw_if_index;
//f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;
}
n_left_to_next -= n_rx_packets;
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
vlib_increment_combined_counter
(vnm->interface_main.combined_sw_if_counters +
VNET_INTERFACE_COUNTER_RX, vm->thread_index,
rd->hw_if_index, n_rx_packets, n_rx_bytes);
rxq->n_enq -= n_rx_packets;
rdma_device_input_refill (vm, rd, rxq);
return n_rx_packets;
}
VLIB_NODE_FN (rdma_input_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
u32 n_rx = 0;
rdma_main_t *rm = &rdma_main;
vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
vnet_device_and_queue_t *dq;
foreach_device_and_queue (dq, rt->devices_and_queues)
{
rdma_device_t *rd;
rd = vec_elt_at_index (rm->devices, dq->dev_instance);
if ((rd->flags & RDMA_DEVICE_F_ADMIN_UP) == 0)
continue;
n_rx += rdma_device_input_inline (vm, node, frame, rd, dq->queue_id);
}
return n_rx;
}
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (rdma_input_node) = {
.name = "rdma-input",
.sibling_of = "device-input",
.format_trace = format_rdma_input_trace,
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_DISABLED,
.n_errors = RDMA_INPUT_N_ERROR,
.error_strings = rdma_input_error_strings,
};
/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/

133
src/plugins/rdma/output.c Normal file
View File

@@ -0,0 +1,133 @@
/*
*------------------------------------------------------------------
* Copyright (c) 2018 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*------------------------------------------------------------------
*/
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vlib/pci/pci.h>
#include <vppinfra/ring.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/devices/devices.h>
#include <rdma/rdma.h>
static_always_inline u16
rdma_device_output_tx (vlib_main_t * vm, rdma_device_t * rd, rdma_txq_t * txq,
u32 * buffers, u16 n_left, u32 * n_tx_packets,
u32 * n_tx_bytes)
{
struct ibv_sge sg_entry;
struct ibv_send_wr wr, *bad_wr;
u16 i;
for (i = 0; i < n_left; i++)
{
vlib_buffer_t *b = vlib_get_buffer (vm, buffers[i]);
sg_entry.addr = vlib_buffer_get_current_va (b);
sg_entry.length = b->current_length;
sg_entry.lkey = rd->mr->lkey;
memset (&wr, 0, sizeof (wr));
wr.num_sge = 1;
wr.sg_list = &sg_entry;
wr.opcode = IBV_WR_SEND;
wr.send_flags = IBV_SEND_SIGNALED;
wr.wr_id = buffers[i];
if (ibv_post_send (txq->qp, &wr, &bad_wr) != 0)
break;
*n_tx_bytes += b->current_length;
}
*n_tx_packets += i;
return i;
}
static_always_inline void
rdma_device_output_free (vlib_main_t * vm, rdma_txq_t * txq)
{
struct ibv_wc wc[VLIB_FRAME_SIZE];
u32 to_free[VLIB_FRAME_SIZE];
int n_free;
int i;
n_free = ibv_poll_cq (txq->cq, VLIB_FRAME_SIZE, wc);
if (n_free <= 0)
return;
for (i = 0; i < n_free; i++)
to_free[i] = wc[i].wr_id;
vlib_buffer_free (vm, to_free, n_free);
}
VNET_DEVICE_CLASS_TX_FN (rdma_device_class) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
vnet_main_t *vnm = vnet_get_main ();
rdma_main_t *rm = &rdma_main;
vnet_interface_output_runtime_t *ord = (void *) node->runtime_data;
rdma_device_t *rd = pool_elt_at_index (rm->devices, ord->dev_instance);
u32 thread_index = vm->thread_index;
u8 qid = thread_index;
rdma_txq_t *txq = vec_elt_at_index (rd->txqs, qid % vec_len (rd->txqs));
u32 *buffers = vlib_frame_vector_args (frame);
u16 n_left;
u16 n_retry = 5;
u32 n_tx_packets = 0, n_tx_bytes = 0;
clib_spinlock_lock_if_init (&txq->lock);
n_left = frame->n_vectors;
while (n_left)
{
u16 n;
rdma_device_output_free (vm, txq);
n =
rdma_device_output_tx (vm, rd, txq, buffers, n_left, &n_tx_packets,
&n_tx_bytes);
n_left -= n;
buffers += n;
if (n_left && n_retry--)
{
vlib_buffer_free (vm, buffers, n_left);
vlib_error_count (vm, node->node_index,
RDMA_TX_ERROR_NO_FREE_SLOTS, n_left);
break;
}
}
clib_spinlock_unlock_if_init (&txq->lock);
vlib_increment_combined_counter
(vnm->interface_main.combined_sw_if_counters +
VNET_INTERFACE_COUNTER_TX, thread_index,
rd->hw_if_index, n_tx_packets, n_tx_bytes);
return frame->n_vectors - n_left;
}
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/

35
src/plugins/rdma/plugin.c Normal file
View File

@@ -0,0 +1,35 @@
/*
*------------------------------------------------------------------
* Copyright (c) 2018 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*------------------------------------------------------------------
*/
#include <vlib/vlib.h>
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "RDMA (ibverb) Device Plugin",
};
/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/

141
src/plugins/rdma/rdma.h Normal file
View File

@@ -0,0 +1,141 @@
/*
*------------------------------------------------------------------
* Copyright (c) 2018 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*------------------------------------------------------------------
*/
#ifndef _RDMA_H_
#define _RDMA_H_
#include <infiniband/verbs.h>
#include <vlib/log.h>
#define foreach_rdma_device_flags \
_(0, INITIALIZED, "initialized") \
_(1, ERROR, "error") \
_(2, ADMIN_UP, "admin-up") \
_(3, VA_DMA, "vaddr-dma") \
_(4, LINK_UP, "link-up") \
_(5, SHARED_TXQ_LOCK, "shared-txq-lock") \
_(6, ELOG, "elog") \
enum
{
#define _(a, b, c) RDMA_DEVICE_F_##b = (1 << a),
foreach_rdma_device_flags
#undef _
};
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
u32 size;
u32 n_enq;
struct ibv_cq *cq;
struct ibv_qp *qp;
} rdma_rxq_t;
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
u32 size;
u32 n_enq;
struct ibv_cq *cq;
struct ibv_qp *qp;
clib_spinlock_t lock;
} rdma_txq_t;
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
u32 flags;
u32 per_interface_next_index;
u32 dev_instance;
u32 sw_if_index;
u32 hw_if_index;
u32 async_event_clib_file_index;
rdma_rxq_t *rxqs;
rdma_txq_t *txqs;
u8 hwaddr[6];
vlib_pci_addr_t pci_addr;
struct ibv_context *ctx;
struct ibv_pd *pd;
struct ibv_mr *mr;
struct ibv_flow *flow_ucast;
struct ibv_flow *flow_mcast;
/* error */
clib_error_t *error;
} rdma_device_t;
typedef struct
{
rdma_device_t *devices;
vlib_log_class_t log_class;
} rdma_main_t;
extern rdma_main_t rdma_main;
typedef struct
{
u8 *ifname;
/* return */
int rv;
u32 sw_if_index;
clib_error_t *error;
} rdma_create_if_args_t;
void rdma_create_if (vlib_main_t * vm, rdma_create_if_args_t * args);
void rdma_delete_if (vlib_main_t * vm, rdma_device_t * rd);
extern vlib_node_registration_t rdma_input_node;
extern vnet_device_class_t rdma_device_class;
/* format.c */
format_function_t format_rdma_device;
format_function_t format_rdma_device_name;
format_function_t format_rdma_input_trace;
typedef struct
{
u32 next_index;
u32 hw_if_index;
} rdma_input_trace_t;
#define foreach_rdma_tx_func_error \
_(NO_FREE_SLOTS, "no free tx slots")
typedef enum
{
#define _(f,s) RDMA_TX_ERROR_##f,
foreach_rdma_tx_func_error
#undef _
RDMA_TX_N_ERROR,
} rdma_tx_func_error_t;
#endif /* AVF_H */
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/

View File

@@ -347,16 +347,8 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
}
if (!args->mac_addr_set)
{
f64 now = vlib_time_now (vm);
u32 rnd;
rnd = (u32) (now * 1e6);
rnd = random_u32 (&rnd);
ethernet_mac_address_generate (args->mac_addr);
memcpy (args->mac_addr + 2, &rnd, sizeof (rnd));
args->mac_addr[0] = 2;
args->mac_addr[1] = 0xfe;
}
vif->rx_ring_sz = args->rx_ring_sz != 0 ? args->rx_ring_sz : 256;
vif->tx_ring_sz = args->tx_ring_sz != 0 ? args->tx_ring_sz : 256;
clib_memcpy (vif->mac_addr, args->mac_addr, 6);

View File

@@ -70,6 +70,17 @@ ethernet_mac_address_is_zero (const u8 * mac)
return ((*((u32 *) mac) == 0) && (*((u16 *) (mac + 4)) == 0));
}
static inline void
ethernet_mac_address_generate (u8 * mac)
{
u32 rnd = clib_cpu_time_now ();
rnd = random_u32 (&rnd);
memcpy (mac + 2, &rnd, sizeof (rnd));
mac[0] = 2;
mac[1] = 0xfe;
}
static inline int
ethernet_mac_address_equal (const u8 * a, const u8 * b)
{