DOC-ONLY: document latest multi-arch support scheme
Signed-off-by: Dave Barach <dave@barachs.net> Change-Id: I613415c109f648248ee04dd76d5e652dbf21bc7f
This commit is contained in:
@ -9,3 +9,4 @@ Reference
|
|||||||
vppvagrant/index.rst
|
vppvagrant/index.rst
|
||||||
cmdreference/index.rst
|
cmdreference/index.rst
|
||||||
buildsystem/index.rst
|
buildsystem/index.rst
|
||||||
|
multiarch/index.rst
|
||||||
|
11
docs/reference/multiarch/index.rst
Normal file
11
docs/reference/multiarch/index.rst
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
.. _multiarch:
|
||||||
|
|
||||||
|
Multi-architecture support
|
||||||
|
==========================
|
||||||
|
|
||||||
|
This reference guide describes how to use the vpp muli-architecture support scheme
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
nodefns
|
160
docs/reference/multiarch/nodefns.rst
Normal file
160
docs/reference/multiarch/nodefns.rst
Normal file
@ -0,0 +1,160 @@
|
|||||||
|
Multi-Architecture Graph Node Cookbook
|
||||||
|
======================================
|
||||||
|
|
||||||
|
In the context of graph node dispatch functions, it's easy enough to
|
||||||
|
use the vpp multi-architecture support setup. The point of the scheme
|
||||||
|
is simple: for performance-critical nodes, generate multiple CPU
|
||||||
|
hardware-dependent versions of the node dispatch functions, and pick
|
||||||
|
the best one at runtime.
|
||||||
|
|
||||||
|
The vpp scheme is simple enough to use, but details matter.
|
||||||
|
|
||||||
|
100,000 foot view
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
We compile entire graph node dispatch function implementation files
|
||||||
|
multiple times. These compilations give rise to multiple versions of
|
||||||
|
the graph node dispatch functions. Per-node constructor-functions
|
||||||
|
interrogate CPU hardware, select the node dispatch function variant to
|
||||||
|
use, and set the vlib_node_registration_t ".function" member to the
|
||||||
|
address of the selected variant.
|
||||||
|
|
||||||
|
Details
|
||||||
|
-------
|
||||||
|
|
||||||
|
Declare the node dispatch function as shown, using the VLIB\_NODE\_FN macro. The
|
||||||
|
name of the node function **MUST** match the name of the graph node.
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
VLIB_NODE_FN (ip4_sdp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
|
||||||
|
vlib_frame_t * frame)
|
||||||
|
{
|
||||||
|
if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
|
||||||
|
return ip46_sdp_inline (vm, node, frame, 1 /* is_ip4 */ ,
|
||||||
|
1 /* is_trace */ );
|
||||||
|
else
|
||||||
|
return ip46_sdp_inline (vm, node, frame, 1 /* is_ip4 */ ,
|
||||||
|
0 /* is_trace */ );
|
||||||
|
}
|
||||||
|
|
||||||
|
We need to generate *precisely one copy* of the
|
||||||
|
vlib_node_registration_t, error strings, and packet trace decode function.
|
||||||
|
|
||||||
|
Simply bracket these items with "#ifndef CLIB_MARCH_VARIANT...#endif":
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
#ifndef CLIB_MARCH_VARIANT
|
||||||
|
static u8 *
|
||||||
|
format_sdp_trace (u8 * s, va_list * args)
|
||||||
|
{
|
||||||
|
<snip>
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
#ifndef CLIB_MARCH_VARIANT
|
||||||
|
static char *sdp_error_strings[] = {
|
||||||
|
#define _(sym,string) string,
|
||||||
|
foreach_sdp_error
|
||||||
|
#undef _
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
#ifndef CLIB_MARCH_VARIANT
|
||||||
|
VLIB_REGISTER_NODE (ip4_sdp_node) =
|
||||||
|
{
|
||||||
|
// DO NOT set the .function structure member.
|
||||||
|
// The multiarch selection __attribute__((constructor)) function
|
||||||
|
// takes care of it at runtime
|
||||||
|
.name = "ip4-sdp",
|
||||||
|
.vector_size = sizeof (u32),
|
||||||
|
.format_trace = format_sdp_trace,
|
||||||
|
.type = VLIB_NODE_TYPE_INTERNAL,
|
||||||
|
|
||||||
|
.n_errors = ARRAY_LEN(sdp_error_strings),
|
||||||
|
.error_strings = sdp_error_strings,
|
||||||
|
|
||||||
|
.n_next_nodes = SDP_N_NEXT,
|
||||||
|
|
||||||
|
/* edit / add dispositions here */
|
||||||
|
.next_nodes =
|
||||||
|
{
|
||||||
|
[SDP_NEXT_DROP] = "ip4-drop",
|
||||||
|
},
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
To belabor the point: *do not* set the ".function" member! That's the job of the multi-arch
|
||||||
|
selection \_\_attribute\_\_((constructor)) function
|
||||||
|
|
||||||
|
Always inline node dispatch functions
|
||||||
|
-------------------------------------
|
||||||
|
|
||||||
|
It's typical for a graph dispatch function to contain one or more
|
||||||
|
calls to an inline function. See above. If your node dispatch function
|
||||||
|
is structured that way, make *ABSOLUTELY CERTAIN* to use the
|
||||||
|
"always_inline" macro:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
always_inline uword
|
||||||
|
ip46_sdp_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
|
||||||
|
vlib_frame_t * frame,
|
||||||
|
int is_ip4, int is_trace)
|
||||||
|
{ ... }
|
||||||
|
|
||||||
|
Otherwise, the compiler is highly likely NOT to build multiple
|
||||||
|
versions of the guts of your dispatch function.
|
||||||
|
|
||||||
|
It's fairly easy to spot this mistake in "perf top." If you see, for
|
||||||
|
example, a bunch of functions with names of the form
|
||||||
|
"xxx_node_fn_avx2" in the profile, *BUT* your brand-new node function
|
||||||
|
shows up with a name of the form "xxx_inline.isra.1", it's quite likely
|
||||||
|
that the inline was declared "static inline" instead of "always_inline".
|
||||||
|
|
||||||
|
Add the required Makefile.am content
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
If the component in question already sets a "multiversioning_sources"
|
||||||
|
variable, simply add the indicated .c file to the list. If not, add
|
||||||
|
the required boilerplate:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
if CPU_X86_64
|
||||||
|
sdp_multiversioning_sources = \
|
||||||
|
sdp/node.c \
|
||||||
|
sdp/sdp_slookup.c
|
||||||
|
|
||||||
|
if CC_SUPPORTS_AVX2
|
||||||
|
###############################################################
|
||||||
|
# AVX2
|
||||||
|
###############################################################
|
||||||
|
libsdp_plugin_avx2_la_SOURCES = $(sdp_multiversioning_sources)
|
||||||
|
libsdp_plugin_avx2_la_CFLAGS = \
|
||||||
|
$(AM_CFLAGS) @CPU_AVX2_FLAGS@ \
|
||||||
|
-DCLIB_MARCH_VARIANT=avx2
|
||||||
|
noinst_LTLIBRARIES += libsdp_plugin_avx2.la
|
||||||
|
sdp_plugin_la_LIBADD += libsdp_plugin_avx2.la
|
||||||
|
endif
|
||||||
|
|
||||||
|
if CC_SUPPORTS_AVX512
|
||||||
|
###############################################################
|
||||||
|
# AVX512
|
||||||
|
###############################################################
|
||||||
|
libsdp_plugin_avx512_la_SOURCES = $(sdp_multiversioning_sources)
|
||||||
|
libsdp_plugin_avx512_la_CFLAGS = \
|
||||||
|
$(AM_CFLAGS) @CPU_AVX512_FLAGS@ \
|
||||||
|
-DCLIB_MARCH_VARIANT=avx512
|
||||||
|
noinst_LTLIBRARIES += libsdp_plugin_avx512.la
|
||||||
|
sdp_plugin_la_LIBADD += libsdp_plugin_avx512.la
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
A certain amount of cut-paste-modify is currently required. Hopefully
|
||||||
|
we'll manage to improve the scheme in the future.
|
Reference in New Issue
Block a user