Add support for multiple microarchitectures in single binary

* compiler -march= parameter is changed from native to corei7
   so code is always genereted with instructions which are available
   on the Nehalem microarchitecture (up to SSE4.2)

 * compiler -mtune= parameter is added so code is optimized for
   corei7-avx which equals to Sandy Bridge microarchitecture

 * set of macros is added which allows run-time detection of available
   cpu instructions (e.g. clib_cpu_supports_avx())

 * set of macros is added which allows us to clone graph node funcitons
   where cloned function is optmized for different microarchitecture
   Those macros are using following attributes:
     __attribute__((flatten))
     __attribute__((target("arch=core-avx2)))

   I.e. If applied to foo_node_fn() macro will generate cloned
   functions foo_node_fn_avx2() and foo_node_fn_avx512() (future)
   It will also generate function void * foo_node_fn_multiarch_select()
   which detects available instruction set and returns pointer to the
   best matching function clone.

Change-Id: I2dce0ac92a5ede95fcb56f47f3d1f3c4c040bac0
Signed-off-by: Damjan Marion <damarion@cisco.com>
This commit is contained in:
Damjan Marion
2016-05-11 23:07:18 +02:00
parent 82e29c4558
commit 1c80e831b7
72 changed files with 439 additions and 34 deletions

View File

@@ -40,6 +40,7 @@
#ifndef included_vlib_node_h
#define included_vlib_node_h
#include <vppinfra/cpu.h>
#include <vppinfra/longjmp.h>
#include <vppinfra/timing_wheel.h>
#include <vlib/trace.h> /* for vlib_trace_filter_t */
@@ -149,6 +150,32 @@ static void __vlib_add_node_registration_##x (void) \
} \
__VA_ARGS__ vlib_node_registration_t x
#if CLIB_DEBUG > 0
#define VLIB_NODE_FUNCTION_CLONE_TEMPLATE(arch, fn)
#define VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn)
#define VLIB_NODE_FUNCTION_MULTIARCH(node, fn)
#else
#define VLIB_NODE_FUNCTION_CLONE_TEMPLATE(arch, fn, tgt) \
uword \
__attribute__ ((flatten)) \
__attribute__ ((target (tgt))) \
CLIB_CPU_OPTIMIZED \
fn ## _ ## arch ( struct vlib_main_t * vm, \
struct vlib_node_runtime_t * node, \
struct vlib_frame_t * frame) \
{ return fn (vm, node, frame); }
#define VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn) \
foreach_march_variant(VLIB_NODE_FUNCTION_CLONE_TEMPLATE, fn)
#define VLIB_NODE_FUNCTION_MULTIARCH(node, fn) \
VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn) \
CLIB_MULTIARCH_SELECT_FN(fn, static inline) \
static void __attribute__((__constructor__)) \
__vlib_node_function_multiarch_select_##node (void) \
{ node.function = fn ## _multiarch_select(); }
#endif
always_inline vlib_node_registration_t *
vlib_node_next_registered (vlib_node_registration_t * c)
{