Add support for multiple microarchitectures in single binary

* compiler -march= parameter is changed from native to corei7
   so code is always genereted with instructions which are available
   on the Nehalem microarchitecture (up to SSE4.2)

 * compiler -mtune= parameter is added so code is optimized for
   corei7-avx which equals to Sandy Bridge microarchitecture

 * set of macros is added which allows run-time detection of available
   cpu instructions (e.g. clib_cpu_supports_avx())

 * set of macros is added which allows us to clone graph node funcitons
   where cloned function is optmized for different microarchitecture
   Those macros are using following attributes:
     __attribute__((flatten))
     __attribute__((target("arch=core-avx2)))

   I.e. If applied to foo_node_fn() macro will generate cloned
   functions foo_node_fn_avx2() and foo_node_fn_avx512() (future)
   It will also generate function void * foo_node_fn_multiarch_select()
   which detects available instruction set and returns pointer to the
   best matching function clone.

Change-Id: I2dce0ac92a5ede95fcb56f47f3d1f3c4c040bac0
Signed-off-by: Damjan Marion <damarion@cisco.com>
This commit is contained in:
Damjan Marion
2016-05-11 23:07:18 +02:00
parent 82e29c4558
commit 1c80e831b7
72 changed files with 439 additions and 34 deletions
+6
View File
@@ -4,6 +4,11 @@ ifeq ($(DPDK_MARCH),)
DPDK_MARCH="native"
endif
DPDK_TUNE = $(strip $($(PLATFORM)_mtune))
ifeq ($(DPDK_TUNE),)
DPDK_MARCH="generic"
endif
ifneq (,$(findstring debug,$(TAG)))
DPDK_DEBUG=y
else
@@ -14,6 +19,7 @@ DPDK_MAKE_ARGS = -C $(call find_source_fn,$(PACKAGE_SOURCE)) \
DPDK_BUILD_DIR=$(PACKAGE_BUILD_DIR) \
DPDK_INSTALL_DIR=$(PACKAGE_INSTALL_DIR) \
DPDK_MARCH=$(DPDK_MARCH) \
DPDK_TUNE=$(DPDK_TUNE) \
DPDK_DEBUG=$(DPDK_DEBUG)
+5 -2
View File
@@ -13,6 +13,9 @@
# vector packet processor
vpp_arch = native
vpp_march = corei7 # Nehalem Instruction set
vpp_mtune = corei7-avx # Optimize for Sandy Bridge
vpp_dpdk_arch = corei7
vpp_native_tools = vppapigen
vpp_uses_dpdk = yes
@@ -40,9 +43,9 @@ vpp_debug_TAG_CFLAGS = -g -O0 -DCLIB_DEBUG -DFORTIFY_SOURCE=2 -march=$(MARCH) \
vpp_debug_TAG_LDFLAGS = -g -O0 -DCLIB_DEBUG -DFORTIFY_SOURCE=2 -march=$(MARCH) \
-fstack-protector-all -fPIC -Werror
vpp_TAG_CFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) \
vpp_TAG_CFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) -mtune=$(MTUNE) \
-fstack-protector -fPIC -Werror
vpp_TAG_LDFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) \
vpp_TAG_LDFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) -mtune=$(MTUNE) \
-fstack-protector -fPIC -Werror
vpp_gcov_TAG_CFLAGS = -g -O0 -DCLIB_DEBUG -march=$(MARCH) \
+4 -2
View File
@@ -13,6 +13,8 @@
# vector packet processor
vpp_lite_arch = native
vpp_lite_march = corei7 # Nehalem Instruction set
vpp_lite_mtune = corei7-avx # Optimize for Sandy Bridge
vpp_lite_native_tools = vppapigen
vpp_lite_uses_dpdk = no
@@ -30,7 +32,7 @@ vpp_lite_debug_TAG_CFLAGS = -g -O0 -DCLIB_DEBUG -DFORTIFY_SOURCE=2 -march=$(MARC
vpp_lite_debug_TAG_LDFLAGS = -g -O0 -DCLIB_DEBUG -DFORTIFY_SOURCE=2 -march=$(MARCH) \
-fstack-protector-all -fPIC -Werror
vpp_lite_TAG_CFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) \
vpp_lite_TAG_CFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) -mtune=$(MTUNE) \
-fstack-protector -fPIC -Werror
vpp_lite_TAG_LDFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) \
vpp_lite_TAG_LDFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) -mtune=$(MTUNE) \
-fstack-protector -fPIC -Werror