vppinfra: revert clib_memcpy optimization
Looks like some compiler versions are producing wrong code when we are copying 9-16 bytes so reverting back to the original code. Change-Id: I74b5fa54a3b01f6288648f1cb0926030edd3b26f Signed-off-by: Damjan Marion <damarion@cisco.com>
This commit is contained in:
@ -51,19 +51,22 @@
|
||||
#include <stdint.h>
|
||||
#include <x86intrin.h>
|
||||
|
||||
typedef u8 u8x16u __attribute__ ((vector_size (16), aligned (1)));
|
||||
typedef u8 u8x32u __attribute__ ((vector_size (32), aligned (1)));
|
||||
|
||||
static inline void
|
||||
clib_mov16 (u8 * dst, const u8 * src)
|
||||
{
|
||||
*(u8x16u *) dst = *(u8x16u *) src;
|
||||
__m128i xmm0;
|
||||
|
||||
xmm0 = _mm_loadu_si128 ((const __m128i *) src);
|
||||
_mm_storeu_si128 ((__m128i *) dst, xmm0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
clib_mov32 (u8 * dst, const u8 * src)
|
||||
{
|
||||
*(u8x32u *) dst = *(u8x32u *) src;
|
||||
__m256i ymm0;
|
||||
|
||||
ymm0 = _mm256_loadu_si256 ((const __m256i *) src);
|
||||
_mm256_storeu_si256 ((__m256i *) dst, ymm0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
@ -51,19 +51,20 @@
|
||||
#include <stdint.h>
|
||||
#include <x86intrin.h>
|
||||
|
||||
typedef u8 u8x16u __attribute__ ((vector_size (16), aligned (1)));
|
||||
typedef u8 u8x32u __attribute__ ((vector_size (32), aligned (1)));
|
||||
|
||||
static inline void
|
||||
clib_mov16 (u8 * dst, const u8 * src)
|
||||
{
|
||||
*(u8x16u *) dst = *(u8x16u *) src;
|
||||
__m128i xmm0;
|
||||
|
||||
xmm0 = _mm_loadu_si128 ((const __m128i *) src);
|
||||
_mm_storeu_si128 ((__m128i *) dst, xmm0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
clib_mov32 (u8 * dst, const u8 * src)
|
||||
{
|
||||
*(u8x32u *) dst = *(u8x32u *) src;
|
||||
clib_mov16 ((u8 *) dst + 0 * 16, (const u8 *) src + 0 * 16);
|
||||
clib_mov16 ((u8 *) dst + 1 * 16, (const u8 *) src + 1 * 16);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
Reference in New Issue
Block a user