Change StorageBasic to use an aligned allocator.
The storage used will now be aligned to `VTKM_CACHE_LINE_SIZE bytes, resulting in slightly better cache usage and load/store performance. This define is set in `StorageBasic.h We also now detect if Posix is available in Configure.h and will define VTKM_POSIX with _POSIX_VERSION if it's available. The AlignedAllocator used by StorageBasic is also STL compatible and can be used in STL containers so user's can use it in their std::vector and pass aligned user memory to the storage.
This commit is contained in:
parent
aeabacfc77
commit
046cd2d2b9
@ -45,7 +45,6 @@ VTKM_BOOST_POST_INCLUDE
|
||||
#include <cmath>
|
||||
#include <ctime>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#ifdef _WIN32
|
||||
@ -83,7 +82,6 @@ enum BenchmarkName {
|
||||
/// device adapter
|
||||
template<class DeviceAdapterTag>
|
||||
class BenchmarkDeviceAdapter {
|
||||
typedef vtkm::cont::StorageTagBasic StorageTagBasic;
|
||||
typedef vtkm::cont::StorageTagBasic StorageTag;
|
||||
|
||||
typedef vtkm::cont::ArrayHandle<vtkm::Id, StorageTag> IdArrayHandle;
|
||||
@ -331,18 +329,17 @@ private:
|
||||
struct BenchSort {
|
||||
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
|
||||
|
||||
std::vector<Value> Values;
|
||||
ValueArrayHandle ValueHandle;
|
||||
boost::mt19937 Rng;
|
||||
|
||||
VTKM_CONT_EXPORT
|
||||
BenchSort() : Values(ARRAY_SIZE, Value()) {
|
||||
ValueHandle = vtkm::cont::make_ArrayHandle(Values);
|
||||
BenchSort(){
|
||||
ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag());
|
||||
}
|
||||
|
||||
VTKM_CONT_EXPORT
|
||||
vtkm::Float64 operator()(){
|
||||
for (size_t i = 0; i < Values.size(); ++i){
|
||||
for (vtkm::Id i = 0; i < ValueHandle.GetNumberOfValues(); ++i){
|
||||
ValueHandle.GetPortalControl().Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value()));
|
||||
}
|
||||
Timer timer;
|
||||
@ -365,20 +362,17 @@ private:
|
||||
|
||||
boost::mt19937 Rng;
|
||||
vtkm::Id N_KEYS;
|
||||
std::vector<Value> Values;
|
||||
ValueArrayHandle ValueHandle;
|
||||
IdArrayHandle KeyHandle;
|
||||
|
||||
VTKM_CONT_EXPORT
|
||||
BenchSortByKey(vtkm::Id percent_key) : N_KEYS((ARRAY_SIZE * percent_key) / 100),
|
||||
Values(ARRAY_SIZE, Value())
|
||||
{
|
||||
ValueHandle = vtkm::cont::make_ArrayHandle(Values);
|
||||
BenchSortByKey(vtkm::Id percent_key) : N_KEYS((ARRAY_SIZE * percent_key) / 100){
|
||||
ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag());
|
||||
}
|
||||
|
||||
VTKM_CONT_EXPORT
|
||||
vtkm::Float64 operator()(){
|
||||
for (size_t i = 0; i < Values.size(); ++i){
|
||||
for (vtkm::Id i = 0; i < ValueHandle.GetNumberOfValues(); ++i){
|
||||
ValueHandle.GetPortalControl().Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value()));
|
||||
}
|
||||
Algorithm::Schedule(FillModuloTestValueKernel<vtkm::Id>(N_KEYS,
|
||||
|
@ -28,6 +28,29 @@
|
||||
|
||||
#include <vtkm/cont/internal/ArrayPortalFromIterators.h>
|
||||
|
||||
#if defined(VTKM_POSIX)
|
||||
#define VTKM_MEMALIGN_POSIX
|
||||
#elif defined(_WIN32)
|
||||
#define VTKM_MEMALIGN_WIN
|
||||
#elif defined(__SSE__)
|
||||
#define VTKM_MEMALIGN_SSE
|
||||
#else
|
||||
#define VTKM_MEMALIGN_NONE
|
||||
#endif
|
||||
|
||||
#if defined(VTKM_MEMALIGN_POSIX)
|
||||
#include <stdlib.h>
|
||||
#elif defined(VTKM_MEMALIGN_WIN)
|
||||
#include <malloc.h>
|
||||
#elif defined(VTKM_MEMALIGN_SSE)
|
||||
#include <xmmintrin.h>
|
||||
#else
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
// Defines the cache line size in bytes to align allocations to
|
||||
#define VTKM_CACHE_LINE_SIZE 64
|
||||
|
||||
namespace vtkm {
|
||||
namespace cont {
|
||||
|
||||
@ -36,6 +59,93 @@ struct StorageTagBasic { };
|
||||
|
||||
namespace internal {
|
||||
|
||||
inline void* alloc_aligned(size_t size, size_t align){
|
||||
#if defined(VTKM_MEMALIGN_POSIX)
|
||||
void *mem = NULL;
|
||||
if (posix_memalign(&mem, align, size) != 0){
|
||||
mem = NULL;
|
||||
}
|
||||
#elif defined(VTKM_MEMALIGN_WIN)
|
||||
void *mem = _aligned_malloc(size, align);
|
||||
#elif defined(VTKM_MEMALIGN_SSE)
|
||||
void *mem = _mm_malloc(size, align);
|
||||
#else
|
||||
void *mem = malloc(size);
|
||||
#endif
|
||||
if (mem == NULL){
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
return mem;
|
||||
}
|
||||
inline void free_aligned(void *mem){
|
||||
#if defined(VTKM_MEMALIGN_POSIX)
|
||||
free(mem);
|
||||
#elif defined(VTKM_MEMALIGN_WIN)
|
||||
_aligned_free(mem);
|
||||
#elif defined(VTKM_MEMALIGN_SSE)
|
||||
_mm_free(mem);
|
||||
#else
|
||||
free(mem);
|
||||
#endif
|
||||
}
|
||||
|
||||
/// A simple aligned allocator type that will align allocations to `Alignment` bytes
|
||||
/// TODO: Once C++11 std::allocator_traits is better used by STL and we want to drop
|
||||
/// support for pre-C++11 we can drop a lot of the typedefs and functions here.
|
||||
template<typename T, size_t Alignment>
|
||||
struct AlignedAllocator {
|
||||
typedef T value_type;
|
||||
typedef T& reference;
|
||||
typedef const T& const_reference;
|
||||
typedef T* pointer;
|
||||
typedef const T* const_pointer;
|
||||
typedef void* void_pointer;
|
||||
typedef const void* const_void_pointer;
|
||||
typedef ptrdiff_t difference_type;
|
||||
typedef size_t size_type;
|
||||
|
||||
template<typename U>
|
||||
struct rebind {
|
||||
typedef AlignedAllocator<U, Alignment> other;
|
||||
};
|
||||
|
||||
AlignedAllocator(){}
|
||||
|
||||
template<typename Tb>
|
||||
AlignedAllocator(const AlignedAllocator<Tb, Alignment>&){}
|
||||
|
||||
pointer allocate(size_t n){
|
||||
return static_cast<pointer>(alloc_aligned(n * sizeof(T), Alignment));
|
||||
}
|
||||
void deallocate(pointer p, size_t){
|
||||
free_aligned(static_cast<void*>(p));
|
||||
}
|
||||
pointer address(reference r){
|
||||
return &r;
|
||||
}
|
||||
const_pointer address(const_reference r){
|
||||
return &r;
|
||||
}
|
||||
size_type max_size() const {
|
||||
return std::numeric_limits<size_type>::max() / sizeof(T);
|
||||
}
|
||||
void construct(pointer p, const T &t){
|
||||
new(p) T(t);
|
||||
}
|
||||
void destroy(pointer p){
|
||||
p->~T();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, typename U, size_t AlignA, size_t AlignB>
|
||||
bool operator==(const AlignedAllocator<T, AlignA>&, const AlignedAllocator<U, AlignB>&){
|
||||
return AlignA == AlignB;
|
||||
}
|
||||
template<typename T, typename U, size_t AlignA, size_t AlignB>
|
||||
bool operator!=(const AlignedAllocator<T, AlignA>&, const AlignedAllocator<U, AlignB>&){
|
||||
return AlignA != AlignB;
|
||||
}
|
||||
|
||||
/// A basic implementation of an Storage object.
|
||||
///
|
||||
/// \todo This storage does \em not construct the values within the array.
|
||||
@ -59,7 +169,7 @@ public:
|
||||
/// whether that would ever be useful. So, instead of jumping through hoops
|
||||
/// implementing them, just fix the allocator for now.
|
||||
///
|
||||
typedef std::allocator<ValueType> AllocatorType;
|
||||
typedef AlignedAllocator<ValueType, VTKM_CACHE_LINE_SIZE> AllocatorType;
|
||||
|
||||
public:
|
||||
|
||||
@ -71,7 +181,6 @@ public:
|
||||
DeallocateOnRelease(false),
|
||||
UserProvidedMemory( array == NULL ? false : true)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
VTKM_CONT_EXPORT
|
||||
@ -159,8 +268,7 @@ public:
|
||||
if (numberOfValues > 0)
|
||||
{
|
||||
AllocatorType allocator;
|
||||
this->Array = allocator.allocate(
|
||||
static_cast<std::size_t>(numberOfValues) );
|
||||
this->Array = allocator.allocate(static_cast<std::size_t>(numberOfValues));
|
||||
this->AllocatedSize = numberOfValues;
|
||||
this->NumberOfValues = numberOfValues;
|
||||
}
|
||||
|
@ -25,6 +25,16 @@
|
||||
#include <vtkm/cont/testing/Testing.h>
|
||||
#include <vtkm/VecTraits.h>
|
||||
|
||||
// We use these to check if the aligned allocator provided by
|
||||
// StorageBasic can be used with all STL containers
|
||||
#include <vector>
|
||||
#include <deque>
|
||||
#include <list>
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <stack>
|
||||
#include <queue>
|
||||
|
||||
namespace {
|
||||
|
||||
const vtkm::Id ARRAY_SIZE = 10;
|
||||
@ -61,6 +71,49 @@ struct TemplatedTests
|
||||
return 29;
|
||||
}
|
||||
|
||||
void TestAlignedAllocatorSTL(){
|
||||
typedef typename StorageType::AllocatorType Allocator;
|
||||
std::vector<ValueType, Allocator> vec(ARRAY_SIZE, ValueType());
|
||||
StorageType store(&vec[0], ARRAY_SIZE);
|
||||
}
|
||||
|
||||
// This test checks that we can compile and use the allocator with all
|
||||
// STL containers
|
||||
void CompileSTLAllocator(){
|
||||
typedef typename StorageType::AllocatorType Allocator;
|
||||
typedef typename StorageType::AllocatorType::
|
||||
template rebind<std::pair<ValueType, ValueType> >::other PairAllocator;
|
||||
std::vector<ValueType, Allocator> v;
|
||||
v.push_back(ValueType());
|
||||
|
||||
std::deque<ValueType, Allocator> d;
|
||||
d.push_front(ValueType());
|
||||
|
||||
std::list<ValueType, Allocator> l;
|
||||
l.push_front(ValueType());
|
||||
|
||||
std::set<ValueType, std::less<ValueType>, Allocator> set;
|
||||
set.insert(ValueType());
|
||||
|
||||
std::map<ValueType, ValueType, std::less<ValueType>, PairAllocator> m;
|
||||
m[ValueType()] = ValueType();
|
||||
|
||||
std::multiset<ValueType, std::less<ValueType>, Allocator> ms;
|
||||
ms.insert(ValueType());
|
||||
|
||||
std::multimap<ValueType, ValueType, std::less<ValueType>, PairAllocator> mm;
|
||||
mm.insert(std::pair<ValueType, ValueType>(ValueType(), ValueType()));
|
||||
|
||||
std::stack<ValueType, std::deque<ValueType, Allocator> > stack;
|
||||
stack.push(ValueType());
|
||||
|
||||
std::queue<ValueType, std::deque<ValueType, Allocator> > queue;
|
||||
queue.push(ValueType());
|
||||
|
||||
std::priority_queue<ValueType, std::vector<ValueType, Allocator> > pqueue;
|
||||
pqueue.push(ValueType());
|
||||
}
|
||||
|
||||
/// Returned value should later be passed to StealArray2. It is best to
|
||||
/// put as much between the two test parts to maximize the chance of a
|
||||
/// deallocated array being overridden (and thus detected).
|
||||
@ -139,6 +192,9 @@ struct TemplatedTests
|
||||
BasicAllocation();
|
||||
|
||||
StealArray2(stolenArray);
|
||||
|
||||
TestAlignedAllocatorSTL();
|
||||
CompileSTLAllocator();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -46,6 +46,13 @@
|
||||
#define VTKM_GCC
|
||||
#endif
|
||||
|
||||
#if defined(unix) || defined(__unix) || defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
|
||||
#include <unistd.h>
|
||||
# ifdef _POSIX_VERSION
|
||||
# define VTKM_POSIX _POSIX_VERSION
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if !defined(VTKM_USE_DOUBLE_PRECISION) && !defined(VTKM_NO_DOUBLE_PRECISION)
|
||||
#cmakedefine VTKM_USE_DOUBLE_PRECISION
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user