Change StorageBasic to use an aligned allocator.

The storage used will now be aligned to `VTKM_CACHE_LINE_SIZE bytes,
resulting in slightly better cache usage and load/store performance.
This define is set in `StorageBasic.h We also now detect if Posix is
available in Configure.h and will define VTKM_POSIX with _POSIX_VERSION
if it's available.

The AlignedAllocator used by StorageBasic is also STL compatible
and can be used in STL containers so user's can use it in their
std::vector and pass aligned user memory to the storage.
This commit is contained in:
Will Usher 2015-08-04 15:56:33 -06:00
parent aeabacfc77
commit 046cd2d2b9
4 changed files with 181 additions and 16 deletions

@ -45,7 +45,6 @@ VTKM_BOOST_POST_INCLUDE
#include <cmath>
#include <ctime>
#include <utility>
#include <vector>
#include <string>
#ifdef _WIN32
@ -83,7 +82,6 @@ enum BenchmarkName {
/// device adapter
template<class DeviceAdapterTag>
class BenchmarkDeviceAdapter {
typedef vtkm::cont::StorageTagBasic StorageTagBasic;
typedef vtkm::cont::StorageTagBasic StorageTag;
typedef vtkm::cont::ArrayHandle<vtkm::Id, StorageTag> IdArrayHandle;
@ -331,18 +329,17 @@ private:
struct BenchSort {
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
std::vector<Value> Values;
ValueArrayHandle ValueHandle;
boost::mt19937 Rng;
VTKM_CONT_EXPORT
BenchSort() : Values(ARRAY_SIZE, Value()) {
ValueHandle = vtkm::cont::make_ArrayHandle(Values);
BenchSort(){
ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag());
}
VTKM_CONT_EXPORT
vtkm::Float64 operator()(){
for (size_t i = 0; i < Values.size(); ++i){
for (vtkm::Id i = 0; i < ValueHandle.GetNumberOfValues(); ++i){
ValueHandle.GetPortalControl().Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value()));
}
Timer timer;
@ -365,20 +362,17 @@ private:
boost::mt19937 Rng;
vtkm::Id N_KEYS;
std::vector<Value> Values;
ValueArrayHandle ValueHandle;
IdArrayHandle KeyHandle;
VTKM_CONT_EXPORT
BenchSortByKey(vtkm::Id percent_key) : N_KEYS((ARRAY_SIZE * percent_key) / 100),
Values(ARRAY_SIZE, Value())
{
ValueHandle = vtkm::cont::make_ArrayHandle(Values);
BenchSortByKey(vtkm::Id percent_key) : N_KEYS((ARRAY_SIZE * percent_key) / 100){
ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag());
}
VTKM_CONT_EXPORT
vtkm::Float64 operator()(){
for (size_t i = 0; i < Values.size(); ++i){
for (vtkm::Id i = 0; i < ValueHandle.GetNumberOfValues(); ++i){
ValueHandle.GetPortalControl().Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value()));
}
Algorithm::Schedule(FillModuloTestValueKernel<vtkm::Id>(N_KEYS,

@ -28,6 +28,29 @@
#include <vtkm/cont/internal/ArrayPortalFromIterators.h>
#if defined(VTKM_POSIX)
#define VTKM_MEMALIGN_POSIX
#elif defined(_WIN32)
#define VTKM_MEMALIGN_WIN
#elif defined(__SSE__)
#define VTKM_MEMALIGN_SSE
#else
#define VTKM_MEMALIGN_NONE
#endif
#if defined(VTKM_MEMALIGN_POSIX)
#include <stdlib.h>
#elif defined(VTKM_MEMALIGN_WIN)
#include <malloc.h>
#elif defined(VTKM_MEMALIGN_SSE)
#include <xmmintrin.h>
#else
#include <malloc.h>
#endif
// Defines the cache line size in bytes to align allocations to
#define VTKM_CACHE_LINE_SIZE 64
namespace vtkm {
namespace cont {
@ -36,6 +59,93 @@ struct StorageTagBasic { };
namespace internal {
inline void* alloc_aligned(size_t size, size_t align){
#if defined(VTKM_MEMALIGN_POSIX)
void *mem = NULL;
if (posix_memalign(&mem, align, size) != 0){
mem = NULL;
}
#elif defined(VTKM_MEMALIGN_WIN)
void *mem = _aligned_malloc(size, align);
#elif defined(VTKM_MEMALIGN_SSE)
void *mem = _mm_malloc(size, align);
#else
void *mem = malloc(size);
#endif
if (mem == NULL){
throw std::bad_alloc();
}
return mem;
}
inline void free_aligned(void *mem){
#if defined(VTKM_MEMALIGN_POSIX)
free(mem);
#elif defined(VTKM_MEMALIGN_WIN)
_aligned_free(mem);
#elif defined(VTKM_MEMALIGN_SSE)
_mm_free(mem);
#else
free(mem);
#endif
}
/// A simple aligned allocator type that will align allocations to `Alignment` bytes
/// TODO: Once C++11 std::allocator_traits is better used by STL and we want to drop
/// support for pre-C++11 we can drop a lot of the typedefs and functions here.
template<typename T, size_t Alignment>
struct AlignedAllocator {
typedef T value_type;
typedef T& reference;
typedef const T& const_reference;
typedef T* pointer;
typedef const T* const_pointer;
typedef void* void_pointer;
typedef const void* const_void_pointer;
typedef ptrdiff_t difference_type;
typedef size_t size_type;
template<typename U>
struct rebind {
typedef AlignedAllocator<U, Alignment> other;
};
AlignedAllocator(){}
template<typename Tb>
AlignedAllocator(const AlignedAllocator<Tb, Alignment>&){}
pointer allocate(size_t n){
return static_cast<pointer>(alloc_aligned(n * sizeof(T), Alignment));
}
void deallocate(pointer p, size_t){
free_aligned(static_cast<void*>(p));
}
pointer address(reference r){
return &r;
}
const_pointer address(const_reference r){
return &r;
}
size_type max_size() const {
return std::numeric_limits<size_type>::max() / sizeof(T);
}
void construct(pointer p, const T &t){
new(p) T(t);
}
void destroy(pointer p){
p->~T();
}
};
template<typename T, typename U, size_t AlignA, size_t AlignB>
bool operator==(const AlignedAllocator<T, AlignA>&, const AlignedAllocator<U, AlignB>&){
return AlignA == AlignB;
}
template<typename T, typename U, size_t AlignA, size_t AlignB>
bool operator!=(const AlignedAllocator<T, AlignA>&, const AlignedAllocator<U, AlignB>&){
return AlignA != AlignB;
}
/// A basic implementation of an Storage object.
///
/// \todo This storage does \em not construct the values within the array.
@ -59,7 +169,7 @@ public:
/// whether that would ever be useful. So, instead of jumping through hoops
/// implementing them, just fix the allocator for now.
///
typedef std::allocator<ValueType> AllocatorType;
typedef AlignedAllocator<ValueType, VTKM_CACHE_LINE_SIZE> AllocatorType;
public:
@ -71,7 +181,6 @@ public:
DeallocateOnRelease(false),
UserProvidedMemory( array == NULL ? false : true)
{
}
VTKM_CONT_EXPORT
@ -159,8 +268,7 @@ public:
if (numberOfValues > 0)
{
AllocatorType allocator;
this->Array = allocator.allocate(
static_cast<std::size_t>(numberOfValues) );
this->Array = allocator.allocate(static_cast<std::size_t>(numberOfValues));
this->AllocatedSize = numberOfValues;
this->NumberOfValues = numberOfValues;
}

@ -25,6 +25,16 @@
#include <vtkm/cont/testing/Testing.h>
#include <vtkm/VecTraits.h>
// We use these to check if the aligned allocator provided by
// StorageBasic can be used with all STL containers
#include <vector>
#include <deque>
#include <list>
#include <set>
#include <map>
#include <stack>
#include <queue>
namespace {
const vtkm::Id ARRAY_SIZE = 10;
@ -61,6 +71,49 @@ struct TemplatedTests
return 29;
}
void TestAlignedAllocatorSTL(){
typedef typename StorageType::AllocatorType Allocator;
std::vector<ValueType, Allocator> vec(ARRAY_SIZE, ValueType());
StorageType store(&vec[0], ARRAY_SIZE);
}
// This test checks that we can compile and use the allocator with all
// STL containers
void CompileSTLAllocator(){
typedef typename StorageType::AllocatorType Allocator;
typedef typename StorageType::AllocatorType::
template rebind<std::pair<ValueType, ValueType> >::other PairAllocator;
std::vector<ValueType, Allocator> v;
v.push_back(ValueType());
std::deque<ValueType, Allocator> d;
d.push_front(ValueType());
std::list<ValueType, Allocator> l;
l.push_front(ValueType());
std::set<ValueType, std::less<ValueType>, Allocator> set;
set.insert(ValueType());
std::map<ValueType, ValueType, std::less<ValueType>, PairAllocator> m;
m[ValueType()] = ValueType();
std::multiset<ValueType, std::less<ValueType>, Allocator> ms;
ms.insert(ValueType());
std::multimap<ValueType, ValueType, std::less<ValueType>, PairAllocator> mm;
mm.insert(std::pair<ValueType, ValueType>(ValueType(), ValueType()));
std::stack<ValueType, std::deque<ValueType, Allocator> > stack;
stack.push(ValueType());
std::queue<ValueType, std::deque<ValueType, Allocator> > queue;
queue.push(ValueType());
std::priority_queue<ValueType, std::vector<ValueType, Allocator> > pqueue;
pqueue.push(ValueType());
}
/// Returned value should later be passed to StealArray2. It is best to
/// put as much between the two test parts to maximize the chance of a
/// deallocated array being overridden (and thus detected).
@ -139,6 +192,9 @@ struct TemplatedTests
BasicAllocation();
StealArray2(stolenArray);
TestAlignedAllocatorSTL();
CompileSTLAllocator();
}
};

@ -46,6 +46,13 @@
#define VTKM_GCC
#endif
#if defined(unix) || defined(__unix) || defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
#include <unistd.h>
# ifdef _POSIX_VERSION
# define VTKM_POSIX _POSIX_VERSION
# endif
#endif
#if !defined(VTKM_USE_DOUBLE_PRECISION) && !defined(VTKM_NO_DOUBLE_PRECISION)
#cmakedefine VTKM_USE_DOUBLE_PRECISION
#endif