mirror of
https://gitlab.kitware.com/vtk/vtk-m
synced 2024-09-16 17:22:55 +00:00
Merge topic 'bitfields'
661fb64de AtomicInterfaceControl functions are marked with VTKM_SUPPRESS_EXEC_WARNINGS 0c70f9b9a Add BitFieldIn/Out/InOut worklet signature tags. a66510e81 Add ArrayHandleBitField, a boolean-valued AH backed by a BitField. 56cc5c3d3 Add support for BitFields. d01b97382 Allow VTKM_SUPPRESS_EXEC_WARNINGS to be used inside macros. 2f2ca9370 Add bit operations FindFirstSetBit and CountSetBits to Math.h. Acked-by: Kitware Robot <kwrobot@kitware.com> Merge-request: !1629
This commit is contained in:
commit
a5dbe1ece3
@ -26,6 +26,7 @@
|
||||
#include <vtkm/cont/ArrayHandlePermutation.h>
|
||||
#include <vtkm/cont/ArrayHandleZip.h>
|
||||
#include <vtkm/cont/ArrayPortalToIterators.h>
|
||||
#include <vtkm/cont/BitField.h>
|
||||
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
|
||||
#include <vtkm/cont/ErrorExecution.h>
|
||||
#include <vtkm/cont/StorageBasic.h>
|
||||
@ -63,20 +64,24 @@ namespace benchmarking
|
||||
|
||||
enum BenchmarkName
|
||||
{
|
||||
COPY = 1,
|
||||
COPY_IF = 1 << 1,
|
||||
LOWER_BOUNDS = 1 << 2,
|
||||
REDUCE = 1 << 3,
|
||||
REDUCE_BY_KEY = 1 << 4,
|
||||
SCAN_INCLUSIVE = 1 << 5,
|
||||
SCAN_EXCLUSIVE = 1 << 6,
|
||||
SORT = 1 << 7,
|
||||
SORT_BY_KEY = 1 << 8,
|
||||
STABLE_SORT_INDICES = 1 << 9,
|
||||
STABLE_SORT_INDICES_UNIQUE = 1 << 10,
|
||||
UNIQUE = 1 << 11,
|
||||
UPPER_BOUNDS = 1 << 12,
|
||||
ALL = COPY | COPY_IF | LOWER_BOUNDS | REDUCE | REDUCE_BY_KEY | SCAN_INCLUSIVE | SCAN_EXCLUSIVE |
|
||||
BITFIELD_TO_UNORDERED_SET = 1 << 0,
|
||||
COPY = 1 << 1,
|
||||
COPY_IF = 1 << 2,
|
||||
LOWER_BOUNDS = 1 << 3,
|
||||
REDUCE = 1 << 4,
|
||||
REDUCE_BY_KEY = 1 << 5,
|
||||
SCAN_INCLUSIVE = 1 << 6,
|
||||
SCAN_EXCLUSIVE = 1 << 7,
|
||||
SORT = 1 << 8,
|
||||
SORT_BY_KEY = 1 << 9,
|
||||
STABLE_SORT_INDICES = 1 << 10,
|
||||
STABLE_SORT_INDICES_UNIQUE = 1 << 11,
|
||||
UNIQUE = 1 << 12,
|
||||
UPPER_BOUNDS = 1 << 13,
|
||||
|
||||
ALL = BITFIELD_TO_UNORDERED_SET | COPY | COPY_IF | LOWER_BOUNDS | REDUCE | REDUCE_BY_KEY |
|
||||
SCAN_INCLUSIVE |
|
||||
SCAN_EXCLUSIVE |
|
||||
SORT |
|
||||
SORT_BY_KEY |
|
||||
STABLE_SORT_INDICES |
|
||||
@ -132,6 +137,20 @@ struct BenchDevAlgoConfig
|
||||
? static_cast<vtkm::Id>(this->ArraySizeBytes / static_cast<vtkm::UInt64>(sizeof(T)))
|
||||
: static_cast<vtkm::Id>(this->ArraySizeValues);
|
||||
}
|
||||
|
||||
// Compute the number of words in a bit field with the given type.
|
||||
// If DoByteSizes is true, the specified buffer is rounded down to the nearest
|
||||
// number of words that fit into the byte limit. Otherwise, ArraySizeValues
|
||||
// is used to indicate the number of bits.
|
||||
template <typename WordType>
|
||||
VTKM_CONT vtkm::Id ComputeNumberOfWords()
|
||||
{
|
||||
static constexpr vtkm::UInt64 BytesPerWord = static_cast<vtkm::UInt64>(sizeof(WordType));
|
||||
static constexpr vtkm::UInt64 BitsPerWord = BytesPerWord * 8;
|
||||
|
||||
return this->DoByteSizes ? static_cast<vtkm::Id>(this->ArraySizeBytes / BytesPerWord)
|
||||
: static_cast<vtkm::Id>(this->ArraySizeValues / BitsPerWord);
|
||||
}
|
||||
};
|
||||
|
||||
// Share a global instance of the config (only way to get it into the benchmark
|
||||
@ -255,7 +274,170 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <typename WordType, typename BitFieldPortal>
|
||||
struct GenerateBitFieldFunctor : public vtkm::exec::FunctorBase
|
||||
{
|
||||
WordType Exemplar;
|
||||
vtkm::Id Stride;
|
||||
vtkm::Id MaxMaskedWord;
|
||||
BitFieldPortal Portal;
|
||||
|
||||
VTKM_EXEC_CONT
|
||||
GenerateBitFieldFunctor(WordType exemplar,
|
||||
vtkm::Id stride,
|
||||
vtkm::Id maxMaskedWord,
|
||||
const BitFieldPortal& portal)
|
||||
: Exemplar(exemplar)
|
||||
, Stride(stride)
|
||||
, MaxMaskedWord(maxMaskedWord)
|
||||
, Portal(portal)
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_EXEC
|
||||
void operator()(vtkm::Id wordIdx) const
|
||||
{
|
||||
if (wordIdx <= this->MaxMaskedWord && (wordIdx % this->Stride) == 0)
|
||||
{
|
||||
this->Portal.SetWord(wordIdx, this->Exemplar);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->Portal.SetWord(wordIdx, static_cast<WordType>(0));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Create a bit field for testing. The bit array will contain numWords words.
|
||||
// The exemplar word is used to set bits in the array. Stride indicates how
|
||||
// many words will be set to 0 between words initialized to the exemplar.
|
||||
// Words with indices higher than maxMaskedWord will be set to 0.
|
||||
// Stride and maxMaskedWord may be used to test different types of imbalanced
|
||||
// loads.
|
||||
template <typename WordType, typename DeviceAdapterTag>
|
||||
static VTKM_CONT vtkm::cont::BitField GenerateBitField(WordType exemplar,
|
||||
vtkm::Id stride,
|
||||
vtkm::Id maxMaskedWord,
|
||||
vtkm::Id numWords)
|
||||
{
|
||||
using Algo = vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapterTag>;
|
||||
|
||||
if (stride == 0)
|
||||
{
|
||||
stride = 1;
|
||||
}
|
||||
|
||||
vtkm::cont::BitField bits;
|
||||
auto portal = bits.PrepareForOutput(numWords, DeviceAdapterTag{});
|
||||
|
||||
using Functor = GenerateBitFieldFunctor<WordType, decltype(portal)>;
|
||||
|
||||
Algo::Schedule(Functor{ exemplar, stride, maxMaskedWord, portal }, numWords);
|
||||
Algo::Synchronize();
|
||||
|
||||
return bits;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename WordType, typename DeviceAdapter>
|
||||
struct BenchBitFieldToUnorderedSet
|
||||
{
|
||||
using IndicesArray = vtkm::cont::ArrayHandle<vtkm::Id>;
|
||||
|
||||
vtkm::Id NumWords;
|
||||
vtkm::Id NumBits;
|
||||
WordType Exemplar;
|
||||
vtkm::Id Stride;
|
||||
vtkm::Float32 FillRatio;
|
||||
vtkm::Id MaxMaskedIndex;
|
||||
std::string Name;
|
||||
|
||||
vtkm::cont::BitField Bits;
|
||||
IndicesArray Indices;
|
||||
|
||||
// See GenerateBitField for details. fillRatio is used to compute
|
||||
// maxMaskedWord.
|
||||
VTKM_CONT
|
||||
BenchBitFieldToUnorderedSet(WordType exemplar,
|
||||
vtkm::Id stride,
|
||||
vtkm::Float32 fillRatio,
|
||||
const std::string& name)
|
||||
: NumWords(Config.ComputeNumberOfWords<WordType>())
|
||||
, NumBits(this->NumWords * static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT))
|
||||
, Exemplar(exemplar)
|
||||
, Stride(stride)
|
||||
, FillRatio(fillRatio)
|
||||
, MaxMaskedIndex(this->NumWords / static_cast<vtkm::Id>(1. / this->FillRatio))
|
||||
, Name(name)
|
||||
, Bits(GenerateBitField<WordType, DeviceAdapter>(this->Exemplar,
|
||||
this->Stride,
|
||||
this->MaxMaskedIndex,
|
||||
this->NumWords))
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
vtkm::Float64 operator()()
|
||||
{
|
||||
Timer timer(DeviceAdapter{});
|
||||
timer.Start();
|
||||
Algorithm::BitFieldToUnorderedSet(DeviceAdapter{}, this->Bits, this->Indices);
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
std::string Description() const
|
||||
{
|
||||
const vtkm::Id numFilledWords = this->MaxMaskedIndex / this->Stride;
|
||||
const vtkm::Id numSetBits = numFilledWords * vtkm::CountSetBits(this->Exemplar);
|
||||
|
||||
std::stringstream description;
|
||||
description << "BitFieldToUnorderedSet" << this->Name << " ( "
|
||||
<< "NumWords: " << this->NumWords << " "
|
||||
<< "Exemplar: " << std::hex << this->Exemplar << std::dec << " "
|
||||
<< "FillRatio: " << this->FillRatio << " "
|
||||
<< "Stride: " << this->Stride << " "
|
||||
<< "NumSetBits: " << numSetBits << " )";
|
||||
return description.str();
|
||||
}
|
||||
};
|
||||
VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetNull,
|
||||
BenchBitFieldToUnorderedSet,
|
||||
0x00000000,
|
||||
1,
|
||||
0.f,
|
||||
"Null");
|
||||
VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetFull,
|
||||
BenchBitFieldToUnorderedSet,
|
||||
0xffffffff,
|
||||
1,
|
||||
1.f,
|
||||
"Full");
|
||||
VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetHalfWord,
|
||||
BenchBitFieldToUnorderedSet,
|
||||
0xffff0000,
|
||||
1,
|
||||
1.f,
|
||||
"HalfWord");
|
||||
VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetHalfField,
|
||||
BenchBitFieldToUnorderedSet,
|
||||
0xffffffff,
|
||||
1,
|
||||
0.5f,
|
||||
"HalfField");
|
||||
VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetAlternateWords,
|
||||
BenchBitFieldToUnorderedSet,
|
||||
0xffffffff,
|
||||
2,
|
||||
1.f,
|
||||
"AlternateWords");
|
||||
VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetAlternateBits,
|
||||
BenchBitFieldToUnorderedSet,
|
||||
0x55555555,
|
||||
1,
|
||||
1.f,
|
||||
"AlternateBits");
|
||||
|
||||
template <typename Value, typename DeviceAdapter>
|
||||
struct BenchCopy
|
||||
{
|
||||
@ -982,6 +1164,19 @@ public:
|
||||
template <typename ValueTypes>
|
||||
static VTKM_CONT void RunInternal(vtkm::cont::DeviceAdapterId id)
|
||||
{
|
||||
using BitFieldWordTypes = vtkm::ListTagBase<vtkm::UInt32>;
|
||||
|
||||
if (Config.BenchmarkFlags & BITFIELD_TO_UNORDERED_SET)
|
||||
{
|
||||
std::cout << DIVIDER << "\nBenchmarking BitFieldToUnorderedSet\n";
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetNull, BitFieldWordTypes{}, id);
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetFull, BitFieldWordTypes{}, id);
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfWord, BitFieldWordTypes{}, id);
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfField, BitFieldWordTypes{}, id);
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateWords, BitFieldWordTypes{}, id);
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateBits, BitFieldWordTypes{}, id);
|
||||
}
|
||||
|
||||
if (Config.BenchmarkFlags & COPY)
|
||||
{
|
||||
std::cout << DIVIDER << "\nBenchmarking Copy\n";
|
||||
@ -1434,7 +1629,11 @@ int main(int argc, char* argv[])
|
||||
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
|
||||
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
});
|
||||
if (arg == "copy")
|
||||
if (arg == "bitfieldtounorderedset")
|
||||
{
|
||||
config.BenchmarkFlags |= vtkm::benchmarking::BITFIELD_TO_UNORDERED_SET;
|
||||
}
|
||||
else if (arg == "copy")
|
||||
{
|
||||
config.BenchmarkFlags |= vtkm::benchmarking::COPY;
|
||||
}
|
||||
|
51
docs/changelog/bitfields.md
Normal file
51
docs/changelog/bitfields.md
Normal file
@ -0,0 +1,51 @@
|
||||
# Add support for BitFields.
|
||||
|
||||
BitFields are:
|
||||
- Stored in memory using a contiguous buffer of bits.
|
||||
- Accessible via portals, a la ArrayHandle.
|
||||
- Portals operate on individual bits or words.
|
||||
- Operations may be atomic for safe use from concurrent kernels.
|
||||
|
||||
The new BitFieldToUnorderedSet device algorithm produces an
|
||||
ArrayHandle containing the indices of all set bits, in no particular
|
||||
order.
|
||||
|
||||
The new AtomicInterface classes provide an abstraction into bitwise
|
||||
atomic operations across control and execution environments and are
|
||||
used to implement the BitPortals.
|
||||
|
||||
BitFields may be used as boolean-typed ArrayHandles using the
|
||||
ArrayHandleBitField adapter. ArrayHandleBitField uses atomic operations to read
|
||||
and write bits in the BitField, and is safe to use in concurrent code.
|
||||
|
||||
For example, a simple worklet that merges two arrays based on a boolean
|
||||
condition is tested in TestingBitField:
|
||||
|
||||
```
|
||||
class ConditionalMergeWorklet : public vtkm::worklet::WorkletMapField
|
||||
{
|
||||
public:
|
||||
using ControlSignature = void(FieldIn cond,
|
||||
FieldIn trueVals,
|
||||
FieldIn falseVals,
|
||||
FieldOut result);
|
||||
using ExecutionSignature = _4(_1, _2, _3);
|
||||
|
||||
template <typename T>
|
||||
VTKM_EXEC T operator()(bool cond, const T& trueVal, const T& falseVal) const
|
||||
{
|
||||
return cond ? trueVal : falseVal;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
BitField bits = ...;
|
||||
auto condArray = vtkm::cont::make_ArrayHandleBitField(bits);
|
||||
auto trueArray = vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(20, 2, NUM_BITS);
|
||||
auto falseArray = vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(13, 2, NUM_BITS);
|
||||
vtkm::cont::ArrayHandle<vtkm::Id> output;
|
||||
|
||||
vtkm::worklet::DispatcherMapField<ConditionalMergeWorklet> dispatcher;
|
||||
dispatcher.Invoke(condArray, trueArray, falseArray, output);
|
||||
|
||||
```
|
193
vtkm/Math.h
193
vtkm/Math.h
@ -41,9 +41,13 @@
|
||||
#include <algorithm>
|
||||
#endif
|
||||
|
||||
#if defined(VTKM_MSVC) && !defined(VTKM_CUDA)
|
||||
#ifdef VTKM_MSVC
|
||||
#include <intrin.h> // For bitwise intrinsics (__popcnt, etc)
|
||||
#include <vtkm/internal/Windows.h> // for types used by MSVC intrinsics.
|
||||
#ifndef VTKM_CUDA
|
||||
#include <math.h>
|
||||
#endif
|
||||
#endif // VTKM_CUDA
|
||||
#endif // VTKM_MSVC
|
||||
|
||||
#define VTKM_CUDA_MATH_FUNCTION_32(func) func##f
|
||||
#define VTKM_CUDA_MATH_FUNCTION_64(func) func
|
||||
@ -2592,6 +2596,191 @@ inline VTKM_EXEC_CONT vtkm::Float64 Ldexp(vtkm::Float64 x, vtkm::Int32 exponent)
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Bitwise operations
|
||||
///
|
||||
|
||||
/// Find the first set bit in @a word, and return its position (1-32). If no
|
||||
/// bits are set, returns 0.
|
||||
#ifdef VTKM_CUDA_DEVICE_PASS
|
||||
// Need to explicitly mark this as __device__ since __ffs is device only.
|
||||
inline __device__
|
||||
vtkm::Int32 FindFirstSetBit(vtkm::UInt32 word)
|
||||
{
|
||||
// Output is [0,32], with ffs(0) == 0
|
||||
return __ffs(static_cast<int>(word));
|
||||
}
|
||||
#else // CUDA_DEVICE_PASS
|
||||
inline VTKM_EXEC_CONT
|
||||
vtkm::Int32 FindFirstSetBit(vtkm::UInt32 word)
|
||||
{
|
||||
# if defined(VTKM_GCC) || defined(VTKM_CLANG)
|
||||
|
||||
// Output is [0,32], with ffs(0) == 0
|
||||
return __builtin_ffs(static_cast<int>(word));
|
||||
|
||||
# elif defined(VTKM_MSVC)
|
||||
|
||||
// Output is [0, 31], check return code to see if bits are set:
|
||||
vtkm::UInt32 firstSet;
|
||||
return _BitScanForward(reinterpret_cast<DWORD*>(&firstSet), word) != 0
|
||||
? static_cast<vtkm::Int32>(firstSet + 1) : 0;
|
||||
|
||||
# elif defined(VTKM_ICC)
|
||||
|
||||
// Output is [0, 31], undefined if word is 0.
|
||||
return word != 0 ? _bit_scan_forward(word) + 1 : 0;
|
||||
|
||||
# else
|
||||
|
||||
// Naive implementation:
|
||||
if (word == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
vtkm::Int32 bit = 1;
|
||||
while ((word & 0x1) == 0)
|
||||
{
|
||||
word >>= 1;
|
||||
++bit;
|
||||
}
|
||||
return bit;
|
||||
|
||||
# endif
|
||||
}
|
||||
#endif // CUDA_DEVICE_PASS
|
||||
|
||||
/// Find the first set bit in @a word, and return its position (1-64). If no
|
||||
/// bits are set, returns 0.
|
||||
#ifdef VTKM_CUDA_DEVICE_PASS
|
||||
// Need to explicitly mark this as __device__ since __ffsll is device only.
|
||||
inline __device__
|
||||
vtkm::Int32 FindFirstSetBit(vtkm::UInt64 word)
|
||||
{
|
||||
|
||||
// Output is [0,64], with ffs(0) == 0
|
||||
return __ffsll(static_cast<long long int>(word));
|
||||
}
|
||||
#else // CUDA_DEVICE_PASS
|
||||
inline VTKM_EXEC_CONT
|
||||
vtkm::Int32 FindFirstSetBit(vtkm::UInt64 word)
|
||||
{
|
||||
# if defined(VTKM_GCC) || defined(VTKM_CLANG)
|
||||
|
||||
// Output is [0,64], with ffs(0) == 0
|
||||
return __builtin_ffsll(static_cast<long long int>(word));
|
||||
|
||||
# elif defined(VTKM_MSVC) || defined(VTKM_ICC)
|
||||
|
||||
// Output is [0, 63], check return code to see if bits are set:
|
||||
vtkm::UInt32 firstSet;
|
||||
return _BitScanForward64(reinterpret_cast<DWORD*>(&firstSet), word) != 0
|
||||
? static_cast<vtkm::Int32>(firstSet + 1) : 0;
|
||||
|
||||
# else
|
||||
|
||||
// Naive implementation:
|
||||
if (word == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
vtkm::Int32 bit = 1;
|
||||
while ((word & 0x1) == 0)
|
||||
{
|
||||
word >>= 1;
|
||||
++bit;
|
||||
}
|
||||
return bit;
|
||||
|
||||
# endif
|
||||
}
|
||||
#endif // CUDA_DEVICE_PASS
|
||||
|
||||
/// Count the total number of bits set in @a word.
|
||||
#ifdef VTKM_CUDA_DEVICE_PASS
|
||||
// Need to explicitly mark this as __device__ since __popc is device only.
|
||||
inline __device__
|
||||
vtkm::Int32 CountSetBits(vtkm::UInt32 word)
|
||||
{
|
||||
return __popc(word);
|
||||
}
|
||||
#else // CUDA_DEVICE_PASS
|
||||
inline VTKM_EXEC_CONT
|
||||
vtkm::Int32 CountSetBits(vtkm::UInt32 word)
|
||||
{
|
||||
# if defined(VTKM_GCC) || defined(VTKM_CLANG)
|
||||
|
||||
return __builtin_popcount(word);
|
||||
|
||||
# elif defined(VTKM_MSVC)
|
||||
|
||||
return static_cast<vtkm::Int32>(__popcnt(word));
|
||||
|
||||
# elif defined(VTKM_ICC)
|
||||
|
||||
return _popcnt32(static_cast<int>(word));
|
||||
|
||||
# else
|
||||
|
||||
// Naive implementation:
|
||||
vtkm::Int32 bits = 0;
|
||||
while (word)
|
||||
{
|
||||
if (word & 0x1)
|
||||
{
|
||||
++bits;
|
||||
}
|
||||
word >>= 1;
|
||||
}
|
||||
return bits;
|
||||
|
||||
# endif
|
||||
}
|
||||
#endif // CUDA_DEVICE_PASS
|
||||
|
||||
/// Count the total number of bits set in @a word.
|
||||
#ifdef VTKM_CUDA_DEVICE_PASS
|
||||
// Need to explicitly mark this as __device__ since __popcll is device only.
|
||||
inline __device__
|
||||
vtkm::Int32 CountSetBits(vtkm::UInt64 word)
|
||||
{
|
||||
return __popcll(word);
|
||||
}
|
||||
#else // CUDA_DEVICE_PASS
|
||||
inline VTKM_EXEC_CONT
|
||||
vtkm::Int32 CountSetBits(vtkm::UInt64 word)
|
||||
{
|
||||
# if defined(VTKM_GCC) || defined(VTKM_CLANG)
|
||||
|
||||
return __builtin_popcountll(word);
|
||||
|
||||
# elif defined(VTKM_MSVC)
|
||||
|
||||
return static_cast<vtkm::Int32>(__popcnt64(word));
|
||||
|
||||
# elif defined(VTKM_ICC)
|
||||
|
||||
return _popcnt64(static_cast<vtkm::Int64>(word));
|
||||
|
||||
# else
|
||||
|
||||
// Naive implementation:
|
||||
vtkm::Int32 bits = 0;
|
||||
while (word)
|
||||
{
|
||||
if (word & 0x1)
|
||||
{
|
||||
++bits;
|
||||
}
|
||||
word >>= 1;
|
||||
}
|
||||
return bits;
|
||||
|
||||
# endif
|
||||
}
|
||||
#endif // CUDA_DEVICE_PASS
|
||||
|
||||
} // namespace vtkm
|
||||
// clang-format on
|
||||
|
||||
|
194
vtkm/Math.h.in
194
vtkm/Math.h.in
@ -53,9 +53,14 @@ $# Ignore the following comment. It is meant for the generated file.
|
||||
#include <algorithm>
|
||||
#endif
|
||||
|
||||
#if defined(VTKM_MSVC) && !defined(VTKM_CUDA)
|
||||
#ifdef VTKM_MSVC
|
||||
#include <intrin.h> // For bitwise intrinsics (__popcnt, etc)
|
||||
#include <vtkm/internal/Windows.h> // for types used by MSVC intrinsics.
|
||||
#ifndef VTKM_CUDA
|
||||
#include <math.h>
|
||||
#endif
|
||||
#endif // VTKM_CUDA
|
||||
#endif // VTKM_MSVC
|
||||
|
||||
|
||||
#define VTKM_CUDA_MATH_FUNCTION_32(func) func##f
|
||||
#define VTKM_CUDA_MATH_FUNCTION_64(func) func
|
||||
@ -1194,6 +1199,191 @@ inline VTKM_EXEC_CONT vtkm::Float64 Ldexp(vtkm::Float64 x, vtkm::Int32 exponent)
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Bitwise operations
|
||||
///
|
||||
|
||||
/// Find the first set bit in @a word, and return its position (1-32). If no
|
||||
/// bits are set, returns 0.
|
||||
#ifdef VTKM_CUDA_DEVICE_PASS
|
||||
// Need to explicitly mark this as __device__ since __ffs is device only.
|
||||
inline __device__
|
||||
vtkm::Int32 FindFirstSetBit(vtkm::UInt32 word)
|
||||
{
|
||||
// Output is [0,32], with ffs(0) == 0
|
||||
return __ffs(static_cast<int>(word));
|
||||
}
|
||||
#else // CUDA_DEVICE_PASS
|
||||
inline VTKM_EXEC_CONT
|
||||
vtkm::Int32 FindFirstSetBit(vtkm::UInt32 word)
|
||||
{
|
||||
# if defined(VTKM_GCC) || defined(VTKM_CLANG)
|
||||
|
||||
// Output is [0,32], with ffs(0) == 0
|
||||
return __builtin_ffs(static_cast<int>(word));
|
||||
|
||||
# elif defined(VTKM_MSVC)
|
||||
|
||||
// Output is [0, 31], check return code to see if bits are set:
|
||||
vtkm::UInt32 firstSet;
|
||||
return _BitScanForward(reinterpret_cast<DWORD*>(&firstSet), word) != 0
|
||||
? static_cast<vtkm::Int32>(firstSet + 1) : 0;
|
||||
|
||||
# elif defined(VTKM_ICC)
|
||||
|
||||
// Output is [0, 31], undefined if word is 0.
|
||||
return word != 0 ? _bit_scan_forward(word) + 1 : 0;
|
||||
|
||||
# else
|
||||
|
||||
// Naive implementation:
|
||||
if (word == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
vtkm::Int32 bit = 1;
|
||||
while ((word & 0x1) == 0)
|
||||
{
|
||||
word >>= 1;
|
||||
++bit;
|
||||
}
|
||||
return bit;
|
||||
|
||||
# endif
|
||||
}
|
||||
#endif // CUDA_DEVICE_PASS
|
||||
|
||||
/// Find the first set bit in @a word, and return its position (1-64). If no
|
||||
/// bits are set, returns 0.
|
||||
#ifdef VTKM_CUDA_DEVICE_PASS
|
||||
// Need to explicitly mark this as __device__ since __ffsll is device only.
|
||||
inline __device__
|
||||
vtkm::Int32 FindFirstSetBit(vtkm::UInt64 word)
|
||||
{
|
||||
|
||||
// Output is [0,64], with ffs(0) == 0
|
||||
return __ffsll(static_cast<long long int>(word));
|
||||
}
|
||||
#else // CUDA_DEVICE_PASS
|
||||
inline VTKM_EXEC_CONT
|
||||
vtkm::Int32 FindFirstSetBit(vtkm::UInt64 word)
|
||||
{
|
||||
# if defined(VTKM_GCC) || defined(VTKM_CLANG)
|
||||
|
||||
// Output is [0,64], with ffs(0) == 0
|
||||
return __builtin_ffsll(static_cast<long long int>(word));
|
||||
|
||||
# elif defined(VTKM_MSVC) || defined(VTKM_ICC)
|
||||
|
||||
// Output is [0, 63], check return code to see if bits are set:
|
||||
vtkm::UInt32 firstSet;
|
||||
return _BitScanForward64(reinterpret_cast<DWORD*>(&firstSet), word) != 0
|
||||
? static_cast<vtkm::Int32>(firstSet + 1) : 0;
|
||||
|
||||
# else
|
||||
|
||||
// Naive implementation:
|
||||
if (word == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
vtkm::Int32 bit = 1;
|
||||
while ((word & 0x1) == 0)
|
||||
{
|
||||
word >>= 1;
|
||||
++bit;
|
||||
}
|
||||
return bit;
|
||||
|
||||
# endif
|
||||
}
|
||||
#endif // CUDA_DEVICE_PASS
|
||||
|
||||
/// Count the total number of bits set in @a word.
|
||||
#ifdef VTKM_CUDA_DEVICE_PASS
|
||||
// Need to explicitly mark this as __device__ since __popc is device only.
|
||||
inline __device__
|
||||
vtkm::Int32 CountSetBits(vtkm::UInt32 word)
|
||||
{
|
||||
return __popc(word);
|
||||
}
|
||||
#else // CUDA_DEVICE_PASS
|
||||
inline VTKM_EXEC_CONT
|
||||
vtkm::Int32 CountSetBits(vtkm::UInt32 word)
|
||||
{
|
||||
# if defined(VTKM_GCC) || defined(VTKM_CLANG)
|
||||
|
||||
return __builtin_popcount(word);
|
||||
|
||||
# elif defined(VTKM_MSVC)
|
||||
|
||||
return static_cast<vtkm::Int32>(__popcnt(word));
|
||||
|
||||
# elif defined(VTKM_ICC)
|
||||
|
||||
return _popcnt32(static_cast<int>(word));
|
||||
|
||||
# else
|
||||
|
||||
// Naive implementation:
|
||||
vtkm::Int32 bits = 0;
|
||||
while (word)
|
||||
{
|
||||
if (word & 0x1)
|
||||
{
|
||||
++bits;
|
||||
}
|
||||
word >>= 1;
|
||||
}
|
||||
return bits;
|
||||
|
||||
# endif
|
||||
}
|
||||
#endif // CUDA_DEVICE_PASS
|
||||
|
||||
/// Count the total number of bits set in @a word.
|
||||
#ifdef VTKM_CUDA_DEVICE_PASS
|
||||
// Need to explicitly mark this as __device__ since __popcll is device only.
|
||||
inline __device__
|
||||
vtkm::Int32 CountSetBits(vtkm::UInt64 word)
|
||||
{
|
||||
return __popcll(word);
|
||||
}
|
||||
#else // CUDA_DEVICE_PASS
|
||||
inline VTKM_EXEC_CONT
|
||||
vtkm::Int32 CountSetBits(vtkm::UInt64 word)
|
||||
{
|
||||
# if defined(VTKM_GCC) || defined(VTKM_CLANG)
|
||||
|
||||
return __builtin_popcountll(word);
|
||||
|
||||
# elif defined(VTKM_MSVC)
|
||||
|
||||
return static_cast<vtkm::Int32>(__popcnt64(word));
|
||||
|
||||
# elif defined(VTKM_ICC)
|
||||
|
||||
return _popcnt64(static_cast<vtkm::Int64>(word));
|
||||
|
||||
# else
|
||||
|
||||
// Naive implementation:
|
||||
vtkm::Int32 bits = 0;
|
||||
while (word)
|
||||
{
|
||||
if (word & 0x1)
|
||||
{
|
||||
++bits;
|
||||
}
|
||||
word >>= 1;
|
||||
}
|
||||
return bits;
|
||||
|
||||
# endif
|
||||
}
|
||||
#endif // CUDA_DEVICE_PASS
|
||||
|
||||
} // namespace vtkm
|
||||
// clang-format on
|
||||
|
||||
|
@ -159,6 +159,10 @@ using UInt32 = unsigned int;
|
||||
/// than smaller widths.
|
||||
using IdComponent = vtkm::Int32;
|
||||
|
||||
/// The default word size used for atomic bitwise operations. Universally
|
||||
/// supported on all devices.
|
||||
using WordTypeDefault = vtkm::UInt32;
|
||||
|
||||
//In this order so that we exactly match the logic that exists in VTK
|
||||
#if VTKM_SIZE_LONG_LONG == 8
|
||||
using Int64 = long long;
|
||||
|
@ -60,6 +60,20 @@ auto PrepareArgForExec(T&& object)
|
||||
vtkm::cont::internal::IsExecutionObjectBase<T>{});
|
||||
}
|
||||
|
||||
struct BitFieldToUnorderedSetFunctor
|
||||
{
|
||||
vtkm::Id Result{ 0 };
|
||||
|
||||
template <typename Device, typename... Args>
|
||||
VTKM_CONT bool operator()(Device, Args&&... args)
|
||||
{
|
||||
VTKM_IS_DEVICE_ADAPTER_TAG(Device);
|
||||
this->Result = vtkm::cont::DeviceAdapterAlgorithm<Device>::BitFieldToUnorderedSet(
|
||||
PrepareArgForExec<Device>(std::forward<Args>(args))...);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct CopyFunctor
|
||||
{
|
||||
template <typename Device, typename... Args>
|
||||
@ -374,6 +388,27 @@ struct UpperBoundsFunctor
|
||||
struct Algorithm
|
||||
{
|
||||
|
||||
template <typename IndicesStorage>
|
||||
VTKM_CONT static vtkm::Id BitFieldToUnorderedSet(
|
||||
vtkm::cont::DeviceAdapterId devId,
|
||||
const vtkm::cont::BitField& bits,
|
||||
vtkm::cont::ArrayHandle<Id, IndicesStorage>& indices)
|
||||
{
|
||||
detail::BitFieldToUnorderedSetFunctor functor;
|
||||
vtkm::cont::TryExecuteOnDevice(devId, functor, bits, indices);
|
||||
return functor.Result;
|
||||
}
|
||||
|
||||
template <typename IndicesStorage>
|
||||
VTKM_CONT static vtkm::Id BitFieldToUnorderedSet(
|
||||
const vtkm::cont::BitField& bits,
|
||||
vtkm::cont::ArrayHandle<Id, IndicesStorage>& indices)
|
||||
{
|
||||
detail::BitFieldToUnorderedSetFunctor functor;
|
||||
vtkm::cont::TryExecute(functor, bits, indices);
|
||||
return functor.Result;
|
||||
}
|
||||
|
||||
template <typename T, typename U, class CIn, class COut>
|
||||
VTKM_CONT static bool Copy(vtkm::cont::DeviceAdapterId devId,
|
||||
const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||
|
220
vtkm/cont/ArrayHandleBitField.h
Normal file
220
vtkm/cont/ArrayHandleBitField.h
Normal file
@ -0,0 +1,220 @@
|
||||
//=============================================================================
|
||||
//
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
//
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2019 UT-Battelle, LLC.
|
||||
// Copyright 2019 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//
|
||||
//=============================================================================
|
||||
#ifndef vtk_m_cont_ArrayHandleBitField_h
|
||||
#define vtk_m_cont_ArrayHandleBitField_h
|
||||
|
||||
#include <vtkm/cont/ArrayHandle.h>
|
||||
#include <vtkm/cont/BitField.h>
|
||||
#include <vtkm/cont/Storage.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
|
||||
namespace internal
|
||||
{
|
||||
|
||||
template <typename BitPortalType>
|
||||
class ArrayPortalBitField
|
||||
{
|
||||
public:
|
||||
using ValueType = bool;
|
||||
|
||||
VTKM_EXEC_CONT
|
||||
explicit ArrayPortalBitField(const BitPortalType& portal) noexcept : BitPortal{ portal } {}
|
||||
|
||||
VTKM_EXEC_CONT
|
||||
explicit ArrayPortalBitField(BitPortalType&& portal) noexcept : BitPortal{ std::move(portal) } {}
|
||||
|
||||
ArrayPortalBitField() noexcept = default;
|
||||
ArrayPortalBitField(const ArrayPortalBitField&) noexcept = default;
|
||||
ArrayPortalBitField(ArrayPortalBitField&&) noexcept = default;
|
||||
ArrayPortalBitField& operator=(const ArrayPortalBitField&) noexcept = default;
|
||||
ArrayPortalBitField& operator=(ArrayPortalBitField&&) noexcept = default;
|
||||
|
||||
VTKM_EXEC_CONT
|
||||
vtkm::Id GetNumberOfValues() const noexcept { return this->BitPortal.GetNumberOfBits(); }
|
||||
|
||||
VTKM_EXEC_CONT
|
||||
ValueType Get(vtkm::Id index) const noexcept { return this->BitPortal.GetBit(index); }
|
||||
|
||||
VTKM_EXEC_CONT
|
||||
void Set(vtkm::Id index, ValueType value) const
|
||||
{
|
||||
// Use an atomic set so we don't clash with other threads writing nearby
|
||||
// bits.
|
||||
this->BitPortal.SetBitAtomic(index, value);
|
||||
}
|
||||
|
||||
private:
|
||||
BitPortalType BitPortal;
|
||||
};
|
||||
|
||||
struct VTKM_ALWAYS_EXPORT StorageTagBitField
|
||||
{
|
||||
};
|
||||
|
||||
template <>
|
||||
class Storage<bool, StorageTagBitField>
|
||||
{
|
||||
using BitPortalType = vtkm::cont::detail::BitPortal<vtkm::cont::internal::AtomicInterfaceControl>;
|
||||
using BitPortalConstType =
|
||||
vtkm::cont::detail::BitPortalConst<vtkm::cont::internal::AtomicInterfaceControl>;
|
||||
|
||||
public:
|
||||
using ValueType = bool;
|
||||
using PortalType = vtkm::cont::internal::ArrayPortalBitField<BitPortalType>;
|
||||
using PortalConstType = vtkm::cont::internal::ArrayPortalBitField<BitPortalConstType>;
|
||||
|
||||
explicit VTKM_CONT Storage(const vtkm::cont::BitField& data)
|
||||
: Data{ data }
|
||||
{
|
||||
}
|
||||
|
||||
explicit VTKM_CONT Storage(vtkm::cont::BitField&& data) noexcept : Data{ std::move(data) } {}
|
||||
|
||||
VTKM_CONT Storage() = default;
|
||||
VTKM_CONT Storage(const Storage& src) = default;
|
||||
VTKM_CONT Storage(Storage&& src) noexcept = default;
|
||||
VTKM_CONT Storage& operator=(const Storage& src) = default;
|
||||
VTKM_CONT Storage& operator=(Storage&& src) noexcept = default;
|
||||
|
||||
VTKM_CONT
|
||||
PortalType GetPortal() { return PortalType{ this->Data.GetPortalControl() }; }
|
||||
|
||||
VTKM_CONT
|
||||
PortalConstType GetPortalConst() { return PortalConstType{ this->Data.GetPortalConstControl() }; }
|
||||
|
||||
VTKM_CONT vtkm::Id GetNumberOfValues() const { return this->Data.GetNumberOfBits(); }
|
||||
VTKM_CONT void Allocate(vtkm::Id numberOfValues) { this->Data.Allocate(numberOfValues); }
|
||||
VTKM_CONT void Shrink(vtkm::Id numberOfValues) { this->Data.Shrink(numberOfValues); }
|
||||
VTKM_CONT void ReleaseResources() { this->Data.ReleaseResources(); }
|
||||
|
||||
VTKM_CONT vtkm::cont::BitField GetBitField() const { return this->Data; }
|
||||
|
||||
private:
|
||||
vtkm::cont::BitField Data;
|
||||
};
|
||||
|
||||
template <typename Device>
|
||||
class ArrayTransfer<bool, StorageTagBitField, Device>
|
||||
{
|
||||
using AtomicInterface = AtomicInterfaceExecution<Device>;
|
||||
using StorageType = Storage<bool, StorageTagBitField>;
|
||||
using BitPortalExecution = vtkm::cont::detail::BitPortal<AtomicInterface>;
|
||||
using BitPortalConstExecution = vtkm::cont::detail::BitPortalConst<AtomicInterface>;
|
||||
|
||||
public:
|
||||
using ValueType = bool;
|
||||
using PortalControl = typename StorageType::PortalType;
|
||||
using PortalConstControl = typename StorageType::PortalConstType;
|
||||
using PortalExecution = vtkm::cont::internal::ArrayPortalBitField<BitPortalExecution>;
|
||||
using PortalConstExecution = vtkm::cont::internal::ArrayPortalBitField<BitPortalConstExecution>;
|
||||
|
||||
VTKM_CONT
|
||||
explicit ArrayTransfer(StorageType* storage)
|
||||
: Data{ storage->GetBitField() }
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
vtkm::Id GetNumberOfValues() const { return this->Data.GetNumberOfBits(); }
|
||||
|
||||
VTKM_CONT
|
||||
PortalConstExecution PrepareForInput(bool vtkmNotUsed(updateData))
|
||||
{
|
||||
return PortalConstExecution{ this->Data.PrepareForInput(Device{}) };
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
PortalExecution PrepareForInPlace(bool vtkmNotUsed(updateData))
|
||||
{
|
||||
return PortalExecution{ this->Data.PrepareForInPlace(Device{}) };
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
PortalExecution PrepareForOutput(vtkm::Id numberOfValues)
|
||||
{
|
||||
return PortalExecution{ this->Data.PrepareForOutput(numberOfValues, Device{}) };
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
void RetrieveOutputData(StorageType* vtkmNotUsed(storage)) const
|
||||
{
|
||||
// Implementation of this method should be unnecessary. The internal
|
||||
// bitfield should automatically retrieve the output data as necessary.
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
void Shrink(vtkm::Id numberOfValues) { this->Data.Shrink(numberOfValues); }
|
||||
|
||||
VTKM_CONT
|
||||
void ReleaseResources() { this->Data.ReleaseResources(); }
|
||||
|
||||
private:
|
||||
vtkm::cont::BitField Data;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
/// The ArrayHandleBitField class is a boolean-valued ArrayHandle that is backed
|
||||
/// by a BitField.
|
||||
///
|
||||
class ArrayHandleBitField : public ArrayHandle<bool, internal::StorageTagBitField>
|
||||
{
|
||||
public:
|
||||
VTKM_ARRAY_HANDLE_SUBCLASS_NT(ArrayHandleBitField,
|
||||
(ArrayHandle<bool, internal::StorageTagBitField>));
|
||||
|
||||
VTKM_CONT
|
||||
explicit ArrayHandleBitField(const vtkm::cont::BitField& bitField)
|
||||
: Superclass{ StorageType{ bitField } }
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
explicit ArrayHandleBitField(vtkm::cont::BitField&& bitField) noexcept
|
||||
: Superclass{ StorageType{ std::move(bitField) } }
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
vtkm::cont::BitField GetBitField() const { return this->GetStorage().GetBitField(); }
|
||||
};
|
||||
|
||||
VTKM_CONT inline vtkm::cont::ArrayHandleBitField make_ArrayHandleBitField(
|
||||
const vtkm::cont::BitField& bitField)
|
||||
{
|
||||
return ArrayHandleBitField{ bitField };
|
||||
}
|
||||
|
||||
VTKM_CONT inline vtkm::cont::ArrayHandleBitField make_ArrayHandleBitField(
|
||||
vtkm::cont::BitField&& bitField) noexcept
|
||||
{
|
||||
return ArrayHandleBitField{ std::move(bitField) };
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::cont
|
||||
|
||||
#endif // vtk_m_cont_ArrayHandleBitField_h
|
719
vtkm/cont/BitField.h
Normal file
719
vtkm/cont/BitField.h
Normal file
@ -0,0 +1,719 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2019 UT-Battelle, LLC.
|
||||
// Copyright 2019 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#ifndef vtk_m_cont_BitField_h
|
||||
#define vtk_m_cont_BitField_h
|
||||
|
||||
#include <vtkm/cont/internal/AtomicInterfaceControl.h>
|
||||
#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
|
||||
|
||||
#include <vtkm/cont/ArrayHandle.h>
|
||||
#include <vtkm/cont/Logging.h>
|
||||
|
||||
#include <vtkm/ListTag.h>
|
||||
#include <vtkm/Types.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <climits>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
|
||||
class BitField;
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
struct BitFieldTraits
|
||||
{
|
||||
// Allocations will occur in blocks of BlockSize bytes. This ensures that
|
||||
// power-of-two word sizes up to BlockSize will not access invalid data
|
||||
// during word-based access, and that atomic values will be properly aligned.
|
||||
// We use the default StorageBasic alignment for this.
|
||||
constexpr static vtkm::Id BlockSize = VTKM_ALLOCATION_ALIGNMENT;
|
||||
|
||||
// Make sure the blocksize is at least 64. Eventually we may implement SIMD
|
||||
// bit operations, and the current largest vector width is 512 bits.
|
||||
VTKM_STATIC_ASSERT(BlockSize >= 64);
|
||||
|
||||
/// Require an unsigned integral type that is <= BlockSize bytes.
|
||||
template <typename WordType>
|
||||
using IsValidWordType =
|
||||
std::integral_constant<bool,
|
||||
/* is unsigned */
|
||||
std::is_unsigned<WordType>::value &&
|
||||
/* doesn't exceed blocksize */
|
||||
sizeof(WordType) <= static_cast<size_t>(BlockSize) &&
|
||||
/* BlockSize is a multiple of WordType */
|
||||
static_cast<size_t>(BlockSize) % sizeof(WordType) == 0>;
|
||||
|
||||
/// Require an unsigned integral type that is <= BlockSize bytes, and is
|
||||
/// is supported by the specified AtomicInterface.
|
||||
template <typename WordType, typename AtomicInterface>
|
||||
using IsValidWordTypeAtomic = std::integral_constant<
|
||||
bool,
|
||||
/* is unsigned */
|
||||
std::is_unsigned<WordType>::value &&
|
||||
/* doesn't exceed blocksize */
|
||||
sizeof(WordType) <= static_cast<size_t>(BlockSize) &&
|
||||
/* BlockSize is a multiple of WordType */
|
||||
static_cast<size_t>(BlockSize) % sizeof(WordType) == 0 &&
|
||||
/* Supported by atomic interface */
|
||||
vtkm::ListContains<typename AtomicInterface::WordTypes, WordType>::value>;
|
||||
};
|
||||
|
||||
/// Identifies a bit in a BitField by Word and BitOffset. Note that these
|
||||
/// values are dependent on the type of word used to generate the coordinate.
|
||||
struct BitCoordinate
|
||||
{
|
||||
/// The word containing the specified bit.
|
||||
vtkm::Id WordIndex;
|
||||
|
||||
/// The zero-indexed bit in the word.
|
||||
vtkm::Int32 BitOffset; // [0, bitsInWord)
|
||||
};
|
||||
|
||||
/// Portal for performing bit or word operations on a BitField.
|
||||
///
|
||||
/// This is the implementation used by BitPortal and BitPortalConst.
|
||||
template <typename AtomicInterface_, bool IsConst>
|
||||
class BitPortalBase
|
||||
{
|
||||
// Checks if PortalType has a GetIteratorBegin() method that returns a
|
||||
// pointer.
|
||||
template <typename PortalType,
|
||||
typename PointerType = decltype(std::declval<PortalType>().GetIteratorBegin())>
|
||||
struct HasPointerAccess : public std::is_pointer<PointerType>
|
||||
{
|
||||
};
|
||||
|
||||
// Determine whether we should store a const vs. mutable pointer:
|
||||
template <typename T>
|
||||
using MaybeConstPointer = typename std::conditional<IsConst, T const*, T*>::type;
|
||||
using BufferType = MaybeConstPointer<void>; // void* or void const*, as appropriate
|
||||
|
||||
public:
|
||||
/// The atomic interface used to carry out atomic operations. See
|
||||
/// AtomicInterfaceExecution<Device> and AtomicInterfaceControl
|
||||
using AtomicInterface = AtomicInterface_;
|
||||
|
||||
/// The fastest word type for performing bitwise operations through AtomicInterface.
|
||||
using WordTypePreferred = typename AtomicInterface::WordTypePreferred;
|
||||
|
||||
/// MPL check for whether a WordType may be used for non-atomic operations.
|
||||
template <typename WordType>
|
||||
using IsValidWordType = BitFieldTraits::IsValidWordType<WordType>;
|
||||
|
||||
/// MPL check for whether a WordType may be used for atomic operations.
|
||||
template <typename WordType>
|
||||
using IsValidWordTypeAtomic = BitFieldTraits::IsValidWordTypeAtomic<WordType, AtomicInterface>;
|
||||
|
||||
VTKM_STATIC_ASSERT_MSG(IsValidWordType<WordTypeDefault>::value,
|
||||
"Internal error: Default word type is invalid.");
|
||||
VTKM_STATIC_ASSERT_MSG(IsValidWordType<WordTypePreferred>::value,
|
||||
"Device-specific fast word type is invalid.");
|
||||
|
||||
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordTypeDefault>::value,
|
||||
"Internal error: Default word type is invalid.");
|
||||
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordTypePreferred>::value,
|
||||
"Device-specific fast word type is invalid for atomic operations.");
|
||||
|
||||
protected:
|
||||
friend class vtkm::cont::BitField;
|
||||
|
||||
/// Construct a BitPortal from an ArrayHandle with basic storage's portal.
|
||||
template <typename PortalType>
|
||||
VTKM_EXEC_CONT BitPortalBase(const PortalType& portal, vtkm::Id numberOfBits)
|
||||
: Data{ portal.GetIteratorBegin() }
|
||||
, NumberOfBits{ numberOfBits }
|
||||
{
|
||||
VTKM_STATIC_ASSERT_MSG(HasPointerAccess<PortalType>::value,
|
||||
"Source portal must return a pointer from "
|
||||
"GetIteratorBegin().");
|
||||
}
|
||||
|
||||
public:
|
||||
BitPortalBase() noexcept = default;
|
||||
BitPortalBase(const BitPortalBase& src) noexcept = default;
|
||||
BitPortalBase(BitPortalBase&& src) noexcept = default;
|
||||
BitPortalBase& operator=(const BitPortalBase& src) noexcept = default;
|
||||
BitPortalBase& operator=(BitPortalBase&& src) noexcept = default;
|
||||
|
||||
/// Returns the number of bits in the BitField.
|
||||
VTKM_EXEC_CONT
|
||||
vtkm::Id GetNumberOfBits() const noexcept { return this->NumberOfBits; }
|
||||
|
||||
/// Returns how many words of type @a WordTypePreferred exist in the dataset.
|
||||
/// Note that this is rounded up and may contain partial words. See
|
||||
/// also GetFinalWordMask to handle the trailing partial word.
|
||||
template <typename WordType = WordTypePreferred>
|
||||
VTKM_EXEC_CONT vtkm::Id GetNumberOfWords() const noexcept
|
||||
{
|
||||
VTKM_STATIC_ASSERT(IsValidWordType<WordType>::value);
|
||||
static constexpr vtkm::Id WordSize = static_cast<vtkm::Id>(sizeof(WordType));
|
||||
static constexpr vtkm::Id WordBits = WordSize * CHAR_BIT;
|
||||
return (this->NumberOfBits + WordBits - 1) / WordBits;
|
||||
}
|
||||
|
||||
/// Return a mask in which the valid bits in the final word (of type @a
|
||||
/// WordType) are set to 1.
|
||||
template <typename WordType = WordTypePreferred>
|
||||
VTKM_EXEC_CONT WordType GetFinalWordMask() const noexcept
|
||||
{
|
||||
if (this->NumberOfBits == 0)
|
||||
{
|
||||
return WordType{ 0 };
|
||||
}
|
||||
|
||||
static constexpr vtkm::Int32 BitsPerWord =
|
||||
static_cast<vtkm::Int32>(sizeof(WordType) * CHAR_BIT);
|
||||
|
||||
const auto maxBit = this->NumberOfBits - 1;
|
||||
const auto coord = this->GetBitCoordinateFromIndex<WordType>(maxBit);
|
||||
const vtkm::Int32 shift = BitsPerWord - coord.BitOffset - 1;
|
||||
return (~WordType{ 0 }) >> shift;
|
||||
}
|
||||
|
||||
/// Given a bit index, compute a @a BitCoordinate that identifies the
|
||||
/// corresponding word index and bit offset.
|
||||
template <typename WordType = WordTypePreferred>
|
||||
VTKM_EXEC_CONT static BitCoordinate GetBitCoordinateFromIndex(vtkm::Id bitIdx) noexcept
|
||||
{
|
||||
VTKM_STATIC_ASSERT(IsValidWordType<WordType>::value);
|
||||
static constexpr vtkm::Id BitsPerWord = static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT);
|
||||
return { static_cast<vtkm::Id>(bitIdx / BitsPerWord),
|
||||
static_cast<vtkm::Int32>(bitIdx % BitsPerWord) };
|
||||
}
|
||||
|
||||
/// Set the bit at @a bitIdx to @a val. This method is not thread-safe --
|
||||
/// threads modifying bits nearby may interfere with this operation.
|
||||
/// Additionally, this should not be used for synchronization, as there are
|
||||
/// no memory ordering requirements. See SetBitAtomic for those usecases.
|
||||
VTKM_EXEC_CONT
|
||||
void SetBit(vtkm::Id bitIdx, bool val) const noexcept
|
||||
{
|
||||
using WordType = WordTypePreferred;
|
||||
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
|
||||
const auto mask = WordType(1) << coord.BitOffset;
|
||||
auto* const wordAddr = this->GetWordAddress<WordType>(coord.WordIndex);
|
||||
if (val)
|
||||
{
|
||||
*wordAddr |= mask;
|
||||
}
|
||||
else
|
||||
{
|
||||
*wordAddr &= ~mask;
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the bit at @a bitIdx to @a val using atomic operations. This method
|
||||
/// is thread-safe and guarantees, at minimum, "release" memory ordering.
|
||||
VTKM_EXEC_CONT
|
||||
void SetBitAtomic(vtkm::Id bitIdx, bool val) const
|
||||
{
|
||||
using WordType = WordTypePreferred;
|
||||
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
|
||||
const auto mask = WordType(1) << coord.BitOffset;
|
||||
if (val)
|
||||
{
|
||||
this->OrWordAtomic(coord.WordIndex, mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->AndWordAtomic(coord.WordIndex, ~mask);
|
||||
}
|
||||
}
|
||||
|
||||
/// Return whether or not the bit at @a bitIdx is set. Note that this uses
|
||||
/// non-atomic loads and thus should not be used for synchronization.
|
||||
VTKM_EXEC_CONT
|
||||
bool GetBit(vtkm::Id bitIdx) const noexcept
|
||||
{
|
||||
using WordType = WordTypePreferred;
|
||||
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
|
||||
const auto word = this->GetWord<WordType>(coord.WordIndex);
|
||||
const auto mask = WordType(1) << coord.BitOffset;
|
||||
return (word & mask) != WordType(0);
|
||||
}
|
||||
|
||||
/// Return whether or not the bit at @a bitIdx is set using atomic loads.
|
||||
/// This method is thread safe and guarantees, at minimum, "acquire" memory
|
||||
/// ordering.
|
||||
VTKM_EXEC_CONT
|
||||
bool GetBitAtomic(vtkm::Id bitIdx) const
|
||||
{
|
||||
using WordType = WordTypePreferred;
|
||||
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
|
||||
const auto word = this->GetWordAtomic<WordType>(coord.WordIndex);
|
||||
const auto mask = WordType(1) << coord.BitOffset;
|
||||
return (word & mask) != WordType(0);
|
||||
}
|
||||
|
||||
/// Set the word (of type @a WordType) at @a wordIdx to @a word using
|
||||
/// non-atomic operations.
|
||||
template <typename WordType = WordTypePreferred>
|
||||
VTKM_EXEC_CONT void SetWord(vtkm::Id wordIdx, WordType word) const noexcept
|
||||
{
|
||||
*this->GetWordAddress<WordType>(wordIdx) = word;
|
||||
}
|
||||
|
||||
/// Set the word (of type @a WordType) at @a wordIdx to @a word using atomic
|
||||
/// operations. The store guarantees, at minimum, "release" memory ordering.
|
||||
template <typename WordType = WordTypePreferred>
|
||||
VTKM_EXEC_CONT void SetWordAtomic(vtkm::Id wordIdx, WordType word) const
|
||||
{
|
||||
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
|
||||
"Requested WordType does not support atomic"
|
||||
" operations on target execution platform.");
|
||||
AtomicInterface::Store(this->GetWordAddress<WordType>(wordIdx), word);
|
||||
}
|
||||
|
||||
/// Get the word (of type @a WordType) at @a wordIdx using non-atomic
|
||||
/// operations.
|
||||
template <typename WordType = WordTypePreferred>
|
||||
VTKM_EXEC_CONT WordType GetWord(vtkm::Id wordIdx) const noexcept
|
||||
{
|
||||
return *this->GetWordAddress<WordType>(wordIdx);
|
||||
}
|
||||
|
||||
/// Get the word (of type @a WordType) at @ wordIdx using an atomic read with,
|
||||
/// at minimum, "acquire" memory ordering.
|
||||
template <typename WordType = WordTypePreferred>
|
||||
VTKM_EXEC_CONT WordType GetWordAtomic(vtkm::Id wordIdx) const
|
||||
{
|
||||
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
|
||||
"Requested WordType does not support atomic"
|
||||
" operations on target execution platform.");
|
||||
return AtomicInterface::Load(this->GetWordAddress<WordType>(wordIdx));
|
||||
}
|
||||
|
||||
/// Toggle the bit at @a bitIdx, returning the original value. This method
|
||||
/// uses atomic operations and a full memory barrier.
|
||||
VTKM_EXEC_CONT
|
||||
bool NotBitAtomic(vtkm::Id bitIdx) const
|
||||
{
|
||||
using WordType = WordTypePreferred;
|
||||
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
|
||||
const auto mask = WordType(1) << coord.BitOffset;
|
||||
const auto oldWord = this->XorWordAtomic(coord.WordIndex, mask);
|
||||
return (oldWord & mask) != WordType(0);
|
||||
}
|
||||
|
||||
/// Perform a bitwise "not" operation on the word at @a wordIdx, returning the
|
||||
/// original word. This uses atomic operations and a full memory barrier.
|
||||
template <typename WordType = WordTypePreferred>
|
||||
VTKM_EXEC_CONT WordType NotWordAtomic(vtkm::Id wordIdx) const
|
||||
{
|
||||
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
|
||||
"Requested WordType does not support atomic"
|
||||
" operations on target execution platform.");
|
||||
WordType* addr = this->GetWordAddress<WordType>(wordIdx);
|
||||
return AtomicInterface::Not(addr);
|
||||
}
|
||||
|
||||
/// Perform an "and" operation between the bit at @a bitIdx and @a val,
|
||||
/// returning the original value at @a bitIdx. This method uses atomic
|
||||
/// operations and a full memory barrier.
|
||||
VTKM_EXEC_CONT
|
||||
bool AndBitAtomic(vtkm::Id bitIdx, bool val) const
|
||||
{
|
||||
using WordType = WordTypePreferred;
|
||||
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
|
||||
const auto bitmask = WordType(1) << coord.BitOffset;
|
||||
// wordmask is all 1's, except for BitOffset which is (val ? 1 : 0)
|
||||
const auto wordmask = val ? ~WordType(0) : ~bitmask;
|
||||
const auto oldWord = this->AndWordAtomic(coord.WordIndex, wordmask);
|
||||
return (oldWord & bitmask) != WordType(0);
|
||||
}
|
||||
|
||||
/// Perform an "and" operation between the word at @a wordIdx and @a wordMask,
|
||||
/// returning the original word at @a wordIdx. This method uses atomic
|
||||
/// operations and a full memory barrier.
|
||||
template <typename WordType = WordTypePreferred>
|
||||
VTKM_EXEC_CONT WordType AndWordAtomic(vtkm::Id wordIdx, WordType wordmask) const
|
||||
{
|
||||
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
|
||||
"Requested WordType does not support atomic"
|
||||
" operations on target execution platform.");
|
||||
WordType* addr = this->GetWordAddress<WordType>(wordIdx);
|
||||
return AtomicInterface::And(addr, wordmask);
|
||||
}
|
||||
|
||||
/// Perform an "of" operation between the bit at @a bitIdx and @a val,
|
||||
/// returning the original value at @a bitIdx. This method uses atomic
|
||||
/// operations and a full memory barrier.
|
||||
VTKM_EXEC_CONT
|
||||
bool OrBitAtomic(vtkm::Id bitIdx, bool val) const
|
||||
{
|
||||
using WordType = WordTypePreferred;
|
||||
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
|
||||
const auto bitmask = WordType(1) << coord.BitOffset;
|
||||
// wordmask is all 0's, except for BitOffset which is (val ? 1 : 0)
|
||||
const auto wordmask = val ? bitmask : WordType(0);
|
||||
const auto oldWord = this->OrWordAtomic(coord.WordIndex, wordmask);
|
||||
return (oldWord & bitmask) != WordType(0);
|
||||
}
|
||||
|
||||
/// Perform an "or" operation between the word at @a wordIdx and @a wordMask,
|
||||
/// returning the original word at @a wordIdx. This method uses atomic
|
||||
/// operations and a full memory barrier.
|
||||
template <typename WordType = WordTypePreferred>
|
||||
VTKM_EXEC_CONT WordType OrWordAtomic(vtkm::Id wordIdx, WordType wordmask) const
|
||||
{
|
||||
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
|
||||
"Requested WordType does not support atomic"
|
||||
" operations on target execution platform.");
|
||||
WordType* addr = this->GetWordAddress<WordType>(wordIdx);
|
||||
return AtomicInterface::Or(addr, wordmask);
|
||||
}
|
||||
|
||||
/// Perform an "xor" operation between the bit at @a bitIdx and @a val,
|
||||
/// returning the original value at @a bitIdx. This method uses atomic
|
||||
/// operations and a full memory barrier.
|
||||
VTKM_EXEC_CONT
|
||||
bool XorBitAtomic(vtkm::Id bitIdx, bool val) const
|
||||
{
|
||||
using WordType = WordTypePreferred;
|
||||
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
|
||||
const auto bitmask = WordType(1) << coord.BitOffset;
|
||||
// wordmask is all 0's, except for BitOffset which is (val ? 1 : 0)
|
||||
const auto wordmask = val ? bitmask : WordType(0);
|
||||
const auto oldWord = this->XorWordAtomic(coord.WordIndex, wordmask);
|
||||
return (oldWord & bitmask) != WordType(0);
|
||||
}
|
||||
|
||||
/// Perform an "xor" operation between the word at @a wordIdx and @a wordMask,
|
||||
/// returning the original word at @a wordIdx. This method uses atomic
|
||||
/// operations and a full memory barrier.
|
||||
template <typename WordType = WordTypePreferred>
|
||||
VTKM_EXEC_CONT WordType XorWordAtomic(vtkm::Id wordIdx, WordType wordmask) const
|
||||
{
|
||||
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
|
||||
"Requested WordType does not support atomic"
|
||||
" operations on target execution platform.");
|
||||
WordType* addr = this->GetWordAddress<WordType>(wordIdx);
|
||||
return AtomicInterface::Xor(addr, wordmask);
|
||||
}
|
||||
|
||||
/// Perform an atomic compare-and-swap operation on the bit at @a bitIdx.
|
||||
/// If the value in memory is equal to @a expectedBit, it is replaced with
|
||||
/// the value of @a newBit and the original value of the bit is returned as a
|
||||
/// boolean. This method implements a full memory barrier around the atomic
|
||||
/// operation.
|
||||
VTKM_EXEC_CONT
|
||||
bool CompareAndSwapBitAtomic(vtkm::Id bitIdx, bool newBit, bool expectedBit) const
|
||||
{
|
||||
using WordType = WordTypePreferred;
|
||||
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
|
||||
const auto bitmask = WordType(1) << coord.BitOffset;
|
||||
|
||||
WordType oldWord;
|
||||
WordType newWord;
|
||||
do
|
||||
{
|
||||
oldWord = this->GetWord<WordType>(coord.WordIndex);
|
||||
bool oldBitSet = (oldWord & bitmask) != WordType(0);
|
||||
if (oldBitSet != expectedBit)
|
||||
{ // The bit-of-interest does not match what we expected.
|
||||
return oldBitSet;
|
||||
}
|
||||
else if (oldBitSet == newBit)
|
||||
{ // The bit hasn't changed, but also already matches newVal. We're done.
|
||||
return expectedBit;
|
||||
}
|
||||
|
||||
// Compute the new word
|
||||
newWord = oldWord ^ bitmask;
|
||||
} // CAS loop to resolve any conflicting changes to other bits in the word.
|
||||
while (this->CompareAndSwapWordAtomic(coord.WordIndex, newWord, oldWord) != oldWord);
|
||||
|
||||
return expectedBit;
|
||||
}
|
||||
|
||||
/// Perform an atomic compare-and-swap operation on the word at @a wordIdx.
|
||||
/// If the word in memory is equal to @a expectedWord, it is replaced with
|
||||
/// the value of @a newWord and the original word is returned. This method
|
||||
/// implements a full memory barrier around the atomic operation.
|
||||
template <typename WordType = WordTypePreferred>
|
||||
VTKM_EXEC_CONT WordType CompareAndSwapWordAtomic(vtkm::Id wordIdx,
|
||||
WordType newWord,
|
||||
WordType expected) const
|
||||
{
|
||||
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
|
||||
"Requested WordType does not support atomic"
|
||||
" operations on target execution platform.");
|
||||
WordType* addr = this->GetWordAddress<WordType>(wordIdx);
|
||||
return AtomicInterface::CompareAndSwap(addr, newWord, expected);
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename WordType>
|
||||
VTKM_EXEC_CONT MaybeConstPointer<WordType> GetWordAddress(vtkm::Id wordId) const noexcept
|
||||
{
|
||||
VTKM_STATIC_ASSERT(IsValidWordType<WordType>::value);
|
||||
return reinterpret_cast<MaybeConstPointer<WordType>>(this->Data) + wordId;
|
||||
}
|
||||
|
||||
BufferType Data{ nullptr };
|
||||
vtkm::Id NumberOfBits{ 0 };
|
||||
};
|
||||
|
||||
template <typename AtomicOps>
|
||||
using BitPortal = BitPortalBase<AtomicOps, false>;
|
||||
|
||||
template <typename AtomicOps>
|
||||
using BitPortalConst = BitPortalBase<AtomicOps, true>;
|
||||
|
||||
} // end namespace detail
|
||||
|
||||
class BitField
|
||||
{
|
||||
static constexpr vtkm::Id BlockSize = detail::BitFieldTraits::BlockSize;
|
||||
|
||||
public:
|
||||
/// The type array handle used to store the bit data internally:
|
||||
using ArrayHandleType = ArrayHandle<WordTypeDefault, StorageTagBasic>;
|
||||
|
||||
/// The BitPortal used in the control environment.
|
||||
using PortalControl = detail::BitPortal<vtkm::cont::internal::AtomicInterfaceControl>;
|
||||
|
||||
/// A read-only BitPortal used in the control environment.
|
||||
using PortalConstControl = detail::BitPortalConst<vtkm::cont::internal::AtomicInterfaceControl>;
|
||||
|
||||
template <typename Device>
|
||||
struct ExecutionTypes
|
||||
{
|
||||
/// The AtomicInterfaceExecution implementation used by the specified device.
|
||||
using AtomicInterface = vtkm::cont::internal::AtomicInterfaceExecution<Device>;
|
||||
|
||||
/// The preferred word type used by the specified device.
|
||||
using WordTypePreferred = typename AtomicInterface::WordTypePreferred;
|
||||
|
||||
/// A BitPortal that is usable on the specified device.
|
||||
using Portal = detail::BitPortal<AtomicInterface>;
|
||||
|
||||
/// A read-only BitPortal that is usable on the specified device.
|
||||
using PortalConst = detail::BitPortalConst<AtomicInterface>;
|
||||
};
|
||||
|
||||
/// Check whether a word type is valid for non-atomic operations.
|
||||
template <typename WordType>
|
||||
using IsValidWordType = detail::BitFieldTraits::IsValidWordType<WordType>;
|
||||
|
||||
/// Check whether a word type is valid for atomic operations on a specific
|
||||
/// device.
|
||||
template <typename WordType, typename Device>
|
||||
using IsValidWordTypeAtomic = detail::BitFieldTraits::
|
||||
IsValidWordTypeAtomic<WordType, vtkm::cont::internal::AtomicInterfaceExecution<Device>>;
|
||||
|
||||
/// Check whether a word type is valid for atomic operations from the control
|
||||
/// environment.
|
||||
template <typename WordType>
|
||||
using IsValidWordTypeAtomicControl =
|
||||
detail::BitFieldTraits::IsValidWordTypeAtomic<WordType,
|
||||
vtkm::cont::internal::AtomicInterfaceControl>;
|
||||
|
||||
VTKM_CONT BitField()
|
||||
: Internals{ std::make_shared<InternalStruct>() }
|
||||
{
|
||||
}
|
||||
VTKM_CONT BitField(const BitField& src) = default;
|
||||
VTKM_CONT BitField(BitField&& src) noexcept = default;
|
||||
VTKM_CONT ~BitField() = default;
|
||||
VTKM_CONT BitField& operator=(const BitField& src) = default;
|
||||
VTKM_CONT BitField& operator=(BitField&& src) noexcept = default;
|
||||
|
||||
VTKM_CONT
|
||||
bool operator==(const BitField& rhs) const { return this->Internals == rhs.Internals; }
|
||||
|
||||
VTKM_CONT
|
||||
bool operator!=(const BitField& rhs) const { return this->Internals != rhs.Internals; }
|
||||
|
||||
/// Return the internal ArrayHandle used to store the BitField.
|
||||
VTKM_CONT
|
||||
ArrayHandleType& GetData() { return this->Internals->Data; }
|
||||
|
||||
/// Return the internal ArrayHandle used to store the BitField.
|
||||
VTKM_CONT
|
||||
const ArrayHandleType& GetData() const { return this->Internals->Data; }
|
||||
|
||||
/// Return the number of bits stored by this BitField.
|
||||
VTKM_CONT
|
||||
vtkm::Id GetNumberOfBits() const { return this->Internals->NumberOfBits; }
|
||||
|
||||
/// Return the number of words (of @a WordType) stored in this bit fields.
|
||||
///
|
||||
template <typename WordType>
|
||||
VTKM_CONT vtkm::Id GetNumberOfWords() const
|
||||
{
|
||||
VTKM_STATIC_ASSERT(IsValidWordType<WordType>::value);
|
||||
static constexpr vtkm::Id WordBits = static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT);
|
||||
return (this->Internals->NumberOfBits + WordBits - 1) / WordBits;
|
||||
}
|
||||
|
||||
/// Allocate the requested number of bits.
|
||||
VTKM_CONT
|
||||
void Allocate(vtkm::Id numberOfBits)
|
||||
{
|
||||
const vtkm::Id numWords = this->BitsToAllocatedStorageWords(numberOfBits);
|
||||
|
||||
VTKM_LOG_F(vtkm::cont::LogLevel::MemCont,
|
||||
"BitField Allocation: %llu bits, blocked up to %s.",
|
||||
static_cast<unsigned long long>(numberOfBits),
|
||||
vtkm::cont::GetSizeString(
|
||||
static_cast<vtkm::UInt64>(static_cast<size_t>(numWords) * sizeof(WordTypeDefault)))
|
||||
.c_str());
|
||||
|
||||
this->Internals->Data.Allocate(numWords);
|
||||
this->Internals->NumberOfBits = numberOfBits;
|
||||
}
|
||||
|
||||
/// Shrink the bit field to the requested number of bits.
|
||||
VTKM_CONT
|
||||
void Shrink(vtkm::Id numberOfBits)
|
||||
{
|
||||
const vtkm::Id numWords = this->BitsToAllocatedStorageWords(numberOfBits);
|
||||
this->Internals->Data.Shrink(numWords);
|
||||
this->Internals->NumberOfBits = numberOfBits;
|
||||
}
|
||||
|
||||
/// Release all execution-side resources held by this BitField.
|
||||
VTKM_CONT
|
||||
void ReleaseResourcesExecution() { this->Internals->Data.ReleaseResourcesExecution(); }
|
||||
|
||||
/// Release all resources held by this BitField and reset to empty.
|
||||
VTKM_CONT
|
||||
void ReleaseResources()
|
||||
{
|
||||
this->Internals->Data.ReleaseResources();
|
||||
this->Internals->NumberOfBits = 0;
|
||||
}
|
||||
|
||||
/// Force the control array to sync with the last-used device.
|
||||
VTKM_CONT
|
||||
void SyncControlArray() const { this->Internals->Data.SyncControlArray(); }
|
||||
|
||||
/// The id of the device where the most up-to-date copy of the data is
|
||||
/// currently resident. If the data is on the host, DeviceAdapterTagUndefined
|
||||
/// is returned.
|
||||
VTKM_CONT
|
||||
DeviceAdapterId GetDeviceAdapterId() const { return this->Internals->Data.GetDeviceAdapterId(); }
|
||||
|
||||
/// Get a portal to the data that is usable from the control environment.
|
||||
VTKM_CONT
|
||||
PortalControl GetPortalControl()
|
||||
{
|
||||
return PortalControl{ this->Internals->Data.GetPortalControl(), this->Internals->NumberOfBits };
|
||||
}
|
||||
|
||||
/// Get a read-only portal to the data that is usable from the control
|
||||
/// environment.
|
||||
VTKM_CONT
|
||||
PortalConstControl GetPortalConstControl() const
|
||||
{
|
||||
return PortalConstControl{ this->Internals->Data.GetPortalConstControl(),
|
||||
this->Internals->NumberOfBits };
|
||||
}
|
||||
|
||||
/// Prepares this BitField to be used as an input to an operation in the
|
||||
/// execution environment. If necessary, copies data to the execution
|
||||
/// environment. Can throw an exception if this BitField does not yet contain
|
||||
/// any data. Returns a portal that can be used in code running in the
|
||||
/// execution environment.
|
||||
template <typename DeviceAdapterTag>
|
||||
VTKM_CONT typename ExecutionTypes<DeviceAdapterTag>::PortalConst PrepareForInput(
|
||||
DeviceAdapterTag device) const
|
||||
{
|
||||
using PortalType = typename ExecutionTypes<DeviceAdapterTag>::PortalConst;
|
||||
return PortalType{ this->Internals->Data.PrepareForInput(device),
|
||||
this->Internals->NumberOfBits };
|
||||
}
|
||||
|
||||
/// Prepares (allocates) this BitField to be used as an output from an
|
||||
/// operation in the execution environment. The internal state of this class
|
||||
/// is set to have valid data in the execution BitField with the assumption
|
||||
/// that the array will be filled soon (i.e. before any other methods of this
|
||||
/// object are called). Returns a portal that can be used in code running in
|
||||
/// the execution environment.
|
||||
template <typename DeviceAdapterTag>
|
||||
VTKM_CONT typename ExecutionTypes<DeviceAdapterTag>::Portal PrepareForOutput(
|
||||
vtkm::Id numBits,
|
||||
DeviceAdapterTag device) const
|
||||
{
|
||||
using PortalType = typename ExecutionTypes<DeviceAdapterTag>::Portal;
|
||||
const vtkm::Id numWords = this->BitsToAllocatedStorageWords(numBits);
|
||||
|
||||
VTKM_LOG_F(vtkm::cont::LogLevel::MemExec,
|
||||
"BitField Allocation: %llu bits, blocked up to %s.",
|
||||
static_cast<unsigned long long>(numBits),
|
||||
vtkm::cont::GetSizeString(
|
||||
static_cast<vtkm::UInt64>(static_cast<size_t>(numWords) * sizeof(WordTypeDefault)))
|
||||
.c_str());
|
||||
|
||||
auto portal = this->Internals->Data.PrepareForOutput(numWords, device);
|
||||
this->Internals->NumberOfBits = numBits;
|
||||
return PortalType{ portal, numBits };
|
||||
}
|
||||
|
||||
/// Prepares this BitField to be used in an in-place operation (both as input
|
||||
/// and output) in the execution environment. If necessary, copies data to
|
||||
/// the execution environment. Can throw an exception if this BitField does
|
||||
/// not yet contain any data. Returns a portal that can be used in code
|
||||
/// running in the execution environment.
|
||||
template <typename DeviceAdapterTag>
|
||||
VTKM_CONT typename ExecutionTypes<DeviceAdapterTag>::Portal PrepareForInPlace(
|
||||
DeviceAdapterTag device) const
|
||||
{
|
||||
using PortalType = typename ExecutionTypes<DeviceAdapterTag>::Portal;
|
||||
return PortalType{ this->Internals->Data.PrepareForInPlace(device),
|
||||
this->Internals->NumberOfBits };
|
||||
}
|
||||
|
||||
private:
|
||||
/// Returns the number of words, padded out to respect BlockSize.
|
||||
VTKM_CONT
|
||||
static vtkm::Id BitsToAllocatedStorageWords(vtkm::Id numBits)
|
||||
{
|
||||
static constexpr vtkm::Id InternalWordSize = static_cast<vtkm::Id>(sizeof(WordTypeDefault));
|
||||
|
||||
// Round up to BlockSize bytes:
|
||||
const vtkm::Id bytesNeeded = (numBits + CHAR_BIT - 1) / CHAR_BIT;
|
||||
const vtkm::Id blocksNeeded = (bytesNeeded + BlockSize - 1) / BlockSize;
|
||||
const vtkm::Id numBytes = blocksNeeded * BlockSize;
|
||||
const vtkm::Id numWords = numBytes / InternalWordSize;
|
||||
return numWords;
|
||||
}
|
||||
|
||||
struct VTKM_ALWAYS_EXPORT InternalStruct
|
||||
{
|
||||
ArrayHandleType Data;
|
||||
vtkm::Id NumberOfBits;
|
||||
};
|
||||
|
||||
std::shared_ptr<InternalStruct> Internals;
|
||||
};
|
||||
}
|
||||
} // end namespace vtkm::cont
|
||||
|
||||
#endif // vtk_m_cont_BitField_h
|
@ -22,6 +22,7 @@ set(headers
|
||||
Algorithm.h
|
||||
ArrayCopy.h
|
||||
ArrayHandle.h
|
||||
ArrayHandleBitField.h
|
||||
ArrayHandleCartesianProduct.h
|
||||
ArrayHandleCast.h
|
||||
ArrayHandleCompositeVector.h
|
||||
@ -49,6 +50,7 @@ set(headers
|
||||
ArrayRangeCompute.h
|
||||
AssignerMultiBlock.h
|
||||
AtomicArray.h
|
||||
BitField.h
|
||||
BoundsCompute.h
|
||||
BoundsGlobalCompute.h
|
||||
CastAndCall.h
|
||||
|
@ -52,6 +52,15 @@ template <class DeviceAdapterTag>
|
||||
struct DeviceAdapterAlgorithm
|
||||
#ifdef VTKM_DOXYGEN_ONLY
|
||||
{
|
||||
/// \brief Create a unique, unsorted list of indices denoting which bits are
|
||||
/// set in a bitfield.
|
||||
///
|
||||
/// Returns the total number of set bits.
|
||||
template <typename IndicesStorage>
|
||||
VTKM_CONT static vtkm::Id BitFieldToUnorderedSet(
|
||||
const vtkm::cont::BitField& bits,
|
||||
vtkm::cont::ArrayHandle<Id, IndicesStorage>& indices);
|
||||
|
||||
/// \brief Copy the contents of one ArrayHandle to another
|
||||
///
|
||||
/// Copies the contents of \c input to \c output. The array \c output will be
|
||||
@ -660,9 +669,18 @@ public:
|
||||
/// The class provide the actual implementation used by
|
||||
/// vtkm::cont::DeviceAdapterAtomicArrayImplementation.
|
||||
///
|
||||
/// TODO combine this with AtomicInterfaceExecution.
|
||||
template <typename T, typename DeviceTag>
|
||||
class DeviceAdapterAtomicArrayImplementation;
|
||||
|
||||
/// \brief Class providing a device-specific support for atomic operations.
|
||||
///
|
||||
/// AtomicInterfaceControl provides atomic operations for the control
|
||||
/// environment, and may be subclassed to implement the device interface when
|
||||
/// appropriate for a CPU-based device.
|
||||
template <typename DeviceTag>
|
||||
class AtomicInterfaceExecution;
|
||||
|
||||
/// \brief Class providing a device-specific support for selecting the optimal
|
||||
/// Task type for a given worklet.
|
||||
///
|
||||
|
@ -25,6 +25,7 @@ set(headers
|
||||
TransportTagArrayInOut.h
|
||||
TransportTagArrayOut.h
|
||||
TransportTagAtomicArray.h
|
||||
TransportTagBitField.h
|
||||
TransportTagCellSetIn.h
|
||||
TransportTagExecObject.h
|
||||
TransportTagKeyedValuesIn.h
|
||||
@ -38,6 +39,7 @@ set(headers
|
||||
TypeCheck.h
|
||||
TypeCheckTagArray.h
|
||||
TypeCheckTagAtomicArray.h
|
||||
TypeCheckTagBitField.h
|
||||
TypeCheckTagCellSet.h
|
||||
TypeCheckTagCellSetStructured.h
|
||||
TypeCheckTagExecObject.h
|
||||
|
89
vtkm/cont/arg/TransportTagBitField.h
Normal file
89
vtkm/cont/arg/TransportTagBitField.h
Normal file
@ -0,0 +1,89 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2015 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2015 UT-Battelle, LLC.
|
||||
// Copyright 2015 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
#ifndef vtk_m_cont_arg_TransportTagBitField_h
|
||||
#define vtk_m_cont_arg_TransportTagBitField_h
|
||||
|
||||
#include <vtkm/cont/arg/Transport.h>
|
||||
|
||||
#include <vtkm/cont/BitField.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace arg
|
||||
{
|
||||
|
||||
struct TransportTagBitFieldIn
|
||||
{
|
||||
};
|
||||
struct TransportTagBitFieldOut
|
||||
{
|
||||
};
|
||||
struct TransportTagBitFieldInOut
|
||||
{
|
||||
};
|
||||
|
||||
template <typename Device>
|
||||
struct Transport<vtkm::cont::arg::TransportTagBitFieldIn, vtkm::cont::BitField, Device>
|
||||
{
|
||||
using ExecObjectType =
|
||||
typename vtkm::cont::BitField::template ExecutionTypes<Device>::PortalConst;
|
||||
|
||||
template <typename InputDomainType>
|
||||
VTKM_CONT ExecObjectType
|
||||
operator()(vtkm::cont::BitField& field, const InputDomainType&, vtkm::Id, vtkm::Id) const
|
||||
{
|
||||
return field.PrepareForInput(Device{});
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Device>
|
||||
struct Transport<vtkm::cont::arg::TransportTagBitFieldOut, vtkm::cont::BitField, Device>
|
||||
{
|
||||
using ExecObjectType = typename vtkm::cont::BitField::template ExecutionTypes<Device>::Portal;
|
||||
|
||||
template <typename InputDomainType>
|
||||
VTKM_CONT ExecObjectType
|
||||
operator()(vtkm::cont::BitField& field, const InputDomainType&, vtkm::Id, vtkm::Id) const
|
||||
{
|
||||
// This behaves similarly to WholeArray tags, where "Out" maps to InPlace
|
||||
// since we don't want to reallocate or enforce size restrictions.
|
||||
return field.PrepareForInPlace(Device{});
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Device>
|
||||
struct Transport<vtkm::cont::arg::TransportTagBitFieldInOut, vtkm::cont::BitField, Device>
|
||||
{
|
||||
using ExecObjectType = typename vtkm::cont::BitField::template ExecutionTypes<Device>::Portal;
|
||||
|
||||
template <typename InputDomainType>
|
||||
VTKM_CONT ExecObjectType
|
||||
operator()(vtkm::cont::BitField& field, const InputDomainType&, vtkm::Id, vtkm::Id) const
|
||||
{
|
||||
return field.PrepareForInPlace(Device{});
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
} // namespace vtkm::cont::arg
|
||||
|
||||
#endif //vtk_m_cont_arg_TransportTagBitField_h
|
48
vtkm/cont/arg/TypeCheckTagBitField.h
Normal file
48
vtkm/cont/arg/TypeCheckTagBitField.h
Normal file
@ -0,0 +1,48 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2016 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2016 UT-Battelle, LLC.
|
||||
// Copyright 2016 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
#ifndef vtk_m_cont_arg_TypeCheckTagBitField_h
|
||||
#define vtk_m_cont_arg_TypeCheckTagBitField_h
|
||||
|
||||
#include <vtkm/cont/arg/TypeCheck.h>
|
||||
|
||||
#include <vtkm/cont/BitField.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace arg
|
||||
{
|
||||
|
||||
struct TypeCheckTagBitField
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct TypeCheck<TypeCheckTagBitField, T> : public std::is_base_of<vtkm::cont::BitField, T>
|
||||
{
|
||||
};
|
||||
}
|
||||
}
|
||||
} // namespace vtkm::cont::arg
|
||||
|
||||
#endif //vtk_m_cont_arg_TypeCheckTagBitField_h
|
105
vtkm/cont/cuda/internal/AtomicInterfaceExecutionCuda.h
Normal file
105
vtkm/cont/cuda/internal/AtomicInterfaceExecutionCuda.h
Normal file
@ -0,0 +1,105 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2019 UT-Battelle, LLC.
|
||||
// Copyright 2019 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
#ifndef vtk_m_cont_cuda_internal_AtomicInterfaceExecutionCuda_h
|
||||
#define vtk_m_cont_cuda_internal_AtomicInterfaceExecutionCuda_h
|
||||
|
||||
#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
|
||||
|
||||
#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
|
||||
|
||||
#include <vtkm/ListTag.h>
|
||||
#include <vtkm/Types.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace internal
|
||||
{
|
||||
|
||||
template <>
|
||||
class AtomicInterfaceExecution<DeviceAdapterTagCuda>
|
||||
{
|
||||
|
||||
public:
|
||||
// Note: There are 64-bit atomics available, but not on all devices. Stick
|
||||
// with 32-bit only until we require compute capability 3.5+
|
||||
using WordTypes = vtkm::ListTagBase<vtkm::UInt32>;
|
||||
using WordTypePreferred = vtkm::UInt32;
|
||||
|
||||
#define VTKM_ATOMIC_OPS_FOR_TYPE(type) \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Load(const type* addr) \
|
||||
{ \
|
||||
const volatile type* vaddr = addr; /* volatile to bypass cache*/ \
|
||||
const type value = *vaddr; \
|
||||
/* fence to ensure that dependent reads are correctly ordered */ \
|
||||
__threadfence(); \
|
||||
return value; \
|
||||
} \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS __device__ static void Store(type* addr, type value) \
|
||||
{ \
|
||||
volatile type* vaddr = addr; /* volatile to bypass cache */ \
|
||||
/* fence to ensure that previous non-atomic stores are visible to other threads */ \
|
||||
__threadfence(); \
|
||||
*vaddr = value; \
|
||||
} \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Not(type* addr) \
|
||||
{ \
|
||||
return AtomicInterfaceExecution::Xor(addr, static_cast<type>(~type{ 0u })); \
|
||||
} \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type And(type* addr, type mask) \
|
||||
{ \
|
||||
__threadfence(); \
|
||||
auto result = atomicAnd(addr, mask); \
|
||||
__threadfence(); \
|
||||
return result; \
|
||||
} \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Or(type* addr, type mask) \
|
||||
{ \
|
||||
__threadfence(); \
|
||||
auto result = atomicOr(addr, mask); \
|
||||
__threadfence(); \
|
||||
return result; \
|
||||
} \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Xor(type* addr, type mask) \
|
||||
{ \
|
||||
__threadfence(); \
|
||||
auto result = atomicXor(addr, mask); \
|
||||
__threadfence(); \
|
||||
return result; \
|
||||
} \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type CompareAndSwap( \
|
||||
type* addr, type newWord, type expected) \
|
||||
{ \
|
||||
__threadfence(); \
|
||||
auto result = atomicCAS(addr, expected, newWord); \
|
||||
__threadfence(); \
|
||||
return result; \
|
||||
}
|
||||
|
||||
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32)
|
||||
|
||||
#undef VTKM_ATOMIC_OPS_FOR_TYPE
|
||||
};
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::cont::internal
|
||||
|
||||
#endif // vtk_m_cont_cuda_internal_AtomicInterfaceExecutionCuda_h
|
@ -20,6 +20,7 @@
|
||||
|
||||
set(headers
|
||||
ArrayManagerExecutionCuda.h
|
||||
AtomicInterfaceExecutionCuda.h
|
||||
CudaAllocator.h
|
||||
DeviceAdapterAlgorithmCuda.h
|
||||
DeviceAdapterAtomicArrayImplementationCuda.h
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <vtkm/UnaryPredicates.h>
|
||||
|
||||
#include <vtkm/cont/ArrayHandle.h>
|
||||
#include <vtkm/cont/BitField.h>
|
||||
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
|
||||
#include <vtkm/cont/ErrorExecution.h>
|
||||
#include <vtkm/cont/Logging.h>
|
||||
@ -35,6 +36,7 @@
|
||||
|
||||
#include <vtkm/cont/cuda/ErrorCuda.h>
|
||||
#include <vtkm/cont/cuda/internal/ArrayManagerExecutionCuda.h>
|
||||
#include <vtkm/cont/cuda/internal/AtomicInterfaceExecutionCuda.h>
|
||||
#include <vtkm/cont/cuda/internal/DeviceAdapterAtomicArrayImplementationCuda.h>
|
||||
#include <vtkm/cont/cuda/internal/DeviceAdapterRuntimeDetectorCuda.h>
|
||||
#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
|
||||
@ -54,8 +56,7 @@
|
||||
|
||||
// Disable warnings we check vtkm for but Thrust does not.
|
||||
VTKM_THIRDPARTY_PRE_INCLUDE
|
||||
//This is required to be first so that we get patches for thrust included
|
||||
//in the correct order
|
||||
#include <cooperative_groups.h>
|
||||
#include <cuda.h>
|
||||
#include <thrust/advance.h>
|
||||
#include <thrust/binary_search.h>
|
||||
@ -71,6 +72,9 @@ VTKM_THIRDPARTY_PRE_INCLUDE
|
||||
#include <vtkm/exec/cuda/internal/ThrustPatches.h>
|
||||
VTKM_THIRDPARTY_POST_INCLUDE
|
||||
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
@ -148,6 +152,22 @@ struct CastPortal
|
||||
VTKM_EXEC
|
||||
ValueType Get(vtkm::Id index) const { return this->Functor(this->Portal.Get(index)); }
|
||||
};
|
||||
|
||||
struct CudaFreeFunctor
|
||||
{
|
||||
void operator()(void* ptr) const { VTKM_CUDA_CALL(cudaFree(ptr)); }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using CudaUniquePtr = std::unique_ptr<T, CudaFreeFunctor>;
|
||||
|
||||
template <typename T>
|
||||
CudaUniquePtr<T> make_CudaUniquePtr(std::size_t numElements)
|
||||
{
|
||||
T* ptr;
|
||||
VTKM_CUDA_CALL(cudaMalloc(&ptr, sizeof(T) * numElements));
|
||||
return CudaUniquePtr<T>(ptr);
|
||||
}
|
||||
}
|
||||
} // end namespace cuda::internal
|
||||
|
||||
@ -162,6 +182,132 @@ struct DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>
|
||||
#ifndef VTKM_CUDA
|
||||
private:
|
||||
#endif
|
||||
|
||||
template <typename BitsPortal, typename IndicesPortal, typename GlobalPopCountType>
|
||||
struct BitFieldToUnorderedSetFunctor : public vtkm::exec::FunctorBase
|
||||
{
|
||||
VTKM_STATIC_ASSERT_MSG(VTKM_PASS_COMMAS(std::is_same<GlobalPopCountType, vtkm::Int32>::value ||
|
||||
std::is_same<GlobalPopCountType, vtkm::UInt32>::value ||
|
||||
std::is_same<GlobalPopCountType, vtkm::UInt64>::value),
|
||||
"Unsupported GlobalPopCountType. Must support CUDA atomicAdd.");
|
||||
|
||||
using Word = typename BitsPortal::WordTypePreferred;
|
||||
|
||||
VTKM_STATIC_ASSERT(
|
||||
VTKM_PASS_COMMAS(std::is_same<typename IndicesPortal::ValueType, vtkm::Id>::value));
|
||||
|
||||
VTKM_CONT
|
||||
BitFieldToUnorderedSetFunctor(const BitsPortal& input,
|
||||
const IndicesPortal& output,
|
||||
GlobalPopCountType* globalPopCount)
|
||||
: Input{ input }
|
||||
, Output{ output }
|
||||
, GlobalPopCount{ globalPopCount }
|
||||
, FinalWordIndex{ input.GetNumberOfWords() - 1 }
|
||||
, FinalWordMask(input.GetFinalWordMask())
|
||||
{
|
||||
}
|
||||
|
||||
~BitFieldToUnorderedSetFunctor() {}
|
||||
|
||||
VTKM_CONT void Initialize()
|
||||
{
|
||||
assert(this->GlobalPopCount != nullptr);
|
||||
VTKM_CUDA_CALL(cudaMemset(this->GlobalPopCount, 0, sizeof(GlobalPopCountType)));
|
||||
}
|
||||
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS
|
||||
__device__ void operator()(vtkm::Id wordIdx) const
|
||||
{
|
||||
Word word = this->Input.GetWord(wordIdx);
|
||||
|
||||
// The last word may be partial -- mask out trailing bits if needed.
|
||||
const Word mask = wordIdx == this->FinalWordIndex ? this->FinalWordMask : ~Word{ 0 };
|
||||
|
||||
word &= mask;
|
||||
|
||||
if (word != 0)
|
||||
{
|
||||
this->LocalPopCount = vtkm::CountSetBits(word);
|
||||
this->ReduceAllocate();
|
||||
|
||||
vtkm::Id firstBitIdx = wordIdx * sizeof(Word) * CHAR_BIT;
|
||||
do
|
||||
{
|
||||
// Find next bit. FindFirstSetBit's result is indexed starting at 1.
|
||||
vtkm::Int32 bit = vtkm::FindFirstSetBit(word) - 1;
|
||||
vtkm::Id outIdx = this->GetNextOutputIndex();
|
||||
// Write index of bit
|
||||
this->Output.Set(outIdx, firstBitIdx + bit);
|
||||
word ^= (1 << bit); // clear bit
|
||||
} while (word != 0); // have bits
|
||||
}
|
||||
}
|
||||
|
||||
VTKM_CONT vtkm::Id Finalize() const
|
||||
{
|
||||
assert(this->GlobalPopCount != nullptr);
|
||||
GlobalPopCountType result;
|
||||
VTKM_CUDA_CALL(cudaMemcpy(
|
||||
&result, this->GlobalPopCount, sizeof(GlobalPopCountType), cudaMemcpyDeviceToHost));
|
||||
return static_cast<vtkm::Id>(result);
|
||||
}
|
||||
|
||||
private:
|
||||
// Every thread with a non-zero local popcount calls this function, which
|
||||
// computes the total popcount for the coalesced threads and allocates
|
||||
// a contiguous block in the output by atomically increasing the global
|
||||
// popcount.
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS
|
||||
__device__ void ReduceAllocate() const
|
||||
{
|
||||
const auto activeLanes = cooperative_groups::coalesced_threads();
|
||||
const int activeRank = activeLanes.thread_rank();
|
||||
const int activeSize = activeLanes.size();
|
||||
|
||||
// Reduction value:
|
||||
vtkm::Int32 rVal = this->LocalPopCount;
|
||||
for (int delta = 1; delta < activeSize; delta *= 2)
|
||||
{
|
||||
rVal += activeLanes.shfl_down(rVal, delta);
|
||||
}
|
||||
|
||||
if (activeRank == 0)
|
||||
{
|
||||
this->AllocationHead =
|
||||
atomicAdd(this->GlobalPopCount, static_cast<GlobalPopCountType>(rVal));
|
||||
}
|
||||
|
||||
this->AllocationHead = activeLanes.shfl(this->AllocationHead, 0);
|
||||
}
|
||||
|
||||
// The global output allocation is written to by striding the writes across
|
||||
// the warp lanes, allowing the writes to global memory to be coalesced.
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS
|
||||
__device__ vtkm::Id GetNextOutputIndex() const
|
||||
{
|
||||
// Only lanes with unwritten output indices left will call this method,
|
||||
// so just check the coalesced threads:
|
||||
const auto activeLanes = cooperative_groups::coalesced_threads();
|
||||
const int activeRank = activeLanes.thread_rank();
|
||||
const int activeSize = activeLanes.size();
|
||||
|
||||
vtkm::Id nextIdx = static_cast<vtkm::Id>(this->AllocationHead + activeRank);
|
||||
this->AllocationHead += activeSize;
|
||||
|
||||
return nextIdx;
|
||||
}
|
||||
|
||||
const BitsPortal Input;
|
||||
const IndicesPortal Output;
|
||||
GlobalPopCountType* GlobalPopCount;
|
||||
mutable vtkm::UInt64 AllocationHead{ 0 };
|
||||
mutable vtkm::Int32 LocalPopCount{ 0 };
|
||||
// Used to mask trailing bits the in last word.
|
||||
vtkm::Id FinalWordIndex{ 0 };
|
||||
Word FinalWordMask{ 0 };
|
||||
};
|
||||
|
||||
template <class InputPortal, class OutputPortal>
|
||||
VTKM_CONT static void CopyPortal(const InputPortal& input, const OutputPortal& output)
|
||||
{
|
||||
@ -746,9 +892,43 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GlobalPopCountType, typename BitsPortal, typename IndicesPortal>
|
||||
VTKM_CONT static vtkm::Id BitFieldToUnorderedSetPortal(const BitsPortal& bits,
|
||||
const IndicesPortal& indices)
|
||||
{
|
||||
using Functor = BitFieldToUnorderedSetFunctor<BitsPortal, IndicesPortal, GlobalPopCountType>;
|
||||
|
||||
// RAII for the global atomic counter.
|
||||
auto globalCount = cuda::internal::make_CudaUniquePtr<GlobalPopCountType>(1);
|
||||
Functor functor{ bits, indices, globalCount.get() };
|
||||
|
||||
functor.Initialize();
|
||||
Schedule(functor, bits.GetNumberOfWords());
|
||||
Synchronize(); // Ensure kernel is done before checking final atomic count
|
||||
return functor.Finalize();
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
public:
|
||||
template <typename IndicesStorage>
|
||||
VTKM_CONT static vtkm::Id BitFieldToUnorderedSet(
|
||||
const vtkm::cont::BitField& bits,
|
||||
vtkm::cont::ArrayHandle<Id, IndicesStorage>& indices)
|
||||
{
|
||||
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
|
||||
|
||||
vtkm::Id numBits = bits.GetNumberOfBits();
|
||||
auto bitsPortal = bits.PrepareForInput(DeviceAdapterTagCuda{});
|
||||
auto indicesPortal = indices.PrepareForOutput(numBits, DeviceAdapterTagCuda{});
|
||||
|
||||
// Use a uint64 for accumulator, as atomicAdd does not support signed int64.
|
||||
numBits = BitFieldToUnorderedSetPortal<vtkm::UInt64>(bitsPortal, indicesPortal);
|
||||
|
||||
indices.Shrink(numBits);
|
||||
return numBits;
|
||||
}
|
||||
|
||||
template <typename T, typename U, class SIn, class SOut>
|
||||
VTKM_CONT static void Copy(const vtkm::cont::ArrayHandle<T, SIn>& input,
|
||||
vtkm::cont::ArrayHandle<U, SOut>& output)
|
||||
|
@ -22,6 +22,7 @@ set(unit_tests
|
||||
UnitTestCudaArrayHandle.cu
|
||||
UnitTestCudaArrayHandleFancy.cu
|
||||
UnitTestCudaArrayHandleVirtualCoordinates.cu
|
||||
UnitTestCudaBitField.cu
|
||||
UnitTestCudaCellLocatorRectilinearGrid.cu
|
||||
UnitTestCudaCellLocatorUniformBins.cu
|
||||
UnitTestCudaCellLocatorUniformGrid.cu
|
||||
|
34
vtkm/cont/cuda/testing/UnitTestCudaBitField.cu
Normal file
34
vtkm/cont/cuda/testing/UnitTestCudaBitField.cu
Normal file
@ -0,0 +1,34 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2019 UT-Battelle, LLC.
|
||||
// Copyright 2019 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
// Make sure that the tested code is using the device adapter specified. This
|
||||
// is important in the long run so we don't, for example, use the CUDA device
|
||||
// for a part of an operation where the TBB device was specified.
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||
|
||||
#include <vtkm/cont/cuda/DeviceAdapterCuda.h>
|
||||
#include <vtkm/cont/testing/TestingBitField.h>
|
||||
|
||||
int UnitTestCudaBitField(int argc, char* argv[])
|
||||
{
|
||||
auto tracker = vtkm::cont::GetRuntimeDeviceTracker();
|
||||
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagCuda{});
|
||||
return vtkm::cont::testing::TestingBitField<vtkm::cont::DeviceAdapterTagCuda>::Run(argc, argv);
|
||||
}
|
223
vtkm/cont/internal/AtomicInterfaceControl.h
Normal file
223
vtkm/cont/internal/AtomicInterfaceControl.h
Normal file
@ -0,0 +1,223 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2019 UT-Battelle, LLC.
|
||||
// Copyright 2019 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
#ifndef vtk_m_cont_internal_AtomicInterfaceControl_h
|
||||
#define vtk_m_cont_internal_AtomicInterfaceControl_h
|
||||
|
||||
#include <vtkm/internal/Configure.h>
|
||||
#include <vtkm/internal/Windows.h>
|
||||
|
||||
#include <vtkm/ListTag.h>
|
||||
#include <vtkm/Types.h>
|
||||
|
||||
#if defined(VTKM_MSVC) && !defined(VTKM_CUDA)
|
||||
#include <intrin.h> // For MSVC atomics
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace internal
|
||||
{
|
||||
|
||||
/**
|
||||
* Implementation of AtomicInterfaceDevice that uses control-side atomics.
|
||||
*/
|
||||
class AtomicInterfaceControl
|
||||
{
|
||||
public:
|
||||
using WordTypes = vtkm::ListTagBase<vtkm::UInt8, vtkm::UInt16, vtkm::UInt32, vtkm::UInt64>;
|
||||
|
||||
// TODO These support UInt64, too. This should be benchmarked to see which
|
||||
// is faster.
|
||||
using WordTypePreferred = vtkm::UInt32;
|
||||
|
||||
#ifdef VTKM_MSVC
|
||||
private:
|
||||
template <typename To, typename From>
|
||||
VTKM_EXEC_CONT static To BitCast(const From& src)
|
||||
{
|
||||
// The memcpy should be removed by the compiler when possible, but this
|
||||
// works around a host of issues with bitcasting using reinterpret_cast.
|
||||
VTKM_STATIC_ASSERT(sizeof(From) == sizeof(To));
|
||||
To dst;
|
||||
std::memcpy(&dst, &src, sizeof(From));
|
||||
return dst;
|
||||
}
|
||||
|
||||
public:
|
||||
// Note about Load and Store implementations:
|
||||
//
|
||||
// "Simple reads and writes to properly-aligned 32-bit variables are atomic
|
||||
// operations"
|
||||
//
|
||||
// "Simple reads and writes to properly aligned 64-bit variables are atomic on
|
||||
// 64-bit Windows. Reads and writes to 64-bit values are not guaranteed to be
|
||||
// atomic on 32-bit Windows."
|
||||
//
|
||||
// "Reads and writes to variables of other sizes [than 32 or 64 bits] are not
|
||||
// guaranteed to be atomic on any platform."
|
||||
//
|
||||
// https://docs.microsoft.com/en-us/windows/desktop/sync/interlocked-variable-access
|
||||
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkm::UInt8 Load(const vtkm::UInt8* addr)
|
||||
{
|
||||
// This assumes that the memory interface is smart enough to load a 32-bit
|
||||
// word atomically and a properly aligned 8-bit word from it.
|
||||
// We could build address masks and do shifts to perform this manually if
|
||||
// this assumption is incorrect.
|
||||
auto result = *static_cast<volatile const vtkm::UInt8*>(addr);
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
return result;
|
||||
}
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkm::UInt16 Load(const vtkm::UInt16* addr)
|
||||
{
|
||||
// This assumes that the memory interface is smart enough to load a 32-bit
|
||||
// word atomically and a properly aligned 16-bit word from it.
|
||||
// We could build address masks and do shifts to perform this manually if
|
||||
// this assumption is incorrect.
|
||||
auto result = *static_cast<volatile const vtkm::UInt16*>(addr);
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
return result;
|
||||
}
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkm::UInt32 Load(const vtkm::UInt32* addr)
|
||||
{
|
||||
auto result = *static_cast<volatile const vtkm::UInt32*>(addr);
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
return result;
|
||||
}
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkm::UInt64 Load(const vtkm::UInt64* addr)
|
||||
{
|
||||
auto result = *static_cast<volatile const vtkm::UInt64*>(addr);
|
||||
std::atomic_thread_fence(std::memory_order_acquire);
|
||||
return result;
|
||||
}
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static void Store(vtkm::UInt8* addr, vtkm::UInt8 val)
|
||||
{
|
||||
// There doesn't seem to be an atomic store instruction in the windows
|
||||
// API, so just exchange and discard the result.
|
||||
_InterlockedExchange8(reinterpret_cast<volatile CHAR*>(addr), BitCast<CHAR>(val));
|
||||
}
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static void Store(vtkm::UInt16* addr, vtkm::UInt16 val)
|
||||
{
|
||||
// There doesn't seem to be an atomic store instruction in the windows
|
||||
// API, so just exchange and discard the result.
|
||||
_InterlockedExchange16(reinterpret_cast<volatile SHORT*>(addr), BitCast<SHORT>(val));
|
||||
}
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static void Store(vtkm::UInt32* addr, vtkm::UInt32 val)
|
||||
{
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
*addr = val;
|
||||
}
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static void Store(vtkm::UInt64* addr, vtkm::UInt64 val)
|
||||
{
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
*addr = val;
|
||||
}
|
||||
|
||||
#define VTKM_ATOMIC_OPS_FOR_TYPE(vtkmType, winType, suffix) \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkmType Not(vtkmType* addr) \
|
||||
{ \
|
||||
return Xor(addr, static_cast<vtkmType>(~vtkmType{ 0u })); \
|
||||
} \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkmType And(vtkmType* addr, vtkmType mask) \
|
||||
{ \
|
||||
return BitCast<vtkmType>( \
|
||||
_InterlockedAnd##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(mask))); \
|
||||
} \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkmType Or(vtkmType* addr, vtkmType mask) \
|
||||
{ \
|
||||
return BitCast<vtkmType>( \
|
||||
_InterlockedOr##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(mask))); \
|
||||
} \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkmType Xor(vtkmType* addr, vtkmType mask) \
|
||||
{ \
|
||||
return BitCast<vtkmType>( \
|
||||
_InterlockedXor##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(mask))); \
|
||||
} \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkmType CompareAndSwap( \
|
||||
vtkmType* addr, vtkmType newWord, vtkmType expected) \
|
||||
{ \
|
||||
return BitCast<vtkmType>( \
|
||||
_InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr), \
|
||||
BitCast<winType>(newWord), \
|
||||
BitCast<winType>(expected))); \
|
||||
}
|
||||
|
||||
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt8, CHAR, 8)
|
||||
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt16, SHORT, 16)
|
||||
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32, LONG, )
|
||||
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt64, LONG64, 64)
|
||||
|
||||
#undef VTKM_ATOMIC_OPS_FOR_TYPE
|
||||
|
||||
#else // gcc/clang
|
||||
|
||||
#define VTKM_ATOMIC_OPS_FOR_TYPE(type) \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type Load(const type* addr) \
|
||||
{ \
|
||||
return __atomic_load_n(addr, __ATOMIC_ACQUIRE); \
|
||||
} \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static void Store(type* addr, type value) \
|
||||
{ \
|
||||
return __atomic_store_n(addr, value, __ATOMIC_RELEASE); \
|
||||
} \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type Not(type* addr) \
|
||||
{ \
|
||||
return Xor(addr, static_cast<type>(~type{ 0u })); \
|
||||
} \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type And(type* addr, type mask) \
|
||||
{ \
|
||||
return __atomic_fetch_and(addr, mask, __ATOMIC_SEQ_CST); \
|
||||
} \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type Or(type* addr, type mask) \
|
||||
{ \
|
||||
return __atomic_fetch_or(addr, mask, __ATOMIC_SEQ_CST); \
|
||||
} \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type Xor(type* addr, type mask) \
|
||||
{ \
|
||||
return __atomic_fetch_xor(addr, mask, __ATOMIC_SEQ_CST); \
|
||||
} \
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type CompareAndSwap( \
|
||||
type* addr, type newWord, type expected) \
|
||||
{ \
|
||||
__atomic_compare_exchange_n( \
|
||||
addr, &expected, newWord, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \
|
||||
return expected; \
|
||||
}
|
||||
|
||||
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt8)
|
||||
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt16)
|
||||
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32)
|
||||
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt64)
|
||||
|
||||
#undef VTKM_ATOMIC_OPS_FOR_TYPE
|
||||
|
||||
#endif
|
||||
};
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::cont::internal
|
||||
|
||||
#endif // vtk_m_cont_internal_AtomicInterfaceControl_h
|
113
vtkm/cont/internal/AtomicInterfaceExecution.h
Normal file
113
vtkm/cont/internal/AtomicInterfaceExecution.h
Normal file
@ -0,0 +1,113 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2019 UT-Battelle, LLC.
|
||||
// Copyright 2019 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
#ifndef vtk_m_cont_internal_AtomicInterfaceExecution_h
|
||||
#define vtk_m_cont_internal_AtomicInterfaceExecution_h
|
||||
|
||||
#include <vtkm/Types.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace internal
|
||||
{
|
||||
|
||||
/// Class template that provides a collection of static methods that perform
|
||||
/// atomic operations on raw addresses. It is the responsibility of the caller
|
||||
/// to ensure that the addresses are properly aligned.
|
||||
///
|
||||
/// The class defines a WordTypePreferred member that is the fastest available
|
||||
/// for bitwise operations of the given device. At minimum, the interface must
|
||||
/// support operations on WordTypePreferred and vtkm::WordTypeDefault, which may
|
||||
/// be the same. A full list of supported word types is advertised in the type
|
||||
/// list @a WordTypes.
|
||||
///
|
||||
/// To implement this on devices that share the control environment, subclass
|
||||
/// vtkm::cont::internal::AtomicInterfaceControl, which may also be used
|
||||
/// directly from control-side code.
|
||||
template <typename DeviceTag>
|
||||
class AtomicInterfaceExecution
|
||||
#ifdef VTKM_DOXYGEN_ONLY
|
||||
{
|
||||
/// The preferred word type for the target device for bitwise atomic
|
||||
/// operations.
|
||||
using WordTypePreferred = FastestWordTypeForDevice;
|
||||
|
||||
using WordTypes = vtkm::ListTagBase<vtkm::WordTypeDefault, WordTypePreferred>;
|
||||
|
||||
/// Atomically load a value from memory while enforcing, at minimum, "acquire"
|
||||
/// memory ordering.
|
||||
VTKM_EXEC static vtkm::WordTypeDefault Load(vtkm::WordTypeDefault* addr);
|
||||
VTKM_EXEC static WordTypePreferred Load(WordTypePreferred* addr);
|
||||
|
||||
/// Atomically write a value to memory while enforcing, at minimum, "release"
|
||||
/// memory ordering.
|
||||
VTKM_EXEC static void Store(vtkm::WordTypeDefault* addr, vtkm::WordTypeDefault value);
|
||||
VTKM_EXEC static void Store(WordTypePreferred* addr, WordTypePreferred value);
|
||||
|
||||
/// Perform a bitwise atomic not operation on the word at @a addr.
|
||||
/// This operation performs a full memory barrier around the atomic access.
|
||||
/// @{
|
||||
VTKM_EXEC static vtkm::WordTypeDefault Not(vtkm::WordTypeDefault* addr);
|
||||
VTKM_EXEC static WordTypePreferred Not(WordTypePreferred* addr);
|
||||
/// @}
|
||||
|
||||
/// Perform a bitwise atomic and operation on the word at @a addr.
|
||||
/// This operation performs a full memory barrier around the atomic access.
|
||||
/// @{
|
||||
VTKM_EXEC static vtkm::WordTypeDefault And(vtkm::WordTypeDefault* addr,
|
||||
vtkm::WordTypeDefault mask);
|
||||
VTKM_EXEC static WordTypePreferred And(WordTypePreferred* addr, WordTypePreferred mask);
|
||||
/// @}
|
||||
|
||||
/// Perform a bitwise atomic or operation on the word at @a addr.
|
||||
/// This operation performs a full memory barrier around the atomic access.
|
||||
/// @{
|
||||
VTKM_EXEC static vtkm::WordTypeDefault Or(vtkm::WordTypeDefault* addr,
|
||||
vtkm::WordTypeDefault mask);
|
||||
VTKM_EXEC static WordTypePreferred Or(WordTypePreferred* addr, WordTypePreferred mask);
|
||||
/// @}
|
||||
|
||||
/// Perform a bitwise atomic xor operation on the word at @a addr.
|
||||
/// This operation performs a full memory barrier around the atomic access.
|
||||
/// @{
|
||||
VTKM_EXEC static vtkm::WordTypeDefault Xor(vtkm::WordTypeDefault* addr,
|
||||
vtkm::WordTypeDefault mask);
|
||||
VTKM_EXEC static WordTypePreferred Xor(WordTypePreferred* addr, WordTypePreferred mask);
|
||||
/// @}
|
||||
|
||||
/// Perform an atomic CAS operation on the word at @a addr.
|
||||
/// This operation performs a full memory barrier around the atomic access.
|
||||
/// @{
|
||||
VTKM_EXEC static vtkm::WordTypeDefault CompareAndSwap(vtkm::WordTypeDefault* addr,
|
||||
vtkm::WordTypeDefault newWord,
|
||||
vtkm::WordTypeDefault expected);
|
||||
VTKM_EXEC static WordTypePreferred CompareAndSwap(WordTypePreferred* addr,
|
||||
WordTypePreferred newWord,
|
||||
WordTypePreferred expected);
|
||||
/// @}
|
||||
}
|
||||
#endif // VTKM_DOXYGEN_ONLY
|
||||
;
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::cont::internal
|
||||
|
||||
#endif // vtk_m_cont_internal_AtomicInterfaceExecution_h
|
@ -28,6 +28,8 @@ set(headers
|
||||
ArrayPortalFromIterators.h
|
||||
ArrayPortalShrink.h
|
||||
ArrayTransfer.h
|
||||
AtomicInterfaceControl.h
|
||||
AtomicInterfaceExecution.h
|
||||
ConnectivityExplicitInternals.h
|
||||
DeviceAdapterAlgorithmGeneral.h
|
||||
DeviceAdapterAtomicArrayImplementation.h
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <vtkm/cont/ArrayHandleIndex.h>
|
||||
#include <vtkm/cont/ArrayHandleStreaming.h>
|
||||
#include <vtkm/cont/ArrayHandleZip.h>
|
||||
#include <vtkm/cont/BitField.h>
|
||||
#include <vtkm/cont/Logging.h>
|
||||
#include <vtkm/cont/internal/DeviceAdapterAtomicArrayImplementation.h>
|
||||
#include <vtkm/cont/internal/FunctorsGeneral.h>
|
||||
@ -123,6 +124,35 @@ private:
|
||||
}
|
||||
|
||||
public:
|
||||
//--------------------------------------------------------------------------
|
||||
// BitFieldToUnorderedSet
|
||||
template <typename IndicesStorage>
|
||||
VTKM_CONT static vtkm::Id BitFieldToUnorderedSet(
|
||||
const vtkm::cont::BitField& bits,
|
||||
vtkm::cont::ArrayHandle<Id, IndicesStorage>& indices)
|
||||
{
|
||||
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
|
||||
|
||||
vtkm::Id numBits = bits.GetNumberOfBits();
|
||||
|
||||
auto bitsPortal = bits.PrepareForInput(DeviceAdapterTag{});
|
||||
auto indicesPortal = indices.PrepareForOutput(numBits, DeviceAdapterTag{});
|
||||
|
||||
std::atomic<vtkm::UInt64> popCount;
|
||||
popCount.store(0, std::memory_order_seq_cst);
|
||||
|
||||
using Functor = BitFieldToUnorderedSetFunctor<decltype(bitsPortal), decltype(indicesPortal)>;
|
||||
Functor functor{ bitsPortal, indicesPortal, popCount };
|
||||
|
||||
DerivedAlgorithm::Schedule(functor, functor.GetNumberOfInstances());
|
||||
DerivedAlgorithm::Synchronize();
|
||||
|
||||
numBits = static_cast<vtkm::Id>(popCount.load(std::memory_order_seq_cst));
|
||||
|
||||
indices.Shrink(numBits);
|
||||
return numBits;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Copy
|
||||
template <typename T, typename U, class CIn, class COut>
|
||||
|
@ -24,10 +24,12 @@
|
||||
#include <vtkm/TypeTraits.h>
|
||||
#include <vtkm/UnaryPredicates.h>
|
||||
#include <vtkm/cont/ArrayPortalToIterators.h>
|
||||
#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
|
||||
|
||||
#include <vtkm/exec/FunctorBase.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
@ -332,6 +334,142 @@ struct ShiftCopyAndInit : vtkm::exec::FunctorBase
|
||||
}
|
||||
};
|
||||
|
||||
template <class BitsPortal, class IndicesPortal>
|
||||
struct BitFieldToUnorderedSetFunctor : public vtkm::exec::FunctorBase
|
||||
{
|
||||
using WordType = typename BitsPortal::WordTypePreferred;
|
||||
|
||||
// This functor executes a number of instances, where each instance handles
|
||||
// two cachelines worth of data. Figure out how many words that is:
|
||||
static constexpr vtkm::Id CacheLineSize = VTKM_ALLOCATION_ALIGNMENT;
|
||||
static constexpr vtkm::Id WordsPerCacheLine =
|
||||
CacheLineSize / static_cast<vtkm::Id>(sizeof(WordType));
|
||||
static constexpr vtkm::Id CacheLinesPerInstance = 2;
|
||||
static constexpr vtkm::Id WordsPerInstance = CacheLinesPerInstance * WordsPerCacheLine;
|
||||
|
||||
VTKM_STATIC_ASSERT(
|
||||
VTKM_PASS_COMMAS(std::is_same<typename IndicesPortal::ValueType, vtkm::Id>::value));
|
||||
|
||||
VTKM_CONT
|
||||
BitFieldToUnorderedSetFunctor(const BitsPortal& input,
|
||||
IndicesPortal& output,
|
||||
std::atomic<vtkm::UInt64>& popCount)
|
||||
: Input{ input }
|
||||
, Output{ output }
|
||||
, PopCount(popCount)
|
||||
, FinalWordIndex{ input.GetNumberOfWords() - 1 }
|
||||
, FinalWordMask(input.GetFinalWordMask())
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_CONT vtkm::Id GetNumberOfInstances() const
|
||||
{
|
||||
const auto numWords = this->Input.GetNumberOfWords();
|
||||
return (numWords + WordsPerInstance - 1) / WordsPerInstance;
|
||||
}
|
||||
|
||||
VTKM_EXEC void operator()(vtkm::Id instanceIdx) const
|
||||
{
|
||||
const vtkm::Id numWords = this->Input.GetNumberOfWords();
|
||||
const vtkm::Id wordStart = vtkm::Min(instanceIdx * WordsPerInstance, numWords);
|
||||
const vtkm::Id wordEnd = vtkm::Min(wordStart + WordsPerInstance, numWords);
|
||||
|
||||
if (wordStart != wordEnd) // range is valid
|
||||
{
|
||||
this->ExecuteRange(wordStart, wordEnd);
|
||||
}
|
||||
}
|
||||
|
||||
VTKM_EXEC void ExecuteRange(vtkm::Id wordStart, vtkm::Id wordEnd) const
|
||||
{
|
||||
#ifndef VTKM_CUDA_DEVICE_PASS // for std::atomic call from VTKM_EXEC function:
|
||||
// Count bits and allocate space for output:
|
||||
vtkm::UInt64 chunkBits = this->CountChunkBits(wordStart, wordEnd);
|
||||
if (chunkBits > 0)
|
||||
{
|
||||
vtkm::UInt64 outIdx = this->PopCount.fetch_add(chunkBits, std::memory_order_relaxed);
|
||||
|
||||
this->ProcessWords(wordStart, wordEnd, static_cast<vtkm::Id>(outIdx));
|
||||
}
|
||||
#else
|
||||
(void)wordStart;
|
||||
(void)wordEnd;
|
||||
#endif
|
||||
}
|
||||
|
||||
VTKM_CONT vtkm::UInt64 GetPopCount() const { return PopCount.load(std::memory_order_relaxed); }
|
||||
|
||||
private:
|
||||
VTKM_EXEC vtkm::UInt64 CountChunkBits(vtkm::Id wordStart, vtkm::Id wordEnd) const
|
||||
{
|
||||
// Need to mask out trailing bits from the final word:
|
||||
const bool isFinalChunk = wordEnd == (this->FinalWordIndex + 1);
|
||||
|
||||
if (isFinalChunk)
|
||||
{
|
||||
wordEnd = this->FinalWordIndex;
|
||||
}
|
||||
|
||||
vtkm::Int32 tmp = 0;
|
||||
for (vtkm::Id i = wordStart; i < wordEnd; ++i)
|
||||
{
|
||||
tmp += vtkm::CountSetBits(this->Input.GetWord(i));
|
||||
}
|
||||
|
||||
if (isFinalChunk)
|
||||
{
|
||||
tmp += vtkm::CountSetBits(this->Input.GetWord(this->FinalWordIndex) & this->FinalWordMask);
|
||||
}
|
||||
|
||||
return static_cast<vtkm::UInt64>(tmp);
|
||||
}
|
||||
|
||||
VTKM_EXEC void ProcessWords(vtkm::Id wordStart, vtkm::Id wordEnd, vtkm::Id outputStartIdx) const
|
||||
{
|
||||
// Need to mask out trailing bits from the final word:
|
||||
const bool isFinalChunk = wordEnd == (this->FinalWordIndex + 1);
|
||||
|
||||
if (isFinalChunk)
|
||||
{
|
||||
wordEnd = this->FinalWordIndex;
|
||||
}
|
||||
|
||||
for (vtkm::Id i = wordStart; i < wordEnd; ++i)
|
||||
{
|
||||
const vtkm::Id firstBitIdx = i * static_cast<vtkm::Id>(sizeof(WordType)) * CHAR_BIT;
|
||||
WordType word = this->Input.GetWord(i);
|
||||
while (word != 0) // have bits
|
||||
{
|
||||
// Find next bit. FindFirstSetBit starts counting at 1.
|
||||
vtkm::Int32 bit = vtkm::FindFirstSetBit(word) - 1;
|
||||
this->Output.Set(outputStartIdx++, firstBitIdx + bit); // Write index of bit
|
||||
word ^= (1 << bit); // clear bit
|
||||
}
|
||||
}
|
||||
|
||||
if (isFinalChunk)
|
||||
{
|
||||
const vtkm::Id i = this->FinalWordIndex;
|
||||
const vtkm::Id firstBitIdx = i * static_cast<vtkm::Id>(sizeof(WordType)) * CHAR_BIT;
|
||||
WordType word = this->Input.GetWord(i) & this->FinalWordMask;
|
||||
while (word != 0) // have bits
|
||||
{
|
||||
// Find next bit. FindFirstSetBit starts counting at 1.
|
||||
vtkm::Int32 bit = vtkm::FindFirstSetBit(word) - 1;
|
||||
this->Output.Set(outputStartIdx++, firstBitIdx + bit); // Write index of bit
|
||||
word ^= (1 << bit); // clear bit
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BitsPortal Input;
|
||||
IndicesPortal Output;
|
||||
std::atomic<vtkm::UInt64>& PopCount;
|
||||
// Used to mask trailing bits the in last word.
|
||||
vtkm::Id FinalWordIndex{ 0 };
|
||||
WordType FinalWordMask{ 0 };
|
||||
};
|
||||
|
||||
template <class InputPortalType, class OutputPortalType>
|
||||
struct CopyKernel
|
||||
{
|
||||
|
@ -26,6 +26,7 @@
|
||||
|
||||
#ifdef VTKM_ENABLE_OPENMP
|
||||
#include <vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/AtomicInterfaceExecutionOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/VirtualObjectTransferOpenMP.h>
|
||||
#endif
|
||||
|
45
vtkm/cont/openmp/internal/AtomicInterfaceExecutionOpenMP.h
Normal file
45
vtkm/cont/openmp/internal/AtomicInterfaceExecutionOpenMP.h
Normal file
@ -0,0 +1,45 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2019 UT-Battelle, LLC.
|
||||
// Copyright 2019 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
#ifndef vtk_m_cont_openmp_internal_AtomicInterfaceExecutionOpenMP_h
|
||||
#define vtk_m_cont_openmp_internal_AtomicInterfaceExecutionOpenMP_h
|
||||
|
||||
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||
|
||||
#include <vtkm/cont/internal/AtomicInterfaceControl.h>
|
||||
#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
|
||||
|
||||
#include <vtkm/Types.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace internal
|
||||
{
|
||||
|
||||
template <>
|
||||
class AtomicInterfaceExecution<DeviceAdapterTagOpenMP> : public AtomicInterfaceControl
|
||||
{
|
||||
};
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::cont::internal
|
||||
|
||||
#endif // vtk_m_cont_openmp_internal_AtomicInterfaceExecutionOpenMP_h
|
@ -23,6 +23,7 @@ set(headers
|
||||
DeviceAdapterAlgorithmOpenMP.h
|
||||
DeviceAdapterRuntimeDetectorOpenMP.h
|
||||
DeviceAdapterTagOpenMP.h
|
||||
AtomicInterfaceExecutionOpenMP.h
|
||||
ExecutionArrayInterfaceBasicOpenMP.h
|
||||
FunctorsOpenMP.h
|
||||
ParallelQuickSortOpenMP.h
|
||||
|
@ -22,6 +22,7 @@ set(unit_tests
|
||||
UnitTestOpenMPArrayHandle.cxx
|
||||
UnitTestOpenMPArrayHandleFancy.cxx
|
||||
UnitTestOpenMPArrayHandleVirtualCoordinates.cxx
|
||||
UnitTestOpenMPBitField.cxx
|
||||
UnitTestOpenMPCellLocatorRectilinearGrid.cxx
|
||||
UnitTestOpenMPCellLocatorUniformBins.cxx
|
||||
UnitTestOpenMPCellLocatorUniformGrid.cxx
|
||||
|
31
vtkm/cont/openmp/testing/UnitTestOpenMPBitField.cxx
Normal file
31
vtkm/cont/openmp/testing/UnitTestOpenMPBitField.cxx
Normal file
@ -0,0 +1,31 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||
|
||||
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||
#include <vtkm/cont/testing/TestingBitField.h>
|
||||
|
||||
int UnitTestOpenMPBitField(int argc, char* argv[])
|
||||
{
|
||||
auto tracker = vtkm::cont::GetRuntimeDeviceTracker();
|
||||
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||
return vtkm::cont::testing::TestingBitField<vtkm::cont::DeviceAdapterTagOpenMP>::Run(argc, argv);
|
||||
}
|
@ -24,6 +24,7 @@
|
||||
// clang-format off
|
||||
#include <vtkm/cont/serial/internal/DeviceAdapterTagSerial.h>
|
||||
#include <vtkm/cont/serial/internal/DeviceAdapterRuntimeDetectorSerial.h>
|
||||
#include <vtkm/cont/serial/internal/AtomicInterfaceExecutionSerial.h>
|
||||
#include <vtkm/cont/serial/internal/ArrayManagerExecutionSerial.h>
|
||||
#include <vtkm/cont/serial/internal/DeviceAdapterAlgorithmSerial.h>
|
||||
#include <vtkm/cont/serial/internal/VirtualObjectTransferSerial.h>
|
||||
|
45
vtkm/cont/serial/internal/AtomicInterfaceExecutionSerial.h
Normal file
45
vtkm/cont/serial/internal/AtomicInterfaceExecutionSerial.h
Normal file
@ -0,0 +1,45 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2019 UT-Battelle, LLC.
|
||||
// Copyright 2019 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
#ifndef vtk_m_cont_serial_internal_AtomicInterfaceExecutionSerial_h
|
||||
#define vtk_m_cont_serial_internal_AtomicInterfaceExecutionSerial_h
|
||||
|
||||
#include <vtkm/cont/serial/internal/DeviceAdapterTagSerial.h>
|
||||
|
||||
#include <vtkm/cont/internal/AtomicInterfaceControl.h>
|
||||
#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
|
||||
|
||||
#include <vtkm/Types.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace internal
|
||||
{
|
||||
|
||||
template <>
|
||||
class AtomicInterfaceExecution<DeviceAdapterTagSerial> : public AtomicInterfaceControl
|
||||
{
|
||||
};
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::cont::internal
|
||||
|
||||
#endif // vtk_m_cont_serial_internal_AtomicInterfaceExecutionSerial_h
|
@ -20,6 +20,7 @@
|
||||
|
||||
set(headers
|
||||
ArrayManagerExecutionSerial.h
|
||||
AtomicInterfaceExecutionSerial.h
|
||||
DeviceAdapterAlgorithmSerial.h
|
||||
DeviceAdapterRuntimeDetectorSerial.h
|
||||
DeviceAdapterTagSerial.h
|
||||
|
@ -22,6 +22,7 @@ set(unit_tests
|
||||
UnitTestSerialArrayHandle.cxx
|
||||
UnitTestSerialArrayHandleFancy.cxx
|
||||
UnitTestSerialArrayHandleVirtualCoordinates.cxx
|
||||
UnitTestSerialBitField.cxx
|
||||
UnitTestSerialCellLocatorRectilinearGrid.cxx
|
||||
UnitTestSerialCellLocatorUniformBins.cxx
|
||||
UnitTestSerialCellLocatorUniformGrid.cxx
|
||||
|
34
vtkm/cont/serial/testing/UnitTestSerialBitField.cxx
Normal file
34
vtkm/cont/serial/testing/UnitTestSerialBitField.cxx
Normal file
@ -0,0 +1,34 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2019 UT-Battelle, LLC.
|
||||
// Copyright 2019 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
// Make sure that the tested code is using the device adapter specified. This
|
||||
// is important in the long run so we don't, for example, use the CUDA device
|
||||
// for a part of an operation where the TBB device was specified.
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||
|
||||
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
|
||||
#include <vtkm/cont/testing/TestingBitField.h>
|
||||
|
||||
int UnitTestSerialBitField(int argc, char* argv[])
|
||||
{
|
||||
auto tracker = vtkm::cont::GetRuntimeDeviceTracker();
|
||||
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagSerial{});
|
||||
return vtkm::cont::testing::TestingBitField<vtkm::cont::DeviceAdapterTagSerial>::Run(argc, argv);
|
||||
}
|
@ -25,6 +25,7 @@
|
||||
|
||||
#ifdef VTKM_ENABLE_TBB
|
||||
#include <vtkm/cont/tbb/internal/ArrayManagerExecutionTBB.h>
|
||||
#include <vtkm/cont/tbb/internal/AtomicInterfaceExecutionTBB.h>
|
||||
#include <vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h>
|
||||
#include <vtkm/cont/tbb/internal/VirtualObjectTransferTBB.h>
|
||||
#endif
|
||||
|
45
vtkm/cont/tbb/internal/AtomicInterfaceExecutionTBB.h
Normal file
45
vtkm/cont/tbb/internal/AtomicInterfaceExecutionTBB.h
Normal file
@ -0,0 +1,45 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2019 UT-Battelle, LLC.
|
||||
// Copyright 2019 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
#ifndef vtk_m_cont_tbb_internal_AtomicInterfaceExecutionTBB_h
|
||||
#define vtk_m_cont_tbb_internal_AtomicInterfaceExecutionTBB_h
|
||||
|
||||
#include <vtkm/cont/tbb/internal/DeviceAdapterTagTBB.h>
|
||||
|
||||
#include <vtkm/cont/internal/AtomicInterfaceControl.h>
|
||||
#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
|
||||
|
||||
#include <vtkm/Types.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace internal
|
||||
{
|
||||
|
||||
template <>
|
||||
class AtomicInterfaceExecution<DeviceAdapterTagTBB> : public AtomicInterfaceControl
|
||||
{
|
||||
};
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::cont::internal
|
||||
|
||||
#endif // vtk_m_cont_tbb_internal_AtomicInterfaceExecutionTBB_h
|
@ -20,6 +20,7 @@
|
||||
|
||||
set(headers
|
||||
ArrayManagerExecutionTBB.h
|
||||
AtomicInterfaceExecutionTBB.h
|
||||
DeviceAdapterAlgorithmTBB.h
|
||||
DeviceAdapterRuntimeDetectorTBB.h
|
||||
DeviceAdapterTagTBB.h
|
||||
|
@ -22,6 +22,7 @@ set(unit_tests
|
||||
UnitTestTBBArrayHandle.cxx
|
||||
UnitTestTBBArrayHandleFancy.cxx
|
||||
UnitTestTBBArrayHandleVirtualCoordinates.cxx
|
||||
UnitTestTBBBitField.cxx
|
||||
UnitTestTBBCellLocatorRectilinearGrid.cxx
|
||||
UnitTestTBBCellLocatorUniformBins.cxx
|
||||
UnitTestTBBCellLocatorUniformGrid.cxx
|
||||
|
34
vtkm/cont/tbb/testing/UnitTestTBBBitField.cxx
Normal file
34
vtkm/cont/tbb/testing/UnitTestTBBBitField.cxx
Normal file
@ -0,0 +1,34 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2019 UT-Battelle, LLC.
|
||||
// Copyright 2019 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
// Make sure that the tested code is using the device adapter specified. This
|
||||
// is important in the long run so we don't, for example, use the CUDA device
|
||||
// for a part of an operation where the TBB device was specified.
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||
|
||||
#include <vtkm/cont/tbb/DeviceAdapterTBB.h>
|
||||
#include <vtkm/cont/testing/TestingBitField.h>
|
||||
|
||||
int UnitTestTBBBitField(int argc, char* argv[])
|
||||
{
|
||||
auto tracker = vtkm::cont::GetRuntimeDeviceTracker();
|
||||
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagTBB{});
|
||||
return vtkm::cont::testing::TestingBitField<vtkm::cont::DeviceAdapterTagTBB>::Run(argc, argv);
|
||||
}
|
685
vtkm/cont/testing/TestingBitField.h
Normal file
685
vtkm/cont/testing/TestingBitField.h
Normal file
@ -0,0 +1,685 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2019 UT-Battelle, LLC.
|
||||
// Copyright 2019 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
#ifndef vtk_m_cont_testing_TestingBitFields_h
|
||||
#define vtk_m_cont_testing_TestingBitFields_h
|
||||
|
||||
#include <vtkm/cont/ArrayHandleBitField.h>
|
||||
#include <vtkm/cont/ArrayHandleCounting.h>
|
||||
#include <vtkm/cont/BitField.h>
|
||||
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
|
||||
#include <vtkm/cont/RuntimeDeviceTracker.h>
|
||||
|
||||
#include <vtkm/cont/testing/Testing.h>
|
||||
|
||||
#include <vtkm/exec/FunctorBase.h>
|
||||
|
||||
#include <vtkm/worklet/Invoker.h>
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#define DEVICE_ASSERT_MSG(cond, message) \
|
||||
do \
|
||||
{ \
|
||||
if (!(cond)) \
|
||||
{ \
|
||||
printf("Testing assert failed at %s:%d\n\t- Condition: %s\n\t- Subtest: %s\n", \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
#cond, \
|
||||
message); \
|
||||
return false; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define DEVICE_ASSERT(cond) \
|
||||
do \
|
||||
{ \
|
||||
if (!(cond)) \
|
||||
{ \
|
||||
printf("Testing assert failed at %s:%d\n\t- Condition: %s\n", __FILE__, __LINE__, #cond); \
|
||||
return false; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
// Test with some trailing bits in partial last word:
|
||||
#define NUM_BITS \
|
||||
vtkm::Id { 7681 }
|
||||
|
||||
using vtkm::cont::BitField;
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace testing
|
||||
{
|
||||
|
||||
// Takes an ArrayHandleBitField as the boolean condition field
|
||||
class ConditionalMergeWorklet : public vtkm::worklet::WorkletMapField
|
||||
{
|
||||
public:
|
||||
using ControlSignature = void(FieldIn cond, FieldIn trueVals, FieldIn falseVals, FieldOut result);
|
||||
using ExecutionSignature = _4(_1, _2, _3);
|
||||
|
||||
template <typename T>
|
||||
VTKM_EXEC T operator()(bool cond, const T& trueVal, const T& falseVal) const
|
||||
{
|
||||
return cond ? trueVal : falseVal;
|
||||
}
|
||||
};
|
||||
|
||||
// Takes a BitFieldInOut as the condition information, and reverses
|
||||
// the bits in place after performing the merge.
|
||||
class ConditionalMergeWorklet2 : public vtkm::worklet::WorkletMapField
|
||||
{
|
||||
public:
|
||||
using ControlSignature = void(BitFieldInOut bits,
|
||||
FieldIn trueVals,
|
||||
FieldIn falseVal,
|
||||
FieldOut result);
|
||||
using ExecutionSignature = _4(InputIndex, _1, _2, _3);
|
||||
using InputDomain = _2;
|
||||
|
||||
template <typename BitPortal, typename T>
|
||||
VTKM_EXEC T
|
||||
operator()(const vtkm::Id i, BitPortal& bits, const T& trueVal, const T& falseVal) const
|
||||
{
|
||||
return bits.XorBitAtomic(i, true) ? trueVal : falseVal;
|
||||
}
|
||||
};
|
||||
|
||||
/// This class has a single static member, Run, that runs all tests with the
|
||||
/// given DeviceAdapter.
|
||||
template <class DeviceAdapterTag>
|
||||
struct TestingBitField
|
||||
{
|
||||
using Algo = vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapterTag>;
|
||||
using AtomicInterface = vtkm::cont::internal::AtomicInterfaceExecution<DeviceAdapterTag>;
|
||||
using Traits = vtkm::cont::detail::BitFieldTraits;
|
||||
using WordTypes = typename AtomicInterface::WordTypes;
|
||||
using WordTypesControl = vtkm::cont::internal::AtomicInterfaceControl::WordTypes;
|
||||
|
||||
VTKM_EXEC_CONT
|
||||
static bool RandomBitFromIndex(vtkm::Id idx) noexcept
|
||||
{
|
||||
// Some random operations that will give a pseudorandom stream of bits:
|
||||
auto m = idx + (idx * 2) - (idx / 3) + (idx * 5 / 7) - (idx * 11 / 13);
|
||||
return (m % 2) == 1;
|
||||
}
|
||||
|
||||
template <typename WordType>
|
||||
VTKM_EXEC_CONT static WordType RandomWordFromIndex(vtkm::Id idx) noexcept
|
||||
{
|
||||
vtkm::UInt64 m = static_cast<vtkm::UInt64>(idx * (NUM_BITS - 1) + (idx + 1) * NUM_BITS);
|
||||
m ^= m << 3;
|
||||
m ^= m << 7;
|
||||
m ^= m << 15;
|
||||
m ^= m << 31;
|
||||
m = (m << 32) | (m >> 32);
|
||||
|
||||
const size_t mBits = 64;
|
||||
const size_t wordBits = sizeof(WordType) * CHAR_BIT;
|
||||
|
||||
const WordType highWord = static_cast<WordType>(m >> (mBits - wordBits));
|
||||
return highWord;
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
static BitField RandomBitField(vtkm::Id numBits = NUM_BITS)
|
||||
{
|
||||
BitField field;
|
||||
field.Allocate(numBits);
|
||||
auto portal = field.GetPortalControl();
|
||||
for (vtkm::Id i = 0; i < numBits; ++i)
|
||||
{
|
||||
portal.SetBit(i, RandomBitFromIndex(i));
|
||||
}
|
||||
|
||||
return field;
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
static void TestBlockAllocation()
|
||||
{
|
||||
BitField field;
|
||||
field.Allocate(NUM_BITS);
|
||||
|
||||
// NumBits should be rounded up to the nearest block of bytes, as defined in
|
||||
// the traits:
|
||||
const vtkm::Id bytesInFieldData =
|
||||
field.GetData().GetNumberOfValues() * static_cast<vtkm::Id>(sizeof(vtkm::WordTypeDefault));
|
||||
|
||||
const vtkm::Id blockSize = vtkm::cont::detail::BitFieldTraits::BlockSize;
|
||||
const vtkm::Id numBytes = (NUM_BITS + CHAR_BIT - 1) / CHAR_BIT;
|
||||
const vtkm::Id numBlocks = (numBytes + blockSize - 1) / blockSize;
|
||||
const vtkm::Id expectedBytes = numBlocks * blockSize;
|
||||
|
||||
VTKM_TEST_ASSERT(bytesInFieldData == expectedBytes,
|
||||
"The BitField allocation does not round up to the nearest "
|
||||
"block. This can cause access-by-word to read/write invalid "
|
||||
"memory.");
|
||||
}
|
||||
|
||||
template <typename PortalType, typename PortalConstType>
|
||||
VTKM_EXEC_CONT static bool HelpTestBit(vtkm::Id i, PortalType portal, PortalConstType portalConst)
|
||||
{
|
||||
const auto origBit = RandomBitFromIndex(i);
|
||||
auto bit = origBit;
|
||||
|
||||
const auto mod = RandomBitFromIndex(i + NUM_BITS);
|
||||
|
||||
auto testValues = [&](const char* op) -> bool {
|
||||
auto expected = bit;
|
||||
auto result = portal.GetBitAtomic(i);
|
||||
auto resultConst = portalConst.GetBitAtomic(i);
|
||||
DEVICE_ASSERT_MSG(result == expected, op);
|
||||
DEVICE_ASSERT_MSG(resultConst == expected, op);
|
||||
|
||||
// Reset:
|
||||
bit = origBit;
|
||||
portal.SetBitAtomic(i, bit);
|
||||
return true;
|
||||
};
|
||||
|
||||
portal.SetBit(i, bit);
|
||||
DEVICE_ASSERT(testValues("SetBit"));
|
||||
|
||||
bit = mod;
|
||||
portal.SetBitAtomic(i, mod);
|
||||
DEVICE_ASSERT(testValues("SetBitAtomic"));
|
||||
|
||||
bit = !bit;
|
||||
portal.NotBitAtomic(i);
|
||||
DEVICE_ASSERT(testValues("NotBitAtomic"));
|
||||
|
||||
bit = bit && mod;
|
||||
portal.AndBitAtomic(i, mod);
|
||||
DEVICE_ASSERT(testValues("AndBitAtomic"));
|
||||
|
||||
bit = bit || mod;
|
||||
portal.OrBitAtomic(i, mod);
|
||||
DEVICE_ASSERT(testValues("OrBitAtomic"));
|
||||
|
||||
bit = bit != mod;
|
||||
portal.XorBitAtomic(i, mod);
|
||||
DEVICE_ASSERT(testValues("XorBitAtomic"));
|
||||
|
||||
const auto notBit = !bit;
|
||||
bool casResult = portal.CompareAndSwapBitAtomic(i, bit, notBit);
|
||||
DEVICE_ASSERT(casResult == bit);
|
||||
DEVICE_ASSERT(portal.GetBit(i) == bit);
|
||||
DEVICE_ASSERT(portalConst.GetBit(i) == bit);
|
||||
casResult = portal.CompareAndSwapBitAtomic(i, notBit, bit);
|
||||
DEVICE_ASSERT(casResult == bit);
|
||||
DEVICE_ASSERT(portal.GetBit(i) == notBit);
|
||||
DEVICE_ASSERT(portalConst.GetBit(i) == notBit);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename WordType, typename PortalType, typename PortalConstType>
|
||||
VTKM_EXEC_CONT static bool HelpTestWord(vtkm::Id i,
|
||||
PortalType portal,
|
||||
PortalConstType portalConst)
|
||||
{
|
||||
const auto origWord = RandomWordFromIndex<WordType>(i);
|
||||
auto word = origWord;
|
||||
|
||||
const auto mod = RandomWordFromIndex<WordType>(i + NUM_BITS);
|
||||
|
||||
auto testValues = [&](const char* op) -> bool {
|
||||
auto expected = word;
|
||||
auto result = portal.template GetWordAtomic<WordType>(i);
|
||||
auto resultConst = portalConst.template GetWordAtomic<WordType>(i);
|
||||
DEVICE_ASSERT_MSG(result == expected, op);
|
||||
DEVICE_ASSERT_MSG(resultConst == expected, op);
|
||||
|
||||
// Reset:
|
||||
word = origWord;
|
||||
portal.SetWordAtomic(i, word);
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
portal.SetWord(i, word);
|
||||
DEVICE_ASSERT(testValues("SetWord"));
|
||||
|
||||
word = mod;
|
||||
portal.SetWordAtomic(i, mod);
|
||||
DEVICE_ASSERT(testValues("SetWordAtomic"));
|
||||
|
||||
// C++ promotes e.g. uint8 to int32 when performing bitwise not. Silence
|
||||
// conversion warning and mask unimportant bits:
|
||||
word = static_cast<WordType>(~word);
|
||||
portal.template NotWordAtomic<WordType>(i);
|
||||
DEVICE_ASSERT(testValues("NotWordAtomic"));
|
||||
|
||||
word = word & mod;
|
||||
portal.AndWordAtomic(i, mod);
|
||||
DEVICE_ASSERT(testValues("AndWordAtomic"));
|
||||
|
||||
word = word | mod;
|
||||
portal.OrWordAtomic(i, mod);
|
||||
DEVICE_ASSERT(testValues("OrWordAtomic"));
|
||||
|
||||
word = word ^ mod;
|
||||
portal.XorWordAtomic(i, mod);
|
||||
DEVICE_ASSERT(testValues("XorWordAtomic"));
|
||||
|
||||
const WordType notWord = static_cast<WordType>(~word);
|
||||
auto casResult = portal.CompareAndSwapWordAtomic(i, word, notWord);
|
||||
DEVICE_ASSERT(casResult == word);
|
||||
DEVICE_ASSERT(portal.template GetWord<WordType>(i) == word);
|
||||
DEVICE_ASSERT(portalConst.template GetWord<WordType>(i) == word);
|
||||
casResult = portal.CompareAndSwapWordAtomic(i, notWord, word);
|
||||
DEVICE_ASSERT(casResult == word);
|
||||
DEVICE_ASSERT(portal.template GetWord<WordType>(i) == notWord);
|
||||
DEVICE_ASSERT(portalConst.template GetWord<WordType>(i) == notWord);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename PortalType, typename PortalConstType>
|
||||
struct HelpTestWordOpsControl
|
||||
{
|
||||
PortalType Portal;
|
||||
PortalConstType PortalConst;
|
||||
|
||||
VTKM_CONT
|
||||
HelpTestWordOpsControl(PortalType portal, PortalConstType portalConst)
|
||||
: Portal(portal)
|
||||
, PortalConst(portalConst)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename WordType>
|
||||
VTKM_CONT void operator()(WordType)
|
||||
{
|
||||
const auto numWords = this->Portal.template GetNumberOfWords<WordType>();
|
||||
VTKM_TEST_ASSERT(numWords == this->PortalConst.template GetNumberOfWords<WordType>());
|
||||
for (vtkm::Id i = 0; i < numWords; ++i)
|
||||
{
|
||||
VTKM_TEST_ASSERT(HelpTestWord<WordType>(i, this->Portal, this->PortalConst));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Portal, typename PortalConst>
|
||||
VTKM_CONT static void HelpTestPortalsControl(Portal portal, PortalConst portalConst)
|
||||
{
|
||||
const auto numWords8 = (NUM_BITS + 7) / 8;
|
||||
const auto numWords16 = (NUM_BITS + 15) / 16;
|
||||
const auto numWords32 = (NUM_BITS + 31) / 32;
|
||||
const auto numWords64 = (NUM_BITS + 63) / 64;
|
||||
|
||||
VTKM_TEST_ASSERT(portal.GetNumberOfBits() == NUM_BITS);
|
||||
VTKM_TEST_ASSERT(portal.template GetNumberOfWords<vtkm::UInt8>() == numWords8);
|
||||
VTKM_TEST_ASSERT(portal.template GetNumberOfWords<vtkm::UInt16>() == numWords16);
|
||||
VTKM_TEST_ASSERT(portal.template GetNumberOfWords<vtkm::UInt32>() == numWords32);
|
||||
VTKM_TEST_ASSERT(portal.template GetNumberOfWords<vtkm::UInt64>() == numWords64);
|
||||
VTKM_TEST_ASSERT(portalConst.GetNumberOfBits() == NUM_BITS);
|
||||
VTKM_TEST_ASSERT(portalConst.template GetNumberOfWords<vtkm::UInt8>() == numWords8);
|
||||
VTKM_TEST_ASSERT(portalConst.template GetNumberOfWords<vtkm::UInt16>() == numWords16);
|
||||
VTKM_TEST_ASSERT(portalConst.template GetNumberOfWords<vtkm::UInt32>() == numWords32);
|
||||
VTKM_TEST_ASSERT(portalConst.template GetNumberOfWords<vtkm::UInt64>() == numWords64);
|
||||
|
||||
for (vtkm::Id i = 0; i < NUM_BITS; ++i)
|
||||
{
|
||||
HelpTestBit(i, portal, portalConst);
|
||||
}
|
||||
|
||||
HelpTestWordOpsControl<Portal, PortalConst> test(portal, portalConst);
|
||||
vtkm::ListForEach(test, typename Portal::AtomicInterface::WordTypes{});
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
static void TestControlPortals()
|
||||
{
|
||||
auto field = RandomBitField();
|
||||
auto portal = field.GetPortalControl();
|
||||
auto portalConst = field.GetPortalConstControl();
|
||||
|
||||
HelpTestPortalsControl(portal, portalConst);
|
||||
}
|
||||
|
||||
template <typename Portal>
|
||||
VTKM_EXEC_CONT static bool HelpTestPortalSanityExecution(Portal portal)
|
||||
{
|
||||
const auto numWords8 = (NUM_BITS + 7) / 8;
|
||||
const auto numWords16 = (NUM_BITS + 15) / 16;
|
||||
const auto numWords32 = (NUM_BITS + 31) / 32;
|
||||
const auto numWords64 = (NUM_BITS + 63) / 64;
|
||||
|
||||
DEVICE_ASSERT(portal.GetNumberOfBits() == NUM_BITS);
|
||||
DEVICE_ASSERT(portal.template GetNumberOfWords<vtkm::UInt8>() == numWords8);
|
||||
DEVICE_ASSERT(portal.template GetNumberOfWords<vtkm::UInt16>() == numWords16);
|
||||
DEVICE_ASSERT(portal.template GetNumberOfWords<vtkm::UInt32>() == numWords32);
|
||||
DEVICE_ASSERT(portal.template GetNumberOfWords<vtkm::UInt64>() == numWords64);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename WordType, typename PortalType, typename PortalConstType>
|
||||
struct HelpTestPortalsExecutionWordsFunctor : vtkm::exec::FunctorBase
|
||||
{
|
||||
PortalType Portal;
|
||||
PortalConstType PortalConst;
|
||||
|
||||
HelpTestPortalsExecutionWordsFunctor(PortalType portal, PortalConstType portalConst)
|
||||
: Portal(portal)
|
||||
, PortalConst(portalConst)
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_EXEC_CONT
|
||||
void operator()(vtkm::Id i) const
|
||||
{
|
||||
if (i == 0)
|
||||
{
|
||||
if (!HelpTestPortalSanityExecution(this->Portal))
|
||||
{
|
||||
this->RaiseError("Testing Portal sanity failed.");
|
||||
return;
|
||||
}
|
||||
if (!HelpTestPortalSanityExecution(this->PortalConst))
|
||||
{
|
||||
this->RaiseError("Testing PortalConst sanity failed.");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!HelpTestWord<WordType>(i, this->Portal, this->PortalConst))
|
||||
{
|
||||
this->RaiseError("Testing word operations failed.");
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename PortalType, typename PortalConstType>
|
||||
struct HelpTestPortalsExecutionBitsFunctor : vtkm::exec::FunctorBase
|
||||
{
|
||||
PortalType Portal;
|
||||
PortalConstType PortalConst;
|
||||
|
||||
HelpTestPortalsExecutionBitsFunctor(PortalType portal, PortalConstType portalConst)
|
||||
: Portal(portal)
|
||||
, PortalConst(portalConst)
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_EXEC_CONT
|
||||
void operator()(vtkm::Id i) const
|
||||
{
|
||||
if (!HelpTestBit(i, this->Portal, this->PortalConst))
|
||||
{
|
||||
this->RaiseError("Testing bit operations failed.");
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename PortalType, typename PortalConstType>
|
||||
struct HelpTestWordOpsExecution
|
||||
{
|
||||
PortalType Portal;
|
||||
PortalConstType PortalConst;
|
||||
|
||||
VTKM_CONT
|
||||
HelpTestWordOpsExecution(PortalType portal, PortalConstType portalConst)
|
||||
: Portal(portal)
|
||||
, PortalConst(portalConst)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename WordType>
|
||||
VTKM_CONT void operator()(WordType)
|
||||
{
|
||||
const auto numWords = this->Portal.template GetNumberOfWords<WordType>();
|
||||
VTKM_TEST_ASSERT(numWords == this->PortalConst.template GetNumberOfWords<WordType>());
|
||||
|
||||
using WordFunctor =
|
||||
HelpTestPortalsExecutionWordsFunctor<WordType, PortalType, PortalConstType>;
|
||||
WordFunctor test{ this->Portal, this->PortalConst };
|
||||
Algo::Schedule(test, numWords);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Portal, typename PortalConst>
|
||||
VTKM_CONT static void HelpTestPortalsExecution(Portal portal, PortalConst portalConst)
|
||||
{
|
||||
HelpTestPortalsExecutionBitsFunctor<Portal, PortalConst> bitTest{ portal, portalConst };
|
||||
Algo::Schedule(bitTest, portal.GetNumberOfBits());
|
||||
|
||||
|
||||
HelpTestWordOpsExecution<Portal, PortalConst> test(portal, portalConst);
|
||||
vtkm::ListForEach(test, typename Portal::AtomicInterface::WordTypes{});
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
static void TestExecutionPortals()
|
||||
{
|
||||
auto field = RandomBitField();
|
||||
auto portal = field.PrepareForInPlace(DeviceAdapterTag{});
|
||||
auto portalConst = field.PrepareForInput(DeviceAdapterTag{});
|
||||
|
||||
HelpTestPortalsExecution(portal, portalConst);
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
static void TestFinalWordMask()
|
||||
{
|
||||
auto testMask32 = [](vtkm::Id numBits, vtkm::UInt32 expectedMask) {
|
||||
vtkm::cont::BitField field;
|
||||
field.Allocate(numBits);
|
||||
auto mask = field.GetPortalConstControl().GetFinalWordMask<vtkm::UInt32>();
|
||||
|
||||
VTKM_TEST_ASSERT(expectedMask == mask,
|
||||
"Unexpected mask for BitField size ",
|
||||
numBits,
|
||||
": Expected 0x",
|
||||
std::hex,
|
||||
expectedMask,
|
||||
" got 0x",
|
||||
mask);
|
||||
};
|
||||
|
||||
auto testMask64 = [](vtkm::Id numBits, vtkm::UInt64 expectedMask) {
|
||||
vtkm::cont::BitField field;
|
||||
field.Allocate(numBits);
|
||||
auto mask = field.GetPortalConstControl().GetFinalWordMask<vtkm::UInt64>();
|
||||
|
||||
VTKM_TEST_ASSERT(expectedMask == mask,
|
||||
"Unexpected mask for BitField size ",
|
||||
numBits,
|
||||
": Expected 0x",
|
||||
std::hex,
|
||||
expectedMask,
|
||||
" got 0x",
|
||||
mask);
|
||||
};
|
||||
|
||||
testMask32(0, 0x00000000);
|
||||
testMask32(1, 0x00000001);
|
||||
testMask32(2, 0x00000003);
|
||||
testMask32(3, 0x00000007);
|
||||
testMask32(4, 0x0000000f);
|
||||
testMask32(5, 0x0000001f);
|
||||
testMask32(8, 0x000000ff);
|
||||
testMask32(16, 0x0000ffff);
|
||||
testMask32(24, 0x00ffffff);
|
||||
testMask32(25, 0x01ffffff);
|
||||
testMask32(31, 0x7fffffff);
|
||||
testMask32(32, 0xffffffff);
|
||||
testMask32(64, 0xffffffff);
|
||||
testMask32(128, 0xffffffff);
|
||||
testMask32(129, 0x00000001);
|
||||
|
||||
testMask64(0, 0x0000000000000000);
|
||||
testMask64(1, 0x0000000000000001);
|
||||
testMask64(2, 0x0000000000000003);
|
||||
testMask64(3, 0x0000000000000007);
|
||||
testMask64(4, 0x000000000000000f);
|
||||
testMask64(5, 0x000000000000001f);
|
||||
testMask64(8, 0x00000000000000ff);
|
||||
testMask64(16, 0x000000000000ffff);
|
||||
testMask64(24, 0x0000000000ffffff);
|
||||
testMask64(25, 0x0000000001ffffff);
|
||||
testMask64(31, 0x000000007fffffff);
|
||||
testMask64(32, 0x00000000ffffffff);
|
||||
testMask64(40, 0x000000ffffffffff);
|
||||
testMask64(48, 0x0000ffffffffffff);
|
||||
testMask64(56, 0x00ffffffffffffff);
|
||||
testMask64(64, 0xffffffffffffffff);
|
||||
testMask64(128, 0xffffffffffffffff);
|
||||
testMask64(129, 0x0000000000000001);
|
||||
}
|
||||
|
||||
struct ArrayHandleBitFieldChecker : vtkm::exec::FunctorBase
|
||||
{
|
||||
using PortalType = typename ArrayHandleBitField::ExecutionTypes<DeviceAdapterTag>::Portal;
|
||||
|
||||
PortalType Portal;
|
||||
bool InvertReference;
|
||||
|
||||
VTKM_EXEC_CONT
|
||||
ArrayHandleBitFieldChecker(PortalType portal, bool invert)
|
||||
: Portal(portal)
|
||||
, InvertReference(invert)
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_EXEC
|
||||
void operator()(vtkm::Id i) const
|
||||
{
|
||||
const bool ref = this->InvertReference ? !RandomBitFromIndex(i) : RandomBitFromIndex(i);
|
||||
if (this->Portal.Get(i) != ref)
|
||||
{
|
||||
this->RaiseError("Unexpected value from ArrayHandleBitField portal.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Flip the bit for the next kernel launch, which tests that the bitfield
|
||||
// is inverted.
|
||||
this->Portal.Set(i, !ref);
|
||||
}
|
||||
};
|
||||
|
||||
VTKM_CONT
|
||||
static void TestArrayHandleBitField()
|
||||
{
|
||||
auto handle = vtkm::cont::make_ArrayHandleBitField(RandomBitField());
|
||||
const vtkm::Id numBits = handle.GetNumberOfValues();
|
||||
|
||||
VTKM_TEST_ASSERT(numBits == NUM_BITS,
|
||||
"ArrayHandleBitField returned the wrong number of values. "
|
||||
"Expected: ",
|
||||
NUM_BITS,
|
||||
" got: ",
|
||||
numBits);
|
||||
|
||||
Algo::Schedule(
|
||||
ArrayHandleBitFieldChecker{ handle.PrepareForInPlace(DeviceAdapterTag{}), false }, numBits);
|
||||
Algo::Schedule(ArrayHandleBitFieldChecker{ handle.PrepareForInPlace(DeviceAdapterTag{}), true },
|
||||
numBits);
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
static void TestArrayInvokeWorklet()
|
||||
{
|
||||
auto condArray = vtkm::cont::make_ArrayHandleBitField(RandomBitField());
|
||||
auto trueArray = vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(20, 2, NUM_BITS);
|
||||
auto falseArray = vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(13, 2, NUM_BITS);
|
||||
vtkm::cont::ArrayHandle<vtkm::Id> output;
|
||||
|
||||
vtkm::worklet::Invoker invoke;
|
||||
invoke(ConditionalMergeWorklet{}, condArray, trueArray, falseArray, output);
|
||||
|
||||
auto condVals = condArray.GetPortalConstControl();
|
||||
auto trueVals = trueArray.GetPortalConstControl();
|
||||
auto falseVals = falseArray.GetPortalConstControl();
|
||||
auto outVals = output.GetPortalConstControl();
|
||||
|
||||
VTKM_TEST_ASSERT(condVals.GetNumberOfValues() == trueVals.GetNumberOfValues());
|
||||
VTKM_TEST_ASSERT(condVals.GetNumberOfValues() == falseVals.GetNumberOfValues());
|
||||
VTKM_TEST_ASSERT(condVals.GetNumberOfValues() == outVals.GetNumberOfValues());
|
||||
|
||||
for (vtkm::Id i = 0; i < condVals.GetNumberOfValues(); ++i)
|
||||
{
|
||||
VTKM_TEST_ASSERT(outVals.Get(i) == (condVals.Get(i) ? trueVals.Get(i) : falseVals.Get(i)));
|
||||
}
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
static void TestArrayInvokeWorklet2()
|
||||
{
|
||||
auto condBits = RandomBitField();
|
||||
auto trueArray = vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(20, 2, NUM_BITS);
|
||||
auto falseArray = vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(13, 2, NUM_BITS);
|
||||
vtkm::cont::ArrayHandle<vtkm::Id> output;
|
||||
|
||||
vtkm::worklet::Invoker invoke;
|
||||
invoke(ConditionalMergeWorklet2{}, condBits, trueArray, falseArray, output);
|
||||
|
||||
auto condVals = condBits.GetPortalConstControl();
|
||||
auto trueVals = trueArray.GetPortalConstControl();
|
||||
auto falseVals = falseArray.GetPortalConstControl();
|
||||
auto outVals = output.GetPortalConstControl();
|
||||
|
||||
VTKM_TEST_ASSERT(condVals.GetNumberOfBits() == trueVals.GetNumberOfValues());
|
||||
VTKM_TEST_ASSERT(condVals.GetNumberOfBits() == falseVals.GetNumberOfValues());
|
||||
VTKM_TEST_ASSERT(condVals.GetNumberOfBits() == outVals.GetNumberOfValues());
|
||||
|
||||
for (vtkm::Id i = 0; i < condVals.GetNumberOfBits(); ++i)
|
||||
{
|
||||
// The worklet flips the bitfield in place after choosing true/false paths
|
||||
VTKM_TEST_ASSERT(condVals.GetBit(i) == !RandomBitFromIndex(i));
|
||||
VTKM_TEST_ASSERT(outVals.Get(i) ==
|
||||
(!condVals.GetBit(i) ? trueVals.Get(i) : falseVals.Get(i)));
|
||||
}
|
||||
}
|
||||
|
||||
struct TestRunner
|
||||
{
|
||||
VTKM_CONT
|
||||
void operator()() const
|
||||
{
|
||||
TestingBitField::TestBlockAllocation();
|
||||
TestingBitField::TestControlPortals();
|
||||
TestingBitField::TestExecutionPortals();
|
||||
TestingBitField::TestFinalWordMask();
|
||||
TestingBitField::TestArrayHandleBitField();
|
||||
TestingBitField::TestArrayInvokeWorklet();
|
||||
TestingBitField::TestArrayInvokeWorklet2();
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
static VTKM_CONT int Run(int argc, char* argv[])
|
||||
{
|
||||
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(DeviceAdapterTag());
|
||||
return vtkm::cont::testing::Testing::Run(TestRunner{}, argc, argv);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
} // namespace vtkm::cont::testing
|
||||
|
||||
#endif //vtk_m_cont_testing_TestingArrayHandles_h
|
@ -2443,6 +2443,100 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
static VTKM_CONT void TestBitFieldToUnorderedSet()
|
||||
{
|
||||
using IndexArray = vtkm::cont::ArrayHandle<vtkm::Id>;
|
||||
using WordType = WordTypeDefault;
|
||||
|
||||
// Test that everything works correctly with a partial word at the end.
|
||||
static constexpr vtkm::Id BitsPerWord = static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT);
|
||||
// +5 to get a partial word:
|
||||
static constexpr vtkm::Id NumBits = 1024 * BitsPerWord + 5;
|
||||
static constexpr vtkm::Id NumWords = (NumBits + BitsPerWord - 1) / BitsPerWord;
|
||||
|
||||
auto testIndexArray = [](const BitField& bits) {
|
||||
const vtkm::Id numBits = bits.GetNumberOfBits();
|
||||
IndexArray indices;
|
||||
Algorithm::BitFieldToUnorderedSet(bits, indices);
|
||||
Algorithm::Sort(indices);
|
||||
|
||||
auto bitPortal = bits.GetPortalConstControl();
|
||||
auto indexPortal = indices.GetPortalConstControl();
|
||||
|
||||
const vtkm::Id numIndices = indices.GetNumberOfValues();
|
||||
vtkm::Id curIndex = 0;
|
||||
for (vtkm::Id curBit = 0; curBit < numBits; ++curBit)
|
||||
{
|
||||
const bool markedSet = curIndex < numIndices ? indexPortal.Get(curIndex) == curBit : false;
|
||||
const bool isSet = bitPortal.GetBit(curBit);
|
||||
|
||||
// std::cout << "curBit: " << curBit
|
||||
// << " activeIndex: "
|
||||
// << (curIndex < numIndices ? indexPortal.Get(curIndex) : -1)
|
||||
// << " isSet: " << isSet << " markedSet: " << markedSet << "\n";
|
||||
|
||||
VTKM_TEST_ASSERT(
|
||||
markedSet == isSet, "Bit ", curBit, " is set? ", isSet, " Marked set? ", markedSet);
|
||||
|
||||
if (markedSet)
|
||||
{
|
||||
curIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
VTKM_TEST_ASSERT(curIndex == indices.GetNumberOfValues(), "Index array has extra values.");
|
||||
};
|
||||
|
||||
auto testRepeatedMask = [&](WordType mask) {
|
||||
std::cout << "Testing BitFieldToUnorderedSet with repeated 32-bit word 0x" << std::hex << mask
|
||||
<< std::endl;
|
||||
|
||||
BitField bits;
|
||||
{
|
||||
bits.Allocate(NumBits);
|
||||
auto fillPortal = bits.GetPortalControl();
|
||||
for (vtkm::Id i = 0; i < NumWords; ++i)
|
||||
{
|
||||
fillPortal.SetWord(i, mask);
|
||||
}
|
||||
}
|
||||
|
||||
testIndexArray(bits);
|
||||
};
|
||||
|
||||
auto testRandomMask = [&](WordType seed) {
|
||||
std::cout << "Testing BitFieldToUnorderedSet with random sequence seeded with 0x" << std::hex
|
||||
<< seed << std::endl;
|
||||
|
||||
std::mt19937 mt{ seed };
|
||||
std::uniform_int_distribution<std::mt19937::result_type> rng;
|
||||
|
||||
BitField bits;
|
||||
{
|
||||
bits.Allocate(NumBits);
|
||||
auto fillPortal = bits.GetPortalControl();
|
||||
for (vtkm::Id i = 0; i < NumWords; ++i)
|
||||
{
|
||||
fillPortal.SetWord(i, static_cast<WordType>(rng(mt)));
|
||||
}
|
||||
}
|
||||
|
||||
testIndexArray(bits);
|
||||
};
|
||||
|
||||
testRepeatedMask(0x00000000);
|
||||
testRepeatedMask(0xeeeeeeee);
|
||||
testRepeatedMask(0xffffffff);
|
||||
testRepeatedMask(0x1c0fd395);
|
||||
testRepeatedMask(0xdeadbeef);
|
||||
|
||||
testRandomMask(0x00000000);
|
||||
testRandomMask(0xeeeeeeee);
|
||||
testRandomMask(0xffffffff);
|
||||
testRandomMask(0x1c0fd395);
|
||||
testRandomMask(0xdeadbeef);
|
||||
}
|
||||
|
||||
struct TestAll
|
||||
{
|
||||
VTKM_CONT void operator()() const
|
||||
@ -2496,6 +2590,8 @@ private:
|
||||
TestCopyArraysInDiffTypes();
|
||||
|
||||
TestAtomicArray();
|
||||
|
||||
TestBitFieldToUnorderedSet();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -30,6 +30,8 @@
|
||||
|
||||
#include <vtkm/cont/ArrayHandle.h>
|
||||
#include <vtkm/cont/RuntimeDeviceTracker.h>
|
||||
#include <vtkm/cont/internal/AtomicInterfaceControl.h>
|
||||
#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
|
||||
#include <vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h>
|
||||
#include <vtkm/cont/internal/VirtualObjectTransferShareWithControl.h>
|
||||
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
|
||||
@ -101,6 +103,11 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class AtomicInterfaceExecution<DeviceAdapterTagTestAlgorithmGeneral> : public AtomicInterfaceControl
|
||||
{
|
||||
};
|
||||
|
||||
template <typename TargetClass>
|
||||
struct VirtualObjectTransfer<TargetClass, vtkm::cont::DeviceAdapterTagTestAlgorithmGeneral> final
|
||||
: public VirtualObjectTransferShareWithControl<TargetClass>
|
||||
|
@ -30,9 +30,9 @@
|
||||
#define VTKM_EXEC __device__ __host__
|
||||
#define VTKM_EXEC_CONT __device__ __host__
|
||||
#if __CUDAVER__ >= 75000
|
||||
#define VTKM_SUPPRESS_EXEC_WARNINGS #pragma nv_exec_check_disable
|
||||
#define VTKM_SUPPRESS_EXEC_WARNINGS _Pragma("nv_exec_check_disable")
|
||||
#else
|
||||
#define VTKM_SUPPRESS_EXEC_WARNINGS #pragma hd_warning_disable
|
||||
#define VTKM_SUPPRESS_EXEC_WARNINGS _Pragma("hd_warning_disable")
|
||||
#endif
|
||||
#else
|
||||
#define VTKM_EXEC
|
||||
|
@ -33,6 +33,8 @@
|
||||
|
||||
#include <vtkm/cont/testing/Testing.h>
|
||||
|
||||
#include <limits>
|
||||
|
||||
#define VTKM_MATH_ASSERT(condition, message) \
|
||||
if (!(condition)) \
|
||||
{ \
|
||||
@ -761,6 +763,76 @@ struct TypeListTagAbs
|
||||
{
|
||||
};
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
static constexpr vtkm::Id BitOpSamples = 1024 * 1024;
|
||||
|
||||
template <typename T>
|
||||
struct BitOpTests : public vtkm::exec::FunctorBase
|
||||
{
|
||||
static constexpr T MaxT = std::numeric_limits<T>::max();
|
||||
static constexpr T Offset = MaxT / BitOpSamples;
|
||||
|
||||
VTKM_EXEC void operator()(vtkm::Id i) const
|
||||
{
|
||||
const T idx = static_cast<T>(i);
|
||||
const T word = idx * this->Offset;
|
||||
|
||||
TestWord(word - idx);
|
||||
TestWord(word);
|
||||
TestWord(word + idx);
|
||||
}
|
||||
|
||||
VTKM_EXEC void TestWord(T word) const
|
||||
{
|
||||
VTKM_MATH_ASSERT(test_equal(vtkm::CountSetBits(word), this->DumbCountBits(word)),
|
||||
"CountBits returned wrong value.");
|
||||
VTKM_MATH_ASSERT(test_equal(vtkm::FindFirstSetBit(word), this->DumbFindFirstSetBit(word)),
|
||||
"FindFirstSetBit returned wrong value.")
|
||||
}
|
||||
|
||||
VTKM_EXEC vtkm::Int32 DumbCountBits(T word) const
|
||||
{
|
||||
vtkm::Int32 bits = 0;
|
||||
while (word)
|
||||
{
|
||||
if (word & 0x1)
|
||||
{
|
||||
++bits;
|
||||
}
|
||||
word >>= 1;
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
VTKM_EXEC vtkm::Int32 DumbFindFirstSetBit(T word) const
|
||||
{
|
||||
if (word == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
vtkm::Int32 bit = 1;
|
||||
while ((word & 0x1) == 0)
|
||||
{
|
||||
word >>= 1;
|
||||
++bit;
|
||||
}
|
||||
return bit;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Device>
|
||||
struct TryBitOpTests
|
||||
{
|
||||
template <typename T>
|
||||
void operator()(const T&) const
|
||||
{
|
||||
vtkm::cont::DeviceAdapterAlgorithm<Device>::Schedule(BitOpTests<T>(), BitOpSamples);
|
||||
}
|
||||
};
|
||||
|
||||
using TypeListTagBitOp = vtkm::ListTagBase<vtkm::UInt32, vtkm::UInt64>;
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
template <typename Device>
|
||||
void RunMathTests()
|
||||
@ -773,6 +845,8 @@ void RunMathTests()
|
||||
vtkm::testing::Testing::TryTypes(TryAllTypesTests<Device>());
|
||||
std::cout << "Test all Abs types" << std::endl;
|
||||
vtkm::testing::Testing::TryTypes(TryAbsTests<Device>(), TypeListTagAbs());
|
||||
std::cout << "Test all bit operations" << std::endl;
|
||||
vtkm::testing::Testing::TryTypes(TryBitOpTests<Device>(), TypeListTagBitOp());
|
||||
}
|
||||
|
||||
} // namespace UnitTestMathNamespace
|
||||
|
@ -36,6 +36,7 @@
|
||||
|
||||
#include <vtkm/cont/arg/ControlSignatureTagBase.h>
|
||||
#include <vtkm/cont/arg/TransportTagAtomicArray.h>
|
||||
#include <vtkm/cont/arg/TransportTagBitField.h>
|
||||
#include <vtkm/cont/arg/TransportTagCellSetIn.h>
|
||||
#include <vtkm/cont/arg/TransportTagExecObject.h>
|
||||
#include <vtkm/cont/arg/TransportTagWholeArrayIn.h>
|
||||
@ -43,6 +44,7 @@
|
||||
#include <vtkm/cont/arg/TransportTagWholeArrayOut.h>
|
||||
#include <vtkm/cont/arg/TypeCheckTagArray.h>
|
||||
#include <vtkm/cont/arg/TypeCheckTagAtomicArray.h>
|
||||
#include <vtkm/cont/arg/TypeCheckTagBitField.h>
|
||||
#include <vtkm/cont/arg/TypeCheckTagCellSet.h>
|
||||
#include <vtkm/cont/arg/TypeCheckTagExecObject.h>
|
||||
|
||||
@ -217,6 +219,36 @@ public:
|
||||
using FetchTag = vtkm::exec::arg::FetchTagExecObject;
|
||||
};
|
||||
|
||||
/// \c ControlSignature tags for whole BitFields.
|
||||
///
|
||||
/// When a BitField is passed in to a worklet expecting this ControlSignature
|
||||
/// type, the appropriate BitPortal is generated and given to the worklet's
|
||||
/// execution.
|
||||
///
|
||||
/// Be aware that this data structure is especially prone to race conditions,
|
||||
/// so be sure to use the appropriate atomic methods when necessary.
|
||||
/// @{
|
||||
///
|
||||
struct BitFieldIn : vtkm::cont::arg::ControlSignatureTagBase
|
||||
{
|
||||
using TypeCheckTag = vtkm::cont::arg::TypeCheckTagBitField;
|
||||
using TransportTag = vtkm::cont::arg::TransportTagBitFieldIn;
|
||||
using FetchTag = vtkm::exec::arg::FetchTagExecObject;
|
||||
};
|
||||
struct BitFieldOut : vtkm::cont::arg::ControlSignatureTagBase
|
||||
{
|
||||
using TypeCheckTag = vtkm::cont::arg::TypeCheckTagBitField;
|
||||
using TransportTag = vtkm::cont::arg::TransportTagBitFieldOut;
|
||||
using FetchTag = vtkm::exec::arg::FetchTagExecObject;
|
||||
};
|
||||
struct BitFieldInOut : vtkm::cont::arg::ControlSignatureTagBase
|
||||
{
|
||||
using TypeCheckTag = vtkm::cont::arg::TypeCheckTagBitField;
|
||||
using TransportTag = vtkm::cont::arg::TransportTagBitFieldInOut;
|
||||
using FetchTag = vtkm::exec::arg::FetchTagExecObject;
|
||||
};
|
||||
/// @}
|
||||
|
||||
/// \c ControlSignature tag for whole input topology.
|
||||
///
|
||||
/// The \c WholeCellSetIn control signature tag specifies an \c CellSet
|
||||
|
Loading…
Reference in New Issue
Block a user