Merge topic 'bitfields'

661fb64de AtomicInterfaceControl functions are marked with VTKM_SUPPRESS_EXEC_WARNINGS
0c70f9b9a Add BitFieldIn/Out/InOut worklet signature tags.
a66510e81 Add ArrayHandleBitField, a boolean-valued AH backed by a BitField.
56cc5c3d3 Add support for BitFields.
d01b97382 Allow VTKM_SUPPRESS_EXEC_WARNINGS to be used inside macros.
2f2ca9370 Add bit operations FindFirstSetBit and CountSetBits to Math.h.

Acked-by: Kitware Robot <kwrobot@kitware.com>
Merge-request: !1629
This commit is contained in:
Robert Maynard 2019-04-11 16:31:54 +00:00 committed by Kitware Robot
commit a5dbe1ece3
44 changed files with 3753 additions and 23 deletions

@ -26,6 +26,7 @@
#include <vtkm/cont/ArrayHandlePermutation.h>
#include <vtkm/cont/ArrayHandleZip.h>
#include <vtkm/cont/ArrayPortalToIterators.h>
#include <vtkm/cont/BitField.h>
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/ErrorExecution.h>
#include <vtkm/cont/StorageBasic.h>
@ -63,20 +64,24 @@ namespace benchmarking
enum BenchmarkName
{
COPY = 1,
COPY_IF = 1 << 1,
LOWER_BOUNDS = 1 << 2,
REDUCE = 1 << 3,
REDUCE_BY_KEY = 1 << 4,
SCAN_INCLUSIVE = 1 << 5,
SCAN_EXCLUSIVE = 1 << 6,
SORT = 1 << 7,
SORT_BY_KEY = 1 << 8,
STABLE_SORT_INDICES = 1 << 9,
STABLE_SORT_INDICES_UNIQUE = 1 << 10,
UNIQUE = 1 << 11,
UPPER_BOUNDS = 1 << 12,
ALL = COPY | COPY_IF | LOWER_BOUNDS | REDUCE | REDUCE_BY_KEY | SCAN_INCLUSIVE | SCAN_EXCLUSIVE |
BITFIELD_TO_UNORDERED_SET = 1 << 0,
COPY = 1 << 1,
COPY_IF = 1 << 2,
LOWER_BOUNDS = 1 << 3,
REDUCE = 1 << 4,
REDUCE_BY_KEY = 1 << 5,
SCAN_INCLUSIVE = 1 << 6,
SCAN_EXCLUSIVE = 1 << 7,
SORT = 1 << 8,
SORT_BY_KEY = 1 << 9,
STABLE_SORT_INDICES = 1 << 10,
STABLE_SORT_INDICES_UNIQUE = 1 << 11,
UNIQUE = 1 << 12,
UPPER_BOUNDS = 1 << 13,
ALL = BITFIELD_TO_UNORDERED_SET | COPY | COPY_IF | LOWER_BOUNDS | REDUCE | REDUCE_BY_KEY |
SCAN_INCLUSIVE |
SCAN_EXCLUSIVE |
SORT |
SORT_BY_KEY |
STABLE_SORT_INDICES |
@ -132,6 +137,20 @@ struct BenchDevAlgoConfig
? static_cast<vtkm::Id>(this->ArraySizeBytes / static_cast<vtkm::UInt64>(sizeof(T)))
: static_cast<vtkm::Id>(this->ArraySizeValues);
}
// Compute the number of words in a bit field with the given type.
// If DoByteSizes is true, the specified buffer is rounded down to the nearest
// number of words that fit into the byte limit. Otherwise, ArraySizeValues
// is used to indicate the number of bits.
template <typename WordType>
VTKM_CONT vtkm::Id ComputeNumberOfWords()
{
static constexpr vtkm::UInt64 BytesPerWord = static_cast<vtkm::UInt64>(sizeof(WordType));
static constexpr vtkm::UInt64 BitsPerWord = BytesPerWord * 8;
return this->DoByteSizes ? static_cast<vtkm::Id>(this->ArraySizeBytes / BytesPerWord)
: static_cast<vtkm::Id>(this->ArraySizeValues / BitsPerWord);
}
};
// Share a global instance of the config (only way to get it into the benchmark
@ -255,7 +274,170 @@ public:
}
};
template <typename WordType, typename BitFieldPortal>
struct GenerateBitFieldFunctor : public vtkm::exec::FunctorBase
{
WordType Exemplar;
vtkm::Id Stride;
vtkm::Id MaxMaskedWord;
BitFieldPortal Portal;
VTKM_EXEC_CONT
GenerateBitFieldFunctor(WordType exemplar,
vtkm::Id stride,
vtkm::Id maxMaskedWord,
const BitFieldPortal& portal)
: Exemplar(exemplar)
, Stride(stride)
, MaxMaskedWord(maxMaskedWord)
, Portal(portal)
{
}
VTKM_EXEC
void operator()(vtkm::Id wordIdx) const
{
if (wordIdx <= this->MaxMaskedWord && (wordIdx % this->Stride) == 0)
{
this->Portal.SetWord(wordIdx, this->Exemplar);
}
else
{
this->Portal.SetWord(wordIdx, static_cast<WordType>(0));
}
}
};
// Create a bit field for testing. The bit array will contain numWords words.
// The exemplar word is used to set bits in the array. Stride indicates how
// many words will be set to 0 between words initialized to the exemplar.
// Words with indices higher than maxMaskedWord will be set to 0.
// Stride and maxMaskedWord may be used to test different types of imbalanced
// loads.
template <typename WordType, typename DeviceAdapterTag>
static VTKM_CONT vtkm::cont::BitField GenerateBitField(WordType exemplar,
vtkm::Id stride,
vtkm::Id maxMaskedWord,
vtkm::Id numWords)
{
using Algo = vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapterTag>;
if (stride == 0)
{
stride = 1;
}
vtkm::cont::BitField bits;
auto portal = bits.PrepareForOutput(numWords, DeviceAdapterTag{});
using Functor = GenerateBitFieldFunctor<WordType, decltype(portal)>;
Algo::Schedule(Functor{ exemplar, stride, maxMaskedWord, portal }, numWords);
Algo::Synchronize();
return bits;
}
private:
template <typename WordType, typename DeviceAdapter>
struct BenchBitFieldToUnorderedSet
{
using IndicesArray = vtkm::cont::ArrayHandle<vtkm::Id>;
vtkm::Id NumWords;
vtkm::Id NumBits;
WordType Exemplar;
vtkm::Id Stride;
vtkm::Float32 FillRatio;
vtkm::Id MaxMaskedIndex;
std::string Name;
vtkm::cont::BitField Bits;
IndicesArray Indices;
// See GenerateBitField for details. fillRatio is used to compute
// maxMaskedWord.
VTKM_CONT
BenchBitFieldToUnorderedSet(WordType exemplar,
vtkm::Id stride,
vtkm::Float32 fillRatio,
const std::string& name)
: NumWords(Config.ComputeNumberOfWords<WordType>())
, NumBits(this->NumWords * static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT))
, Exemplar(exemplar)
, Stride(stride)
, FillRatio(fillRatio)
, MaxMaskedIndex(this->NumWords / static_cast<vtkm::Id>(1. / this->FillRatio))
, Name(name)
, Bits(GenerateBitField<WordType, DeviceAdapter>(this->Exemplar,
this->Stride,
this->MaxMaskedIndex,
this->NumWords))
{
}
VTKM_CONT
vtkm::Float64 operator()()
{
Timer timer(DeviceAdapter{});
timer.Start();
Algorithm::BitFieldToUnorderedSet(DeviceAdapter{}, this->Bits, this->Indices);
return timer.GetElapsedTime();
}
VTKM_CONT
std::string Description() const
{
const vtkm::Id numFilledWords = this->MaxMaskedIndex / this->Stride;
const vtkm::Id numSetBits = numFilledWords * vtkm::CountSetBits(this->Exemplar);
std::stringstream description;
description << "BitFieldToUnorderedSet" << this->Name << " ( "
<< "NumWords: " << this->NumWords << " "
<< "Exemplar: " << std::hex << this->Exemplar << std::dec << " "
<< "FillRatio: " << this->FillRatio << " "
<< "Stride: " << this->Stride << " "
<< "NumSetBits: " << numSetBits << " )";
return description.str();
}
};
VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetNull,
BenchBitFieldToUnorderedSet,
0x00000000,
1,
0.f,
"Null");
VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetFull,
BenchBitFieldToUnorderedSet,
0xffffffff,
1,
1.f,
"Full");
VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetHalfWord,
BenchBitFieldToUnorderedSet,
0xffff0000,
1,
1.f,
"HalfWord");
VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetHalfField,
BenchBitFieldToUnorderedSet,
0xffffffff,
1,
0.5f,
"HalfField");
VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetAlternateWords,
BenchBitFieldToUnorderedSet,
0xffffffff,
2,
1.f,
"AlternateWords");
VTKM_MAKE_BENCHMARK(BitFieldToUnorderedSetAlternateBits,
BenchBitFieldToUnorderedSet,
0x55555555,
1,
1.f,
"AlternateBits");
template <typename Value, typename DeviceAdapter>
struct BenchCopy
{
@ -982,6 +1164,19 @@ public:
template <typename ValueTypes>
static VTKM_CONT void RunInternal(vtkm::cont::DeviceAdapterId id)
{
using BitFieldWordTypes = vtkm::ListTagBase<vtkm::UInt32>;
if (Config.BenchmarkFlags & BITFIELD_TO_UNORDERED_SET)
{
std::cout << DIVIDER << "\nBenchmarking BitFieldToUnorderedSet\n";
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetNull, BitFieldWordTypes{}, id);
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetFull, BitFieldWordTypes{}, id);
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfWord, BitFieldWordTypes{}, id);
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfField, BitFieldWordTypes{}, id);
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateWords, BitFieldWordTypes{}, id);
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateBits, BitFieldWordTypes{}, id);
}
if (Config.BenchmarkFlags & COPY)
{
std::cout << DIVIDER << "\nBenchmarking Copy\n";
@ -1434,7 +1629,11 @@ int main(int argc, char* argv[])
std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
});
if (arg == "copy")
if (arg == "bitfieldtounorderedset")
{
config.BenchmarkFlags |= vtkm::benchmarking::BITFIELD_TO_UNORDERED_SET;
}
else if (arg == "copy")
{
config.BenchmarkFlags |= vtkm::benchmarking::COPY;
}

@ -0,0 +1,51 @@
# Add support for BitFields.
BitFields are:
- Stored in memory using a contiguous buffer of bits.
- Accessible via portals, a la ArrayHandle.
- Portals operate on individual bits or words.
- Operations may be atomic for safe use from concurrent kernels.
The new BitFieldToUnorderedSet device algorithm produces an
ArrayHandle containing the indices of all set bits, in no particular
order.
The new AtomicInterface classes provide an abstraction into bitwise
atomic operations across control and execution environments and are
used to implement the BitPortals.
BitFields may be used as boolean-typed ArrayHandles using the
ArrayHandleBitField adapter. ArrayHandleBitField uses atomic operations to read
and write bits in the BitField, and is safe to use in concurrent code.
For example, a simple worklet that merges two arrays based on a boolean
condition is tested in TestingBitField:
```
class ConditionalMergeWorklet : public vtkm::worklet::WorkletMapField
{
public:
using ControlSignature = void(FieldIn cond,
FieldIn trueVals,
FieldIn falseVals,
FieldOut result);
using ExecutionSignature = _4(_1, _2, _3);
template <typename T>
VTKM_EXEC T operator()(bool cond, const T& trueVal, const T& falseVal) const
{
return cond ? trueVal : falseVal;
}
};
BitField bits = ...;
auto condArray = vtkm::cont::make_ArrayHandleBitField(bits);
auto trueArray = vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(20, 2, NUM_BITS);
auto falseArray = vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(13, 2, NUM_BITS);
vtkm::cont::ArrayHandle<vtkm::Id> output;
vtkm::worklet::DispatcherMapField<ConditionalMergeWorklet> dispatcher;
dispatcher.Invoke(condArray, trueArray, falseArray, output);
```

@ -41,9 +41,13 @@
#include <algorithm>
#endif
#if defined(VTKM_MSVC) && !defined(VTKM_CUDA)
#ifdef VTKM_MSVC
#include <intrin.h> // For bitwise intrinsics (__popcnt, etc)
#include <vtkm/internal/Windows.h> // for types used by MSVC intrinsics.
#ifndef VTKM_CUDA
#include <math.h>
#endif
#endif // VTKM_CUDA
#endif // VTKM_MSVC
#define VTKM_CUDA_MATH_FUNCTION_32(func) func##f
#define VTKM_CUDA_MATH_FUNCTION_64(func) func
@ -2592,6 +2596,191 @@ inline VTKM_EXEC_CONT vtkm::Float64 Ldexp(vtkm::Float64 x, vtkm::Int32 exponent)
#endif
}
/// Bitwise operations
///
/// Find the first set bit in @a word, and return its position (1-32). If no
/// bits are set, returns 0.
#ifdef VTKM_CUDA_DEVICE_PASS
// Need to explicitly mark this as __device__ since __ffs is device only.
inline __device__
vtkm::Int32 FindFirstSetBit(vtkm::UInt32 word)
{
// Output is [0,32], with ffs(0) == 0
return __ffs(static_cast<int>(word));
}
#else // CUDA_DEVICE_PASS
inline VTKM_EXEC_CONT
vtkm::Int32 FindFirstSetBit(vtkm::UInt32 word)
{
# if defined(VTKM_GCC) || defined(VTKM_CLANG)
// Output is [0,32], with ffs(0) == 0
return __builtin_ffs(static_cast<int>(word));
# elif defined(VTKM_MSVC)
// Output is [0, 31], check return code to see if bits are set:
vtkm::UInt32 firstSet;
return _BitScanForward(reinterpret_cast<DWORD*>(&firstSet), word) != 0
? static_cast<vtkm::Int32>(firstSet + 1) : 0;
# elif defined(VTKM_ICC)
// Output is [0, 31], undefined if word is 0.
return word != 0 ? _bit_scan_forward(word) + 1 : 0;
# else
// Naive implementation:
if (word == 0)
{
return 0;
}
vtkm::Int32 bit = 1;
while ((word & 0x1) == 0)
{
word >>= 1;
++bit;
}
return bit;
# endif
}
#endif // CUDA_DEVICE_PASS
/// Find the first set bit in @a word, and return its position (1-64). If no
/// bits are set, returns 0.
#ifdef VTKM_CUDA_DEVICE_PASS
// Need to explicitly mark this as __device__ since __ffsll is device only.
inline __device__
vtkm::Int32 FindFirstSetBit(vtkm::UInt64 word)
{
// Output is [0,64], with ffs(0) == 0
return __ffsll(static_cast<long long int>(word));
}
#else // CUDA_DEVICE_PASS
inline VTKM_EXEC_CONT
vtkm::Int32 FindFirstSetBit(vtkm::UInt64 word)
{
# if defined(VTKM_GCC) || defined(VTKM_CLANG)
// Output is [0,64], with ffs(0) == 0
return __builtin_ffsll(static_cast<long long int>(word));
# elif defined(VTKM_MSVC) || defined(VTKM_ICC)
// Output is [0, 63], check return code to see if bits are set:
vtkm::UInt32 firstSet;
return _BitScanForward64(reinterpret_cast<DWORD*>(&firstSet), word) != 0
? static_cast<vtkm::Int32>(firstSet + 1) : 0;
# else
// Naive implementation:
if (word == 0)
{
return 0;
}
vtkm::Int32 bit = 1;
while ((word & 0x1) == 0)
{
word >>= 1;
++bit;
}
return bit;
# endif
}
#endif // CUDA_DEVICE_PASS
/// Count the total number of bits set in @a word.
#ifdef VTKM_CUDA_DEVICE_PASS
// Need to explicitly mark this as __device__ since __popc is device only.
inline __device__
vtkm::Int32 CountSetBits(vtkm::UInt32 word)
{
return __popc(word);
}
#else // CUDA_DEVICE_PASS
inline VTKM_EXEC_CONT
vtkm::Int32 CountSetBits(vtkm::UInt32 word)
{
# if defined(VTKM_GCC) || defined(VTKM_CLANG)
return __builtin_popcount(word);
# elif defined(VTKM_MSVC)
return static_cast<vtkm::Int32>(__popcnt(word));
# elif defined(VTKM_ICC)
return _popcnt32(static_cast<int>(word));
# else
// Naive implementation:
vtkm::Int32 bits = 0;
while (word)
{
if (word & 0x1)
{
++bits;
}
word >>= 1;
}
return bits;
# endif
}
#endif // CUDA_DEVICE_PASS
/// Count the total number of bits set in @a word.
#ifdef VTKM_CUDA_DEVICE_PASS
// Need to explicitly mark this as __device__ since __popcll is device only.
inline __device__
vtkm::Int32 CountSetBits(vtkm::UInt64 word)
{
return __popcll(word);
}
#else // CUDA_DEVICE_PASS
inline VTKM_EXEC_CONT
vtkm::Int32 CountSetBits(vtkm::UInt64 word)
{
# if defined(VTKM_GCC) || defined(VTKM_CLANG)
return __builtin_popcountll(word);
# elif defined(VTKM_MSVC)
return static_cast<vtkm::Int32>(__popcnt64(word));
# elif defined(VTKM_ICC)
return _popcnt64(static_cast<vtkm::Int64>(word));
# else
// Naive implementation:
vtkm::Int32 bits = 0;
while (word)
{
if (word & 0x1)
{
++bits;
}
word >>= 1;
}
return bits;
# endif
}
#endif // CUDA_DEVICE_PASS
} // namespace vtkm
// clang-format on

@ -53,9 +53,14 @@ $# Ignore the following comment. It is meant for the generated file.
#include <algorithm>
#endif
#if defined(VTKM_MSVC) && !defined(VTKM_CUDA)
#ifdef VTKM_MSVC
#include <intrin.h> // For bitwise intrinsics (__popcnt, etc)
#include <vtkm/internal/Windows.h> // for types used by MSVC intrinsics.
#ifndef VTKM_CUDA
#include <math.h>
#endif
#endif // VTKM_CUDA
#endif // VTKM_MSVC
#define VTKM_CUDA_MATH_FUNCTION_32(func) func##f
#define VTKM_CUDA_MATH_FUNCTION_64(func) func
@ -1194,6 +1199,191 @@ inline VTKM_EXEC_CONT vtkm::Float64 Ldexp(vtkm::Float64 x, vtkm::Int32 exponent)
#endif
}
/// Bitwise operations
///
/// Find the first set bit in @a word, and return its position (1-32). If no
/// bits are set, returns 0.
#ifdef VTKM_CUDA_DEVICE_PASS
// Need to explicitly mark this as __device__ since __ffs is device only.
inline __device__
vtkm::Int32 FindFirstSetBit(vtkm::UInt32 word)
{
// Output is [0,32], with ffs(0) == 0
return __ffs(static_cast<int>(word));
}
#else // CUDA_DEVICE_PASS
inline VTKM_EXEC_CONT
vtkm::Int32 FindFirstSetBit(vtkm::UInt32 word)
{
# if defined(VTKM_GCC) || defined(VTKM_CLANG)
// Output is [0,32], with ffs(0) == 0
return __builtin_ffs(static_cast<int>(word));
# elif defined(VTKM_MSVC)
// Output is [0, 31], check return code to see if bits are set:
vtkm::UInt32 firstSet;
return _BitScanForward(reinterpret_cast<DWORD*>(&firstSet), word) != 0
? static_cast<vtkm::Int32>(firstSet + 1) : 0;
# elif defined(VTKM_ICC)
// Output is [0, 31], undefined if word is 0.
return word != 0 ? _bit_scan_forward(word) + 1 : 0;
# else
// Naive implementation:
if (word == 0)
{
return 0;
}
vtkm::Int32 bit = 1;
while ((word & 0x1) == 0)
{
word >>= 1;
++bit;
}
return bit;
# endif
}
#endif // CUDA_DEVICE_PASS
/// Find the first set bit in @a word, and return its position (1-64). If no
/// bits are set, returns 0.
#ifdef VTKM_CUDA_DEVICE_PASS
// Need to explicitly mark this as __device__ since __ffsll is device only.
inline __device__
vtkm::Int32 FindFirstSetBit(vtkm::UInt64 word)
{
// Output is [0,64], with ffs(0) == 0
return __ffsll(static_cast<long long int>(word));
}
#else // CUDA_DEVICE_PASS
inline VTKM_EXEC_CONT
vtkm::Int32 FindFirstSetBit(vtkm::UInt64 word)
{
# if defined(VTKM_GCC) || defined(VTKM_CLANG)
// Output is [0,64], with ffs(0) == 0
return __builtin_ffsll(static_cast<long long int>(word));
# elif defined(VTKM_MSVC) || defined(VTKM_ICC)
// Output is [0, 63], check return code to see if bits are set:
vtkm::UInt32 firstSet;
return _BitScanForward64(reinterpret_cast<DWORD*>(&firstSet), word) != 0
? static_cast<vtkm::Int32>(firstSet + 1) : 0;
# else
// Naive implementation:
if (word == 0)
{
return 0;
}
vtkm::Int32 bit = 1;
while ((word & 0x1) == 0)
{
word >>= 1;
++bit;
}
return bit;
# endif
}
#endif // CUDA_DEVICE_PASS
/// Count the total number of bits set in @a word.
#ifdef VTKM_CUDA_DEVICE_PASS
// Need to explicitly mark this as __device__ since __popc is device only.
inline __device__
vtkm::Int32 CountSetBits(vtkm::UInt32 word)
{
return __popc(word);
}
#else // CUDA_DEVICE_PASS
inline VTKM_EXEC_CONT
vtkm::Int32 CountSetBits(vtkm::UInt32 word)
{
# if defined(VTKM_GCC) || defined(VTKM_CLANG)
return __builtin_popcount(word);
# elif defined(VTKM_MSVC)
return static_cast<vtkm::Int32>(__popcnt(word));
# elif defined(VTKM_ICC)
return _popcnt32(static_cast<int>(word));
# else
// Naive implementation:
vtkm::Int32 bits = 0;
while (word)
{
if (word & 0x1)
{
++bits;
}
word >>= 1;
}
return bits;
# endif
}
#endif // CUDA_DEVICE_PASS
/// Count the total number of bits set in @a word.
#ifdef VTKM_CUDA_DEVICE_PASS
// Need to explicitly mark this as __device__ since __popcll is device only.
inline __device__
vtkm::Int32 CountSetBits(vtkm::UInt64 word)
{
return __popcll(word);
}
#else // CUDA_DEVICE_PASS
inline VTKM_EXEC_CONT
vtkm::Int32 CountSetBits(vtkm::UInt64 word)
{
# if defined(VTKM_GCC) || defined(VTKM_CLANG)
return __builtin_popcountll(word);
# elif defined(VTKM_MSVC)
return static_cast<vtkm::Int32>(__popcnt64(word));
# elif defined(VTKM_ICC)
return _popcnt64(static_cast<vtkm::Int64>(word));
# else
// Naive implementation:
vtkm::Int32 bits = 0;
while (word)
{
if (word & 0x1)
{
++bits;
}
word >>= 1;
}
return bits;
# endif
}
#endif // CUDA_DEVICE_PASS
} // namespace vtkm
// clang-format on

@ -159,6 +159,10 @@ using UInt32 = unsigned int;
/// than smaller widths.
using IdComponent = vtkm::Int32;
/// The default word size used for atomic bitwise operations. Universally
/// supported on all devices.
using WordTypeDefault = vtkm::UInt32;
//In this order so that we exactly match the logic that exists in VTK
#if VTKM_SIZE_LONG_LONG == 8
using Int64 = long long;

@ -60,6 +60,20 @@ auto PrepareArgForExec(T&& object)
vtkm::cont::internal::IsExecutionObjectBase<T>{});
}
struct BitFieldToUnorderedSetFunctor
{
vtkm::Id Result{ 0 };
template <typename Device, typename... Args>
VTKM_CONT bool operator()(Device, Args&&... args)
{
VTKM_IS_DEVICE_ADAPTER_TAG(Device);
this->Result = vtkm::cont::DeviceAdapterAlgorithm<Device>::BitFieldToUnorderedSet(
PrepareArgForExec<Device>(std::forward<Args>(args))...);
return true;
}
};
struct CopyFunctor
{
template <typename Device, typename... Args>
@ -374,6 +388,27 @@ struct UpperBoundsFunctor
struct Algorithm
{
template <typename IndicesStorage>
VTKM_CONT static vtkm::Id BitFieldToUnorderedSet(
vtkm::cont::DeviceAdapterId devId,
const vtkm::cont::BitField& bits,
vtkm::cont::ArrayHandle<Id, IndicesStorage>& indices)
{
detail::BitFieldToUnorderedSetFunctor functor;
vtkm::cont::TryExecuteOnDevice(devId, functor, bits, indices);
return functor.Result;
}
template <typename IndicesStorage>
VTKM_CONT static vtkm::Id BitFieldToUnorderedSet(
const vtkm::cont::BitField& bits,
vtkm::cont::ArrayHandle<Id, IndicesStorage>& indices)
{
detail::BitFieldToUnorderedSetFunctor functor;
vtkm::cont::TryExecute(functor, bits, indices);
return functor.Result;
}
template <typename T, typename U, class CIn, class COut>
VTKM_CONT static bool Copy(vtkm::cont::DeviceAdapterId devId,
const vtkm::cont::ArrayHandle<T, CIn>& input,

@ -0,0 +1,220 @@
//=============================================================================
//
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2019 UT-Battelle, LLC.
// Copyright 2019 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//
//=============================================================================
#ifndef vtk_m_cont_ArrayHandleBitField_h
#define vtk_m_cont_ArrayHandleBitField_h
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/BitField.h>
#include <vtkm/cont/Storage.h>
namespace vtkm
{
namespace cont
{
namespace internal
{
template <typename BitPortalType>
class ArrayPortalBitField
{
public:
using ValueType = bool;
VTKM_EXEC_CONT
explicit ArrayPortalBitField(const BitPortalType& portal) noexcept : BitPortal{ portal } {}
VTKM_EXEC_CONT
explicit ArrayPortalBitField(BitPortalType&& portal) noexcept : BitPortal{ std::move(portal) } {}
ArrayPortalBitField() noexcept = default;
ArrayPortalBitField(const ArrayPortalBitField&) noexcept = default;
ArrayPortalBitField(ArrayPortalBitField&&) noexcept = default;
ArrayPortalBitField& operator=(const ArrayPortalBitField&) noexcept = default;
ArrayPortalBitField& operator=(ArrayPortalBitField&&) noexcept = default;
VTKM_EXEC_CONT
vtkm::Id GetNumberOfValues() const noexcept { return this->BitPortal.GetNumberOfBits(); }
VTKM_EXEC_CONT
ValueType Get(vtkm::Id index) const noexcept { return this->BitPortal.GetBit(index); }
VTKM_EXEC_CONT
void Set(vtkm::Id index, ValueType value) const
{
// Use an atomic set so we don't clash with other threads writing nearby
// bits.
this->BitPortal.SetBitAtomic(index, value);
}
private:
BitPortalType BitPortal;
};
struct VTKM_ALWAYS_EXPORT StorageTagBitField
{
};
template <>
class Storage<bool, StorageTagBitField>
{
using BitPortalType = vtkm::cont::detail::BitPortal<vtkm::cont::internal::AtomicInterfaceControl>;
using BitPortalConstType =
vtkm::cont::detail::BitPortalConst<vtkm::cont::internal::AtomicInterfaceControl>;
public:
using ValueType = bool;
using PortalType = vtkm::cont::internal::ArrayPortalBitField<BitPortalType>;
using PortalConstType = vtkm::cont::internal::ArrayPortalBitField<BitPortalConstType>;
explicit VTKM_CONT Storage(const vtkm::cont::BitField& data)
: Data{ data }
{
}
explicit VTKM_CONT Storage(vtkm::cont::BitField&& data) noexcept : Data{ std::move(data) } {}
VTKM_CONT Storage() = default;
VTKM_CONT Storage(const Storage& src) = default;
VTKM_CONT Storage(Storage&& src) noexcept = default;
VTKM_CONT Storage& operator=(const Storage& src) = default;
VTKM_CONT Storage& operator=(Storage&& src) noexcept = default;
VTKM_CONT
PortalType GetPortal() { return PortalType{ this->Data.GetPortalControl() }; }
VTKM_CONT
PortalConstType GetPortalConst() { return PortalConstType{ this->Data.GetPortalConstControl() }; }
VTKM_CONT vtkm::Id GetNumberOfValues() const { return this->Data.GetNumberOfBits(); }
VTKM_CONT void Allocate(vtkm::Id numberOfValues) { this->Data.Allocate(numberOfValues); }
VTKM_CONT void Shrink(vtkm::Id numberOfValues) { this->Data.Shrink(numberOfValues); }
VTKM_CONT void ReleaseResources() { this->Data.ReleaseResources(); }
VTKM_CONT vtkm::cont::BitField GetBitField() const { return this->Data; }
private:
vtkm::cont::BitField Data;
};
template <typename Device>
class ArrayTransfer<bool, StorageTagBitField, Device>
{
using AtomicInterface = AtomicInterfaceExecution<Device>;
using StorageType = Storage<bool, StorageTagBitField>;
using BitPortalExecution = vtkm::cont::detail::BitPortal<AtomicInterface>;
using BitPortalConstExecution = vtkm::cont::detail::BitPortalConst<AtomicInterface>;
public:
using ValueType = bool;
using PortalControl = typename StorageType::PortalType;
using PortalConstControl = typename StorageType::PortalConstType;
using PortalExecution = vtkm::cont::internal::ArrayPortalBitField<BitPortalExecution>;
using PortalConstExecution = vtkm::cont::internal::ArrayPortalBitField<BitPortalConstExecution>;
VTKM_CONT
explicit ArrayTransfer(StorageType* storage)
: Data{ storage->GetBitField() }
{
}
VTKM_CONT
vtkm::Id GetNumberOfValues() const { return this->Data.GetNumberOfBits(); }
VTKM_CONT
PortalConstExecution PrepareForInput(bool vtkmNotUsed(updateData))
{
return PortalConstExecution{ this->Data.PrepareForInput(Device{}) };
}
VTKM_CONT
PortalExecution PrepareForInPlace(bool vtkmNotUsed(updateData))
{
return PortalExecution{ this->Data.PrepareForInPlace(Device{}) };
}
VTKM_CONT
PortalExecution PrepareForOutput(vtkm::Id numberOfValues)
{
return PortalExecution{ this->Data.PrepareForOutput(numberOfValues, Device{}) };
}
VTKM_CONT
void RetrieveOutputData(StorageType* vtkmNotUsed(storage)) const
{
// Implementation of this method should be unnecessary. The internal
// bitfield should automatically retrieve the output data as necessary.
}
VTKM_CONT
void Shrink(vtkm::Id numberOfValues) { this->Data.Shrink(numberOfValues); }
VTKM_CONT
void ReleaseResources() { this->Data.ReleaseResources(); }
private:
vtkm::cont::BitField Data;
};
} // end namespace internal
/// The ArrayHandleBitField class is a boolean-valued ArrayHandle that is backed
/// by a BitField.
///
class ArrayHandleBitField : public ArrayHandle<bool, internal::StorageTagBitField>
{
public:
VTKM_ARRAY_HANDLE_SUBCLASS_NT(ArrayHandleBitField,
(ArrayHandle<bool, internal::StorageTagBitField>));
VTKM_CONT
explicit ArrayHandleBitField(const vtkm::cont::BitField& bitField)
: Superclass{ StorageType{ bitField } }
{
}
VTKM_CONT
explicit ArrayHandleBitField(vtkm::cont::BitField&& bitField) noexcept
: Superclass{ StorageType{ std::move(bitField) } }
{
}
VTKM_CONT
vtkm::cont::BitField GetBitField() const { return this->GetStorage().GetBitField(); }
};
VTKM_CONT inline vtkm::cont::ArrayHandleBitField make_ArrayHandleBitField(
const vtkm::cont::BitField& bitField)
{
return ArrayHandleBitField{ bitField };
}
VTKM_CONT inline vtkm::cont::ArrayHandleBitField make_ArrayHandleBitField(
vtkm::cont::BitField&& bitField) noexcept
{
return ArrayHandleBitField{ std::move(bitField) };
}
}
} // end namespace vtkm::cont
#endif // vtk_m_cont_ArrayHandleBitField_h

719
vtkm/cont/BitField.h Normal file

@ -0,0 +1,719 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2019 UT-Battelle, LLC.
// Copyright 2019 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_BitField_h
#define vtk_m_cont_BitField_h
#include <vtkm/cont/internal/AtomicInterfaceControl.h>
#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/Logging.h>
#include <vtkm/ListTag.h>
#include <vtkm/Types.h>
#include <cassert>
#include <climits>
#include <memory>
#include <type_traits>
namespace vtkm
{
namespace cont
{
class BitField;
namespace detail
{
struct BitFieldTraits
{
// Allocations will occur in blocks of BlockSize bytes. This ensures that
// power-of-two word sizes up to BlockSize will not access invalid data
// during word-based access, and that atomic values will be properly aligned.
// We use the default StorageBasic alignment for this.
constexpr static vtkm::Id BlockSize = VTKM_ALLOCATION_ALIGNMENT;
// Make sure the blocksize is at least 64. Eventually we may implement SIMD
// bit operations, and the current largest vector width is 512 bits.
VTKM_STATIC_ASSERT(BlockSize >= 64);
/// Require an unsigned integral type that is <= BlockSize bytes.
template <typename WordType>
using IsValidWordType =
std::integral_constant<bool,
/* is unsigned */
std::is_unsigned<WordType>::value &&
/* doesn't exceed blocksize */
sizeof(WordType) <= static_cast<size_t>(BlockSize) &&
/* BlockSize is a multiple of WordType */
static_cast<size_t>(BlockSize) % sizeof(WordType) == 0>;
/// Require an unsigned integral type that is <= BlockSize bytes, and is
/// is supported by the specified AtomicInterface.
template <typename WordType, typename AtomicInterface>
using IsValidWordTypeAtomic = std::integral_constant<
bool,
/* is unsigned */
std::is_unsigned<WordType>::value &&
/* doesn't exceed blocksize */
sizeof(WordType) <= static_cast<size_t>(BlockSize) &&
/* BlockSize is a multiple of WordType */
static_cast<size_t>(BlockSize) % sizeof(WordType) == 0 &&
/* Supported by atomic interface */
vtkm::ListContains<typename AtomicInterface::WordTypes, WordType>::value>;
};
/// Identifies a bit in a BitField by Word and BitOffset. Note that these
/// values are dependent on the type of word used to generate the coordinate.
struct BitCoordinate
{
/// The word containing the specified bit.
vtkm::Id WordIndex;
/// The zero-indexed bit in the word.
vtkm::Int32 BitOffset; // [0, bitsInWord)
};
/// Portal for performing bit or word operations on a BitField.
///
/// This is the implementation used by BitPortal and BitPortalConst.
template <typename AtomicInterface_, bool IsConst>
class BitPortalBase
{
// Checks if PortalType has a GetIteratorBegin() method that returns a
// pointer.
template <typename PortalType,
typename PointerType = decltype(std::declval<PortalType>().GetIteratorBegin())>
struct HasPointerAccess : public std::is_pointer<PointerType>
{
};
// Determine whether we should store a const vs. mutable pointer:
template <typename T>
using MaybeConstPointer = typename std::conditional<IsConst, T const*, T*>::type;
using BufferType = MaybeConstPointer<void>; // void* or void const*, as appropriate
public:
/// The atomic interface used to carry out atomic operations. See
/// AtomicInterfaceExecution<Device> and AtomicInterfaceControl
using AtomicInterface = AtomicInterface_;
/// The fastest word type for performing bitwise operations through AtomicInterface.
using WordTypePreferred = typename AtomicInterface::WordTypePreferred;
/// MPL check for whether a WordType may be used for non-atomic operations.
template <typename WordType>
using IsValidWordType = BitFieldTraits::IsValidWordType<WordType>;
/// MPL check for whether a WordType may be used for atomic operations.
template <typename WordType>
using IsValidWordTypeAtomic = BitFieldTraits::IsValidWordTypeAtomic<WordType, AtomicInterface>;
VTKM_STATIC_ASSERT_MSG(IsValidWordType<WordTypeDefault>::value,
"Internal error: Default word type is invalid.");
VTKM_STATIC_ASSERT_MSG(IsValidWordType<WordTypePreferred>::value,
"Device-specific fast word type is invalid.");
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordTypeDefault>::value,
"Internal error: Default word type is invalid.");
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordTypePreferred>::value,
"Device-specific fast word type is invalid for atomic operations.");
protected:
friend class vtkm::cont::BitField;
/// Construct a BitPortal from an ArrayHandle with basic storage's portal.
template <typename PortalType>
VTKM_EXEC_CONT BitPortalBase(const PortalType& portal, vtkm::Id numberOfBits)
: Data{ portal.GetIteratorBegin() }
, NumberOfBits{ numberOfBits }
{
VTKM_STATIC_ASSERT_MSG(HasPointerAccess<PortalType>::value,
"Source portal must return a pointer from "
"GetIteratorBegin().");
}
public:
BitPortalBase() noexcept = default;
BitPortalBase(const BitPortalBase& src) noexcept = default;
BitPortalBase(BitPortalBase&& src) noexcept = default;
BitPortalBase& operator=(const BitPortalBase& src) noexcept = default;
BitPortalBase& operator=(BitPortalBase&& src) noexcept = default;
/// Returns the number of bits in the BitField.
VTKM_EXEC_CONT
vtkm::Id GetNumberOfBits() const noexcept { return this->NumberOfBits; }
/// Returns how many words of type @a WordTypePreferred exist in the dataset.
/// Note that this is rounded up and may contain partial words. See
/// also GetFinalWordMask to handle the trailing partial word.
template <typename WordType = WordTypePreferred>
VTKM_EXEC_CONT vtkm::Id GetNumberOfWords() const noexcept
{
VTKM_STATIC_ASSERT(IsValidWordType<WordType>::value);
static constexpr vtkm::Id WordSize = static_cast<vtkm::Id>(sizeof(WordType));
static constexpr vtkm::Id WordBits = WordSize * CHAR_BIT;
return (this->NumberOfBits + WordBits - 1) / WordBits;
}
/// Return a mask in which the valid bits in the final word (of type @a
/// WordType) are set to 1.
template <typename WordType = WordTypePreferred>
VTKM_EXEC_CONT WordType GetFinalWordMask() const noexcept
{
if (this->NumberOfBits == 0)
{
return WordType{ 0 };
}
static constexpr vtkm::Int32 BitsPerWord =
static_cast<vtkm::Int32>(sizeof(WordType) * CHAR_BIT);
const auto maxBit = this->NumberOfBits - 1;
const auto coord = this->GetBitCoordinateFromIndex<WordType>(maxBit);
const vtkm::Int32 shift = BitsPerWord - coord.BitOffset - 1;
return (~WordType{ 0 }) >> shift;
}
/// Given a bit index, compute a @a BitCoordinate that identifies the
/// corresponding word index and bit offset.
template <typename WordType = WordTypePreferred>
VTKM_EXEC_CONT static BitCoordinate GetBitCoordinateFromIndex(vtkm::Id bitIdx) noexcept
{
VTKM_STATIC_ASSERT(IsValidWordType<WordType>::value);
static constexpr vtkm::Id BitsPerWord = static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT);
return { static_cast<vtkm::Id>(bitIdx / BitsPerWord),
static_cast<vtkm::Int32>(bitIdx % BitsPerWord) };
}
/// Set the bit at @a bitIdx to @a val. This method is not thread-safe --
/// threads modifying bits nearby may interfere with this operation.
/// Additionally, this should not be used for synchronization, as there are
/// no memory ordering requirements. See SetBitAtomic for those usecases.
VTKM_EXEC_CONT
void SetBit(vtkm::Id bitIdx, bool val) const noexcept
{
using WordType = WordTypePreferred;
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
const auto mask = WordType(1) << coord.BitOffset;
auto* const wordAddr = this->GetWordAddress<WordType>(coord.WordIndex);
if (val)
{
*wordAddr |= mask;
}
else
{
*wordAddr &= ~mask;
}
}
/// Set the bit at @a bitIdx to @a val using atomic operations. This method
/// is thread-safe and guarantees, at minimum, "release" memory ordering.
VTKM_EXEC_CONT
void SetBitAtomic(vtkm::Id bitIdx, bool val) const
{
using WordType = WordTypePreferred;
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
const auto mask = WordType(1) << coord.BitOffset;
if (val)
{
this->OrWordAtomic(coord.WordIndex, mask);
}
else
{
this->AndWordAtomic(coord.WordIndex, ~mask);
}
}
/// Return whether or not the bit at @a bitIdx is set. Note that this uses
/// non-atomic loads and thus should not be used for synchronization.
VTKM_EXEC_CONT
bool GetBit(vtkm::Id bitIdx) const noexcept
{
using WordType = WordTypePreferred;
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
const auto word = this->GetWord<WordType>(coord.WordIndex);
const auto mask = WordType(1) << coord.BitOffset;
return (word & mask) != WordType(0);
}
/// Return whether or not the bit at @a bitIdx is set using atomic loads.
/// This method is thread safe and guarantees, at minimum, "acquire" memory
/// ordering.
VTKM_EXEC_CONT
bool GetBitAtomic(vtkm::Id bitIdx) const
{
using WordType = WordTypePreferred;
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
const auto word = this->GetWordAtomic<WordType>(coord.WordIndex);
const auto mask = WordType(1) << coord.BitOffset;
return (word & mask) != WordType(0);
}
/// Set the word (of type @a WordType) at @a wordIdx to @a word using
/// non-atomic operations.
template <typename WordType = WordTypePreferred>
VTKM_EXEC_CONT void SetWord(vtkm::Id wordIdx, WordType word) const noexcept
{
*this->GetWordAddress<WordType>(wordIdx) = word;
}
/// Set the word (of type @a WordType) at @a wordIdx to @a word using atomic
/// operations. The store guarantees, at minimum, "release" memory ordering.
template <typename WordType = WordTypePreferred>
VTKM_EXEC_CONT void SetWordAtomic(vtkm::Id wordIdx, WordType word) const
{
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
"Requested WordType does not support atomic"
" operations on target execution platform.");
AtomicInterface::Store(this->GetWordAddress<WordType>(wordIdx), word);
}
/// Get the word (of type @a WordType) at @a wordIdx using non-atomic
/// operations.
template <typename WordType = WordTypePreferred>
VTKM_EXEC_CONT WordType GetWord(vtkm::Id wordIdx) const noexcept
{
return *this->GetWordAddress<WordType>(wordIdx);
}
/// Get the word (of type @a WordType) at @ wordIdx using an atomic read with,
/// at minimum, "acquire" memory ordering.
template <typename WordType = WordTypePreferred>
VTKM_EXEC_CONT WordType GetWordAtomic(vtkm::Id wordIdx) const
{
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
"Requested WordType does not support atomic"
" operations on target execution platform.");
return AtomicInterface::Load(this->GetWordAddress<WordType>(wordIdx));
}
/// Toggle the bit at @a bitIdx, returning the original value. This method
/// uses atomic operations and a full memory barrier.
VTKM_EXEC_CONT
bool NotBitAtomic(vtkm::Id bitIdx) const
{
using WordType = WordTypePreferred;
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
const auto mask = WordType(1) << coord.BitOffset;
const auto oldWord = this->XorWordAtomic(coord.WordIndex, mask);
return (oldWord & mask) != WordType(0);
}
/// Perform a bitwise "not" operation on the word at @a wordIdx, returning the
/// original word. This uses atomic operations and a full memory barrier.
template <typename WordType = WordTypePreferred>
VTKM_EXEC_CONT WordType NotWordAtomic(vtkm::Id wordIdx) const
{
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
"Requested WordType does not support atomic"
" operations on target execution platform.");
WordType* addr = this->GetWordAddress<WordType>(wordIdx);
return AtomicInterface::Not(addr);
}
/// Perform an "and" operation between the bit at @a bitIdx and @a val,
/// returning the original value at @a bitIdx. This method uses atomic
/// operations and a full memory barrier.
VTKM_EXEC_CONT
bool AndBitAtomic(vtkm::Id bitIdx, bool val) const
{
using WordType = WordTypePreferred;
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
const auto bitmask = WordType(1) << coord.BitOffset;
// wordmask is all 1's, except for BitOffset which is (val ? 1 : 0)
const auto wordmask = val ? ~WordType(0) : ~bitmask;
const auto oldWord = this->AndWordAtomic(coord.WordIndex, wordmask);
return (oldWord & bitmask) != WordType(0);
}
/// Perform an "and" operation between the word at @a wordIdx and @a wordMask,
/// returning the original word at @a wordIdx. This method uses atomic
/// operations and a full memory barrier.
template <typename WordType = WordTypePreferred>
VTKM_EXEC_CONT WordType AndWordAtomic(vtkm::Id wordIdx, WordType wordmask) const
{
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
"Requested WordType does not support atomic"
" operations on target execution platform.");
WordType* addr = this->GetWordAddress<WordType>(wordIdx);
return AtomicInterface::And(addr, wordmask);
}
/// Perform an "of" operation between the bit at @a bitIdx and @a val,
/// returning the original value at @a bitIdx. This method uses atomic
/// operations and a full memory barrier.
VTKM_EXEC_CONT
bool OrBitAtomic(vtkm::Id bitIdx, bool val) const
{
using WordType = WordTypePreferred;
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
const auto bitmask = WordType(1) << coord.BitOffset;
// wordmask is all 0's, except for BitOffset which is (val ? 1 : 0)
const auto wordmask = val ? bitmask : WordType(0);
const auto oldWord = this->OrWordAtomic(coord.WordIndex, wordmask);
return (oldWord & bitmask) != WordType(0);
}
/// Perform an "or" operation between the word at @a wordIdx and @a wordMask,
/// returning the original word at @a wordIdx. This method uses atomic
/// operations and a full memory barrier.
template <typename WordType = WordTypePreferred>
VTKM_EXEC_CONT WordType OrWordAtomic(vtkm::Id wordIdx, WordType wordmask) const
{
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
"Requested WordType does not support atomic"
" operations on target execution platform.");
WordType* addr = this->GetWordAddress<WordType>(wordIdx);
return AtomicInterface::Or(addr, wordmask);
}
/// Perform an "xor" operation between the bit at @a bitIdx and @a val,
/// returning the original value at @a bitIdx. This method uses atomic
/// operations and a full memory barrier.
VTKM_EXEC_CONT
bool XorBitAtomic(vtkm::Id bitIdx, bool val) const
{
using WordType = WordTypePreferred;
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
const auto bitmask = WordType(1) << coord.BitOffset;
// wordmask is all 0's, except for BitOffset which is (val ? 1 : 0)
const auto wordmask = val ? bitmask : WordType(0);
const auto oldWord = this->XorWordAtomic(coord.WordIndex, wordmask);
return (oldWord & bitmask) != WordType(0);
}
/// Perform an "xor" operation between the word at @a wordIdx and @a wordMask,
/// returning the original word at @a wordIdx. This method uses atomic
/// operations and a full memory barrier.
template <typename WordType = WordTypePreferred>
VTKM_EXEC_CONT WordType XorWordAtomic(vtkm::Id wordIdx, WordType wordmask) const
{
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
"Requested WordType does not support atomic"
" operations on target execution platform.");
WordType* addr = this->GetWordAddress<WordType>(wordIdx);
return AtomicInterface::Xor(addr, wordmask);
}
/// Perform an atomic compare-and-swap operation on the bit at @a bitIdx.
/// If the value in memory is equal to @a expectedBit, it is replaced with
/// the value of @a newBit and the original value of the bit is returned as a
/// boolean. This method implements a full memory barrier around the atomic
/// operation.
VTKM_EXEC_CONT
bool CompareAndSwapBitAtomic(vtkm::Id bitIdx, bool newBit, bool expectedBit) const
{
using WordType = WordTypePreferred;
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
const auto bitmask = WordType(1) << coord.BitOffset;
WordType oldWord;
WordType newWord;
do
{
oldWord = this->GetWord<WordType>(coord.WordIndex);
bool oldBitSet = (oldWord & bitmask) != WordType(0);
if (oldBitSet != expectedBit)
{ // The bit-of-interest does not match what we expected.
return oldBitSet;
}
else if (oldBitSet == newBit)
{ // The bit hasn't changed, but also already matches newVal. We're done.
return expectedBit;
}
// Compute the new word
newWord = oldWord ^ bitmask;
} // CAS loop to resolve any conflicting changes to other bits in the word.
while (this->CompareAndSwapWordAtomic(coord.WordIndex, newWord, oldWord) != oldWord);
return expectedBit;
}
/// Perform an atomic compare-and-swap operation on the word at @a wordIdx.
/// If the word in memory is equal to @a expectedWord, it is replaced with
/// the value of @a newWord and the original word is returned. This method
/// implements a full memory barrier around the atomic operation.
template <typename WordType = WordTypePreferred>
VTKM_EXEC_CONT WordType CompareAndSwapWordAtomic(vtkm::Id wordIdx,
WordType newWord,
WordType expected) const
{
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
"Requested WordType does not support atomic"
" operations on target execution platform.");
WordType* addr = this->GetWordAddress<WordType>(wordIdx);
return AtomicInterface::CompareAndSwap(addr, newWord, expected);
}
private:
template <typename WordType>
VTKM_EXEC_CONT MaybeConstPointer<WordType> GetWordAddress(vtkm::Id wordId) const noexcept
{
VTKM_STATIC_ASSERT(IsValidWordType<WordType>::value);
return reinterpret_cast<MaybeConstPointer<WordType>>(this->Data) + wordId;
}
BufferType Data{ nullptr };
vtkm::Id NumberOfBits{ 0 };
};
template <typename AtomicOps>
using BitPortal = BitPortalBase<AtomicOps, false>;
template <typename AtomicOps>
using BitPortalConst = BitPortalBase<AtomicOps, true>;
} // end namespace detail
class BitField
{
static constexpr vtkm::Id BlockSize = detail::BitFieldTraits::BlockSize;
public:
/// The type array handle used to store the bit data internally:
using ArrayHandleType = ArrayHandle<WordTypeDefault, StorageTagBasic>;
/// The BitPortal used in the control environment.
using PortalControl = detail::BitPortal<vtkm::cont::internal::AtomicInterfaceControl>;
/// A read-only BitPortal used in the control environment.
using PortalConstControl = detail::BitPortalConst<vtkm::cont::internal::AtomicInterfaceControl>;
template <typename Device>
struct ExecutionTypes
{
/// The AtomicInterfaceExecution implementation used by the specified device.
using AtomicInterface = vtkm::cont::internal::AtomicInterfaceExecution<Device>;
/// The preferred word type used by the specified device.
using WordTypePreferred = typename AtomicInterface::WordTypePreferred;
/// A BitPortal that is usable on the specified device.
using Portal = detail::BitPortal<AtomicInterface>;
/// A read-only BitPortal that is usable on the specified device.
using PortalConst = detail::BitPortalConst<AtomicInterface>;
};
/// Check whether a word type is valid for non-atomic operations.
template <typename WordType>
using IsValidWordType = detail::BitFieldTraits::IsValidWordType<WordType>;
/// Check whether a word type is valid for atomic operations on a specific
/// device.
template <typename WordType, typename Device>
using IsValidWordTypeAtomic = detail::BitFieldTraits::
IsValidWordTypeAtomic<WordType, vtkm::cont::internal::AtomicInterfaceExecution<Device>>;
/// Check whether a word type is valid for atomic operations from the control
/// environment.
template <typename WordType>
using IsValidWordTypeAtomicControl =
detail::BitFieldTraits::IsValidWordTypeAtomic<WordType,
vtkm::cont::internal::AtomicInterfaceControl>;
VTKM_CONT BitField()
: Internals{ std::make_shared<InternalStruct>() }
{
}
VTKM_CONT BitField(const BitField& src) = default;
VTKM_CONT BitField(BitField&& src) noexcept = default;
VTKM_CONT ~BitField() = default;
VTKM_CONT BitField& operator=(const BitField& src) = default;
VTKM_CONT BitField& operator=(BitField&& src) noexcept = default;
VTKM_CONT
bool operator==(const BitField& rhs) const { return this->Internals == rhs.Internals; }
VTKM_CONT
bool operator!=(const BitField& rhs) const { return this->Internals != rhs.Internals; }
/// Return the internal ArrayHandle used to store the BitField.
VTKM_CONT
ArrayHandleType& GetData() { return this->Internals->Data; }
/// Return the internal ArrayHandle used to store the BitField.
VTKM_CONT
const ArrayHandleType& GetData() const { return this->Internals->Data; }
/// Return the number of bits stored by this BitField.
VTKM_CONT
vtkm::Id GetNumberOfBits() const { return this->Internals->NumberOfBits; }
/// Return the number of words (of @a WordType) stored in this bit fields.
///
template <typename WordType>
VTKM_CONT vtkm::Id GetNumberOfWords() const
{
VTKM_STATIC_ASSERT(IsValidWordType<WordType>::value);
static constexpr vtkm::Id WordBits = static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT);
return (this->Internals->NumberOfBits + WordBits - 1) / WordBits;
}
/// Allocate the requested number of bits.
VTKM_CONT
void Allocate(vtkm::Id numberOfBits)
{
const vtkm::Id numWords = this->BitsToAllocatedStorageWords(numberOfBits);
VTKM_LOG_F(vtkm::cont::LogLevel::MemCont,
"BitField Allocation: %llu bits, blocked up to %s.",
static_cast<unsigned long long>(numberOfBits),
vtkm::cont::GetSizeString(
static_cast<vtkm::UInt64>(static_cast<size_t>(numWords) * sizeof(WordTypeDefault)))
.c_str());
this->Internals->Data.Allocate(numWords);
this->Internals->NumberOfBits = numberOfBits;
}
/// Shrink the bit field to the requested number of bits.
VTKM_CONT
void Shrink(vtkm::Id numberOfBits)
{
const vtkm::Id numWords = this->BitsToAllocatedStorageWords(numberOfBits);
this->Internals->Data.Shrink(numWords);
this->Internals->NumberOfBits = numberOfBits;
}
/// Release all execution-side resources held by this BitField.
VTKM_CONT
void ReleaseResourcesExecution() { this->Internals->Data.ReleaseResourcesExecution(); }
/// Release all resources held by this BitField and reset to empty.
VTKM_CONT
void ReleaseResources()
{
this->Internals->Data.ReleaseResources();
this->Internals->NumberOfBits = 0;
}
/// Force the control array to sync with the last-used device.
VTKM_CONT
void SyncControlArray() const { this->Internals->Data.SyncControlArray(); }
/// The id of the device where the most up-to-date copy of the data is
/// currently resident. If the data is on the host, DeviceAdapterTagUndefined
/// is returned.
VTKM_CONT
DeviceAdapterId GetDeviceAdapterId() const { return this->Internals->Data.GetDeviceAdapterId(); }
/// Get a portal to the data that is usable from the control environment.
VTKM_CONT
PortalControl GetPortalControl()
{
return PortalControl{ this->Internals->Data.GetPortalControl(), this->Internals->NumberOfBits };
}
/// Get a read-only portal to the data that is usable from the control
/// environment.
VTKM_CONT
PortalConstControl GetPortalConstControl() const
{
return PortalConstControl{ this->Internals->Data.GetPortalConstControl(),
this->Internals->NumberOfBits };
}
/// Prepares this BitField to be used as an input to an operation in the
/// execution environment. If necessary, copies data to the execution
/// environment. Can throw an exception if this BitField does not yet contain
/// any data. Returns a portal that can be used in code running in the
/// execution environment.
template <typename DeviceAdapterTag>
VTKM_CONT typename ExecutionTypes<DeviceAdapterTag>::PortalConst PrepareForInput(
DeviceAdapterTag device) const
{
using PortalType = typename ExecutionTypes<DeviceAdapterTag>::PortalConst;
return PortalType{ this->Internals->Data.PrepareForInput(device),
this->Internals->NumberOfBits };
}
/// Prepares (allocates) this BitField to be used as an output from an
/// operation in the execution environment. The internal state of this class
/// is set to have valid data in the execution BitField with the assumption
/// that the array will be filled soon (i.e. before any other methods of this
/// object are called). Returns a portal that can be used in code running in
/// the execution environment.
template <typename DeviceAdapterTag>
VTKM_CONT typename ExecutionTypes<DeviceAdapterTag>::Portal PrepareForOutput(
vtkm::Id numBits,
DeviceAdapterTag device) const
{
using PortalType = typename ExecutionTypes<DeviceAdapterTag>::Portal;
const vtkm::Id numWords = this->BitsToAllocatedStorageWords(numBits);
VTKM_LOG_F(vtkm::cont::LogLevel::MemExec,
"BitField Allocation: %llu bits, blocked up to %s.",
static_cast<unsigned long long>(numBits),
vtkm::cont::GetSizeString(
static_cast<vtkm::UInt64>(static_cast<size_t>(numWords) * sizeof(WordTypeDefault)))
.c_str());
auto portal = this->Internals->Data.PrepareForOutput(numWords, device);
this->Internals->NumberOfBits = numBits;
return PortalType{ portal, numBits };
}
/// Prepares this BitField to be used in an in-place operation (both as input
/// and output) in the execution environment. If necessary, copies data to
/// the execution environment. Can throw an exception if this BitField does
/// not yet contain any data. Returns a portal that can be used in code
/// running in the execution environment.
template <typename DeviceAdapterTag>
VTKM_CONT typename ExecutionTypes<DeviceAdapterTag>::Portal PrepareForInPlace(
DeviceAdapterTag device) const
{
using PortalType = typename ExecutionTypes<DeviceAdapterTag>::Portal;
return PortalType{ this->Internals->Data.PrepareForInPlace(device),
this->Internals->NumberOfBits };
}
private:
/// Returns the number of words, padded out to respect BlockSize.
VTKM_CONT
static vtkm::Id BitsToAllocatedStorageWords(vtkm::Id numBits)
{
static constexpr vtkm::Id InternalWordSize = static_cast<vtkm::Id>(sizeof(WordTypeDefault));
// Round up to BlockSize bytes:
const vtkm::Id bytesNeeded = (numBits + CHAR_BIT - 1) / CHAR_BIT;
const vtkm::Id blocksNeeded = (bytesNeeded + BlockSize - 1) / BlockSize;
const vtkm::Id numBytes = blocksNeeded * BlockSize;
const vtkm::Id numWords = numBytes / InternalWordSize;
return numWords;
}
struct VTKM_ALWAYS_EXPORT InternalStruct
{
ArrayHandleType Data;
vtkm::Id NumberOfBits;
};
std::shared_ptr<InternalStruct> Internals;
};
}
} // end namespace vtkm::cont
#endif // vtk_m_cont_BitField_h

@ -22,6 +22,7 @@ set(headers
Algorithm.h
ArrayCopy.h
ArrayHandle.h
ArrayHandleBitField.h
ArrayHandleCartesianProduct.h
ArrayHandleCast.h
ArrayHandleCompositeVector.h
@ -49,6 +50,7 @@ set(headers
ArrayRangeCompute.h
AssignerMultiBlock.h
AtomicArray.h
BitField.h
BoundsCompute.h
BoundsGlobalCompute.h
CastAndCall.h

@ -52,6 +52,15 @@ template <class DeviceAdapterTag>
struct DeviceAdapterAlgorithm
#ifdef VTKM_DOXYGEN_ONLY
{
/// \brief Create a unique, unsorted list of indices denoting which bits are
/// set in a bitfield.
///
/// Returns the total number of set bits.
template <typename IndicesStorage>
VTKM_CONT static vtkm::Id BitFieldToUnorderedSet(
const vtkm::cont::BitField& bits,
vtkm::cont::ArrayHandle<Id, IndicesStorage>& indices);
/// \brief Copy the contents of one ArrayHandle to another
///
/// Copies the contents of \c input to \c output. The array \c output will be
@ -660,9 +669,18 @@ public:
/// The class provide the actual implementation used by
/// vtkm::cont::DeviceAdapterAtomicArrayImplementation.
///
/// TODO combine this with AtomicInterfaceExecution.
template <typename T, typename DeviceTag>
class DeviceAdapterAtomicArrayImplementation;
/// \brief Class providing a device-specific support for atomic operations.
///
/// AtomicInterfaceControl provides atomic operations for the control
/// environment, and may be subclassed to implement the device interface when
/// appropriate for a CPU-based device.
template <typename DeviceTag>
class AtomicInterfaceExecution;
/// \brief Class providing a device-specific support for selecting the optimal
/// Task type for a given worklet.
///

@ -25,6 +25,7 @@ set(headers
TransportTagArrayInOut.h
TransportTagArrayOut.h
TransportTagAtomicArray.h
TransportTagBitField.h
TransportTagCellSetIn.h
TransportTagExecObject.h
TransportTagKeyedValuesIn.h
@ -38,6 +39,7 @@ set(headers
TypeCheck.h
TypeCheckTagArray.h
TypeCheckTagAtomicArray.h
TypeCheckTagBitField.h
TypeCheckTagCellSet.h
TypeCheckTagCellSetStructured.h
TypeCheckTagExecObject.h

@ -0,0 +1,89 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2015 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2015 UT-Battelle, LLC.
// Copyright 2015 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_arg_TransportTagBitField_h
#define vtk_m_cont_arg_TransportTagBitField_h
#include <vtkm/cont/arg/Transport.h>
#include <vtkm/cont/BitField.h>
namespace vtkm
{
namespace cont
{
namespace arg
{
struct TransportTagBitFieldIn
{
};
struct TransportTagBitFieldOut
{
};
struct TransportTagBitFieldInOut
{
};
template <typename Device>
struct Transport<vtkm::cont::arg::TransportTagBitFieldIn, vtkm::cont::BitField, Device>
{
using ExecObjectType =
typename vtkm::cont::BitField::template ExecutionTypes<Device>::PortalConst;
template <typename InputDomainType>
VTKM_CONT ExecObjectType
operator()(vtkm::cont::BitField& field, const InputDomainType&, vtkm::Id, vtkm::Id) const
{
return field.PrepareForInput(Device{});
}
};
template <typename Device>
struct Transport<vtkm::cont::arg::TransportTagBitFieldOut, vtkm::cont::BitField, Device>
{
using ExecObjectType = typename vtkm::cont::BitField::template ExecutionTypes<Device>::Portal;
template <typename InputDomainType>
VTKM_CONT ExecObjectType
operator()(vtkm::cont::BitField& field, const InputDomainType&, vtkm::Id, vtkm::Id) const
{
// This behaves similarly to WholeArray tags, where "Out" maps to InPlace
// since we don't want to reallocate or enforce size restrictions.
return field.PrepareForInPlace(Device{});
}
};
template <typename Device>
struct Transport<vtkm::cont::arg::TransportTagBitFieldInOut, vtkm::cont::BitField, Device>
{
using ExecObjectType = typename vtkm::cont::BitField::template ExecutionTypes<Device>::Portal;
template <typename InputDomainType>
VTKM_CONT ExecObjectType
operator()(vtkm::cont::BitField& field, const InputDomainType&, vtkm::Id, vtkm::Id) const
{
return field.PrepareForInPlace(Device{});
}
};
}
}
} // namespace vtkm::cont::arg
#endif //vtk_m_cont_arg_TransportTagBitField_h

@ -0,0 +1,48 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2016 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2016 UT-Battelle, LLC.
// Copyright 2016 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_arg_TypeCheckTagBitField_h
#define vtk_m_cont_arg_TypeCheckTagBitField_h
#include <vtkm/cont/arg/TypeCheck.h>
#include <vtkm/cont/BitField.h>
#include <type_traits>
namespace vtkm
{
namespace cont
{
namespace arg
{
struct TypeCheckTagBitField
{
};
template <typename T>
struct TypeCheck<TypeCheckTagBitField, T> : public std::is_base_of<vtkm::cont::BitField, T>
{
};
}
}
} // namespace vtkm::cont::arg
#endif //vtk_m_cont_arg_TypeCheckTagBitField_h

@ -0,0 +1,105 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2019 UT-Battelle, LLC.
// Copyright 2019 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_cuda_internal_AtomicInterfaceExecutionCuda_h
#define vtk_m_cont_cuda_internal_AtomicInterfaceExecutionCuda_h
#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
#include <vtkm/ListTag.h>
#include <vtkm/Types.h>
namespace vtkm
{
namespace cont
{
namespace internal
{
template <>
class AtomicInterfaceExecution<DeviceAdapterTagCuda>
{
public:
// Note: There are 64-bit atomics available, but not on all devices. Stick
// with 32-bit only until we require compute capability 3.5+
using WordTypes = vtkm::ListTagBase<vtkm::UInt32>;
using WordTypePreferred = vtkm::UInt32;
#define VTKM_ATOMIC_OPS_FOR_TYPE(type) \
VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Load(const type* addr) \
{ \
const volatile type* vaddr = addr; /* volatile to bypass cache*/ \
const type value = *vaddr; \
/* fence to ensure that dependent reads are correctly ordered */ \
__threadfence(); \
return value; \
} \
VTKM_SUPPRESS_EXEC_WARNINGS __device__ static void Store(type* addr, type value) \
{ \
volatile type* vaddr = addr; /* volatile to bypass cache */ \
/* fence to ensure that previous non-atomic stores are visible to other threads */ \
__threadfence(); \
*vaddr = value; \
} \
VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Not(type* addr) \
{ \
return AtomicInterfaceExecution::Xor(addr, static_cast<type>(~type{ 0u })); \
} \
VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type And(type* addr, type mask) \
{ \
__threadfence(); \
auto result = atomicAnd(addr, mask); \
__threadfence(); \
return result; \
} \
VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Or(type* addr, type mask) \
{ \
__threadfence(); \
auto result = atomicOr(addr, mask); \
__threadfence(); \
return result; \
} \
VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Xor(type* addr, type mask) \
{ \
__threadfence(); \
auto result = atomicXor(addr, mask); \
__threadfence(); \
return result; \
} \
VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type CompareAndSwap( \
type* addr, type newWord, type expected) \
{ \
__threadfence(); \
auto result = atomicCAS(addr, expected, newWord); \
__threadfence(); \
return result; \
}
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32)
#undef VTKM_ATOMIC_OPS_FOR_TYPE
};
}
}
} // end namespace vtkm::cont::internal
#endif // vtk_m_cont_cuda_internal_AtomicInterfaceExecutionCuda_h

@ -20,6 +20,7 @@
set(headers
ArrayManagerExecutionCuda.h
AtomicInterfaceExecutionCuda.h
CudaAllocator.h
DeviceAdapterAlgorithmCuda.h
DeviceAdapterAtomicArrayImplementationCuda.h

@ -26,6 +26,7 @@
#include <vtkm/UnaryPredicates.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/BitField.h>
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/ErrorExecution.h>
#include <vtkm/cont/Logging.h>
@ -35,6 +36,7 @@
#include <vtkm/cont/cuda/ErrorCuda.h>
#include <vtkm/cont/cuda/internal/ArrayManagerExecutionCuda.h>
#include <vtkm/cont/cuda/internal/AtomicInterfaceExecutionCuda.h>
#include <vtkm/cont/cuda/internal/DeviceAdapterAtomicArrayImplementationCuda.h>
#include <vtkm/cont/cuda/internal/DeviceAdapterRuntimeDetectorCuda.h>
#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
@ -54,8 +56,7 @@
// Disable warnings we check vtkm for but Thrust does not.
VTKM_THIRDPARTY_PRE_INCLUDE
//This is required to be first so that we get patches for thrust included
//in the correct order
#include <cooperative_groups.h>
#include <cuda.h>
#include <thrust/advance.h>
#include <thrust/binary_search.h>
@ -71,6 +72,9 @@ VTKM_THIRDPARTY_PRE_INCLUDE
#include <vtkm/exec/cuda/internal/ThrustPatches.h>
VTKM_THIRDPARTY_POST_INCLUDE
#include <limits>
#include <memory>
namespace vtkm
{
namespace cont
@ -148,6 +152,22 @@ struct CastPortal
VTKM_EXEC
ValueType Get(vtkm::Id index) const { return this->Functor(this->Portal.Get(index)); }
};
struct CudaFreeFunctor
{
void operator()(void* ptr) const { VTKM_CUDA_CALL(cudaFree(ptr)); }
};
template <typename T>
using CudaUniquePtr = std::unique_ptr<T, CudaFreeFunctor>;
template <typename T>
CudaUniquePtr<T> make_CudaUniquePtr(std::size_t numElements)
{
T* ptr;
VTKM_CUDA_CALL(cudaMalloc(&ptr, sizeof(T) * numElements));
return CudaUniquePtr<T>(ptr);
}
}
} // end namespace cuda::internal
@ -162,6 +182,132 @@ struct DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>
#ifndef VTKM_CUDA
private:
#endif
template <typename BitsPortal, typename IndicesPortal, typename GlobalPopCountType>
struct BitFieldToUnorderedSetFunctor : public vtkm::exec::FunctorBase
{
VTKM_STATIC_ASSERT_MSG(VTKM_PASS_COMMAS(std::is_same<GlobalPopCountType, vtkm::Int32>::value ||
std::is_same<GlobalPopCountType, vtkm::UInt32>::value ||
std::is_same<GlobalPopCountType, vtkm::UInt64>::value),
"Unsupported GlobalPopCountType. Must support CUDA atomicAdd.");
using Word = typename BitsPortal::WordTypePreferred;
VTKM_STATIC_ASSERT(
VTKM_PASS_COMMAS(std::is_same<typename IndicesPortal::ValueType, vtkm::Id>::value));
VTKM_CONT
BitFieldToUnorderedSetFunctor(const BitsPortal& input,
const IndicesPortal& output,
GlobalPopCountType* globalPopCount)
: Input{ input }
, Output{ output }
, GlobalPopCount{ globalPopCount }
, FinalWordIndex{ input.GetNumberOfWords() - 1 }
, FinalWordMask(input.GetFinalWordMask())
{
}
~BitFieldToUnorderedSetFunctor() {}
VTKM_CONT void Initialize()
{
assert(this->GlobalPopCount != nullptr);
VTKM_CUDA_CALL(cudaMemset(this->GlobalPopCount, 0, sizeof(GlobalPopCountType)));
}
VTKM_SUPPRESS_EXEC_WARNINGS
__device__ void operator()(vtkm::Id wordIdx) const
{
Word word = this->Input.GetWord(wordIdx);
// The last word may be partial -- mask out trailing bits if needed.
const Word mask = wordIdx == this->FinalWordIndex ? this->FinalWordMask : ~Word{ 0 };
word &= mask;
if (word != 0)
{
this->LocalPopCount = vtkm::CountSetBits(word);
this->ReduceAllocate();
vtkm::Id firstBitIdx = wordIdx * sizeof(Word) * CHAR_BIT;
do
{
// Find next bit. FindFirstSetBit's result is indexed starting at 1.
vtkm::Int32 bit = vtkm::FindFirstSetBit(word) - 1;
vtkm::Id outIdx = this->GetNextOutputIndex();
// Write index of bit
this->Output.Set(outIdx, firstBitIdx + bit);
word ^= (1 << bit); // clear bit
} while (word != 0); // have bits
}
}
VTKM_CONT vtkm::Id Finalize() const
{
assert(this->GlobalPopCount != nullptr);
GlobalPopCountType result;
VTKM_CUDA_CALL(cudaMemcpy(
&result, this->GlobalPopCount, sizeof(GlobalPopCountType), cudaMemcpyDeviceToHost));
return static_cast<vtkm::Id>(result);
}
private:
// Every thread with a non-zero local popcount calls this function, which
// computes the total popcount for the coalesced threads and allocates
// a contiguous block in the output by atomically increasing the global
// popcount.
VTKM_SUPPRESS_EXEC_WARNINGS
__device__ void ReduceAllocate() const
{
const auto activeLanes = cooperative_groups::coalesced_threads();
const int activeRank = activeLanes.thread_rank();
const int activeSize = activeLanes.size();
// Reduction value:
vtkm::Int32 rVal = this->LocalPopCount;
for (int delta = 1; delta < activeSize; delta *= 2)
{
rVal += activeLanes.shfl_down(rVal, delta);
}
if (activeRank == 0)
{
this->AllocationHead =
atomicAdd(this->GlobalPopCount, static_cast<GlobalPopCountType>(rVal));
}
this->AllocationHead = activeLanes.shfl(this->AllocationHead, 0);
}
// The global output allocation is written to by striding the writes across
// the warp lanes, allowing the writes to global memory to be coalesced.
VTKM_SUPPRESS_EXEC_WARNINGS
__device__ vtkm::Id GetNextOutputIndex() const
{
// Only lanes with unwritten output indices left will call this method,
// so just check the coalesced threads:
const auto activeLanes = cooperative_groups::coalesced_threads();
const int activeRank = activeLanes.thread_rank();
const int activeSize = activeLanes.size();
vtkm::Id nextIdx = static_cast<vtkm::Id>(this->AllocationHead + activeRank);
this->AllocationHead += activeSize;
return nextIdx;
}
const BitsPortal Input;
const IndicesPortal Output;
GlobalPopCountType* GlobalPopCount;
mutable vtkm::UInt64 AllocationHead{ 0 };
mutable vtkm::Int32 LocalPopCount{ 0 };
// Used to mask trailing bits the in last word.
vtkm::Id FinalWordIndex{ 0 };
Word FinalWordMask{ 0 };
};
template <class InputPortal, class OutputPortal>
VTKM_CONT static void CopyPortal(const InputPortal& input, const OutputPortal& output)
{
@ -746,9 +892,43 @@ private:
}
}
template <typename GlobalPopCountType, typename BitsPortal, typename IndicesPortal>
VTKM_CONT static vtkm::Id BitFieldToUnorderedSetPortal(const BitsPortal& bits,
const IndicesPortal& indices)
{
using Functor = BitFieldToUnorderedSetFunctor<BitsPortal, IndicesPortal, GlobalPopCountType>;
// RAII for the global atomic counter.
auto globalCount = cuda::internal::make_CudaUniquePtr<GlobalPopCountType>(1);
Functor functor{ bits, indices, globalCount.get() };
functor.Initialize();
Schedule(functor, bits.GetNumberOfWords());
Synchronize(); // Ensure kernel is done before checking final atomic count
return functor.Finalize();
}
//-----------------------------------------------------------------------------
public:
template <typename IndicesStorage>
VTKM_CONT static vtkm::Id BitFieldToUnorderedSet(
const vtkm::cont::BitField& bits,
vtkm::cont::ArrayHandle<Id, IndicesStorage>& indices)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
vtkm::Id numBits = bits.GetNumberOfBits();
auto bitsPortal = bits.PrepareForInput(DeviceAdapterTagCuda{});
auto indicesPortal = indices.PrepareForOutput(numBits, DeviceAdapterTagCuda{});
// Use a uint64 for accumulator, as atomicAdd does not support signed int64.
numBits = BitFieldToUnorderedSetPortal<vtkm::UInt64>(bitsPortal, indicesPortal);
indices.Shrink(numBits);
return numBits;
}
template <typename T, typename U, class SIn, class SOut>
VTKM_CONT static void Copy(const vtkm::cont::ArrayHandle<T, SIn>& input,
vtkm::cont::ArrayHandle<U, SOut>& output)

@ -22,6 +22,7 @@ set(unit_tests
UnitTestCudaArrayHandle.cu
UnitTestCudaArrayHandleFancy.cu
UnitTestCudaArrayHandleVirtualCoordinates.cu
UnitTestCudaBitField.cu
UnitTestCudaCellLocatorRectilinearGrid.cu
UnitTestCudaCellLocatorUniformBins.cu
UnitTestCudaCellLocatorUniformGrid.cu

@ -0,0 +1,34 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2019 UT-Battelle, LLC.
// Copyright 2019 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
// Make sure that the tested code is using the device adapter specified. This
// is important in the long run so we don't, for example, use the CUDA device
// for a part of an operation where the TBB device was specified.
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/cuda/DeviceAdapterCuda.h>
#include <vtkm/cont/testing/TestingBitField.h>
int UnitTestCudaBitField(int argc, char* argv[])
{
auto tracker = vtkm::cont::GetRuntimeDeviceTracker();
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagCuda{});
return vtkm::cont::testing::TestingBitField<vtkm::cont::DeviceAdapterTagCuda>::Run(argc, argv);
}

@ -0,0 +1,223 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2019 UT-Battelle, LLC.
// Copyright 2019 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_internal_AtomicInterfaceControl_h
#define vtk_m_cont_internal_AtomicInterfaceControl_h
#include <vtkm/internal/Configure.h>
#include <vtkm/internal/Windows.h>
#include <vtkm/ListTag.h>
#include <vtkm/Types.h>
#if defined(VTKM_MSVC) && !defined(VTKM_CUDA)
#include <intrin.h> // For MSVC atomics
#endif
#include <atomic>
#include <cstdint>
#include <cstring>
namespace vtkm
{
namespace cont
{
namespace internal
{
/**
* Implementation of AtomicInterfaceDevice that uses control-side atomics.
*/
class AtomicInterfaceControl
{
public:
using WordTypes = vtkm::ListTagBase<vtkm::UInt8, vtkm::UInt16, vtkm::UInt32, vtkm::UInt64>;
// TODO These support UInt64, too. This should be benchmarked to see which
// is faster.
using WordTypePreferred = vtkm::UInt32;
#ifdef VTKM_MSVC
private:
template <typename To, typename From>
VTKM_EXEC_CONT static To BitCast(const From& src)
{
// The memcpy should be removed by the compiler when possible, but this
// works around a host of issues with bitcasting using reinterpret_cast.
VTKM_STATIC_ASSERT(sizeof(From) == sizeof(To));
To dst;
std::memcpy(&dst, &src, sizeof(From));
return dst;
}
public:
// Note about Load and Store implementations:
//
// "Simple reads and writes to properly-aligned 32-bit variables are atomic
// operations"
//
// "Simple reads and writes to properly aligned 64-bit variables are atomic on
// 64-bit Windows. Reads and writes to 64-bit values are not guaranteed to be
// atomic on 32-bit Windows."
//
// "Reads and writes to variables of other sizes [than 32 or 64 bits] are not
// guaranteed to be atomic on any platform."
//
// https://docs.microsoft.com/en-us/windows/desktop/sync/interlocked-variable-access
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkm::UInt8 Load(const vtkm::UInt8* addr)
{
// This assumes that the memory interface is smart enough to load a 32-bit
// word atomically and a properly aligned 8-bit word from it.
// We could build address masks and do shifts to perform this manually if
// this assumption is incorrect.
auto result = *static_cast<volatile const vtkm::UInt8*>(addr);
std::atomic_thread_fence(std::memory_order_acquire);
return result;
}
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkm::UInt16 Load(const vtkm::UInt16* addr)
{
// This assumes that the memory interface is smart enough to load a 32-bit
// word atomically and a properly aligned 16-bit word from it.
// We could build address masks and do shifts to perform this manually if
// this assumption is incorrect.
auto result = *static_cast<volatile const vtkm::UInt16*>(addr);
std::atomic_thread_fence(std::memory_order_acquire);
return result;
}
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkm::UInt32 Load(const vtkm::UInt32* addr)
{
auto result = *static_cast<volatile const vtkm::UInt32*>(addr);
std::atomic_thread_fence(std::memory_order_acquire);
return result;
}
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkm::UInt64 Load(const vtkm::UInt64* addr)
{
auto result = *static_cast<volatile const vtkm::UInt64*>(addr);
std::atomic_thread_fence(std::memory_order_acquire);
return result;
}
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static void Store(vtkm::UInt8* addr, vtkm::UInt8 val)
{
// There doesn't seem to be an atomic store instruction in the windows
// API, so just exchange and discard the result.
_InterlockedExchange8(reinterpret_cast<volatile CHAR*>(addr), BitCast<CHAR>(val));
}
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static void Store(vtkm::UInt16* addr, vtkm::UInt16 val)
{
// There doesn't seem to be an atomic store instruction in the windows
// API, so just exchange and discard the result.
_InterlockedExchange16(reinterpret_cast<volatile SHORT*>(addr), BitCast<SHORT>(val));
}
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static void Store(vtkm::UInt32* addr, vtkm::UInt32 val)
{
std::atomic_thread_fence(std::memory_order_release);
*addr = val;
}
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static void Store(vtkm::UInt64* addr, vtkm::UInt64 val)
{
std::atomic_thread_fence(std::memory_order_release);
*addr = val;
}
#define VTKM_ATOMIC_OPS_FOR_TYPE(vtkmType, winType, suffix) \
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkmType Not(vtkmType* addr) \
{ \
return Xor(addr, static_cast<vtkmType>(~vtkmType{ 0u })); \
} \
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkmType And(vtkmType* addr, vtkmType mask) \
{ \
return BitCast<vtkmType>( \
_InterlockedAnd##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(mask))); \
} \
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkmType Or(vtkmType* addr, vtkmType mask) \
{ \
return BitCast<vtkmType>( \
_InterlockedOr##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(mask))); \
} \
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkmType Xor(vtkmType* addr, vtkmType mask) \
{ \
return BitCast<vtkmType>( \
_InterlockedXor##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(mask))); \
} \
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkmType CompareAndSwap( \
vtkmType* addr, vtkmType newWord, vtkmType expected) \
{ \
return BitCast<vtkmType>( \
_InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr), \
BitCast<winType>(newWord), \
BitCast<winType>(expected))); \
}
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt8, CHAR, 8)
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt16, SHORT, 16)
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32, LONG, )
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt64, LONG64, 64)
#undef VTKM_ATOMIC_OPS_FOR_TYPE
#else // gcc/clang
#define VTKM_ATOMIC_OPS_FOR_TYPE(type) \
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type Load(const type* addr) \
{ \
return __atomic_load_n(addr, __ATOMIC_ACQUIRE); \
} \
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static void Store(type* addr, type value) \
{ \
return __atomic_store_n(addr, value, __ATOMIC_RELEASE); \
} \
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type Not(type* addr) \
{ \
return Xor(addr, static_cast<type>(~type{ 0u })); \
} \
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type And(type* addr, type mask) \
{ \
return __atomic_fetch_and(addr, mask, __ATOMIC_SEQ_CST); \
} \
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type Or(type* addr, type mask) \
{ \
return __atomic_fetch_or(addr, mask, __ATOMIC_SEQ_CST); \
} \
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type Xor(type* addr, type mask) \
{ \
return __atomic_fetch_xor(addr, mask, __ATOMIC_SEQ_CST); \
} \
VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type CompareAndSwap( \
type* addr, type newWord, type expected) \
{ \
__atomic_compare_exchange_n( \
addr, &expected, newWord, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \
return expected; \
}
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt8)
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt16)
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32)
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt64)
#undef VTKM_ATOMIC_OPS_FOR_TYPE
#endif
};
}
}
} // end namespace vtkm::cont::internal
#endif // vtk_m_cont_internal_AtomicInterfaceControl_h

@ -0,0 +1,113 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2019 UT-Battelle, LLC.
// Copyright 2019 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_internal_AtomicInterfaceExecution_h
#define vtk_m_cont_internal_AtomicInterfaceExecution_h
#include <vtkm/Types.h>
namespace vtkm
{
namespace cont
{
namespace internal
{
/// Class template that provides a collection of static methods that perform
/// atomic operations on raw addresses. It is the responsibility of the caller
/// to ensure that the addresses are properly aligned.
///
/// The class defines a WordTypePreferred member that is the fastest available
/// for bitwise operations of the given device. At minimum, the interface must
/// support operations on WordTypePreferred and vtkm::WordTypeDefault, which may
/// be the same. A full list of supported word types is advertised in the type
/// list @a WordTypes.
///
/// To implement this on devices that share the control environment, subclass
/// vtkm::cont::internal::AtomicInterfaceControl, which may also be used
/// directly from control-side code.
template <typename DeviceTag>
class AtomicInterfaceExecution
#ifdef VTKM_DOXYGEN_ONLY
{
/// The preferred word type for the target device for bitwise atomic
/// operations.
using WordTypePreferred = FastestWordTypeForDevice;
using WordTypes = vtkm::ListTagBase<vtkm::WordTypeDefault, WordTypePreferred>;
/// Atomically load a value from memory while enforcing, at minimum, "acquire"
/// memory ordering.
VTKM_EXEC static vtkm::WordTypeDefault Load(vtkm::WordTypeDefault* addr);
VTKM_EXEC static WordTypePreferred Load(WordTypePreferred* addr);
/// Atomically write a value to memory while enforcing, at minimum, "release"
/// memory ordering.
VTKM_EXEC static void Store(vtkm::WordTypeDefault* addr, vtkm::WordTypeDefault value);
VTKM_EXEC static void Store(WordTypePreferred* addr, WordTypePreferred value);
/// Perform a bitwise atomic not operation on the word at @a addr.
/// This operation performs a full memory barrier around the atomic access.
/// @{
VTKM_EXEC static vtkm::WordTypeDefault Not(vtkm::WordTypeDefault* addr);
VTKM_EXEC static WordTypePreferred Not(WordTypePreferred* addr);
/// @}
/// Perform a bitwise atomic and operation on the word at @a addr.
/// This operation performs a full memory barrier around the atomic access.
/// @{
VTKM_EXEC static vtkm::WordTypeDefault And(vtkm::WordTypeDefault* addr,
vtkm::WordTypeDefault mask);
VTKM_EXEC static WordTypePreferred And(WordTypePreferred* addr, WordTypePreferred mask);
/// @}
/// Perform a bitwise atomic or operation on the word at @a addr.
/// This operation performs a full memory barrier around the atomic access.
/// @{
VTKM_EXEC static vtkm::WordTypeDefault Or(vtkm::WordTypeDefault* addr,
vtkm::WordTypeDefault mask);
VTKM_EXEC static WordTypePreferred Or(WordTypePreferred* addr, WordTypePreferred mask);
/// @}
/// Perform a bitwise atomic xor operation on the word at @a addr.
/// This operation performs a full memory barrier around the atomic access.
/// @{
VTKM_EXEC static vtkm::WordTypeDefault Xor(vtkm::WordTypeDefault* addr,
vtkm::WordTypeDefault mask);
VTKM_EXEC static WordTypePreferred Xor(WordTypePreferred* addr, WordTypePreferred mask);
/// @}
/// Perform an atomic CAS operation on the word at @a addr.
/// This operation performs a full memory barrier around the atomic access.
/// @{
VTKM_EXEC static vtkm::WordTypeDefault CompareAndSwap(vtkm::WordTypeDefault* addr,
vtkm::WordTypeDefault newWord,
vtkm::WordTypeDefault expected);
VTKM_EXEC static WordTypePreferred CompareAndSwap(WordTypePreferred* addr,
WordTypePreferred newWord,
WordTypePreferred expected);
/// @}
}
#endif // VTKM_DOXYGEN_ONLY
;
}
}
} // end namespace vtkm::cont::internal
#endif // vtk_m_cont_internal_AtomicInterfaceExecution_h

@ -28,6 +28,8 @@ set(headers
ArrayPortalFromIterators.h
ArrayPortalShrink.h
ArrayTransfer.h
AtomicInterfaceControl.h
AtomicInterfaceExecution.h
ConnectivityExplicitInternals.h
DeviceAdapterAlgorithmGeneral.h
DeviceAdapterAtomicArrayImplementation.h

@ -27,6 +27,7 @@
#include <vtkm/cont/ArrayHandleIndex.h>
#include <vtkm/cont/ArrayHandleStreaming.h>
#include <vtkm/cont/ArrayHandleZip.h>
#include <vtkm/cont/BitField.h>
#include <vtkm/cont/Logging.h>
#include <vtkm/cont/internal/DeviceAdapterAtomicArrayImplementation.h>
#include <vtkm/cont/internal/FunctorsGeneral.h>
@ -123,6 +124,35 @@ private:
}
public:
//--------------------------------------------------------------------------
// BitFieldToUnorderedSet
template <typename IndicesStorage>
VTKM_CONT static vtkm::Id BitFieldToUnorderedSet(
const vtkm::cont::BitField& bits,
vtkm::cont::ArrayHandle<Id, IndicesStorage>& indices)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
vtkm::Id numBits = bits.GetNumberOfBits();
auto bitsPortal = bits.PrepareForInput(DeviceAdapterTag{});
auto indicesPortal = indices.PrepareForOutput(numBits, DeviceAdapterTag{});
std::atomic<vtkm::UInt64> popCount;
popCount.store(0, std::memory_order_seq_cst);
using Functor = BitFieldToUnorderedSetFunctor<decltype(bitsPortal), decltype(indicesPortal)>;
Functor functor{ bitsPortal, indicesPortal, popCount };
DerivedAlgorithm::Schedule(functor, functor.GetNumberOfInstances());
DerivedAlgorithm::Synchronize();
numBits = static_cast<vtkm::Id>(popCount.load(std::memory_order_seq_cst));
indices.Shrink(numBits);
return numBits;
}
//--------------------------------------------------------------------------
// Copy
template <typename T, typename U, class CIn, class COut>

@ -24,10 +24,12 @@
#include <vtkm/TypeTraits.h>
#include <vtkm/UnaryPredicates.h>
#include <vtkm/cont/ArrayPortalToIterators.h>
#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
#include <vtkm/exec/FunctorBase.h>
#include <algorithm>
#include <atomic>
namespace vtkm
{
@ -332,6 +334,142 @@ struct ShiftCopyAndInit : vtkm::exec::FunctorBase
}
};
template <class BitsPortal, class IndicesPortal>
struct BitFieldToUnorderedSetFunctor : public vtkm::exec::FunctorBase
{
using WordType = typename BitsPortal::WordTypePreferred;
// This functor executes a number of instances, where each instance handles
// two cachelines worth of data. Figure out how many words that is:
static constexpr vtkm::Id CacheLineSize = VTKM_ALLOCATION_ALIGNMENT;
static constexpr vtkm::Id WordsPerCacheLine =
CacheLineSize / static_cast<vtkm::Id>(sizeof(WordType));
static constexpr vtkm::Id CacheLinesPerInstance = 2;
static constexpr vtkm::Id WordsPerInstance = CacheLinesPerInstance * WordsPerCacheLine;
VTKM_STATIC_ASSERT(
VTKM_PASS_COMMAS(std::is_same<typename IndicesPortal::ValueType, vtkm::Id>::value));
VTKM_CONT
BitFieldToUnorderedSetFunctor(const BitsPortal& input,
IndicesPortal& output,
std::atomic<vtkm::UInt64>& popCount)
: Input{ input }
, Output{ output }
, PopCount(popCount)
, FinalWordIndex{ input.GetNumberOfWords() - 1 }
, FinalWordMask(input.GetFinalWordMask())
{
}
VTKM_CONT vtkm::Id GetNumberOfInstances() const
{
const auto numWords = this->Input.GetNumberOfWords();
return (numWords + WordsPerInstance - 1) / WordsPerInstance;
}
VTKM_EXEC void operator()(vtkm::Id instanceIdx) const
{
const vtkm::Id numWords = this->Input.GetNumberOfWords();
const vtkm::Id wordStart = vtkm::Min(instanceIdx * WordsPerInstance, numWords);
const vtkm::Id wordEnd = vtkm::Min(wordStart + WordsPerInstance, numWords);
if (wordStart != wordEnd) // range is valid
{
this->ExecuteRange(wordStart, wordEnd);
}
}
VTKM_EXEC void ExecuteRange(vtkm::Id wordStart, vtkm::Id wordEnd) const
{
#ifndef VTKM_CUDA_DEVICE_PASS // for std::atomic call from VTKM_EXEC function:
// Count bits and allocate space for output:
vtkm::UInt64 chunkBits = this->CountChunkBits(wordStart, wordEnd);
if (chunkBits > 0)
{
vtkm::UInt64 outIdx = this->PopCount.fetch_add(chunkBits, std::memory_order_relaxed);
this->ProcessWords(wordStart, wordEnd, static_cast<vtkm::Id>(outIdx));
}
#else
(void)wordStart;
(void)wordEnd;
#endif
}
VTKM_CONT vtkm::UInt64 GetPopCount() const { return PopCount.load(std::memory_order_relaxed); }
private:
VTKM_EXEC vtkm::UInt64 CountChunkBits(vtkm::Id wordStart, vtkm::Id wordEnd) const
{
// Need to mask out trailing bits from the final word:
const bool isFinalChunk = wordEnd == (this->FinalWordIndex + 1);
if (isFinalChunk)
{
wordEnd = this->FinalWordIndex;
}
vtkm::Int32 tmp = 0;
for (vtkm::Id i = wordStart; i < wordEnd; ++i)
{
tmp += vtkm::CountSetBits(this->Input.GetWord(i));
}
if (isFinalChunk)
{
tmp += vtkm::CountSetBits(this->Input.GetWord(this->FinalWordIndex) & this->FinalWordMask);
}
return static_cast<vtkm::UInt64>(tmp);
}
VTKM_EXEC void ProcessWords(vtkm::Id wordStart, vtkm::Id wordEnd, vtkm::Id outputStartIdx) const
{
// Need to mask out trailing bits from the final word:
const bool isFinalChunk = wordEnd == (this->FinalWordIndex + 1);
if (isFinalChunk)
{
wordEnd = this->FinalWordIndex;
}
for (vtkm::Id i = wordStart; i < wordEnd; ++i)
{
const vtkm::Id firstBitIdx = i * static_cast<vtkm::Id>(sizeof(WordType)) * CHAR_BIT;
WordType word = this->Input.GetWord(i);
while (word != 0) // have bits
{
// Find next bit. FindFirstSetBit starts counting at 1.
vtkm::Int32 bit = vtkm::FindFirstSetBit(word) - 1;
this->Output.Set(outputStartIdx++, firstBitIdx + bit); // Write index of bit
word ^= (1 << bit); // clear bit
}
}
if (isFinalChunk)
{
const vtkm::Id i = this->FinalWordIndex;
const vtkm::Id firstBitIdx = i * static_cast<vtkm::Id>(sizeof(WordType)) * CHAR_BIT;
WordType word = this->Input.GetWord(i) & this->FinalWordMask;
while (word != 0) // have bits
{
// Find next bit. FindFirstSetBit starts counting at 1.
vtkm::Int32 bit = vtkm::FindFirstSetBit(word) - 1;
this->Output.Set(outputStartIdx++, firstBitIdx + bit); // Write index of bit
word ^= (1 << bit); // clear bit
}
}
}
BitsPortal Input;
IndicesPortal Output;
std::atomic<vtkm::UInt64>& PopCount;
// Used to mask trailing bits the in last word.
vtkm::Id FinalWordIndex{ 0 };
WordType FinalWordMask{ 0 };
};
template <class InputPortalType, class OutputPortalType>
struct CopyKernel
{

@ -26,6 +26,7 @@
#ifdef VTKM_ENABLE_OPENMP
#include <vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h>
#include <vtkm/cont/openmp/internal/AtomicInterfaceExecutionOpenMP.h>
#include <vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h>
#include <vtkm/cont/openmp/internal/VirtualObjectTransferOpenMP.h>
#endif

@ -0,0 +1,45 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2019 UT-Battelle, LLC.
// Copyright 2019 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_openmp_internal_AtomicInterfaceExecutionOpenMP_h
#define vtk_m_cont_openmp_internal_AtomicInterfaceExecutionOpenMP_h
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
#include <vtkm/cont/internal/AtomicInterfaceControl.h>
#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
#include <vtkm/Types.h>
namespace vtkm
{
namespace cont
{
namespace internal
{
template <>
class AtomicInterfaceExecution<DeviceAdapterTagOpenMP> : public AtomicInterfaceControl
{
};
}
}
} // end namespace vtkm::cont::internal
#endif // vtk_m_cont_openmp_internal_AtomicInterfaceExecutionOpenMP_h

@ -23,6 +23,7 @@ set(headers
DeviceAdapterAlgorithmOpenMP.h
DeviceAdapterRuntimeDetectorOpenMP.h
DeviceAdapterTagOpenMP.h
AtomicInterfaceExecutionOpenMP.h
ExecutionArrayInterfaceBasicOpenMP.h
FunctorsOpenMP.h
ParallelQuickSortOpenMP.h

@ -22,6 +22,7 @@ set(unit_tests
UnitTestOpenMPArrayHandle.cxx
UnitTestOpenMPArrayHandleFancy.cxx
UnitTestOpenMPArrayHandleVirtualCoordinates.cxx
UnitTestOpenMPBitField.cxx
UnitTestOpenMPCellLocatorRectilinearGrid.cxx
UnitTestOpenMPCellLocatorUniformBins.cxx
UnitTestOpenMPCellLocatorUniformGrid.cxx

@ -0,0 +1,31 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
#include <vtkm/cont/testing/TestingBitField.h>
int UnitTestOpenMPBitField(int argc, char* argv[])
{
auto tracker = vtkm::cont::GetRuntimeDeviceTracker();
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
return vtkm::cont::testing::TestingBitField<vtkm::cont::DeviceAdapterTagOpenMP>::Run(argc, argv);
}

@ -24,6 +24,7 @@
// clang-format off
#include <vtkm/cont/serial/internal/DeviceAdapterTagSerial.h>
#include <vtkm/cont/serial/internal/DeviceAdapterRuntimeDetectorSerial.h>
#include <vtkm/cont/serial/internal/AtomicInterfaceExecutionSerial.h>
#include <vtkm/cont/serial/internal/ArrayManagerExecutionSerial.h>
#include <vtkm/cont/serial/internal/DeviceAdapterAlgorithmSerial.h>
#include <vtkm/cont/serial/internal/VirtualObjectTransferSerial.h>

@ -0,0 +1,45 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2019 UT-Battelle, LLC.
// Copyright 2019 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_serial_internal_AtomicInterfaceExecutionSerial_h
#define vtk_m_cont_serial_internal_AtomicInterfaceExecutionSerial_h
#include <vtkm/cont/serial/internal/DeviceAdapterTagSerial.h>
#include <vtkm/cont/internal/AtomicInterfaceControl.h>
#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
#include <vtkm/Types.h>
namespace vtkm
{
namespace cont
{
namespace internal
{
template <>
class AtomicInterfaceExecution<DeviceAdapterTagSerial> : public AtomicInterfaceControl
{
};
}
}
} // end namespace vtkm::cont::internal
#endif // vtk_m_cont_serial_internal_AtomicInterfaceExecutionSerial_h

@ -20,6 +20,7 @@
set(headers
ArrayManagerExecutionSerial.h
AtomicInterfaceExecutionSerial.h
DeviceAdapterAlgorithmSerial.h
DeviceAdapterRuntimeDetectorSerial.h
DeviceAdapterTagSerial.h

@ -22,6 +22,7 @@ set(unit_tests
UnitTestSerialArrayHandle.cxx
UnitTestSerialArrayHandleFancy.cxx
UnitTestSerialArrayHandleVirtualCoordinates.cxx
UnitTestSerialBitField.cxx
UnitTestSerialCellLocatorRectilinearGrid.cxx
UnitTestSerialCellLocatorUniformBins.cxx
UnitTestSerialCellLocatorUniformGrid.cxx

@ -0,0 +1,34 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2019 UT-Battelle, LLC.
// Copyright 2019 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
// Make sure that the tested code is using the device adapter specified. This
// is important in the long run so we don't, for example, use the CUDA device
// for a part of an operation where the TBB device was specified.
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
#include <vtkm/cont/testing/TestingBitField.h>
int UnitTestSerialBitField(int argc, char* argv[])
{
auto tracker = vtkm::cont::GetRuntimeDeviceTracker();
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagSerial{});
return vtkm::cont::testing::TestingBitField<vtkm::cont::DeviceAdapterTagSerial>::Run(argc, argv);
}

@ -25,6 +25,7 @@
#ifdef VTKM_ENABLE_TBB
#include <vtkm/cont/tbb/internal/ArrayManagerExecutionTBB.h>
#include <vtkm/cont/tbb/internal/AtomicInterfaceExecutionTBB.h>
#include <vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h>
#include <vtkm/cont/tbb/internal/VirtualObjectTransferTBB.h>
#endif

@ -0,0 +1,45 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2019 UT-Battelle, LLC.
// Copyright 2019 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_tbb_internal_AtomicInterfaceExecutionTBB_h
#define vtk_m_cont_tbb_internal_AtomicInterfaceExecutionTBB_h
#include <vtkm/cont/tbb/internal/DeviceAdapterTagTBB.h>
#include <vtkm/cont/internal/AtomicInterfaceControl.h>
#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
#include <vtkm/Types.h>
namespace vtkm
{
namespace cont
{
namespace internal
{
template <>
class AtomicInterfaceExecution<DeviceAdapterTagTBB> : public AtomicInterfaceControl
{
};
}
}
} // end namespace vtkm::cont::internal
#endif // vtk_m_cont_tbb_internal_AtomicInterfaceExecutionTBB_h

@ -20,6 +20,7 @@
set(headers
ArrayManagerExecutionTBB.h
AtomicInterfaceExecutionTBB.h
DeviceAdapterAlgorithmTBB.h
DeviceAdapterRuntimeDetectorTBB.h
DeviceAdapterTagTBB.h

@ -22,6 +22,7 @@ set(unit_tests
UnitTestTBBArrayHandle.cxx
UnitTestTBBArrayHandleFancy.cxx
UnitTestTBBArrayHandleVirtualCoordinates.cxx
UnitTestTBBBitField.cxx
UnitTestTBBCellLocatorRectilinearGrid.cxx
UnitTestTBBCellLocatorUniformBins.cxx
UnitTestTBBCellLocatorUniformGrid.cxx

@ -0,0 +1,34 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2019 UT-Battelle, LLC.
// Copyright 2019 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
// Make sure that the tested code is using the device adapter specified. This
// is important in the long run so we don't, for example, use the CUDA device
// for a part of an operation where the TBB device was specified.
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/tbb/DeviceAdapterTBB.h>
#include <vtkm/cont/testing/TestingBitField.h>
int UnitTestTBBBitField(int argc, char* argv[])
{
auto tracker = vtkm::cont::GetRuntimeDeviceTracker();
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagTBB{});
return vtkm::cont::testing::TestingBitField<vtkm::cont::DeviceAdapterTagTBB>::Run(argc, argv);
}

@ -0,0 +1,685 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2019 UT-Battelle, LLC.
// Copyright 2019 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_testing_TestingBitFields_h
#define vtk_m_cont_testing_TestingBitFields_h
#include <vtkm/cont/ArrayHandleBitField.h>
#include <vtkm/cont/ArrayHandleCounting.h>
#include <vtkm/cont/BitField.h>
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/RuntimeDeviceTracker.h>
#include <vtkm/cont/testing/Testing.h>
#include <vtkm/exec/FunctorBase.h>
#include <vtkm/worklet/Invoker.h>
#include <cstdio>
#define DEVICE_ASSERT_MSG(cond, message) \
do \
{ \
if (!(cond)) \
{ \
printf("Testing assert failed at %s:%d\n\t- Condition: %s\n\t- Subtest: %s\n", \
__FILE__, \
__LINE__, \
#cond, \
message); \
return false; \
} \
} while (false)
#define DEVICE_ASSERT(cond) \
do \
{ \
if (!(cond)) \
{ \
printf("Testing assert failed at %s:%d\n\t- Condition: %s\n", __FILE__, __LINE__, #cond); \
return false; \
} \
} while (false)
// Test with some trailing bits in partial last word:
#define NUM_BITS \
vtkm::Id { 7681 }
using vtkm::cont::BitField;
namespace vtkm
{
namespace cont
{
namespace testing
{
// Takes an ArrayHandleBitField as the boolean condition field
class ConditionalMergeWorklet : public vtkm::worklet::WorkletMapField
{
public:
using ControlSignature = void(FieldIn cond, FieldIn trueVals, FieldIn falseVals, FieldOut result);
using ExecutionSignature = _4(_1, _2, _3);
template <typename T>
VTKM_EXEC T operator()(bool cond, const T& trueVal, const T& falseVal) const
{
return cond ? trueVal : falseVal;
}
};
// Takes a BitFieldInOut as the condition information, and reverses
// the bits in place after performing the merge.
class ConditionalMergeWorklet2 : public vtkm::worklet::WorkletMapField
{
public:
using ControlSignature = void(BitFieldInOut bits,
FieldIn trueVals,
FieldIn falseVal,
FieldOut result);
using ExecutionSignature = _4(InputIndex, _1, _2, _3);
using InputDomain = _2;
template <typename BitPortal, typename T>
VTKM_EXEC T
operator()(const vtkm::Id i, BitPortal& bits, const T& trueVal, const T& falseVal) const
{
return bits.XorBitAtomic(i, true) ? trueVal : falseVal;
}
};
/// This class has a single static member, Run, that runs all tests with the
/// given DeviceAdapter.
template <class DeviceAdapterTag>
struct TestingBitField
{
using Algo = vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapterTag>;
using AtomicInterface = vtkm::cont::internal::AtomicInterfaceExecution<DeviceAdapterTag>;
using Traits = vtkm::cont::detail::BitFieldTraits;
using WordTypes = typename AtomicInterface::WordTypes;
using WordTypesControl = vtkm::cont::internal::AtomicInterfaceControl::WordTypes;
VTKM_EXEC_CONT
static bool RandomBitFromIndex(vtkm::Id idx) noexcept
{
// Some random operations that will give a pseudorandom stream of bits:
auto m = idx + (idx * 2) - (idx / 3) + (idx * 5 / 7) - (idx * 11 / 13);
return (m % 2) == 1;
}
template <typename WordType>
VTKM_EXEC_CONT static WordType RandomWordFromIndex(vtkm::Id idx) noexcept
{
vtkm::UInt64 m = static_cast<vtkm::UInt64>(idx * (NUM_BITS - 1) + (idx + 1) * NUM_BITS);
m ^= m << 3;
m ^= m << 7;
m ^= m << 15;
m ^= m << 31;
m = (m << 32) | (m >> 32);
const size_t mBits = 64;
const size_t wordBits = sizeof(WordType) * CHAR_BIT;
const WordType highWord = static_cast<WordType>(m >> (mBits - wordBits));
return highWord;
}
VTKM_CONT
static BitField RandomBitField(vtkm::Id numBits = NUM_BITS)
{
BitField field;
field.Allocate(numBits);
auto portal = field.GetPortalControl();
for (vtkm::Id i = 0; i < numBits; ++i)
{
portal.SetBit(i, RandomBitFromIndex(i));
}
return field;
}
VTKM_CONT
static void TestBlockAllocation()
{
BitField field;
field.Allocate(NUM_BITS);
// NumBits should be rounded up to the nearest block of bytes, as defined in
// the traits:
const vtkm::Id bytesInFieldData =
field.GetData().GetNumberOfValues() * static_cast<vtkm::Id>(sizeof(vtkm::WordTypeDefault));
const vtkm::Id blockSize = vtkm::cont::detail::BitFieldTraits::BlockSize;
const vtkm::Id numBytes = (NUM_BITS + CHAR_BIT - 1) / CHAR_BIT;
const vtkm::Id numBlocks = (numBytes + blockSize - 1) / blockSize;
const vtkm::Id expectedBytes = numBlocks * blockSize;
VTKM_TEST_ASSERT(bytesInFieldData == expectedBytes,
"The BitField allocation does not round up to the nearest "
"block. This can cause access-by-word to read/write invalid "
"memory.");
}
template <typename PortalType, typename PortalConstType>
VTKM_EXEC_CONT static bool HelpTestBit(vtkm::Id i, PortalType portal, PortalConstType portalConst)
{
const auto origBit = RandomBitFromIndex(i);
auto bit = origBit;
const auto mod = RandomBitFromIndex(i + NUM_BITS);
auto testValues = [&](const char* op) -> bool {
auto expected = bit;
auto result = portal.GetBitAtomic(i);
auto resultConst = portalConst.GetBitAtomic(i);
DEVICE_ASSERT_MSG(result == expected, op);
DEVICE_ASSERT_MSG(resultConst == expected, op);
// Reset:
bit = origBit;
portal.SetBitAtomic(i, bit);
return true;
};
portal.SetBit(i, bit);
DEVICE_ASSERT(testValues("SetBit"));
bit = mod;
portal.SetBitAtomic(i, mod);
DEVICE_ASSERT(testValues("SetBitAtomic"));
bit = !bit;
portal.NotBitAtomic(i);
DEVICE_ASSERT(testValues("NotBitAtomic"));
bit = bit && mod;
portal.AndBitAtomic(i, mod);
DEVICE_ASSERT(testValues("AndBitAtomic"));
bit = bit || mod;
portal.OrBitAtomic(i, mod);
DEVICE_ASSERT(testValues("OrBitAtomic"));
bit = bit != mod;
portal.XorBitAtomic(i, mod);
DEVICE_ASSERT(testValues("XorBitAtomic"));
const auto notBit = !bit;
bool casResult = portal.CompareAndSwapBitAtomic(i, bit, notBit);
DEVICE_ASSERT(casResult == bit);
DEVICE_ASSERT(portal.GetBit(i) == bit);
DEVICE_ASSERT(portalConst.GetBit(i) == bit);
casResult = portal.CompareAndSwapBitAtomic(i, notBit, bit);
DEVICE_ASSERT(casResult == bit);
DEVICE_ASSERT(portal.GetBit(i) == notBit);
DEVICE_ASSERT(portalConst.GetBit(i) == notBit);
return true;
}
template <typename WordType, typename PortalType, typename PortalConstType>
VTKM_EXEC_CONT static bool HelpTestWord(vtkm::Id i,
PortalType portal,
PortalConstType portalConst)
{
const auto origWord = RandomWordFromIndex<WordType>(i);
auto word = origWord;
const auto mod = RandomWordFromIndex<WordType>(i + NUM_BITS);
auto testValues = [&](const char* op) -> bool {
auto expected = word;
auto result = portal.template GetWordAtomic<WordType>(i);
auto resultConst = portalConst.template GetWordAtomic<WordType>(i);
DEVICE_ASSERT_MSG(result == expected, op);
DEVICE_ASSERT_MSG(resultConst == expected, op);
// Reset:
word = origWord;
portal.SetWordAtomic(i, word);
return true;
};
portal.SetWord(i, word);
DEVICE_ASSERT(testValues("SetWord"));
word = mod;
portal.SetWordAtomic(i, mod);
DEVICE_ASSERT(testValues("SetWordAtomic"));
// C++ promotes e.g. uint8 to int32 when performing bitwise not. Silence
// conversion warning and mask unimportant bits:
word = static_cast<WordType>(~word);
portal.template NotWordAtomic<WordType>(i);
DEVICE_ASSERT(testValues("NotWordAtomic"));
word = word & mod;
portal.AndWordAtomic(i, mod);
DEVICE_ASSERT(testValues("AndWordAtomic"));
word = word | mod;
portal.OrWordAtomic(i, mod);
DEVICE_ASSERT(testValues("OrWordAtomic"));
word = word ^ mod;
portal.XorWordAtomic(i, mod);
DEVICE_ASSERT(testValues("XorWordAtomic"));
const WordType notWord = static_cast<WordType>(~word);
auto casResult = portal.CompareAndSwapWordAtomic(i, word, notWord);
DEVICE_ASSERT(casResult == word);
DEVICE_ASSERT(portal.template GetWord<WordType>(i) == word);
DEVICE_ASSERT(portalConst.template GetWord<WordType>(i) == word);
casResult = portal.CompareAndSwapWordAtomic(i, notWord, word);
DEVICE_ASSERT(casResult == word);
DEVICE_ASSERT(portal.template GetWord<WordType>(i) == notWord);
DEVICE_ASSERT(portalConst.template GetWord<WordType>(i) == notWord);
return true;
}
template <typename PortalType, typename PortalConstType>
struct HelpTestWordOpsControl
{
PortalType Portal;
PortalConstType PortalConst;
VTKM_CONT
HelpTestWordOpsControl(PortalType portal, PortalConstType portalConst)
: Portal(portal)
, PortalConst(portalConst)
{
}
template <typename WordType>
VTKM_CONT void operator()(WordType)
{
const auto numWords = this->Portal.template GetNumberOfWords<WordType>();
VTKM_TEST_ASSERT(numWords == this->PortalConst.template GetNumberOfWords<WordType>());
for (vtkm::Id i = 0; i < numWords; ++i)
{
VTKM_TEST_ASSERT(HelpTestWord<WordType>(i, this->Portal, this->PortalConst));
}
}
};
template <typename Portal, typename PortalConst>
VTKM_CONT static void HelpTestPortalsControl(Portal portal, PortalConst portalConst)
{
const auto numWords8 = (NUM_BITS + 7) / 8;
const auto numWords16 = (NUM_BITS + 15) / 16;
const auto numWords32 = (NUM_BITS + 31) / 32;
const auto numWords64 = (NUM_BITS + 63) / 64;
VTKM_TEST_ASSERT(portal.GetNumberOfBits() == NUM_BITS);
VTKM_TEST_ASSERT(portal.template GetNumberOfWords<vtkm::UInt8>() == numWords8);
VTKM_TEST_ASSERT(portal.template GetNumberOfWords<vtkm::UInt16>() == numWords16);
VTKM_TEST_ASSERT(portal.template GetNumberOfWords<vtkm::UInt32>() == numWords32);
VTKM_TEST_ASSERT(portal.template GetNumberOfWords<vtkm::UInt64>() == numWords64);
VTKM_TEST_ASSERT(portalConst.GetNumberOfBits() == NUM_BITS);
VTKM_TEST_ASSERT(portalConst.template GetNumberOfWords<vtkm::UInt8>() == numWords8);
VTKM_TEST_ASSERT(portalConst.template GetNumberOfWords<vtkm::UInt16>() == numWords16);
VTKM_TEST_ASSERT(portalConst.template GetNumberOfWords<vtkm::UInt32>() == numWords32);
VTKM_TEST_ASSERT(portalConst.template GetNumberOfWords<vtkm::UInt64>() == numWords64);
for (vtkm::Id i = 0; i < NUM_BITS; ++i)
{
HelpTestBit(i, portal, portalConst);
}
HelpTestWordOpsControl<Portal, PortalConst> test(portal, portalConst);
vtkm::ListForEach(test, typename Portal::AtomicInterface::WordTypes{});
}
VTKM_CONT
static void TestControlPortals()
{
auto field = RandomBitField();
auto portal = field.GetPortalControl();
auto portalConst = field.GetPortalConstControl();
HelpTestPortalsControl(portal, portalConst);
}
template <typename Portal>
VTKM_EXEC_CONT static bool HelpTestPortalSanityExecution(Portal portal)
{
const auto numWords8 = (NUM_BITS + 7) / 8;
const auto numWords16 = (NUM_BITS + 15) / 16;
const auto numWords32 = (NUM_BITS + 31) / 32;
const auto numWords64 = (NUM_BITS + 63) / 64;
DEVICE_ASSERT(portal.GetNumberOfBits() == NUM_BITS);
DEVICE_ASSERT(portal.template GetNumberOfWords<vtkm::UInt8>() == numWords8);
DEVICE_ASSERT(portal.template GetNumberOfWords<vtkm::UInt16>() == numWords16);
DEVICE_ASSERT(portal.template GetNumberOfWords<vtkm::UInt32>() == numWords32);
DEVICE_ASSERT(portal.template GetNumberOfWords<vtkm::UInt64>() == numWords64);
return true;
}
template <typename WordType, typename PortalType, typename PortalConstType>
struct HelpTestPortalsExecutionWordsFunctor : vtkm::exec::FunctorBase
{
PortalType Portal;
PortalConstType PortalConst;
HelpTestPortalsExecutionWordsFunctor(PortalType portal, PortalConstType portalConst)
: Portal(portal)
, PortalConst(portalConst)
{
}
VTKM_EXEC_CONT
void operator()(vtkm::Id i) const
{
if (i == 0)
{
if (!HelpTestPortalSanityExecution(this->Portal))
{
this->RaiseError("Testing Portal sanity failed.");
return;
}
if (!HelpTestPortalSanityExecution(this->PortalConst))
{
this->RaiseError("Testing PortalConst sanity failed.");
return;
}
}
if (!HelpTestWord<WordType>(i, this->Portal, this->PortalConst))
{
this->RaiseError("Testing word operations failed.");
return;
}
}
};
template <typename PortalType, typename PortalConstType>
struct HelpTestPortalsExecutionBitsFunctor : vtkm::exec::FunctorBase
{
PortalType Portal;
PortalConstType PortalConst;
HelpTestPortalsExecutionBitsFunctor(PortalType portal, PortalConstType portalConst)
: Portal(portal)
, PortalConst(portalConst)
{
}
VTKM_EXEC_CONT
void operator()(vtkm::Id i) const
{
if (!HelpTestBit(i, this->Portal, this->PortalConst))
{
this->RaiseError("Testing bit operations failed.");
return;
}
}
};
template <typename PortalType, typename PortalConstType>
struct HelpTestWordOpsExecution
{
PortalType Portal;
PortalConstType PortalConst;
VTKM_CONT
HelpTestWordOpsExecution(PortalType portal, PortalConstType portalConst)
: Portal(portal)
, PortalConst(portalConst)
{
}
template <typename WordType>
VTKM_CONT void operator()(WordType)
{
const auto numWords = this->Portal.template GetNumberOfWords<WordType>();
VTKM_TEST_ASSERT(numWords == this->PortalConst.template GetNumberOfWords<WordType>());
using WordFunctor =
HelpTestPortalsExecutionWordsFunctor<WordType, PortalType, PortalConstType>;
WordFunctor test{ this->Portal, this->PortalConst };
Algo::Schedule(test, numWords);
}
};
template <typename Portal, typename PortalConst>
VTKM_CONT static void HelpTestPortalsExecution(Portal portal, PortalConst portalConst)
{
HelpTestPortalsExecutionBitsFunctor<Portal, PortalConst> bitTest{ portal, portalConst };
Algo::Schedule(bitTest, portal.GetNumberOfBits());
HelpTestWordOpsExecution<Portal, PortalConst> test(portal, portalConst);
vtkm::ListForEach(test, typename Portal::AtomicInterface::WordTypes{});
}
VTKM_CONT
static void TestExecutionPortals()
{
auto field = RandomBitField();
auto portal = field.PrepareForInPlace(DeviceAdapterTag{});
auto portalConst = field.PrepareForInput(DeviceAdapterTag{});
HelpTestPortalsExecution(portal, portalConst);
}
VTKM_CONT
static void TestFinalWordMask()
{
auto testMask32 = [](vtkm::Id numBits, vtkm::UInt32 expectedMask) {
vtkm::cont::BitField field;
field.Allocate(numBits);
auto mask = field.GetPortalConstControl().GetFinalWordMask<vtkm::UInt32>();
VTKM_TEST_ASSERT(expectedMask == mask,
"Unexpected mask for BitField size ",
numBits,
": Expected 0x",
std::hex,
expectedMask,
" got 0x",
mask);
};
auto testMask64 = [](vtkm::Id numBits, vtkm::UInt64 expectedMask) {
vtkm::cont::BitField field;
field.Allocate(numBits);
auto mask = field.GetPortalConstControl().GetFinalWordMask<vtkm::UInt64>();
VTKM_TEST_ASSERT(expectedMask == mask,
"Unexpected mask for BitField size ",
numBits,
": Expected 0x",
std::hex,
expectedMask,
" got 0x",
mask);
};
testMask32(0, 0x00000000);
testMask32(1, 0x00000001);
testMask32(2, 0x00000003);
testMask32(3, 0x00000007);
testMask32(4, 0x0000000f);
testMask32(5, 0x0000001f);
testMask32(8, 0x000000ff);
testMask32(16, 0x0000ffff);
testMask32(24, 0x00ffffff);
testMask32(25, 0x01ffffff);
testMask32(31, 0x7fffffff);
testMask32(32, 0xffffffff);
testMask32(64, 0xffffffff);
testMask32(128, 0xffffffff);
testMask32(129, 0x00000001);
testMask64(0, 0x0000000000000000);
testMask64(1, 0x0000000000000001);
testMask64(2, 0x0000000000000003);
testMask64(3, 0x0000000000000007);
testMask64(4, 0x000000000000000f);
testMask64(5, 0x000000000000001f);
testMask64(8, 0x00000000000000ff);
testMask64(16, 0x000000000000ffff);
testMask64(24, 0x0000000000ffffff);
testMask64(25, 0x0000000001ffffff);
testMask64(31, 0x000000007fffffff);
testMask64(32, 0x00000000ffffffff);
testMask64(40, 0x000000ffffffffff);
testMask64(48, 0x0000ffffffffffff);
testMask64(56, 0x00ffffffffffffff);
testMask64(64, 0xffffffffffffffff);
testMask64(128, 0xffffffffffffffff);
testMask64(129, 0x0000000000000001);
}
struct ArrayHandleBitFieldChecker : vtkm::exec::FunctorBase
{
using PortalType = typename ArrayHandleBitField::ExecutionTypes<DeviceAdapterTag>::Portal;
PortalType Portal;
bool InvertReference;
VTKM_EXEC_CONT
ArrayHandleBitFieldChecker(PortalType portal, bool invert)
: Portal(portal)
, InvertReference(invert)
{
}
VTKM_EXEC
void operator()(vtkm::Id i) const
{
const bool ref = this->InvertReference ? !RandomBitFromIndex(i) : RandomBitFromIndex(i);
if (this->Portal.Get(i) != ref)
{
this->RaiseError("Unexpected value from ArrayHandleBitField portal.");
return;
}
// Flip the bit for the next kernel launch, which tests that the bitfield
// is inverted.
this->Portal.Set(i, !ref);
}
};
VTKM_CONT
static void TestArrayHandleBitField()
{
auto handle = vtkm::cont::make_ArrayHandleBitField(RandomBitField());
const vtkm::Id numBits = handle.GetNumberOfValues();
VTKM_TEST_ASSERT(numBits == NUM_BITS,
"ArrayHandleBitField returned the wrong number of values. "
"Expected: ",
NUM_BITS,
" got: ",
numBits);
Algo::Schedule(
ArrayHandleBitFieldChecker{ handle.PrepareForInPlace(DeviceAdapterTag{}), false }, numBits);
Algo::Schedule(ArrayHandleBitFieldChecker{ handle.PrepareForInPlace(DeviceAdapterTag{}), true },
numBits);
}
VTKM_CONT
static void TestArrayInvokeWorklet()
{
auto condArray = vtkm::cont::make_ArrayHandleBitField(RandomBitField());
auto trueArray = vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(20, 2, NUM_BITS);
auto falseArray = vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(13, 2, NUM_BITS);
vtkm::cont::ArrayHandle<vtkm::Id> output;
vtkm::worklet::Invoker invoke;
invoke(ConditionalMergeWorklet{}, condArray, trueArray, falseArray, output);
auto condVals = condArray.GetPortalConstControl();
auto trueVals = trueArray.GetPortalConstControl();
auto falseVals = falseArray.GetPortalConstControl();
auto outVals = output.GetPortalConstControl();
VTKM_TEST_ASSERT(condVals.GetNumberOfValues() == trueVals.GetNumberOfValues());
VTKM_TEST_ASSERT(condVals.GetNumberOfValues() == falseVals.GetNumberOfValues());
VTKM_TEST_ASSERT(condVals.GetNumberOfValues() == outVals.GetNumberOfValues());
for (vtkm::Id i = 0; i < condVals.GetNumberOfValues(); ++i)
{
VTKM_TEST_ASSERT(outVals.Get(i) == (condVals.Get(i) ? trueVals.Get(i) : falseVals.Get(i)));
}
}
VTKM_CONT
static void TestArrayInvokeWorklet2()
{
auto condBits = RandomBitField();
auto trueArray = vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(20, 2, NUM_BITS);
auto falseArray = vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(13, 2, NUM_BITS);
vtkm::cont::ArrayHandle<vtkm::Id> output;
vtkm::worklet::Invoker invoke;
invoke(ConditionalMergeWorklet2{}, condBits, trueArray, falseArray, output);
auto condVals = condBits.GetPortalConstControl();
auto trueVals = trueArray.GetPortalConstControl();
auto falseVals = falseArray.GetPortalConstControl();
auto outVals = output.GetPortalConstControl();
VTKM_TEST_ASSERT(condVals.GetNumberOfBits() == trueVals.GetNumberOfValues());
VTKM_TEST_ASSERT(condVals.GetNumberOfBits() == falseVals.GetNumberOfValues());
VTKM_TEST_ASSERT(condVals.GetNumberOfBits() == outVals.GetNumberOfValues());
for (vtkm::Id i = 0; i < condVals.GetNumberOfBits(); ++i)
{
// The worklet flips the bitfield in place after choosing true/false paths
VTKM_TEST_ASSERT(condVals.GetBit(i) == !RandomBitFromIndex(i));
VTKM_TEST_ASSERT(outVals.Get(i) ==
(!condVals.GetBit(i) ? trueVals.Get(i) : falseVals.Get(i)));
}
}
struct TestRunner
{
VTKM_CONT
void operator()() const
{
TestingBitField::TestBlockAllocation();
TestingBitField::TestControlPortals();
TestingBitField::TestExecutionPortals();
TestingBitField::TestFinalWordMask();
TestingBitField::TestArrayHandleBitField();
TestingBitField::TestArrayInvokeWorklet();
TestingBitField::TestArrayInvokeWorklet2();
}
};
public:
static VTKM_CONT int Run(int argc, char* argv[])
{
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(DeviceAdapterTag());
return vtkm::cont::testing::Testing::Run(TestRunner{}, argc, argv);
}
};
}
}
} // namespace vtkm::cont::testing
#endif //vtk_m_cont_testing_TestingArrayHandles_h

@ -2443,6 +2443,100 @@ private:
}
}
static VTKM_CONT void TestBitFieldToUnorderedSet()
{
using IndexArray = vtkm::cont::ArrayHandle<vtkm::Id>;
using WordType = WordTypeDefault;
// Test that everything works correctly with a partial word at the end.
static constexpr vtkm::Id BitsPerWord = static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT);
// +5 to get a partial word:
static constexpr vtkm::Id NumBits = 1024 * BitsPerWord + 5;
static constexpr vtkm::Id NumWords = (NumBits + BitsPerWord - 1) / BitsPerWord;
auto testIndexArray = [](const BitField& bits) {
const vtkm::Id numBits = bits.GetNumberOfBits();
IndexArray indices;
Algorithm::BitFieldToUnorderedSet(bits, indices);
Algorithm::Sort(indices);
auto bitPortal = bits.GetPortalConstControl();
auto indexPortal = indices.GetPortalConstControl();
const vtkm::Id numIndices = indices.GetNumberOfValues();
vtkm::Id curIndex = 0;
for (vtkm::Id curBit = 0; curBit < numBits; ++curBit)
{
const bool markedSet = curIndex < numIndices ? indexPortal.Get(curIndex) == curBit : false;
const bool isSet = bitPortal.GetBit(curBit);
// std::cout << "curBit: " << curBit
// << " activeIndex: "
// << (curIndex < numIndices ? indexPortal.Get(curIndex) : -1)
// << " isSet: " << isSet << " markedSet: " << markedSet << "\n";
VTKM_TEST_ASSERT(
markedSet == isSet, "Bit ", curBit, " is set? ", isSet, " Marked set? ", markedSet);
if (markedSet)
{
curIndex++;
}
}
VTKM_TEST_ASSERT(curIndex == indices.GetNumberOfValues(), "Index array has extra values.");
};
auto testRepeatedMask = [&](WordType mask) {
std::cout << "Testing BitFieldToUnorderedSet with repeated 32-bit word 0x" << std::hex << mask
<< std::endl;
BitField bits;
{
bits.Allocate(NumBits);
auto fillPortal = bits.GetPortalControl();
for (vtkm::Id i = 0; i < NumWords; ++i)
{
fillPortal.SetWord(i, mask);
}
}
testIndexArray(bits);
};
auto testRandomMask = [&](WordType seed) {
std::cout << "Testing BitFieldToUnorderedSet with random sequence seeded with 0x" << std::hex
<< seed << std::endl;
std::mt19937 mt{ seed };
std::uniform_int_distribution<std::mt19937::result_type> rng;
BitField bits;
{
bits.Allocate(NumBits);
auto fillPortal = bits.GetPortalControl();
for (vtkm::Id i = 0; i < NumWords; ++i)
{
fillPortal.SetWord(i, static_cast<WordType>(rng(mt)));
}
}
testIndexArray(bits);
};
testRepeatedMask(0x00000000);
testRepeatedMask(0xeeeeeeee);
testRepeatedMask(0xffffffff);
testRepeatedMask(0x1c0fd395);
testRepeatedMask(0xdeadbeef);
testRandomMask(0x00000000);
testRandomMask(0xeeeeeeee);
testRandomMask(0xffffffff);
testRandomMask(0x1c0fd395);
testRandomMask(0xdeadbeef);
}
struct TestAll
{
VTKM_CONT void operator()() const
@ -2496,6 +2590,8 @@ private:
TestCopyArraysInDiffTypes();
TestAtomicArray();
TestBitFieldToUnorderedSet();
}
};

@ -30,6 +30,8 @@
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/RuntimeDeviceTracker.h>
#include <vtkm/cont/internal/AtomicInterfaceControl.h>
#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
#include <vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h>
#include <vtkm/cont/internal/VirtualObjectTransferShareWithControl.h>
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
@ -101,6 +103,11 @@ public:
}
};
template <>
class AtomicInterfaceExecution<DeviceAdapterTagTestAlgorithmGeneral> : public AtomicInterfaceControl
{
};
template <typename TargetClass>
struct VirtualObjectTransfer<TargetClass, vtkm::cont::DeviceAdapterTagTestAlgorithmGeneral> final
: public VirtualObjectTransferShareWithControl<TargetClass>

@ -30,9 +30,9 @@
#define VTKM_EXEC __device__ __host__
#define VTKM_EXEC_CONT __device__ __host__
#if __CUDAVER__ >= 75000
#define VTKM_SUPPRESS_EXEC_WARNINGS #pragma nv_exec_check_disable
#define VTKM_SUPPRESS_EXEC_WARNINGS _Pragma("nv_exec_check_disable")
#else
#define VTKM_SUPPRESS_EXEC_WARNINGS #pragma hd_warning_disable
#define VTKM_SUPPRESS_EXEC_WARNINGS _Pragma("hd_warning_disable")
#endif
#else
#define VTKM_EXEC

@ -33,6 +33,8 @@
#include <vtkm/cont/testing/Testing.h>
#include <limits>
#define VTKM_MATH_ASSERT(condition, message) \
if (!(condition)) \
{ \
@ -761,6 +763,76 @@ struct TypeListTagAbs
{
};
//-----------------------------------------------------------------------------
static constexpr vtkm::Id BitOpSamples = 1024 * 1024;
template <typename T>
struct BitOpTests : public vtkm::exec::FunctorBase
{
static constexpr T MaxT = std::numeric_limits<T>::max();
static constexpr T Offset = MaxT / BitOpSamples;
VTKM_EXEC void operator()(vtkm::Id i) const
{
const T idx = static_cast<T>(i);
const T word = idx * this->Offset;
TestWord(word - idx);
TestWord(word);
TestWord(word + idx);
}
VTKM_EXEC void TestWord(T word) const
{
VTKM_MATH_ASSERT(test_equal(vtkm::CountSetBits(word), this->DumbCountBits(word)),
"CountBits returned wrong value.");
VTKM_MATH_ASSERT(test_equal(vtkm::FindFirstSetBit(word), this->DumbFindFirstSetBit(word)),
"FindFirstSetBit returned wrong value.")
}
VTKM_EXEC vtkm::Int32 DumbCountBits(T word) const
{
vtkm::Int32 bits = 0;
while (word)
{
if (word & 0x1)
{
++bits;
}
word >>= 1;
}
return bits;
}
VTKM_EXEC vtkm::Int32 DumbFindFirstSetBit(T word) const
{
if (word == 0)
{
return 0;
}
vtkm::Int32 bit = 1;
while ((word & 0x1) == 0)
{
word >>= 1;
++bit;
}
return bit;
}
};
template <typename Device>
struct TryBitOpTests
{
template <typename T>
void operator()(const T&) const
{
vtkm::cont::DeviceAdapterAlgorithm<Device>::Schedule(BitOpTests<T>(), BitOpSamples);
}
};
using TypeListTagBitOp = vtkm::ListTagBase<vtkm::UInt32, vtkm::UInt64>;
//-----------------------------------------------------------------------------
template <typename Device>
void RunMathTests()
@ -773,6 +845,8 @@ void RunMathTests()
vtkm::testing::Testing::TryTypes(TryAllTypesTests<Device>());
std::cout << "Test all Abs types" << std::endl;
vtkm::testing::Testing::TryTypes(TryAbsTests<Device>(), TypeListTagAbs());
std::cout << "Test all bit operations" << std::endl;
vtkm::testing::Testing::TryTypes(TryBitOpTests<Device>(), TypeListTagBitOp());
}
} // namespace UnitTestMathNamespace

@ -36,6 +36,7 @@
#include <vtkm/cont/arg/ControlSignatureTagBase.h>
#include <vtkm/cont/arg/TransportTagAtomicArray.h>
#include <vtkm/cont/arg/TransportTagBitField.h>
#include <vtkm/cont/arg/TransportTagCellSetIn.h>
#include <vtkm/cont/arg/TransportTagExecObject.h>
#include <vtkm/cont/arg/TransportTagWholeArrayIn.h>
@ -43,6 +44,7 @@
#include <vtkm/cont/arg/TransportTagWholeArrayOut.h>
#include <vtkm/cont/arg/TypeCheckTagArray.h>
#include <vtkm/cont/arg/TypeCheckTagAtomicArray.h>
#include <vtkm/cont/arg/TypeCheckTagBitField.h>
#include <vtkm/cont/arg/TypeCheckTagCellSet.h>
#include <vtkm/cont/arg/TypeCheckTagExecObject.h>
@ -217,6 +219,36 @@ public:
using FetchTag = vtkm::exec::arg::FetchTagExecObject;
};
/// \c ControlSignature tags for whole BitFields.
///
/// When a BitField is passed in to a worklet expecting this ControlSignature
/// type, the appropriate BitPortal is generated and given to the worklet's
/// execution.
///
/// Be aware that this data structure is especially prone to race conditions,
/// so be sure to use the appropriate atomic methods when necessary.
/// @{
///
struct BitFieldIn : vtkm::cont::arg::ControlSignatureTagBase
{
using TypeCheckTag = vtkm::cont::arg::TypeCheckTagBitField;
using TransportTag = vtkm::cont::arg::TransportTagBitFieldIn;
using FetchTag = vtkm::exec::arg::FetchTagExecObject;
};
struct BitFieldOut : vtkm::cont::arg::ControlSignatureTagBase
{
using TypeCheckTag = vtkm::cont::arg::TypeCheckTagBitField;
using TransportTag = vtkm::cont::arg::TransportTagBitFieldOut;
using FetchTag = vtkm::exec::arg::FetchTagExecObject;
};
struct BitFieldInOut : vtkm::cont::arg::ControlSignatureTagBase
{
using TypeCheckTag = vtkm::cont::arg::TypeCheckTagBitField;
using TransportTag = vtkm::cont::arg::TransportTagBitFieldInOut;
using FetchTag = vtkm::exec::arg::FetchTagExecObject;
};
/// @}
/// \c ControlSignature tag for whole input topology.
///
/// The \c WholeCellSetIn control signature tag specifies an \c CellSet