From f370857c15b6fa0eedc7204ae663b6a44319f554 Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Thu, 13 Jun 2019 10:04:28 -0400 Subject: [PATCH] Add CountSetBits and Fill device algorithms. --- benchmarking/BenchmarkDeviceAdapter.cxx | 289 +++++++++++++++--- vtkm/cont/Algorithm.h | 129 ++++++++ vtkm/cont/DeviceAdapterAlgorithm.h | 29 ++ .../internal/DeviceAdapterAlgorithmCuda.h | 112 +++++++ .../internal/DeviceAdapterAlgorithmGeneral.h | 164 ++++++++++ vtkm/cont/internal/FunctorsGeneral.h | 143 +++++++++ vtkm/cont/testing/TestingDeviceAdapter.h | 241 ++++++++++++++- 7 files changed, 1067 insertions(+), 40 deletions(-) diff --git a/benchmarking/BenchmarkDeviceAdapter.cxx b/benchmarking/BenchmarkDeviceAdapter.cxx index 22ea8cad1..942b6210d 100644 --- a/benchmarking/BenchmarkDeviceAdapter.cxx +++ b/benchmarking/BenchmarkDeviceAdapter.cxx @@ -57,21 +57,24 @@ enum BenchmarkName BITFIELD_TO_UNORDERED_SET = 1 << 0, COPY = 1 << 1, COPY_IF = 1 << 2, - LOWER_BOUNDS = 1 << 3, - REDUCE = 1 << 4, - REDUCE_BY_KEY = 1 << 5, - SCAN_INCLUSIVE = 1 << 6, - SCAN_EXCLUSIVE = 1 << 7, - SORT = 1 << 8, - SORT_BY_KEY = 1 << 9, - STABLE_SORT_INDICES = 1 << 10, - STABLE_SORT_INDICES_UNIQUE = 1 << 11, - UNIQUE = 1 << 12, - UPPER_BOUNDS = 1 << 13, + COUNT_SET_BITS = 1 << 3, + FILL = 1 << 4, + LOWER_BOUNDS = 1 << 5, + REDUCE = 1 << 6, + REDUCE_BY_KEY = 1 << 7, + SCAN_EXCLUSIVE = 1 << 8, + SCAN_INCLUSIVE = 1 << 9, + SORT = 1 << 10, + SORT_BY_KEY = 1 << 11, + STABLE_SORT_INDICES = 1 << 12, + STABLE_SORT_INDICES_UNIQUE = 1 << 13, + UNIQUE = 1 << 14, + UPPER_BOUNDS = 1 << 15, - ALL = BITFIELD_TO_UNORDERED_SET | COPY | COPY_IF | LOWER_BOUNDS | REDUCE | REDUCE_BY_KEY | - SCAN_INCLUSIVE | + ALL = BITFIELD_TO_UNORDERED_SET | COPY | COPY_IF | COUNT_SET_BITS | FILL | LOWER_BOUNDS | REDUCE | + REDUCE_BY_KEY | SCAN_EXCLUSIVE | + SCAN_INCLUSIVE | SORT | SORT_BY_KEY | STABLE_SORT_INDICES | @@ -84,28 +87,20 @@ enum BenchmarkName /// described below: struct BenchDevAlgoConfig { - /// Benchmarks to run. Possible values: - /// Copy, CopyIf, LowerBounds, Reduce, ReduceByKey, ScanInclusive, - /// ScanExclusive, Sort, SortByKey, StableSortIndices, StableSortIndicesUnique, - /// Unique, UpperBounds, or All. (Default: All). - // Zero is for parsing, will change to 'all' in main if needed. + /// Benchmarks to run. See BenchmarkName enum. int BenchmarkFlags{ 0 }; /// ValueTypes to test. - /// CLI arg: "TypeList [Base|Extended]" (Base is default). + /// CLI arg: "--base-typelist | --extended-typelist" (Base is default). bool ExtendedTypeList{ false }; /// Run benchmarks using the same number of bytes for all arrays. - /// CLI arg: "FixBytes [n|off]" (n is the number of bytes, default: 2097152, ie. 2MiB) - /// @note FixBytes and FixSizes are not mutually exclusive. If both are - /// specified, both will run. + /// CLI arg: "--array-size-bytes [n]" (n is the number of bytes, default: 2097152, ie. 2MiB) bool TestArraySizeBytes{ true }; vtkm::UInt64 ArraySizeBytes{ 1 << 21 }; /// Run benchmarks using the same number of values for all arrays. - /// CLI arg: "FixSizes [n|off]" (n is the number of values, default: off) - /// @note FixBytes and FixSizes are not mutually exclusive. If both are - /// specified, both will run. + /// CLI arg: "--array-size-values [n]" (n is the number of values, default: off) bool TestArraySizeValues{ false }; vtkm::UInt64 ArraySizeValues{ 1 << 21 }; @@ -113,7 +108,7 @@ struct BenchDevAlgoConfig /// values (5%, 10%, 15%, 20%, 25%, 30%, 35%, 40%, 45%, 50%, 75%, 100% /// unique). If false (default), the range is limited to 5%, 25%, 50%, 75%, /// 100%. - /// CLI arg: "DetailedOutputRange" enables the extended range. + /// CLI arg: "--more-output-range" enables the extended range. bool DetailedOutputRangeScaling{ false }; // Internal: The benchmarking code will set this depending on execution phase: @@ -289,11 +284,11 @@ public: { if (wordIdx <= this->MaxMaskedWord && (wordIdx % this->Stride) == 0) { - this->Portal.SetWord(wordIdx, this->Exemplar); + this->Portal.SetWordAtomic(wordIdx, this->Exemplar); } else { - this->Portal.SetWord(wordIdx, static_cast(0)); + this->Portal.SetWordAtomic(wordIdx, static_cast(0)); } } }; @@ -317,8 +312,10 @@ public: stride = 1; } + vtkm::Id numBits = numWords * static_cast(sizeof(WordType) * CHAR_BIT); + vtkm::cont::BitField bits; - auto portal = bits.PrepareForOutput(numWords, DeviceAdapterTag{}); + auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{}); using Functor = GenerateBitFieldFunctor; @@ -533,6 +530,184 @@ private: VTKM_MAKE_BENCHMARK(CopyIf75, BenchCopyIf, 75); VTKM_MAKE_BENCHMARK(CopyIf100, BenchCopyIf, 100); + template + struct BenchCountSetBits + { + vtkm::Id NumWords; + vtkm::Id NumBits; + WordType Exemplar; + vtkm::Id Stride; + vtkm::Float32 FillRatio; + vtkm::Id MaxMaskedIndex; + std::string Name; + + vtkm::cont::BitField Bits; + + // See GenerateBitField for details. fillRatio is used to compute + // maxMaskedWord. + VTKM_CONT + BenchCountSetBits(WordType exemplar, + vtkm::Id stride, + vtkm::Float32 fillRatio, + const std::string& name) + : NumWords(Config.ComputeNumberOfWords()) + , NumBits(this->NumWords * static_cast(sizeof(WordType) * CHAR_BIT)) + , Exemplar(exemplar) + , Stride(stride) + , FillRatio(fillRatio) + , MaxMaskedIndex(this->NumWords / static_cast(1. / this->FillRatio)) + , Name(name) + , Bits(GenerateBitField(this->Exemplar, + this->Stride, + this->MaxMaskedIndex, + this->NumWords)) + { + } + + VTKM_CONT + vtkm::Float64 operator()() + { + Timer timer(DeviceAdapter{}); + timer.Start(); + Algorithm::CountSetBits(DeviceAdapter{}, this->Bits); + return timer.GetElapsedTime(); + } + + VTKM_CONT + std::string Description() const + { + const vtkm::Id numFilledWords = this->MaxMaskedIndex / this->Stride; + const vtkm::Id numSetBits = numFilledWords * vtkm::CountSetBits(this->Exemplar); + + std::stringstream description; + description << "CountSetBits" << this->Name << " ( " + << "NumWords: " << this->NumWords << " " + << "Exemplar: " << std::hex << this->Exemplar << std::dec << " " + << "FillRatio: " << this->FillRatio << " " + << "Stride: " << this->Stride << " " + << "NumSetBits: " << numSetBits << " )"; + return description.str(); + } + }; + VTKM_MAKE_BENCHMARK(CountSetBitsNull, BenchCountSetBits, 0x00000000, 1, 0.f, "Null"); + VTKM_MAKE_BENCHMARK(CountSetBitsFull, BenchCountSetBits, 0xffffffff, 1, 1.f, "Full"); + VTKM_MAKE_BENCHMARK(CountSetBitsHalfWord, BenchCountSetBits, 0xffff0000, 1, 1.f, "HalfWord"); + VTKM_MAKE_BENCHMARK(CountSetBitsHalfField, BenchCountSetBits, 0xffffffff, 1, 0.5f, "HalfField"); + VTKM_MAKE_BENCHMARK(CountSetBitsAlternateWords, + BenchCountSetBits, + 0xffffffff, + 2, + 1.f, + "AlternateWords"); + VTKM_MAKE_BENCHMARK(CountSetBitsAlternateBits, + BenchCountSetBits, + 0x55555555, + 1, + 1.f, + "AlternateBits"); + + template + struct BenchFillArrayHandle + { + vtkm::cont::ArrayHandle Handle; + + VTKM_CONT + BenchFillArrayHandle() {} + + VTKM_CONT + vtkm::Float64 operator()() + { + const vtkm::Id numVals = Config.ComputeSize(); + Timer timer(DeviceAdapter{}); + timer.Start(); + Algorithm::Fill(DeviceAdapter{}, this->Handle, TestValue(19, ValueType{}), numVals); + return timer.GetElapsedTime(); + } + + VTKM_CONT + std::string Description() const + { + const vtkm::Id numVals = Config.ComputeSize(); + std::stringstream description; + description << "Fill (ArrayHandle, " << numVals << " values)"; + return description.str(); + } + }; + VTKM_MAKE_BENCHMARK(FillArrayHandle, BenchFillArrayHandle); + + template + struct BenchFillBitField + { + vtkm::Id NumWords; + vtkm::Id NumBits; + bool UseBool; + WordType Pattern; + std::string Name; + + vtkm::cont::BitField Bits; + + VTKM_CONT + BenchFillBitField(bool useBool, WordType pattern, const std::string& name) + : NumWords(Config.ComputeNumberOfWords()) + , NumBits(this->NumWords * static_cast(sizeof(WordType) * CHAR_BIT)) + , UseBool(useBool) + , Pattern(pattern) + , Name(name) + { + } + + VTKM_CONT + vtkm::Float64 operator()() + { + Timer timer(DeviceAdapter{}); + if (this->UseBool) + { + timer.Start(); + Algorithm::Fill(DeviceAdapter{}, this->Bits, this->Pattern != 0, this->NumBits); + return timer.GetElapsedTime(); + } + else + { + timer.Start(); + Algorithm::Fill(DeviceAdapter{}, this->Bits, this->Pattern, this->NumBits); + return timer.GetElapsedTime(); + } + } + + VTKM_CONT + std::string Description() const + { + std::stringstream description; + description << "Fill (BitField)" << this->Name << " ( " + << "FillPattern: " << std::hex << this->Pattern << std::dec << " " + << "UseBool: " << this->UseBool << " " + << "NumBits: " << this->NumBits << " )"; + return description.str(); + } + }; + VTKM_MAKE_BENCHMARK(FillBitFieldTrue, BenchFillBitField, true, 0x1, "True"); + VTKM_MAKE_BENCHMARK(FillBitFieldFalse, BenchFillBitField, true, 0x0, "False"); + VTKM_MAKE_BENCHMARK(FillBitField8Bit, + BenchFillBitField, + false, + static_cast(0xcc), + "8Bit"); + VTKM_MAKE_BENCHMARK(FillBitField16Bit, + BenchFillBitField, + false, + static_cast(0xcccc), + "16Bit"); + VTKM_MAKE_BENCHMARK(FillBitField32Bit, + BenchFillBitField, + false, + static_cast(0xcccccccc), + "32Bit"); + VTKM_MAKE_BENCHMARK(FillBitField64Bit, + BenchFillBitField, + false, + static_cast(0xcccccccccccccccc), + "64Bit"); + template struct BenchLowerBounds { @@ -1154,17 +1329,46 @@ public: template static VTKM_CONT void RunInternal(vtkm::cont::DeviceAdapterId id) { - using BitFieldWordTypes = vtkm::ListTagBase; + using UInt8Type = vtkm::ListTagBase; + using UInt16Type = vtkm::ListTagBase; + using UInt32Type = vtkm::ListTagBase; + using UInt64Type = vtkm::ListTagBase; + // These need specific word types: if (Config.BenchmarkFlags & BITFIELD_TO_UNORDERED_SET) { std::cout << DIVIDER << "\nBenchmarking BitFieldToUnorderedSet\n"; - VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetNull, BitFieldWordTypes{}, id); - VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetFull, BitFieldWordTypes{}, id); - VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfWord, BitFieldWordTypes{}, id); - VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfField, BitFieldWordTypes{}, id); - VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateWords, BitFieldWordTypes{}, id); - VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateBits, BitFieldWordTypes{}, id); + VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetNull, UInt32Type{}, id); + VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetFull, UInt32Type{}, id); + VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfWord, UInt32Type{}, id); + VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfField, UInt32Type{}, id); + VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateWords, UInt32Type{}, id); + VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateBits, UInt32Type{}, id); + } + + if (Config.BenchmarkFlags & COUNT_SET_BITS) + { + std::cout << DIVIDER << "\nBenchmarking CountSetBits\n"; + VTKM_RUN_BENCHMARK(CountSetBitsNull, UInt32Type{}, id); + VTKM_RUN_BENCHMARK(CountSetBitsFull, UInt32Type{}, id); + VTKM_RUN_BENCHMARK(CountSetBitsHalfWord, UInt32Type{}, id); + VTKM_RUN_BENCHMARK(CountSetBitsHalfField, UInt32Type{}, id); + VTKM_RUN_BENCHMARK(CountSetBitsAlternateWords, UInt32Type{}, id); + VTKM_RUN_BENCHMARK(CountSetBitsAlternateBits, UInt32Type{}, id); + } + + if (Config.BenchmarkFlags & FILL) + { + std::cout << DIVIDER << "\nBenchmarking Fill (ArrayHandle)\n"; + VTKM_RUN_BENCHMARK(FillArrayHandle, ValueTypes{}, id); + + std::cout << DIVIDER << "\nBenchmarking Fill (BitField)\n"; + VTKM_RUN_BENCHMARK(FillBitFieldTrue, UInt32Type{}, id); + VTKM_RUN_BENCHMARK(FillBitFieldFalse, UInt32Type{}, id); + VTKM_RUN_BENCHMARK(FillBitField8Bit, UInt8Type{}, id); + VTKM_RUN_BENCHMARK(FillBitField16Bit, UInt16Type{}, id); + VTKM_RUN_BENCHMARK(FillBitField32Bit, UInt32Type{}, id); + VTKM_RUN_BENCHMARK(FillBitField64Bit, UInt64Type{}, id); } if (Config.BenchmarkFlags & COPY) @@ -1413,7 +1617,7 @@ struct Arg : vtkm::cont::internal::option::Arg const char* c = option.arg; while (argIsNum && (*c != '\0')) { - argIsNum &= static_cast(std::isdigit(*c)); + argIsNum = argIsNum && static_cast(std::isdigit(*c)); ++c; } @@ -1524,7 +1728,8 @@ int main(int argc, char* argv[]) "", "", Arg::None, - "\tCopy, CopyIf, LowerBounds, Reduce, ReduceByKey, ScanExclusive, " + "\tBitFieldToUnorderedSet, Copy, CopyIf, CountSetBits, FillBitField, " + "LowerBounds, Reduce, ReduceByKey, ScanExclusive, " "ScanInclusive, Sort, SortByKey, StableSortIndices, StableSortIndicesUnique, " "Unique, UpperBounds" }); usage.push_back( @@ -1631,6 +1836,14 @@ int main(int argc, char* argv[]) { config.BenchmarkFlags |= vtkm::benchmarking::COPY_IF; } + else if (arg == "countsetbits") + { + config.BenchmarkFlags |= vtkm::benchmarking::COUNT_SET_BITS; + } + else if (arg == "fill") + { + config.BenchmarkFlags |= vtkm::benchmarking::FILL; + } else if (arg == "lowerbounds") { config.BenchmarkFlags |= vtkm::benchmarking::LOWER_BOUNDS; diff --git a/vtkm/cont/Algorithm.h b/vtkm/cont/Algorithm.h index b959a8709..fa08f5dea 100644 --- a/vtkm/cont/Algorithm.h +++ b/vtkm/cont/Algorithm.h @@ -108,6 +108,32 @@ struct CopySubRangeFunctor } }; +struct CountSetBitsFunctor +{ + vtkm::Id PopCount{ 0 }; + + template + VTKM_CONT bool operator()(Device, Args&&... args) + { + this->PopCount = vtkm::cont::DeviceAdapterAlgorithm::CountSetBits( + PrepareArgForExec(std::forward(args))...); + return true; + } +}; + +struct FillFunctor +{ + vtkm::Id PopCount{ 0 }; + + template + VTKM_CONT bool operator()(Device, Args&&... args) + { + vtkm::cont::DeviceAdapterAlgorithm::Fill( + PrepareArgForExec(std::forward(args))...); + return true; + } +}; + struct LowerBoundsFunctor { @@ -489,6 +515,109 @@ struct Algorithm outputIndex); } + VTKM_CONT static vtkm::Id CountSetBits(vtkm::cont::DeviceAdapterId devId, + const vtkm::cont::BitField& bits) + { + detail::CountSetBitsFunctor functor; + vtkm::cont::TryExecuteOnDevice(devId, functor, bits); + return functor.PopCount; + } + + VTKM_CONT static vtkm::Id CountSetBits(const vtkm::cont::BitField& bits) + { + return CountSetBits(vtkm::cont::DeviceAdapterTagAny{}, bits); + } + + VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId, + vtkm::cont::BitField& bits, + bool value, + vtkm::Id numBits) + { + detail::FillFunctor functor; + vtkm::cont::TryExecuteOnDevice(devId, functor, bits, value, numBits); + } + + VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value, vtkm::Id numBits) + { + Fill(vtkm::cont::DeviceAdapterTagAny{}, bits, value, numBits); + } + + VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId, + vtkm::cont::BitField& bits, + bool value) + { + detail::FillFunctor functor; + vtkm::cont::TryExecuteOnDevice(devId, functor, bits, value); + } + + VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value) + { + Fill(vtkm::cont::DeviceAdapterTagAny{}, bits, value); + } + + template + VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId, + vtkm::cont::BitField& bits, + WordType word, + vtkm::Id numBits) + { + detail::FillFunctor functor; + vtkm::cont::TryExecuteOnDevice(devId, functor, bits, word, numBits); + } + + template + VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word, vtkm::Id numBits) + { + Fill(vtkm::cont::DeviceAdapterTagAny{}, bits, word, numBits); + } + + template + VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId, + vtkm::cont::BitField& bits, + WordType word) + { + detail::FillFunctor functor; + vtkm::cont::TryExecuteOnDevice(devId, functor, bits, word); + } + + template + VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word) + { + FillBitField(vtkm::cont::DeviceAdapterTagAny{}, bits, word); + } + + template + VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId, + vtkm::cont::ArrayHandle& handle, + const T& value) + { + detail::FillFunctor functor; + vtkm::cont::TryExecuteOnDevice(devId, functor, handle, value); + } + + template + VTKM_CONT static void Fill(vtkm::cont::ArrayHandle& handle, const T& value) + { + Fill(vtkm::cont::DeviceAdapterTagAny{}, handle, value); + } + + template + VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId, + vtkm::cont::ArrayHandle& handle, + const T& value, + const vtkm::Id numValues) + { + detail::FillFunctor functor; + vtkm::cont::TryExecuteOnDevice(devId, functor, handle, value, numValues); + } + + template + VTKM_CONT static void Fill(vtkm::cont::ArrayHandle& handle, + const T& value, + const vtkm::Id numValues) + { + Fill(vtkm::cont::DeviceAdapterTagAny{}, handle, value, numValues); + } template VTKM_CONT static void LowerBounds(vtkm::cont::DeviceAdapterId devId, diff --git a/vtkm/cont/DeviceAdapterAlgorithm.h b/vtkm/cont/DeviceAdapterAlgorithm.h index 16e77c74c..6c0c5801f 100644 --- a/vtkm/cont/DeviceAdapterAlgorithm.h +++ b/vtkm/cont/DeviceAdapterAlgorithm.h @@ -119,6 +119,35 @@ struct DeviceAdapterAlgorithm vtkm::cont::ArrayHandle& output, vtkm::Id outputIndex = 0); + /// \brief Returns the total number of "1" bits in BitField. + VTKM_CONT static vtkm::Id CountSetBits(const vtkm::cont::BitField& bits); + + /// \brief Fill the BitField with a specific pattern of bits. + /// For boolean values, all bits are set to 1 if value is true, or 0 if value + /// is false. + /// For word masks, the word type must be an unsigned integral type, which + /// will be stamped across the BitField. + /// If numBits is provided, the BitField is resized appropriately. + /// @{ + VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value, vtkm::Id numBits); + VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value); + template + VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word, vtkm::Id numBits); + template + VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word, bool value); + /// @} + + /// Fill @a array with @a value. If @a numValues is specified, the array will + /// be resized. + /// @{ + template + VTKM_CONT static void Fill(vtkm::cont::ArrayHandle& array, const T& value); + template + VTKM_CONT static void Fill(vtkm::cont::ArrayHandle& array, + const T& value, + const vtkm::Id numValues); + /// @} + /// \brief Output is the first index in input for each item in values that wouldn't alter the ordering of input /// /// LowerBounds is a vectorized search. From each value in \c values it finds diff --git a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h index db16d404e..1f7aae872 100644 --- a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h +++ b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h @@ -441,6 +441,95 @@ private: } } + + template + struct CountSetBitsFunctor : public vtkm::exec::FunctorBase + { + VTKM_STATIC_ASSERT_MSG(VTKM_PASS_COMMAS(std::is_same::value || + std::is_same::value || + std::is_same::value), + "Unsupported GlobalPopCountType. Must support CUDA atomicAdd."); + + //Using typename BitsPortal::WordTypePreferred causes dependent type errors using GCC 4.8.5 + //which is the GCC required compiler for CUDA 9.2 on summit/power9 + using Word = typename vtkm::cont::internal::AtomicInterfaceExecution< + DeviceAdapterTagCuda>::WordTypePreferred; + + VTKM_CONT + CountSetBitsFunctor(const BitsPortal& portal, GlobalPopCountType* globalPopCount) + : Portal{ portal } + , GlobalPopCount{ globalPopCount } + , FinalWordIndex{ portal.GetNumberOfWords() - 1 } + , FinalWordMask{ portal.GetFinalWordMask() } + { + } + + ~CountSetBitsFunctor() {} + + VTKM_CONT void Initialize() + { + assert(this->GlobalPopCount != nullptr); + VTKM_CUDA_CALL(cudaMemset(this->GlobalPopCount, 0, sizeof(GlobalPopCountType))); + } + + VTKM_SUPPRESS_EXEC_WARNINGS + __device__ void operator()(vtkm::Id wordIdx) const + { + Word word = this->Portal.GetWord(wordIdx); + + // The last word may be partial -- mask out trailing bits if needed. + const Word mask = wordIdx == this->FinalWordIndex ? this->FinalWordMask : ~Word{ 0 }; + + word &= mask; + + if (word != 0) + { + this->LocalPopCount = vtkm::CountSetBits(word); + this->Reduce(); + } + } + + VTKM_CONT vtkm::Id Finalize() const + { + assert(this->GlobalPopCount != nullptr); + GlobalPopCountType result; + VTKM_CUDA_CALL(cudaMemcpy( + &result, this->GlobalPopCount, sizeof(GlobalPopCountType), cudaMemcpyDeviceToHost)); + return static_cast(result); + } + + private: + // Every thread with a non-zero local popcount calls this function, which + // computes the total popcount for the coalesced threads and atomically + // increasing the global popcount. + VTKM_SUPPRESS_EXEC_WARNINGS + __device__ void Reduce() const + { + const auto activeLanes = cooperative_groups::coalesced_threads(); + const int activeRank = activeLanes.thread_rank(); + const int activeSize = activeLanes.size(); + + // Reduction value: + vtkm::Int32 rVal = this->LocalPopCount; + for (int delta = 1; delta < activeSize; delta *= 2) + { + rVal += activeLanes.shfl_down(rVal, delta); + } + + if (activeRank == 0) + { + atomicAdd(this->GlobalPopCount, static_cast(rVal)); + } + } + + const BitsPortal Portal; + GlobalPopCountType* GlobalPopCount; + mutable vtkm::Int32 LocalPopCount{ 0 }; + // Used to mask trailing bits the in last word. + vtkm::Id FinalWordIndex{ 0 }; + Word FinalWordMask{ 0 }; + }; + template VTKM_CONT static void LowerBoundsPortal(const InputPortal& input, const ValuesPortal& values, @@ -961,6 +1050,21 @@ private: return functor.Finalize(); } + template + VTKM_CONT static vtkm::Id CountSetBitsPortal(const BitsPortal& bits) + { + using Functor = CountSetBitsFunctor; + + // RAII for the global atomic counter. + auto globalCount = cuda::internal::make_CudaUniquePtr(1); + Functor functor{ bits, globalCount.get() }; + + functor.Initialize(); + Schedule(functor, bits.GetNumberOfWords()); + Synchronize(); // Ensure kernel is done before checking final atomic count + return functor.Finalize(); + } + //----------------------------------------------------------------------------- public: @@ -1081,6 +1185,14 @@ public: return true; } + VTKM_CONT static vtkm::Id CountSetBits(const vtkm::cont::BitField& bits) + { + VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf); + auto bitsPortal = bits.PrepareForInput(DeviceAdapterTagCuda{}); + // Use a uint64 for accumulator, as atomicAdd does not support signed int64. + return CountSetBitsPortal(bitsPortal); + } + template VTKM_CONT static void LowerBounds(const vtkm::cont::ArrayHandle& input, const vtkm::cont::ArrayHandle& values, diff --git a/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h b/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h index e3ad4ac6d..d1e0ac400 100644 --- a/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h +++ b/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h @@ -269,6 +269,170 @@ public: return true; } + //-------------------------------------------------------------------------- + // Count Set Bits + VTKM_CONT static vtkm::Id CountSetBits(const vtkm::cont::BitField& bits) + { + VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf); + + auto bitsPortal = bits.PrepareForInput(DeviceAdapterTag{}); + + std::atomic popCount; + popCount.store(0, std::memory_order_relaxed); + + using Functor = CountSetBitsFunctor; + Functor functor{ bitsPortal, popCount }; + + DerivedAlgorithm::Schedule(functor, functor.GetNumberOfInstances()); + DerivedAlgorithm::Synchronize(); + + return static_cast(popCount.load(std::memory_order_seq_cst)); + } + + //-------------------------------------------------------------------------- + // Fill Bit Field (bool, resize) + VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value, vtkm::Id numBits) + { + VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf); + + if (numBits == 0) + { + bits.Shrink(0); + return; + } + + auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{}); + + using WordType = + typename vtkm::cont::BitField::template ExecutionTypes::WordTypePreferred; + + using Functor = FillBitFieldFunctor; + Functor functor{ portal, value ? ~WordType{ 0 } : WordType{ 0 } }; + + const vtkm::Id numWords = portal.template GetNumberOfWords(); + DerivedAlgorithm::Schedule(functor, numWords); + } + + //-------------------------------------------------------------------------- + // Fill Bit Field (bool) + VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value) + { + VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf); + + const vtkm::Id numBits = bits.GetNumberOfBits(); + if (numBits == 0) + { + return; + } + + auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{}); + + using WordType = + typename vtkm::cont::BitField::template ExecutionTypes::WordTypePreferred; + + using Functor = FillBitFieldFunctor; + Functor functor{ portal, value ? ~WordType{ 0 } : WordType{ 0 } }; + + const vtkm::Id numWords = portal.template GetNumberOfWords(); + DerivedAlgorithm::Schedule(functor, numWords); + } + + //-------------------------------------------------------------------------- + // Fill Bit Field (mask, resize) + template + VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word, vtkm::Id numBits) + { + VTKM_STATIC_ASSERT_MSG(vtkm::cont::BitField::IsValidWordType{}, "Invalid word type."); + + VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf); + + if (numBits == 0) + { + bits.Shrink(0); + return; + } + + auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{}); + + // If less than 32 bits, repeat the word until we get a 32 bit pattern. + // Using this for the pattern prevents races while writing small numbers + // to adjacent memory locations. + auto repWord = RepeatTo32BitsIfNeeded(word); + using RepWordType = decltype(repWord); + + using Functor = FillBitFieldFunctor; + Functor functor{ portal, repWord }; + + const vtkm::Id numWords = portal.template GetNumberOfWords(); + DerivedAlgorithm::Schedule(functor, numWords); + } + + //-------------------------------------------------------------------------- + // Fill Bit Field (mask) + template + VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word) + { + VTKM_STATIC_ASSERT_MSG(vtkm::cont::BitField::IsValidWordType{}, "Invalid word type."); + VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf); + + const vtkm::Id numBits = bits.GetNumberOfBits(); + if (numBits == 0) + { + return; + } + + auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{}); + + // If less than 32 bits, repeat the word until we get a 32 bit pattern. + // Using this for the pattern prevents races while writing small numbers + // to adjacent memory locations. + auto repWord = RepeatTo32BitsIfNeeded(word); + using RepWordType = decltype(repWord); + + using Functor = FillBitFieldFunctor; + Functor functor{ portal, repWord }; + + const vtkm::Id numWords = portal.template GetNumberOfWords(); + DerivedAlgorithm::Schedule(functor, numWords); + } + + //-------------------------------------------------------------------------- + // Fill ArrayHandle + template + VTKM_CONT static void Fill(vtkm::cont::ArrayHandle& handle, const T& value) + { + VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf); + + const vtkm::Id numValues = handle.GetNumberOfValues(); + if (numValues == 0) + { + return; + } + + auto portal = handle.PrepareForOutput(numValues, DeviceAdapterTag{}); + FillArrayHandleFunctor functor{ portal, value }; + DerivedAlgorithm::Schedule(functor, numValues); + } + + //-------------------------------------------------------------------------- + // Fill ArrayHandle (resize) + template + VTKM_CONT static void Fill(vtkm::cont::ArrayHandle& handle, + const T& value, + const vtkm::Id numValues) + { + VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf); + if (numValues == 0) + { + handle.Shrink(0); + return; + } + + auto portal = handle.PrepareForOutput(numValues, DeviceAdapterTag{}); + FillArrayHandleFunctor functor{ portal, value }; + DerivedAlgorithm::Schedule(functor, numValues); + } + //-------------------------------------------------------------------------- // Lower Bounds template diff --git a/vtkm/cont/internal/FunctorsGeneral.h b/vtkm/cont/internal/FunctorsGeneral.h index 895e74fcc..6c66c547d 100644 --- a/vtkm/cont/internal/FunctorsGeneral.h +++ b/vtkm/cont/internal/FunctorsGeneral.h @@ -494,6 +494,149 @@ struct CopyKernel void SetErrorMessageBuffer(const vtkm::exec::internal::ErrorMessageBuffer&) {} }; +template +struct CountSetBitsFunctor : public vtkm::exec::FunctorBase +{ + using WordType = typename BitsPortal::WordTypePreferred; + + // This functor executes a number of instances, where each instance handles + // two cachelines worth of data. This reduces the number of atomic operations. + // Figure out how many words that is: + static constexpr vtkm::Id CacheLineSize = VTKM_ALLOCATION_ALIGNMENT; + static constexpr vtkm::Id WordsPerCacheLine = + CacheLineSize / static_cast(sizeof(WordType)); + static constexpr vtkm::Id CacheLinesPerInstance = 2; + static constexpr vtkm::Id WordsPerInstance = CacheLinesPerInstance * WordsPerCacheLine; + + VTKM_CONT + CountSetBitsFunctor(const BitsPortal& input, std::atomic& popCount) + : Input{ input } + , PopCount{ popCount } + , FinalWordIndex{ input.GetNumberOfWords() - 1 } + , FinalWordMask{ input.GetFinalWordMask() } + { + } + + VTKM_CONT vtkm::Id GetNumberOfInstances() const + { + const auto numWords = this->Input.GetNumberOfWords(); + return (numWords + WordsPerInstance - 1) / WordsPerInstance; + } + + VTKM_EXEC void operator()(vtkm::Id instanceIdx) const + { + const vtkm::Id numWords = this->Input.GetNumberOfWords(); + const vtkm::Id wordStart = vtkm::Min(instanceIdx * WordsPerInstance, numWords); + const vtkm::Id wordEnd = vtkm::Min(wordStart + WordsPerInstance, numWords); + + if (wordStart != wordEnd) // range is valid + { + this->ExecuteRange(wordStart, wordEnd); + } + } + + VTKM_CONT vtkm::UInt64 GetPopCount() const { return PopCount.load(std::memory_order_relaxed); } + +private: + VTKM_EXEC void ExecuteRange(vtkm::Id wordStart, vtkm::Id wordEnd) const + { +#ifndef VTKM_CUDA_DEVICE_PASS // for std::atomic call from VTKM_EXEC function: + // Count bits and allocate space for output: + vtkm::UInt64 chunkBits = this->CountChunkBits(wordStart, wordEnd); + this->PopCount.fetch_add(chunkBits, std::memory_order_relaxed); +#else + (void)wordStart; + (void)wordEnd; +#endif + } + + VTKM_EXEC vtkm::UInt64 CountChunkBits(vtkm::Id wordStart, vtkm::Id wordEnd) const + { + // Need to mask out trailing bits from the final word: + const bool isFinalChunk = wordEnd == (this->FinalWordIndex + 1); + + if (isFinalChunk) + { + wordEnd = this->FinalWordIndex; + } + + vtkm::Int32 tmp = 0; + for (vtkm::Id i = wordStart; i < wordEnd; ++i) + { + tmp += vtkm::CountSetBits(this->Input.GetWord(i)); + } + + if (isFinalChunk) + { + tmp += vtkm::CountSetBits(this->Input.GetWord(this->FinalWordIndex) & this->FinalWordMask); + } + + return static_cast(tmp); + } + + BitsPortal Input; + std::atomic& PopCount; + // Used to mask trailing bits the in last word. + vtkm::Id FinalWordIndex{ 0 }; + WordType FinalWordMask{ 0 }; +}; + +// For a given unsigned integer less than 32 bits, repeat its bits until we +// have a 32 bit pattern. This is used to make all fill patterns at least +// 32 bits in size, since concurrently writing to adjacent locations smaller +// than 32 bits may race on some platforms. +template = 4)>::type> +static constexpr VTKM_CONT WordType RepeatTo32BitsIfNeeded(WordType pattern) +{ // for 32 bits or more, just pass the type through. + return pattern; +} + +static inline constexpr VTKM_CONT vtkm::UInt32 RepeatTo32BitsIfNeeded(vtkm::UInt16 pattern) +{ + return static_cast(pattern << 16 | pattern); +} + +static inline constexpr VTKM_CONT vtkm::UInt32 RepeatTo32BitsIfNeeded(vtkm::UInt8 pattern) +{ + return RepeatTo32BitsIfNeeded(static_cast(pattern << 8 | pattern)); +} + +template +struct FillBitFieldFunctor : public vtkm::exec::FunctorBase +{ + VTKM_CONT + FillBitFieldFunctor(const BitsPortal& portal, WordType mask) + : Portal{ portal } + , Mask{ mask } + { + } + + VTKM_EXEC void operator()(vtkm::Id wordIdx) const { this->Portal.SetWord(wordIdx, this->Mask); } + +private: + BitsPortal Portal; + WordType Mask; +}; + +template +struct FillArrayHandleFunctor : public vtkm::exec::FunctorBase +{ + using ValueType = typename PortalType::ValueType; + + VTKM_CONT + FillArrayHandleFunctor(const PortalType& portal, ValueType value) + : Portal{ portal } + , Value{ value } + { + } + + VTKM_EXEC void operator()(vtkm::Id idx) const { this->Portal.Set(idx, this->Value); } + +private: + PortalType Portal; + ValueType Value; +}; + template struct LowerBoundsKernel { diff --git a/vtkm/cont/testing/TestingDeviceAdapter.h b/vtkm/cont/testing/TestingDeviceAdapter.h index 43160250c..c64f0bfd2 100644 --- a/vtkm/cont/testing/TestingDeviceAdapter.h +++ b/vtkm/cont/testing/TestingDeviceAdapter.h @@ -2438,7 +2438,7 @@ private: auto testRepeatedMask = [&](WordType mask) { std::cout << "Testing BitFieldToUnorderedSet with repeated 32-bit word 0x" << std::hex << mask - << std::endl; + << std::dec << std::endl; BitField bits; { @@ -2455,7 +2455,7 @@ private: auto testRandomMask = [&](WordType seed) { std::cout << "Testing BitFieldToUnorderedSet with random sequence seeded with 0x" << std::hex - << seed << std::endl; + << seed << std::dec << std::endl; std::mt19937 mt{ seed }; std::uniform_int_distribution rng; @@ -2486,11 +2486,244 @@ private: testRandomMask(0xdeadbeef); } + static VTKM_CONT void TestCountSetBits() + { + using WordType = WordTypeDefault; + + // Test that everything works correctly with a partial word at the end. + static constexpr vtkm::Id BitsPerWord = static_cast(sizeof(WordType) * CHAR_BIT); + // +5 to get a partial word: + static constexpr vtkm::Id NumFullWords = 1024; + static constexpr vtkm::Id NumBits = NumFullWords * BitsPerWord + 5; + static constexpr vtkm::Id NumWords = (NumBits + BitsPerWord - 1) / BitsPerWord; + + auto verifyPopCount = [](const BitField& bits) { + vtkm::Id refPopCount = 0; + const vtkm::Id numBits = bits.GetNumberOfBits(); + auto portal = bits.GetPortalConstControl(); + for (vtkm::Id idx = 0; idx < numBits; ++idx) + { + if (portal.GetBit(idx)) + { + ++refPopCount; + } + } + + const vtkm::Id popCount = Algorithm::CountSetBits(bits); + + VTKM_TEST_ASSERT( + refPopCount == popCount, "CountSetBits returned ", popCount, ", expected ", refPopCount); + }; + + auto testRepeatedMask = [&](WordType mask) { + std::cout << "Testing CountSetBits with repeated word 0x" << std::hex << mask << std::dec + << std::endl; + + BitField bits; + { + bits.Allocate(NumBits); + auto fillPortal = bits.GetPortalControl(); + for (vtkm::Id i = 0; i < NumWords; ++i) + { + fillPortal.SetWord(i, mask); + } + } + + verifyPopCount(bits); + }; + + auto testRandomMask = [&](WordType seed) { + std::cout << "Testing CountSetBits with random sequence seeded with 0x" << std::hex << seed + << std::dec << std::endl; + + std::mt19937 mt{ seed }; + std::uniform_int_distribution rng; + + BitField bits; + { + bits.Allocate(NumBits); + auto fillPortal = bits.GetPortalControl(); + for (vtkm::Id i = 0; i < NumWords; ++i) + { + fillPortal.SetWord(i, static_cast(rng(mt))); + } + } + + verifyPopCount(bits); + }; + + testRepeatedMask(0x00000000); + testRepeatedMask(0xeeeeeeee); + testRepeatedMask(0xffffffff); + testRepeatedMask(0x1c0fd395); + testRepeatedMask(0xdeadbeef); + + testRandomMask(0x00000000); + testRandomMask(0xeeeeeeee); + testRandomMask(0xffffffff); + testRandomMask(0x1c0fd395); + testRandomMask(0xdeadbeef); + } + + template + static VTKM_CONT void TestFillBitFieldMask(WordType mask) + { + std::cout << "Testing Fill with " << (sizeof(WordType) * CHAR_BIT) << " bit mask: " << std::hex + << vtkm::UInt64{ mask } << std::dec << std::endl; + + // Test that everything works correctly with a partial word at the end. + static constexpr vtkm::Id BitsPerWord = static_cast(sizeof(WordType) * CHAR_BIT); + // +5 to get a partial word: + static constexpr vtkm::Id NumFullWords = 1024; + static constexpr vtkm::Id NumBits = NumFullWords * BitsPerWord + 5; + static constexpr vtkm::Id NumWords = (NumBits + BitsPerWord - 1) / BitsPerWord; + + vtkm::cont::BitField bits; + { + Algorithm::Fill(bits, mask, NumBits); + + vtkm::Id numBits = bits.GetNumberOfBits(); + VTKM_TEST_ASSERT(numBits == NumBits, "Unexpected number of bits."); + vtkm::Id numWords = bits.GetNumberOfWords(); + VTKM_TEST_ASSERT(numWords == NumWords, "Unexpected number of words."); + + auto portal = bits.GetPortalConstControl(); + for (vtkm::Id wordIdx = 0; wordIdx < NumWords; ++wordIdx) + { + VTKM_TEST_ASSERT(portal.GetWord(wordIdx) == mask, + "Incorrect word in result BitField; expected 0x", + std::hex, + vtkm::UInt64{ mask }, + ", got 0x", + vtkm::UInt64{ portal.GetWord(wordIdx) }, + std::dec, + " for word ", + wordIdx, + "/", + NumWords); + } + } + + // Now fill the BitField with the reversed mask to test the no-alloc + // overload: + { + WordType invWord = static_cast(~mask); + Algorithm::Fill(bits, invWord); + + vtkm::Id numBits = bits.GetNumberOfBits(); + VTKM_TEST_ASSERT(numBits == NumBits, "Unexpected number of bits."); + vtkm::Id numWords = bits.GetNumberOfWords(); + VTKM_TEST_ASSERT(numWords == NumWords, "Unexpected number of words."); + + auto portal = bits.GetPortalConstControl(); + for (vtkm::Id wordIdx = 0; wordIdx < NumWords; ++wordIdx) + { + VTKM_TEST_ASSERT(portal.GetWord(wordIdx) == invWord, + "Incorrect word in result BitField; expected 0x", + std::hex, + vtkm::UInt64{ invWord }, + ", got 0x", + vtkm::UInt64{ portal.GetWord(wordIdx) }, + std::dec, + " for word ", + wordIdx, + "/", + NumWords); + } + } + } + + static VTKM_CONT void TestFillBitFieldBool(bool value) + { + std::cout << "Testing Fill with bool: " << value << std::endl; + + // Test that everything works correctly with a partial word at the end. + // +5 to get a partial word: + static constexpr vtkm::Id NumBits = 1024 * 32 + 5; + + vtkm::cont::BitField bits; + { + Algorithm::Fill(bits, value, NumBits); + + vtkm::Id numBits = bits.GetNumberOfBits(); + VTKM_TEST_ASSERT(numBits == NumBits, "Unexpected number of bits."); + + auto portal = bits.GetPortalConstControl(); + for (vtkm::Id bitIdx = 0; bitIdx < NumBits; ++bitIdx) + { + VTKM_TEST_ASSERT(portal.GetBit(bitIdx) == value, "Incorrect bit in result BitField."); + } + } + + // Now fill the BitField with the reversed mask to test the no-alloc + // overload: + { + Algorithm::Fill(bits, !value); + + vtkm::Id numBits = bits.GetNumberOfBits(); + VTKM_TEST_ASSERT(numBits == NumBits, "Unexpected number of bits."); + + auto portal = bits.GetPortalConstControl(); + for (vtkm::Id bitIdx = 0; bitIdx < NumBits; ++bitIdx) + { + VTKM_TEST_ASSERT(portal.GetBit(bitIdx) == !value, "Incorrect bit in result BitField."); + } + } + } + + static VTKM_CONT void TestFillBitField() + { + TestFillBitFieldBool(true); + TestFillBitFieldBool(false); + TestFillBitFieldMask(vtkm::UInt8{ 0 }); + TestFillBitFieldMask(static_cast(~vtkm::UInt8{ 0 })); + TestFillBitFieldMask(vtkm::UInt8{ 0xab }); + TestFillBitFieldMask(vtkm::UInt8{ 0x4f }); + TestFillBitFieldMask(vtkm::UInt16{ 0 }); + TestFillBitFieldMask(static_cast(~vtkm::UInt16{ 0 })); + TestFillBitFieldMask(vtkm::UInt16{ 0xfade }); + TestFillBitFieldMask(vtkm::UInt16{ 0xbeef }); + TestFillBitFieldMask(vtkm::UInt32{ 0 }); + TestFillBitFieldMask(static_cast(~vtkm::UInt32{ 0 })); + TestFillBitFieldMask(vtkm::UInt32{ 0xfacecafe }); + TestFillBitFieldMask(vtkm::UInt32{ 0xbaddecaf }); + TestFillBitFieldMask(vtkm::UInt64{ 0 }); + TestFillBitFieldMask(static_cast(~vtkm::UInt64{ 0 })); + TestFillBitFieldMask(vtkm::UInt64{ 0xbaddefacedfacade }); + TestFillBitFieldMask(vtkm::UInt64{ 0xfeeddeadbeef2dad }); + } + + static VTKM_CONT void TestFillArrayHandle() + { + vtkm::cont::ArrayHandle handle; + Algorithm::Fill(handle, 867, ARRAY_SIZE); + + { + auto portal = handle.GetPortalConstControl(); + VTKM_TEST_ASSERT(portal.GetNumberOfValues() == ARRAY_SIZE); + for (vtkm::Id i = 0; i < ARRAY_SIZE; ++i) + { + VTKM_TEST_ASSERT(portal.Get(i) == 867); + } + } + + Algorithm::Fill(handle, 5309); + { + auto portal = handle.GetPortalConstControl(); + VTKM_TEST_ASSERT(portal.GetNumberOfValues() == ARRAY_SIZE); + for (vtkm::Id i = 0; i < ARRAY_SIZE; ++i) + { + VTKM_TEST_ASSERT(portal.Get(i) == 5309); + } + } + } + struct TestAll { VTKM_CONT void operator()() const { std::cout << "Doing DeviceAdapter tests" << std::endl; + TestArrayTransfer(); TestOutOfMemory(); TestTimer(); @@ -2541,6 +2774,10 @@ private: TestAtomicArray(); TestBitFieldToUnorderedSet(); + TestCountSetBits(); + TestFillBitField(); + + TestFillArrayHandle(); } };