mirror of
https://gitlab.kitware.com/vtk/vtk-m
synced 2024-09-19 18:45:43 +00:00
Merge branch 'master' of https://gitlab.kitware.com/vtk/vtk-m into polyLinePathGeom
This commit is contained in:
commit
9132e75ac8
1767
CMake/FindMPI.cmake
Normal file
1767
CMake/FindMPI.cmake
Normal file
File diff suppressed because it is too large
Load Diff
24
CMake/VTKmMPI.cmake
Normal file
24
CMake/VTKmMPI.cmake
Normal file
@ -0,0 +1,24 @@
|
||||
##============================================================================
|
||||
## Copyright (c) Kitware, Inc.
|
||||
## All rights reserved.
|
||||
## See LICENSE.txt for details.
|
||||
##
|
||||
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
## PURPOSE. See the above copyright notice for more information.
|
||||
##============================================================================
|
||||
|
||||
if(VTKm_ENABLE_MPI AND NOT TARGET MPI::MPI_CXX)
|
||||
if(CMAKE_VERSION VERSION_LESS 3.15)
|
||||
#While CMake 3.10 introduced the new MPI module.
|
||||
#Fixes related to MPI+CUDA that VTK-m needs are
|
||||
#only found in CMake 3.15+.
|
||||
find_package(MPI REQUIRED MODULE)
|
||||
else()
|
||||
#clunky but we need to make sure we use the upstream module if it exists
|
||||
set(orig_CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH})
|
||||
set(CMAKE_MODULE_PATH "")
|
||||
find_package(MPI MODULE)
|
||||
set(CMAKE_MODULE_PATH ${orig_CMAKE_MODULE_PATH})
|
||||
endif()
|
||||
endif()
|
@ -12,6 +12,7 @@ include(CMakeParseArguments)
|
||||
|
||||
include(VTKmDeviceAdapters)
|
||||
include(VTKmCPUVectorization)
|
||||
include(VTKmMPI)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Utility to build a kit name from the current directory.
|
||||
|
@ -62,13 +62,13 @@ file(GENERATE
|
||||
OUTPUT "${${file_loc_var}}"
|
||||
CONTENT
|
||||
"
|
||||
set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING \"\")
|
||||
set(CMAKE_PREFIX_PATH ${install_prefix} CACHE STRING \"\")
|
||||
set(CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER} CACHE FILEPATH \"\")
|
||||
set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} CACHE STRING \"\")
|
||||
set(CMAKE_CUDA_COMPILER ${CMAKE_CUDA_COMPILER} CACHE FILEPATH \"\")
|
||||
set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} CACHE STRING \"\")
|
||||
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CUDA_HOST_COMPILER} CACHE FILEPATH \"\")
|
||||
set(CMAKE_MAKE_PROGRAM \"${CMAKE_MAKE_PROGRAM}\" CACHE FILEPATH \"\")
|
||||
set(CMAKE_PREFIX_PATH \"${CMAKE_PREFIX_PATH};${install_prefix}/\" CACHE STRING \"\")
|
||||
set(CMAKE_CXX_COMPILER \"${CMAKE_CXX_COMPILER}\" CACHE FILEPATH \"\")
|
||||
set(CMAKE_CXX_FLAGS \"$CACHE{CMAKE_CXX_FLAGS}\" CACHE STRING \"\")
|
||||
set(CMAKE_CUDA_COMPILER \"${CMAKE_CUDA_COMPILER}\" CACHE FILEPATH \"\")
|
||||
set(CMAKE_CUDA_FLAGS \"$CACHE{CMAKE_CUDA_FLAGS}\" CACHE STRING \"\")
|
||||
set(CMAKE_CUDA_HOST_COMPILER \"${CMAKE_CUDA_HOST_COMPILER}\" CACHE FILEPATH \"\")
|
||||
"
|
||||
)
|
||||
|
||||
@ -81,8 +81,34 @@ function(vtkm_test_against_install dir)
|
||||
set(src_dir "${CMAKE_CURRENT_SOURCE_DIR}/${name}/")
|
||||
set(build_dir "${VTKm_BINARY_DIR}/CMakeFiles/_tmp_build/test_${name}/")
|
||||
|
||||
set(build_config "${build_dir}/build_options.cmake")
|
||||
set(args )
|
||||
if(CMAKE_VERSION VERSION_LESS 3.13)
|
||||
#Before 3.13 the config file passing to cmake via ctest --build-options
|
||||
#was broken
|
||||
set(args
|
||||
-DCMAKE_MAKE_PROGRAM:FILEPATH=${CMAKE_MAKE_PROGRAM}
|
||||
-DCMAKE_PREFIX_PATH:STRING=${install_prefix}
|
||||
-DCMAKE_CXX_COMPILER:FILEPATH=${CMAKE_CXX_COMPILER}
|
||||
-DCMAKE_CUDA_COMPILER:FILEPATH=${CMAKE_CUDA_COMPILER}
|
||||
-DCMAKE_CUDA_HOST_COMPILER:FILEPATH=${CMAKE_CUDA_HOST_COMPILER}
|
||||
-DCMAKE_CXX_FLAGS:STRING=$CACHE{CMAKE_CXX_FLAGS}
|
||||
-DCMAKE_CUDA_FLAGS:STRING=$CACHE{CMAKE_CUDA_FLAGS}
|
||||
)
|
||||
else()
|
||||
set(build_config "${build_dir}build_options.cmake")
|
||||
vtkm_generate_install_build_options(build_config)
|
||||
set(args -C ${build_config})
|
||||
endif()
|
||||
|
||||
if(WIN32 AND TARGET vtkm::tbb)
|
||||
#on windows we need to specify these as FindTBB won't
|
||||
#find the installed version just with the prefix path
|
||||
list(APPEND args
|
||||
-DTBB_LIBRARY_DEBUG:FILEPATH=${TBB_LIBRARY_DEBUG}
|
||||
-DTBB_LIBRARY_RELEASE:FILEPATH=${TBB_LIBRARY_RELEASE}
|
||||
-DTBB_INCLUDE_DIR:PATH=${TBB_INCLUDE_DIR}
|
||||
)
|
||||
endif()
|
||||
|
||||
#determine if the test is expected to compile or fail to build. We use
|
||||
#this information to built the test name to make it clear to the user
|
||||
@ -93,10 +119,13 @@ function(vtkm_test_against_install dir)
|
||||
|
||||
add_test(NAME ${build_name}
|
||||
COMMAND ${CMAKE_CTEST_COMMAND}
|
||||
-C $<CONFIG>
|
||||
--build-and-test ${src_dir} ${build_dir}
|
||||
--build-generator ${CMAKE_GENERATOR}
|
||||
--build-makeprogram ${CMAKE_MAKE_PROGRAM}
|
||||
--build-options -C "${build_config}"
|
||||
--build-options
|
||||
${args}
|
||||
--no-warn-unused-cli
|
||||
)
|
||||
|
||||
set_tests_properties(${build_name} PROPERTIES LABELS ${test_label} )
|
||||
|
@ -182,14 +182,6 @@ check_type_size("long long" VTKm_SIZE_LONG_LONG BUILTIN_TYPES_ONLY)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Add subdirectories
|
||||
if(VTKm_ENABLE_MPI)
|
||||
# This `if` is temporary and will be removed once `diy` supports building
|
||||
# without MPI.
|
||||
if (NOT MPI_C_FOUND)
|
||||
find_package(MPI ${VTKm_FIND_PACKAGE_QUIETLY})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_subdirectory(vtkm)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
@ -241,6 +233,7 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
|
||||
install(
|
||||
FILES
|
||||
${VTKm_SOURCE_DIR}/CMake/FindTBB.cmake
|
||||
${VTKm_SOURCE_DIR}/CMake/FindMPI.cmake
|
||||
${VTKm_SOURCE_DIR}/CMake/FindOpenGL.cmake
|
||||
${VTKm_SOURCE_DIR}/CMake/FindOpenMP.cmake
|
||||
DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
|
||||
@ -253,6 +246,7 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
|
||||
${VTKm_SOURCE_DIR}/CMake/VTKmDetectCUDAVersion.cu
|
||||
${VTKm_SOURCE_DIR}/CMake/VTKmDeviceAdapters.cmake
|
||||
${VTKm_SOURCE_DIR}/CMake/VTKmExportHeaderTemplate.h.in
|
||||
${VTKm_SOURCE_DIR}/CMake/VTKmMPI.cmake
|
||||
${VTKm_SOURCE_DIR}/CMake/VTKmRenderingContexts.cmake
|
||||
${VTKm_SOURCE_DIR}/CMake/VTKmWrappers.cmake
|
||||
DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
|
||||
@ -315,6 +309,4 @@ endif()
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Build examples
|
||||
if(VTKm_ENABLE_EXAMPLES)
|
||||
add_subdirectory(examples)
|
||||
endif(VTKm_ENABLE_EXAMPLES)
|
||||
|
@ -49,7 +49,6 @@ contents of these for details on the specifics of their respective
|
||||
licenses.
|
||||
- - - - - - - - - - - - - - - - - - - - - - - - do not remove this line
|
||||
CMake/FindTBB.cmake
|
||||
CMake/FindGLEW.cmake
|
||||
Utilities
|
||||
vtkm/cont/tbb/internal/parallel_sort.h
|
||||
vtkm/cont/tbb/internal/parallel_radix_sort_tbb.h
|
||||
|
@ -57,21 +57,24 @@ enum BenchmarkName
|
||||
BITFIELD_TO_UNORDERED_SET = 1 << 0,
|
||||
COPY = 1 << 1,
|
||||
COPY_IF = 1 << 2,
|
||||
LOWER_BOUNDS = 1 << 3,
|
||||
REDUCE = 1 << 4,
|
||||
REDUCE_BY_KEY = 1 << 5,
|
||||
SCAN_INCLUSIVE = 1 << 6,
|
||||
SCAN_EXCLUSIVE = 1 << 7,
|
||||
SORT = 1 << 8,
|
||||
SORT_BY_KEY = 1 << 9,
|
||||
STABLE_SORT_INDICES = 1 << 10,
|
||||
STABLE_SORT_INDICES_UNIQUE = 1 << 11,
|
||||
UNIQUE = 1 << 12,
|
||||
UPPER_BOUNDS = 1 << 13,
|
||||
COUNT_SET_BITS = 1 << 3,
|
||||
FILL = 1 << 4,
|
||||
LOWER_BOUNDS = 1 << 5,
|
||||
REDUCE = 1 << 6,
|
||||
REDUCE_BY_KEY = 1 << 7,
|
||||
SCAN_EXCLUSIVE = 1 << 8,
|
||||
SCAN_INCLUSIVE = 1 << 9,
|
||||
SORT = 1 << 10,
|
||||
SORT_BY_KEY = 1 << 11,
|
||||
STABLE_SORT_INDICES = 1 << 12,
|
||||
STABLE_SORT_INDICES_UNIQUE = 1 << 13,
|
||||
UNIQUE = 1 << 14,
|
||||
UPPER_BOUNDS = 1 << 15,
|
||||
|
||||
ALL = BITFIELD_TO_UNORDERED_SET | COPY | COPY_IF | LOWER_BOUNDS | REDUCE | REDUCE_BY_KEY |
|
||||
SCAN_INCLUSIVE |
|
||||
ALL = BITFIELD_TO_UNORDERED_SET | COPY | COPY_IF | COUNT_SET_BITS | FILL | LOWER_BOUNDS | REDUCE |
|
||||
REDUCE_BY_KEY |
|
||||
SCAN_EXCLUSIVE |
|
||||
SCAN_INCLUSIVE |
|
||||
SORT |
|
||||
SORT_BY_KEY |
|
||||
STABLE_SORT_INDICES |
|
||||
@ -84,28 +87,20 @@ enum BenchmarkName
|
||||
/// described below:
|
||||
struct BenchDevAlgoConfig
|
||||
{
|
||||
/// Benchmarks to run. Possible values:
|
||||
/// Copy, CopyIf, LowerBounds, Reduce, ReduceByKey, ScanInclusive,
|
||||
/// ScanExclusive, Sort, SortByKey, StableSortIndices, StableSortIndicesUnique,
|
||||
/// Unique, UpperBounds, or All. (Default: All).
|
||||
// Zero is for parsing, will change to 'all' in main if needed.
|
||||
/// Benchmarks to run. See BenchmarkName enum.
|
||||
int BenchmarkFlags{ 0 };
|
||||
|
||||
/// ValueTypes to test.
|
||||
/// CLI arg: "TypeList [Base|Extended]" (Base is default).
|
||||
/// CLI arg: "--base-typelist | --extended-typelist" (Base is default).
|
||||
bool ExtendedTypeList{ false };
|
||||
|
||||
/// Run benchmarks using the same number of bytes for all arrays.
|
||||
/// CLI arg: "FixBytes [n|off]" (n is the number of bytes, default: 2097152, ie. 2MiB)
|
||||
/// @note FixBytes and FixSizes are not mutually exclusive. If both are
|
||||
/// specified, both will run.
|
||||
/// CLI arg: "--array-size-bytes [n]" (n is the number of bytes, default: 2097152, ie. 2MiB)
|
||||
bool TestArraySizeBytes{ true };
|
||||
vtkm::UInt64 ArraySizeBytes{ 1 << 21 };
|
||||
|
||||
/// Run benchmarks using the same number of values for all arrays.
|
||||
/// CLI arg: "FixSizes [n|off]" (n is the number of values, default: off)
|
||||
/// @note FixBytes and FixSizes are not mutually exclusive. If both are
|
||||
/// specified, both will run.
|
||||
/// CLI arg: "--array-size-values [n]" (n is the number of values, default: off)
|
||||
bool TestArraySizeValues{ false };
|
||||
vtkm::UInt64 ArraySizeValues{ 1 << 21 };
|
||||
|
||||
@ -113,7 +108,7 @@ struct BenchDevAlgoConfig
|
||||
/// values (5%, 10%, 15%, 20%, 25%, 30%, 35%, 40%, 45%, 50%, 75%, 100%
|
||||
/// unique). If false (default), the range is limited to 5%, 25%, 50%, 75%,
|
||||
/// 100%.
|
||||
/// CLI arg: "DetailedOutputRange" enables the extended range.
|
||||
/// CLI arg: "--more-output-range" enables the extended range.
|
||||
bool DetailedOutputRangeScaling{ false };
|
||||
|
||||
// Internal: The benchmarking code will set this depending on execution phase:
|
||||
@ -289,11 +284,11 @@ public:
|
||||
{
|
||||
if (wordIdx <= this->MaxMaskedWord && (wordIdx % this->Stride) == 0)
|
||||
{
|
||||
this->Portal.SetWord(wordIdx, this->Exemplar);
|
||||
this->Portal.SetWordAtomic(wordIdx, this->Exemplar);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->Portal.SetWord(wordIdx, static_cast<WordType>(0));
|
||||
this->Portal.SetWordAtomic(wordIdx, static_cast<WordType>(0));
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -317,8 +312,10 @@ public:
|
||||
stride = 1;
|
||||
}
|
||||
|
||||
vtkm::Id numBits = numWords * static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT);
|
||||
|
||||
vtkm::cont::BitField bits;
|
||||
auto portal = bits.PrepareForOutput(numWords, DeviceAdapterTag{});
|
||||
auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{});
|
||||
|
||||
using Functor = GenerateBitFieldFunctor<WordType, decltype(portal)>;
|
||||
|
||||
@ -533,6 +530,184 @@ private:
|
||||
VTKM_MAKE_BENCHMARK(CopyIf75, BenchCopyIf, 75);
|
||||
VTKM_MAKE_BENCHMARK(CopyIf100, BenchCopyIf, 100);
|
||||
|
||||
template <typename WordType, typename DeviceAdapter>
|
||||
struct BenchCountSetBits
|
||||
{
|
||||
vtkm::Id NumWords;
|
||||
vtkm::Id NumBits;
|
||||
WordType Exemplar;
|
||||
vtkm::Id Stride;
|
||||
vtkm::Float32 FillRatio;
|
||||
vtkm::Id MaxMaskedIndex;
|
||||
std::string Name;
|
||||
|
||||
vtkm::cont::BitField Bits;
|
||||
|
||||
// See GenerateBitField for details. fillRatio is used to compute
|
||||
// maxMaskedWord.
|
||||
VTKM_CONT
|
||||
BenchCountSetBits(WordType exemplar,
|
||||
vtkm::Id stride,
|
||||
vtkm::Float32 fillRatio,
|
||||
const std::string& name)
|
||||
: NumWords(Config.ComputeNumberOfWords<WordType>())
|
||||
, NumBits(this->NumWords * static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT))
|
||||
, Exemplar(exemplar)
|
||||
, Stride(stride)
|
||||
, FillRatio(fillRatio)
|
||||
, MaxMaskedIndex(this->NumWords / static_cast<vtkm::Id>(1. / this->FillRatio))
|
||||
, Name(name)
|
||||
, Bits(GenerateBitField<WordType, DeviceAdapter>(this->Exemplar,
|
||||
this->Stride,
|
||||
this->MaxMaskedIndex,
|
||||
this->NumWords))
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
vtkm::Float64 operator()()
|
||||
{
|
||||
Timer timer(DeviceAdapter{});
|
||||
timer.Start();
|
||||
Algorithm::CountSetBits(DeviceAdapter{}, this->Bits);
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
std::string Description() const
|
||||
{
|
||||
const vtkm::Id numFilledWords = this->MaxMaskedIndex / this->Stride;
|
||||
const vtkm::Id numSetBits = numFilledWords * vtkm::CountSetBits(this->Exemplar);
|
||||
|
||||
std::stringstream description;
|
||||
description << "CountSetBits" << this->Name << " ( "
|
||||
<< "NumWords: " << this->NumWords << " "
|
||||
<< "Exemplar: " << std::hex << this->Exemplar << std::dec << " "
|
||||
<< "FillRatio: " << this->FillRatio << " "
|
||||
<< "Stride: " << this->Stride << " "
|
||||
<< "NumSetBits: " << numSetBits << " )";
|
||||
return description.str();
|
||||
}
|
||||
};
|
||||
VTKM_MAKE_BENCHMARK(CountSetBitsNull, BenchCountSetBits, 0x00000000, 1, 0.f, "Null");
|
||||
VTKM_MAKE_BENCHMARK(CountSetBitsFull, BenchCountSetBits, 0xffffffff, 1, 1.f, "Full");
|
||||
VTKM_MAKE_BENCHMARK(CountSetBitsHalfWord, BenchCountSetBits, 0xffff0000, 1, 1.f, "HalfWord");
|
||||
VTKM_MAKE_BENCHMARK(CountSetBitsHalfField, BenchCountSetBits, 0xffffffff, 1, 0.5f, "HalfField");
|
||||
VTKM_MAKE_BENCHMARK(CountSetBitsAlternateWords,
|
||||
BenchCountSetBits,
|
||||
0xffffffff,
|
||||
2,
|
||||
1.f,
|
||||
"AlternateWords");
|
||||
VTKM_MAKE_BENCHMARK(CountSetBitsAlternateBits,
|
||||
BenchCountSetBits,
|
||||
0x55555555,
|
||||
1,
|
||||
1.f,
|
||||
"AlternateBits");
|
||||
|
||||
template <typename ValueType, typename DeviceAdapter>
|
||||
struct BenchFillArrayHandle
|
||||
{
|
||||
vtkm::cont::ArrayHandle<ValueType> Handle;
|
||||
|
||||
VTKM_CONT
|
||||
BenchFillArrayHandle() {}
|
||||
|
||||
VTKM_CONT
|
||||
vtkm::Float64 operator()()
|
||||
{
|
||||
const vtkm::Id numVals = Config.ComputeSize<ValueType>();
|
||||
Timer timer(DeviceAdapter{});
|
||||
timer.Start();
|
||||
Algorithm::Fill(DeviceAdapter{}, this->Handle, TestValue(19, ValueType{}), numVals);
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
std::string Description() const
|
||||
{
|
||||
const vtkm::Id numVals = Config.ComputeSize<ValueType>();
|
||||
std::stringstream description;
|
||||
description << "Fill (ArrayHandle, " << numVals << " values)";
|
||||
return description.str();
|
||||
}
|
||||
};
|
||||
VTKM_MAKE_BENCHMARK(FillArrayHandle, BenchFillArrayHandle);
|
||||
|
||||
template <typename WordType, typename DeviceAdapter>
|
||||
struct BenchFillBitField
|
||||
{
|
||||
vtkm::Id NumWords;
|
||||
vtkm::Id NumBits;
|
||||
bool UseBool;
|
||||
WordType Pattern;
|
||||
std::string Name;
|
||||
|
||||
vtkm::cont::BitField Bits;
|
||||
|
||||
VTKM_CONT
|
||||
BenchFillBitField(bool useBool, WordType pattern, const std::string& name)
|
||||
: NumWords(Config.ComputeNumberOfWords<WordType>())
|
||||
, NumBits(this->NumWords * static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT))
|
||||
, UseBool(useBool)
|
||||
, Pattern(pattern)
|
||||
, Name(name)
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
vtkm::Float64 operator()()
|
||||
{
|
||||
Timer timer(DeviceAdapter{});
|
||||
if (this->UseBool)
|
||||
{
|
||||
timer.Start();
|
||||
Algorithm::Fill(DeviceAdapter{}, this->Bits, this->Pattern != 0, this->NumBits);
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
else
|
||||
{
|
||||
timer.Start();
|
||||
Algorithm::Fill(DeviceAdapter{}, this->Bits, this->Pattern, this->NumBits);
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
std::string Description() const
|
||||
{
|
||||
std::stringstream description;
|
||||
description << "Fill (BitField)" << this->Name << " ( "
|
||||
<< "FillPattern: " << std::hex << this->Pattern << std::dec << " "
|
||||
<< "UseBool: " << this->UseBool << " "
|
||||
<< "NumBits: " << this->NumBits << " )";
|
||||
return description.str();
|
||||
}
|
||||
};
|
||||
VTKM_MAKE_BENCHMARK(FillBitFieldTrue, BenchFillBitField, true, 0x1, "True");
|
||||
VTKM_MAKE_BENCHMARK(FillBitFieldFalse, BenchFillBitField, true, 0x0, "False");
|
||||
VTKM_MAKE_BENCHMARK(FillBitField8Bit,
|
||||
BenchFillBitField,
|
||||
false,
|
||||
static_cast<vtkm::UInt8>(0xcc),
|
||||
"8Bit");
|
||||
VTKM_MAKE_BENCHMARK(FillBitField16Bit,
|
||||
BenchFillBitField,
|
||||
false,
|
||||
static_cast<vtkm::UInt16>(0xcccc),
|
||||
"16Bit");
|
||||
VTKM_MAKE_BENCHMARK(FillBitField32Bit,
|
||||
BenchFillBitField,
|
||||
false,
|
||||
static_cast<vtkm::UInt32>(0xcccccccc),
|
||||
"32Bit");
|
||||
VTKM_MAKE_BENCHMARK(FillBitField64Bit,
|
||||
BenchFillBitField,
|
||||
false,
|
||||
static_cast<vtkm::UInt64>(0xcccccccccccccccc),
|
||||
"64Bit");
|
||||
|
||||
template <typename Value, typename DeviceAdapter>
|
||||
struct BenchLowerBounds
|
||||
{
|
||||
@ -1154,17 +1329,46 @@ public:
|
||||
template <typename ValueTypes>
|
||||
static VTKM_CONT void RunInternal(vtkm::cont::DeviceAdapterId id)
|
||||
{
|
||||
using BitFieldWordTypes = vtkm::ListTagBase<vtkm::UInt32>;
|
||||
using UInt8Type = vtkm::ListTagBase<vtkm::UInt8>;
|
||||
using UInt16Type = vtkm::ListTagBase<vtkm::UInt16>;
|
||||
using UInt32Type = vtkm::ListTagBase<vtkm::UInt32>;
|
||||
using UInt64Type = vtkm::ListTagBase<vtkm::UInt64>;
|
||||
|
||||
// These need specific word types:
|
||||
if (Config.BenchmarkFlags & BITFIELD_TO_UNORDERED_SET)
|
||||
{
|
||||
std::cout << DIVIDER << "\nBenchmarking BitFieldToUnorderedSet\n";
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetNull, BitFieldWordTypes{}, id);
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetFull, BitFieldWordTypes{}, id);
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfWord, BitFieldWordTypes{}, id);
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfField, BitFieldWordTypes{}, id);
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateWords, BitFieldWordTypes{}, id);
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateBits, BitFieldWordTypes{}, id);
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetNull, UInt32Type{}, id);
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetFull, UInt32Type{}, id);
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfWord, UInt32Type{}, id);
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfField, UInt32Type{}, id);
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateWords, UInt32Type{}, id);
|
||||
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateBits, UInt32Type{}, id);
|
||||
}
|
||||
|
||||
if (Config.BenchmarkFlags & COUNT_SET_BITS)
|
||||
{
|
||||
std::cout << DIVIDER << "\nBenchmarking CountSetBits\n";
|
||||
VTKM_RUN_BENCHMARK(CountSetBitsNull, UInt32Type{}, id);
|
||||
VTKM_RUN_BENCHMARK(CountSetBitsFull, UInt32Type{}, id);
|
||||
VTKM_RUN_BENCHMARK(CountSetBitsHalfWord, UInt32Type{}, id);
|
||||
VTKM_RUN_BENCHMARK(CountSetBitsHalfField, UInt32Type{}, id);
|
||||
VTKM_RUN_BENCHMARK(CountSetBitsAlternateWords, UInt32Type{}, id);
|
||||
VTKM_RUN_BENCHMARK(CountSetBitsAlternateBits, UInt32Type{}, id);
|
||||
}
|
||||
|
||||
if (Config.BenchmarkFlags & FILL)
|
||||
{
|
||||
std::cout << DIVIDER << "\nBenchmarking Fill (ArrayHandle)\n";
|
||||
VTKM_RUN_BENCHMARK(FillArrayHandle, ValueTypes{}, id);
|
||||
|
||||
std::cout << DIVIDER << "\nBenchmarking Fill (BitField)\n";
|
||||
VTKM_RUN_BENCHMARK(FillBitFieldTrue, UInt32Type{}, id);
|
||||
VTKM_RUN_BENCHMARK(FillBitFieldFalse, UInt32Type{}, id);
|
||||
VTKM_RUN_BENCHMARK(FillBitField8Bit, UInt8Type{}, id);
|
||||
VTKM_RUN_BENCHMARK(FillBitField16Bit, UInt16Type{}, id);
|
||||
VTKM_RUN_BENCHMARK(FillBitField32Bit, UInt32Type{}, id);
|
||||
VTKM_RUN_BENCHMARK(FillBitField64Bit, UInt64Type{}, id);
|
||||
}
|
||||
|
||||
if (Config.BenchmarkFlags & COPY)
|
||||
@ -1413,7 +1617,7 @@ struct Arg : vtkm::cont::internal::option::Arg
|
||||
const char* c = option.arg;
|
||||
while (argIsNum && (*c != '\0'))
|
||||
{
|
||||
argIsNum &= static_cast<bool>(std::isdigit(*c));
|
||||
argIsNum = argIsNum && static_cast<bool>(std::isdigit(*c));
|
||||
++c;
|
||||
}
|
||||
|
||||
@ -1524,7 +1728,8 @@ int main(int argc, char* argv[])
|
||||
"",
|
||||
"",
|
||||
Arg::None,
|
||||
"\tCopy, CopyIf, LowerBounds, Reduce, ReduceByKey, ScanExclusive, "
|
||||
"\tBitFieldToUnorderedSet, Copy, CopyIf, CountSetBits, FillBitField, "
|
||||
"LowerBounds, Reduce, ReduceByKey, ScanExclusive, "
|
||||
"ScanInclusive, Sort, SortByKey, StableSortIndices, StableSortIndicesUnique, "
|
||||
"Unique, UpperBounds" });
|
||||
usage.push_back(
|
||||
@ -1631,6 +1836,14 @@ int main(int argc, char* argv[])
|
||||
{
|
||||
config.BenchmarkFlags |= vtkm::benchmarking::COPY_IF;
|
||||
}
|
||||
else if (arg == "countsetbits")
|
||||
{
|
||||
config.BenchmarkFlags |= vtkm::benchmarking::COUNT_SET_BITS;
|
||||
}
|
||||
else if (arg == "fill")
|
||||
{
|
||||
config.BenchmarkFlags |= vtkm::benchmarking::FILL;
|
||||
}
|
||||
else if (arg == "lowerbounds")
|
||||
{
|
||||
config.BenchmarkFlags |= vtkm::benchmarking::LOWER_BOUNDS;
|
||||
|
@ -10,8 +10,9 @@
|
||||
|
||||
#add the directory that contains the VTK-m config file to the cmake
|
||||
#path so that our examples can find VTK-m
|
||||
set(CMAKE_PREFIX_PATH ${VTKm_BINARY_DIR}/${VTKm_INSTALL_CONFIG_DIR})
|
||||
|
||||
if(VTKm_ENABLE_EXAMPLES)
|
||||
set(CMAKE_PREFIX_PATH ${VTKm_BINARY_DIR}/${VTKm_INSTALL_CONFIG_DIR})
|
||||
add_subdirectory(clipping)
|
||||
add_subdirectory(contour_tree)
|
||||
add_subdirectory(contour_tree_augmented)
|
||||
@ -30,8 +31,7 @@ add_subdirectory(rendering)
|
||||
add_subdirectory(streamline)
|
||||
add_subdirectory(temporal_advection)
|
||||
add_subdirectory(tetrahedra)
|
||||
# add_subdirectory(unified_memory)
|
||||
|
||||
endif()
|
||||
|
||||
if (VTKm_ENABLE_TESTING)
|
||||
# These need to be fast to build as they will
|
||||
|
@ -108,6 +108,32 @@ struct CopySubRangeFunctor
|
||||
}
|
||||
};
|
||||
|
||||
struct CountSetBitsFunctor
|
||||
{
|
||||
vtkm::Id PopCount{ 0 };
|
||||
|
||||
template <typename Device, typename... Args>
|
||||
VTKM_CONT bool operator()(Device, Args&&... args)
|
||||
{
|
||||
this->PopCount = vtkm::cont::DeviceAdapterAlgorithm<Device>::CountSetBits(
|
||||
PrepareArgForExec<Device>(std::forward<Args>(args))...);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct FillFunctor
|
||||
{
|
||||
vtkm::Id PopCount{ 0 };
|
||||
|
||||
template <typename Device, typename... Args>
|
||||
VTKM_CONT bool operator()(Device, Args&&... args)
|
||||
{
|
||||
vtkm::cont::DeviceAdapterAlgorithm<Device>::Fill(
|
||||
PrepareArgForExec<Device>(std::forward<Args>(args))...);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct LowerBoundsFunctor
|
||||
{
|
||||
|
||||
@ -489,6 +515,109 @@ struct Algorithm
|
||||
outputIndex);
|
||||
}
|
||||
|
||||
VTKM_CONT static vtkm::Id CountSetBits(vtkm::cont::DeviceAdapterId devId,
|
||||
const vtkm::cont::BitField& bits)
|
||||
{
|
||||
detail::CountSetBitsFunctor functor;
|
||||
vtkm::cont::TryExecuteOnDevice(devId, functor, bits);
|
||||
return functor.PopCount;
|
||||
}
|
||||
|
||||
VTKM_CONT static vtkm::Id CountSetBits(const vtkm::cont::BitField& bits)
|
||||
{
|
||||
return CountSetBits(vtkm::cont::DeviceAdapterTagAny{}, bits);
|
||||
}
|
||||
|
||||
VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId,
|
||||
vtkm::cont::BitField& bits,
|
||||
bool value,
|
||||
vtkm::Id numBits)
|
||||
{
|
||||
detail::FillFunctor functor;
|
||||
vtkm::cont::TryExecuteOnDevice(devId, functor, bits, value, numBits);
|
||||
}
|
||||
|
||||
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value, vtkm::Id numBits)
|
||||
{
|
||||
Fill(vtkm::cont::DeviceAdapterTagAny{}, bits, value, numBits);
|
||||
}
|
||||
|
||||
VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId,
|
||||
vtkm::cont::BitField& bits,
|
||||
bool value)
|
||||
{
|
||||
detail::FillFunctor functor;
|
||||
vtkm::cont::TryExecuteOnDevice(devId, functor, bits, value);
|
||||
}
|
||||
|
||||
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value)
|
||||
{
|
||||
Fill(vtkm::cont::DeviceAdapterTagAny{}, bits, value);
|
||||
}
|
||||
|
||||
template <typename WordType>
|
||||
VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId,
|
||||
vtkm::cont::BitField& bits,
|
||||
WordType word,
|
||||
vtkm::Id numBits)
|
||||
{
|
||||
detail::FillFunctor functor;
|
||||
vtkm::cont::TryExecuteOnDevice(devId, functor, bits, word, numBits);
|
||||
}
|
||||
|
||||
template <typename WordType>
|
||||
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word, vtkm::Id numBits)
|
||||
{
|
||||
Fill(vtkm::cont::DeviceAdapterTagAny{}, bits, word, numBits);
|
||||
}
|
||||
|
||||
template <typename WordType>
|
||||
VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId,
|
||||
vtkm::cont::BitField& bits,
|
||||
WordType word)
|
||||
{
|
||||
detail::FillFunctor functor;
|
||||
vtkm::cont::TryExecuteOnDevice(devId, functor, bits, word);
|
||||
}
|
||||
|
||||
template <typename WordType>
|
||||
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word)
|
||||
{
|
||||
FillBitField(vtkm::cont::DeviceAdapterTagAny{}, bits, word);
|
||||
}
|
||||
|
||||
template <typename T, typename S>
|
||||
VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId,
|
||||
vtkm::cont::ArrayHandle<T, S>& handle,
|
||||
const T& value)
|
||||
{
|
||||
detail::FillFunctor functor;
|
||||
vtkm::cont::TryExecuteOnDevice(devId, functor, handle, value);
|
||||
}
|
||||
|
||||
template <typename T, typename S>
|
||||
VTKM_CONT static void Fill(vtkm::cont::ArrayHandle<T, S>& handle, const T& value)
|
||||
{
|
||||
Fill(vtkm::cont::DeviceAdapterTagAny{}, handle, value);
|
||||
}
|
||||
|
||||
template <typename T, typename S>
|
||||
VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId,
|
||||
vtkm::cont::ArrayHandle<T, S>& handle,
|
||||
const T& value,
|
||||
const vtkm::Id numValues)
|
||||
{
|
||||
detail::FillFunctor functor;
|
||||
vtkm::cont::TryExecuteOnDevice(devId, functor, handle, value, numValues);
|
||||
}
|
||||
|
||||
template <typename T, typename S>
|
||||
VTKM_CONT static void Fill(vtkm::cont::ArrayHandle<T, S>& handle,
|
||||
const T& value,
|
||||
const vtkm::Id numValues)
|
||||
{
|
||||
Fill(vtkm::cont::DeviceAdapterTagAny{}, handle, value, numValues);
|
||||
}
|
||||
|
||||
template <typename T, class CIn, class CVal, class COut>
|
||||
VTKM_CONT static void LowerBounds(vtkm::cont::DeviceAdapterId devId,
|
||||
|
@ -119,6 +119,35 @@ struct DeviceAdapterAlgorithm
|
||||
vtkm::cont::ArrayHandle<U, COut>& output,
|
||||
vtkm::Id outputIndex = 0);
|
||||
|
||||
/// \brief Returns the total number of "1" bits in BitField.
|
||||
VTKM_CONT static vtkm::Id CountSetBits(const vtkm::cont::BitField& bits);
|
||||
|
||||
/// \brief Fill the BitField with a specific pattern of bits.
|
||||
/// For boolean values, all bits are set to 1 if value is true, or 0 if value
|
||||
/// is false.
|
||||
/// For word masks, the word type must be an unsigned integral type, which
|
||||
/// will be stamped across the BitField.
|
||||
/// If numBits is provided, the BitField is resized appropriately.
|
||||
/// @{
|
||||
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value, vtkm::Id numBits);
|
||||
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value);
|
||||
template <typename WordType>
|
||||
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word, vtkm::Id numBits);
|
||||
template <typename WordType>
|
||||
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word, bool value);
|
||||
/// @}
|
||||
|
||||
/// Fill @a array with @a value. If @a numValues is specified, the array will
|
||||
/// be resized.
|
||||
/// @{
|
||||
template <typename T, typename S>
|
||||
VTKM_CONT static void Fill(vtkm::cont::ArrayHandle<T, S>& array, const T& value);
|
||||
template <typename T, typename S>
|
||||
VTKM_CONT static void Fill(vtkm::cont::ArrayHandle<T, S>& array,
|
||||
const T& value,
|
||||
const vtkm::Id numValues);
|
||||
/// @}
|
||||
|
||||
/// \brief Output is the first index in input for each item in values that wouldn't alter the ordering of input
|
||||
///
|
||||
/// LowerBounds is a vectorized search. From each value in \c values it finds
|
||||
|
@ -439,6 +439,95 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename BitsPortal, typename GlobalPopCountType>
|
||||
struct CountSetBitsFunctor : public vtkm::exec::FunctorBase
|
||||
{
|
||||
VTKM_STATIC_ASSERT_MSG(VTKM_PASS_COMMAS(std::is_same<GlobalPopCountType, vtkm::Int32>::value ||
|
||||
std::is_same<GlobalPopCountType, vtkm::UInt32>::value ||
|
||||
std::is_same<GlobalPopCountType, vtkm::UInt64>::value),
|
||||
"Unsupported GlobalPopCountType. Must support CUDA atomicAdd.");
|
||||
|
||||
//Using typename BitsPortal::WordTypePreferred causes dependent type errors using GCC 4.8.5
|
||||
//which is the GCC required compiler for CUDA 9.2 on summit/power9
|
||||
using Word = typename vtkm::cont::internal::AtomicInterfaceExecution<
|
||||
DeviceAdapterTagCuda>::WordTypePreferred;
|
||||
|
||||
VTKM_CONT
|
||||
CountSetBitsFunctor(const BitsPortal& portal, GlobalPopCountType* globalPopCount)
|
||||
: Portal{ portal }
|
||||
, GlobalPopCount{ globalPopCount }
|
||||
, FinalWordIndex{ portal.GetNumberOfWords() - 1 }
|
||||
, FinalWordMask{ portal.GetFinalWordMask() }
|
||||
{
|
||||
}
|
||||
|
||||
~CountSetBitsFunctor() {}
|
||||
|
||||
VTKM_CONT void Initialize()
|
||||
{
|
||||
assert(this->GlobalPopCount != nullptr);
|
||||
VTKM_CUDA_CALL(cudaMemset(this->GlobalPopCount, 0, sizeof(GlobalPopCountType)));
|
||||
}
|
||||
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS
|
||||
__device__ void operator()(vtkm::Id wordIdx) const
|
||||
{
|
||||
Word word = this->Portal.GetWord(wordIdx);
|
||||
|
||||
// The last word may be partial -- mask out trailing bits if needed.
|
||||
const Word mask = wordIdx == this->FinalWordIndex ? this->FinalWordMask : ~Word{ 0 };
|
||||
|
||||
word &= mask;
|
||||
|
||||
if (word != 0)
|
||||
{
|
||||
this->LocalPopCount = vtkm::CountSetBits(word);
|
||||
this->Reduce();
|
||||
}
|
||||
}
|
||||
|
||||
VTKM_CONT vtkm::Id Finalize() const
|
||||
{
|
||||
assert(this->GlobalPopCount != nullptr);
|
||||
GlobalPopCountType result;
|
||||
VTKM_CUDA_CALL(cudaMemcpy(
|
||||
&result, this->GlobalPopCount, sizeof(GlobalPopCountType), cudaMemcpyDeviceToHost));
|
||||
return static_cast<vtkm::Id>(result);
|
||||
}
|
||||
|
||||
private:
|
||||
// Every thread with a non-zero local popcount calls this function, which
|
||||
// computes the total popcount for the coalesced threads and atomically
|
||||
// increasing the global popcount.
|
||||
VTKM_SUPPRESS_EXEC_WARNINGS
|
||||
__device__ void Reduce() const
|
||||
{
|
||||
const auto activeLanes = cooperative_groups::coalesced_threads();
|
||||
const int activeRank = activeLanes.thread_rank();
|
||||
const int activeSize = activeLanes.size();
|
||||
|
||||
// Reduction value:
|
||||
vtkm::Int32 rVal = this->LocalPopCount;
|
||||
for (int delta = 1; delta < activeSize; delta *= 2)
|
||||
{
|
||||
rVal += activeLanes.shfl_down(rVal, delta);
|
||||
}
|
||||
|
||||
if (activeRank == 0)
|
||||
{
|
||||
atomicAdd(this->GlobalPopCount, static_cast<GlobalPopCountType>(rVal));
|
||||
}
|
||||
}
|
||||
|
||||
const BitsPortal Portal;
|
||||
GlobalPopCountType* GlobalPopCount;
|
||||
mutable vtkm::Int32 LocalPopCount{ 0 };
|
||||
// Used to mask trailing bits the in last word.
|
||||
vtkm::Id FinalWordIndex{ 0 };
|
||||
Word FinalWordMask{ 0 };
|
||||
};
|
||||
|
||||
template <class InputPortal, class ValuesPortal, class OutputPortal>
|
||||
VTKM_CONT static void LowerBoundsPortal(const InputPortal& input,
|
||||
const ValuesPortal& values,
|
||||
@ -959,6 +1048,21 @@ private:
|
||||
return functor.Finalize();
|
||||
}
|
||||
|
||||
template <typename GlobalPopCountType, typename BitsPortal>
|
||||
VTKM_CONT static vtkm::Id CountSetBitsPortal(const BitsPortal& bits)
|
||||
{
|
||||
using Functor = CountSetBitsFunctor<BitsPortal, GlobalPopCountType>;
|
||||
|
||||
// RAII for the global atomic counter.
|
||||
auto globalCount = cuda::internal::make_CudaUniquePtr<GlobalPopCountType>(1);
|
||||
Functor functor{ bits, globalCount.get() };
|
||||
|
||||
functor.Initialize();
|
||||
Schedule(functor, bits.GetNumberOfWords());
|
||||
Synchronize(); // Ensure kernel is done before checking final atomic count
|
||||
return functor.Finalize();
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
public:
|
||||
@ -1095,6 +1199,14 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
VTKM_CONT static vtkm::Id CountSetBits(const vtkm::cont::BitField& bits)
|
||||
{
|
||||
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
|
||||
auto bitsPortal = bits.PrepareForInput(DeviceAdapterTagCuda{});
|
||||
// Use a uint64 for accumulator, as atomicAdd does not support signed int64.
|
||||
return CountSetBitsPortal<vtkm::UInt64>(bitsPortal);
|
||||
}
|
||||
|
||||
template <typename T, class SIn, class SVal, class SOut>
|
||||
VTKM_CONT static void LowerBounds(const vtkm::cont::ArrayHandle<T, SIn>& input,
|
||||
const vtkm::cont::ArrayHandle<T, SVal>& values,
|
||||
|
@ -269,6 +269,170 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Count Set Bits
|
||||
VTKM_CONT static vtkm::Id CountSetBits(const vtkm::cont::BitField& bits)
|
||||
{
|
||||
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
|
||||
|
||||
auto bitsPortal = bits.PrepareForInput(DeviceAdapterTag{});
|
||||
|
||||
std::atomic<vtkm::UInt64> popCount;
|
||||
popCount.store(0, std::memory_order_relaxed);
|
||||
|
||||
using Functor = CountSetBitsFunctor<decltype(bitsPortal)>;
|
||||
Functor functor{ bitsPortal, popCount };
|
||||
|
||||
DerivedAlgorithm::Schedule(functor, functor.GetNumberOfInstances());
|
||||
DerivedAlgorithm::Synchronize();
|
||||
|
||||
return static_cast<vtkm::Id>(popCount.load(std::memory_order_seq_cst));
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Fill Bit Field (bool, resize)
|
||||
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value, vtkm::Id numBits)
|
||||
{
|
||||
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
|
||||
|
||||
if (numBits == 0)
|
||||
{
|
||||
bits.Shrink(0);
|
||||
return;
|
||||
}
|
||||
|
||||
auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{});
|
||||
|
||||
using WordType =
|
||||
typename vtkm::cont::BitField::template ExecutionTypes<DeviceAdapterTag>::WordTypePreferred;
|
||||
|
||||
using Functor = FillBitFieldFunctor<decltype(portal), WordType>;
|
||||
Functor functor{ portal, value ? ~WordType{ 0 } : WordType{ 0 } };
|
||||
|
||||
const vtkm::Id numWords = portal.template GetNumberOfWords<WordType>();
|
||||
DerivedAlgorithm::Schedule(functor, numWords);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Fill Bit Field (bool)
|
||||
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value)
|
||||
{
|
||||
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
|
||||
|
||||
const vtkm::Id numBits = bits.GetNumberOfBits();
|
||||
if (numBits == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{});
|
||||
|
||||
using WordType =
|
||||
typename vtkm::cont::BitField::template ExecutionTypes<DeviceAdapterTag>::WordTypePreferred;
|
||||
|
||||
using Functor = FillBitFieldFunctor<decltype(portal), WordType>;
|
||||
Functor functor{ portal, value ? ~WordType{ 0 } : WordType{ 0 } };
|
||||
|
||||
const vtkm::Id numWords = portal.template GetNumberOfWords<WordType>();
|
||||
DerivedAlgorithm::Schedule(functor, numWords);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Fill Bit Field (mask, resize)
|
||||
template <typename WordType>
|
||||
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word, vtkm::Id numBits)
|
||||
{
|
||||
VTKM_STATIC_ASSERT_MSG(vtkm::cont::BitField::IsValidWordType<WordType>{}, "Invalid word type.");
|
||||
|
||||
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
|
||||
|
||||
if (numBits == 0)
|
||||
{
|
||||
bits.Shrink(0);
|
||||
return;
|
||||
}
|
||||
|
||||
auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{});
|
||||
|
||||
// If less than 32 bits, repeat the word until we get a 32 bit pattern.
|
||||
// Using this for the pattern prevents races while writing small numbers
|
||||
// to adjacent memory locations.
|
||||
auto repWord = RepeatTo32BitsIfNeeded(word);
|
||||
using RepWordType = decltype(repWord);
|
||||
|
||||
using Functor = FillBitFieldFunctor<decltype(portal), RepWordType>;
|
||||
Functor functor{ portal, repWord };
|
||||
|
||||
const vtkm::Id numWords = portal.template GetNumberOfWords<RepWordType>();
|
||||
DerivedAlgorithm::Schedule(functor, numWords);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Fill Bit Field (mask)
|
||||
template <typename WordType>
|
||||
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word)
|
||||
{
|
||||
VTKM_STATIC_ASSERT_MSG(vtkm::cont::BitField::IsValidWordType<WordType>{}, "Invalid word type.");
|
||||
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
|
||||
|
||||
const vtkm::Id numBits = bits.GetNumberOfBits();
|
||||
if (numBits == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{});
|
||||
|
||||
// If less than 32 bits, repeat the word until we get a 32 bit pattern.
|
||||
// Using this for the pattern prevents races while writing small numbers
|
||||
// to adjacent memory locations.
|
||||
auto repWord = RepeatTo32BitsIfNeeded(word);
|
||||
using RepWordType = decltype(repWord);
|
||||
|
||||
using Functor = FillBitFieldFunctor<decltype(portal), RepWordType>;
|
||||
Functor functor{ portal, repWord };
|
||||
|
||||
const vtkm::Id numWords = portal.template GetNumberOfWords<RepWordType>();
|
||||
DerivedAlgorithm::Schedule(functor, numWords);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Fill ArrayHandle
|
||||
template <typename T, typename S>
|
||||
VTKM_CONT static void Fill(vtkm::cont::ArrayHandle<T, S>& handle, const T& value)
|
||||
{
|
||||
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
|
||||
|
||||
const vtkm::Id numValues = handle.GetNumberOfValues();
|
||||
if (numValues == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
auto portal = handle.PrepareForOutput(numValues, DeviceAdapterTag{});
|
||||
FillArrayHandleFunctor<decltype(portal)> functor{ portal, value };
|
||||
DerivedAlgorithm::Schedule(functor, numValues);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Fill ArrayHandle (resize)
|
||||
template <typename T, typename S>
|
||||
VTKM_CONT static void Fill(vtkm::cont::ArrayHandle<T, S>& handle,
|
||||
const T& value,
|
||||
const vtkm::Id numValues)
|
||||
{
|
||||
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
|
||||
if (numValues == 0)
|
||||
{
|
||||
handle.Shrink(0);
|
||||
return;
|
||||
}
|
||||
|
||||
auto portal = handle.PrepareForOutput(numValues, DeviceAdapterTag{});
|
||||
FillArrayHandleFunctor<decltype(portal)> functor{ portal, value };
|
||||
DerivedAlgorithm::Schedule(functor, numValues);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Lower Bounds
|
||||
template <typename T, class CIn, class CVal, class COut>
|
||||
|
@ -494,6 +494,149 @@ struct CopyKernel
|
||||
void SetErrorMessageBuffer(const vtkm::exec::internal::ErrorMessageBuffer&) {}
|
||||
};
|
||||
|
||||
template <typename BitsPortal>
|
||||
struct CountSetBitsFunctor : public vtkm::exec::FunctorBase
|
||||
{
|
||||
using WordType = typename BitsPortal::WordTypePreferred;
|
||||
|
||||
// This functor executes a number of instances, where each instance handles
|
||||
// two cachelines worth of data. This reduces the number of atomic operations.
|
||||
// Figure out how many words that is:
|
||||
static constexpr vtkm::Id CacheLineSize = VTKM_ALLOCATION_ALIGNMENT;
|
||||
static constexpr vtkm::Id WordsPerCacheLine =
|
||||
CacheLineSize / static_cast<vtkm::Id>(sizeof(WordType));
|
||||
static constexpr vtkm::Id CacheLinesPerInstance = 2;
|
||||
static constexpr vtkm::Id WordsPerInstance = CacheLinesPerInstance * WordsPerCacheLine;
|
||||
|
||||
VTKM_CONT
|
||||
CountSetBitsFunctor(const BitsPortal& input, std::atomic<vtkm::UInt64>& popCount)
|
||||
: Input{ input }
|
||||
, PopCount{ popCount }
|
||||
, FinalWordIndex{ input.GetNumberOfWords() - 1 }
|
||||
, FinalWordMask{ input.GetFinalWordMask() }
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_CONT vtkm::Id GetNumberOfInstances() const
|
||||
{
|
||||
const auto numWords = this->Input.GetNumberOfWords();
|
||||
return (numWords + WordsPerInstance - 1) / WordsPerInstance;
|
||||
}
|
||||
|
||||
VTKM_EXEC void operator()(vtkm::Id instanceIdx) const
|
||||
{
|
||||
const vtkm::Id numWords = this->Input.GetNumberOfWords();
|
||||
const vtkm::Id wordStart = vtkm::Min(instanceIdx * WordsPerInstance, numWords);
|
||||
const vtkm::Id wordEnd = vtkm::Min(wordStart + WordsPerInstance, numWords);
|
||||
|
||||
if (wordStart != wordEnd) // range is valid
|
||||
{
|
||||
this->ExecuteRange(wordStart, wordEnd);
|
||||
}
|
||||
}
|
||||
|
||||
VTKM_CONT vtkm::UInt64 GetPopCount() const { return PopCount.load(std::memory_order_relaxed); }
|
||||
|
||||
private:
|
||||
VTKM_EXEC void ExecuteRange(vtkm::Id wordStart, vtkm::Id wordEnd) const
|
||||
{
|
||||
#ifndef VTKM_CUDA_DEVICE_PASS // for std::atomic call from VTKM_EXEC function:
|
||||
// Count bits and allocate space for output:
|
||||
vtkm::UInt64 chunkBits = this->CountChunkBits(wordStart, wordEnd);
|
||||
this->PopCount.fetch_add(chunkBits, std::memory_order_relaxed);
|
||||
#else
|
||||
(void)wordStart;
|
||||
(void)wordEnd;
|
||||
#endif
|
||||
}
|
||||
|
||||
VTKM_EXEC vtkm::UInt64 CountChunkBits(vtkm::Id wordStart, vtkm::Id wordEnd) const
|
||||
{
|
||||
// Need to mask out trailing bits from the final word:
|
||||
const bool isFinalChunk = wordEnd == (this->FinalWordIndex + 1);
|
||||
|
||||
if (isFinalChunk)
|
||||
{
|
||||
wordEnd = this->FinalWordIndex;
|
||||
}
|
||||
|
||||
vtkm::Int32 tmp = 0;
|
||||
for (vtkm::Id i = wordStart; i < wordEnd; ++i)
|
||||
{
|
||||
tmp += vtkm::CountSetBits(this->Input.GetWord(i));
|
||||
}
|
||||
|
||||
if (isFinalChunk)
|
||||
{
|
||||
tmp += vtkm::CountSetBits(this->Input.GetWord(this->FinalWordIndex) & this->FinalWordMask);
|
||||
}
|
||||
|
||||
return static_cast<vtkm::UInt64>(tmp);
|
||||
}
|
||||
|
||||
BitsPortal Input;
|
||||
std::atomic<vtkm::UInt64>& PopCount;
|
||||
// Used to mask trailing bits the in last word.
|
||||
vtkm::Id FinalWordIndex{ 0 };
|
||||
WordType FinalWordMask{ 0 };
|
||||
};
|
||||
|
||||
// For a given unsigned integer less than 32 bits, repeat its bits until we
|
||||
// have a 32 bit pattern. This is used to make all fill patterns at least
|
||||
// 32 bits in size, since concurrently writing to adjacent locations smaller
|
||||
// than 32 bits may race on some platforms.
|
||||
template <typename WordType, typename = typename std::enable_if<(sizeof(WordType) >= 4)>::type>
|
||||
static constexpr VTKM_CONT WordType RepeatTo32BitsIfNeeded(WordType pattern)
|
||||
{ // for 32 bits or more, just pass the type through.
|
||||
return pattern;
|
||||
}
|
||||
|
||||
static inline constexpr VTKM_CONT vtkm::UInt32 RepeatTo32BitsIfNeeded(vtkm::UInt16 pattern)
|
||||
{
|
||||
return static_cast<vtkm::UInt32>(pattern << 16 | pattern);
|
||||
}
|
||||
|
||||
static inline constexpr VTKM_CONT vtkm::UInt32 RepeatTo32BitsIfNeeded(vtkm::UInt8 pattern)
|
||||
{
|
||||
return RepeatTo32BitsIfNeeded(static_cast<vtkm::UInt16>(pattern << 8 | pattern));
|
||||
}
|
||||
|
||||
template <typename BitsPortal, typename WordType>
|
||||
struct FillBitFieldFunctor : public vtkm::exec::FunctorBase
|
||||
{
|
||||
VTKM_CONT
|
||||
FillBitFieldFunctor(const BitsPortal& portal, WordType mask)
|
||||
: Portal{ portal }
|
||||
, Mask{ mask }
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_EXEC void operator()(vtkm::Id wordIdx) const { this->Portal.SetWord(wordIdx, this->Mask); }
|
||||
|
||||
private:
|
||||
BitsPortal Portal;
|
||||
WordType Mask;
|
||||
};
|
||||
|
||||
template <typename PortalType>
|
||||
struct FillArrayHandleFunctor : public vtkm::exec::FunctorBase
|
||||
{
|
||||
using ValueType = typename PortalType::ValueType;
|
||||
|
||||
VTKM_CONT
|
||||
FillArrayHandleFunctor(const PortalType& portal, ValueType value)
|
||||
: Portal{ portal }
|
||||
, Value{ value }
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_EXEC void operator()(vtkm::Id idx) const { this->Portal.Set(idx, this->Value); }
|
||||
|
||||
private:
|
||||
PortalType Portal;
|
||||
ValueType Value;
|
||||
};
|
||||
|
||||
template <class InputPortalType, class ValuesPortalType, class OutputPortalType>
|
||||
struct LowerBoundsKernel
|
||||
{
|
||||
|
@ -2438,7 +2438,7 @@ private:
|
||||
|
||||
auto testRepeatedMask = [&](WordType mask) {
|
||||
std::cout << "Testing BitFieldToUnorderedSet with repeated 32-bit word 0x" << std::hex << mask
|
||||
<< std::endl;
|
||||
<< std::dec << std::endl;
|
||||
|
||||
BitField bits;
|
||||
{
|
||||
@ -2455,7 +2455,7 @@ private:
|
||||
|
||||
auto testRandomMask = [&](WordType seed) {
|
||||
std::cout << "Testing BitFieldToUnorderedSet with random sequence seeded with 0x" << std::hex
|
||||
<< seed << std::endl;
|
||||
<< seed << std::dec << std::endl;
|
||||
|
||||
std::mt19937 mt{ seed };
|
||||
std::uniform_int_distribution<std::mt19937::result_type> rng;
|
||||
@ -2486,11 +2486,244 @@ private:
|
||||
testRandomMask(0xdeadbeef);
|
||||
}
|
||||
|
||||
static VTKM_CONT void TestCountSetBits()
|
||||
{
|
||||
using WordType = WordTypeDefault;
|
||||
|
||||
// Test that everything works correctly with a partial word at the end.
|
||||
static constexpr vtkm::Id BitsPerWord = static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT);
|
||||
// +5 to get a partial word:
|
||||
static constexpr vtkm::Id NumFullWords = 1024;
|
||||
static constexpr vtkm::Id NumBits = NumFullWords * BitsPerWord + 5;
|
||||
static constexpr vtkm::Id NumWords = (NumBits + BitsPerWord - 1) / BitsPerWord;
|
||||
|
||||
auto verifyPopCount = [](const BitField& bits) {
|
||||
vtkm::Id refPopCount = 0;
|
||||
const vtkm::Id numBits = bits.GetNumberOfBits();
|
||||
auto portal = bits.GetPortalConstControl();
|
||||
for (vtkm::Id idx = 0; idx < numBits; ++idx)
|
||||
{
|
||||
if (portal.GetBit(idx))
|
||||
{
|
||||
++refPopCount;
|
||||
}
|
||||
}
|
||||
|
||||
const vtkm::Id popCount = Algorithm::CountSetBits(bits);
|
||||
|
||||
VTKM_TEST_ASSERT(
|
||||
refPopCount == popCount, "CountSetBits returned ", popCount, ", expected ", refPopCount);
|
||||
};
|
||||
|
||||
auto testRepeatedMask = [&](WordType mask) {
|
||||
std::cout << "Testing CountSetBits with repeated word 0x" << std::hex << mask << std::dec
|
||||
<< std::endl;
|
||||
|
||||
BitField bits;
|
||||
{
|
||||
bits.Allocate(NumBits);
|
||||
auto fillPortal = bits.GetPortalControl();
|
||||
for (vtkm::Id i = 0; i < NumWords; ++i)
|
||||
{
|
||||
fillPortal.SetWord(i, mask);
|
||||
}
|
||||
}
|
||||
|
||||
verifyPopCount(bits);
|
||||
};
|
||||
|
||||
auto testRandomMask = [&](WordType seed) {
|
||||
std::cout << "Testing CountSetBits with random sequence seeded with 0x" << std::hex << seed
|
||||
<< std::dec << std::endl;
|
||||
|
||||
std::mt19937 mt{ seed };
|
||||
std::uniform_int_distribution<std::mt19937::result_type> rng;
|
||||
|
||||
BitField bits;
|
||||
{
|
||||
bits.Allocate(NumBits);
|
||||
auto fillPortal = bits.GetPortalControl();
|
||||
for (vtkm::Id i = 0; i < NumWords; ++i)
|
||||
{
|
||||
fillPortal.SetWord(i, static_cast<WordType>(rng(mt)));
|
||||
}
|
||||
}
|
||||
|
||||
verifyPopCount(bits);
|
||||
};
|
||||
|
||||
testRepeatedMask(0x00000000);
|
||||
testRepeatedMask(0xeeeeeeee);
|
||||
testRepeatedMask(0xffffffff);
|
||||
testRepeatedMask(0x1c0fd395);
|
||||
testRepeatedMask(0xdeadbeef);
|
||||
|
||||
testRandomMask(0x00000000);
|
||||
testRandomMask(0xeeeeeeee);
|
||||
testRandomMask(0xffffffff);
|
||||
testRandomMask(0x1c0fd395);
|
||||
testRandomMask(0xdeadbeef);
|
||||
}
|
||||
|
||||
template <typename WordType>
|
||||
static VTKM_CONT void TestFillBitFieldMask(WordType mask)
|
||||
{
|
||||
std::cout << "Testing Fill with " << (sizeof(WordType) * CHAR_BIT) << " bit mask: " << std::hex
|
||||
<< vtkm::UInt64{ mask } << std::dec << std::endl;
|
||||
|
||||
// Test that everything works correctly with a partial word at the end.
|
||||
static constexpr vtkm::Id BitsPerWord = static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT);
|
||||
// +5 to get a partial word:
|
||||
static constexpr vtkm::Id NumFullWords = 1024;
|
||||
static constexpr vtkm::Id NumBits = NumFullWords * BitsPerWord + 5;
|
||||
static constexpr vtkm::Id NumWords = (NumBits + BitsPerWord - 1) / BitsPerWord;
|
||||
|
||||
vtkm::cont::BitField bits;
|
||||
{
|
||||
Algorithm::Fill(bits, mask, NumBits);
|
||||
|
||||
vtkm::Id numBits = bits.GetNumberOfBits();
|
||||
VTKM_TEST_ASSERT(numBits == NumBits, "Unexpected number of bits.");
|
||||
vtkm::Id numWords = bits.GetNumberOfWords<WordType>();
|
||||
VTKM_TEST_ASSERT(numWords == NumWords, "Unexpected number of words.");
|
||||
|
||||
auto portal = bits.GetPortalConstControl();
|
||||
for (vtkm::Id wordIdx = 0; wordIdx < NumWords; ++wordIdx)
|
||||
{
|
||||
VTKM_TEST_ASSERT(portal.GetWord<WordType>(wordIdx) == mask,
|
||||
"Incorrect word in result BitField; expected 0x",
|
||||
std::hex,
|
||||
vtkm::UInt64{ mask },
|
||||
", got 0x",
|
||||
vtkm::UInt64{ portal.GetWord<WordType>(wordIdx) },
|
||||
std::dec,
|
||||
" for word ",
|
||||
wordIdx,
|
||||
"/",
|
||||
NumWords);
|
||||
}
|
||||
}
|
||||
|
||||
// Now fill the BitField with the reversed mask to test the no-alloc
|
||||
// overload:
|
||||
{
|
||||
WordType invWord = static_cast<WordType>(~mask);
|
||||
Algorithm::Fill(bits, invWord);
|
||||
|
||||
vtkm::Id numBits = bits.GetNumberOfBits();
|
||||
VTKM_TEST_ASSERT(numBits == NumBits, "Unexpected number of bits.");
|
||||
vtkm::Id numWords = bits.GetNumberOfWords<WordType>();
|
||||
VTKM_TEST_ASSERT(numWords == NumWords, "Unexpected number of words.");
|
||||
|
||||
auto portal = bits.GetPortalConstControl();
|
||||
for (vtkm::Id wordIdx = 0; wordIdx < NumWords; ++wordIdx)
|
||||
{
|
||||
VTKM_TEST_ASSERT(portal.GetWord<WordType>(wordIdx) == invWord,
|
||||
"Incorrect word in result BitField; expected 0x",
|
||||
std::hex,
|
||||
vtkm::UInt64{ invWord },
|
||||
", got 0x",
|
||||
vtkm::UInt64{ portal.GetWord<WordType>(wordIdx) },
|
||||
std::dec,
|
||||
" for word ",
|
||||
wordIdx,
|
||||
"/",
|
||||
NumWords);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static VTKM_CONT void TestFillBitFieldBool(bool value)
|
||||
{
|
||||
std::cout << "Testing Fill with bool: " << value << std::endl;
|
||||
|
||||
// Test that everything works correctly with a partial word at the end.
|
||||
// +5 to get a partial word:
|
||||
static constexpr vtkm::Id NumBits = 1024 * 32 + 5;
|
||||
|
||||
vtkm::cont::BitField bits;
|
||||
{
|
||||
Algorithm::Fill(bits, value, NumBits);
|
||||
|
||||
vtkm::Id numBits = bits.GetNumberOfBits();
|
||||
VTKM_TEST_ASSERT(numBits == NumBits, "Unexpected number of bits.");
|
||||
|
||||
auto portal = bits.GetPortalConstControl();
|
||||
for (vtkm::Id bitIdx = 0; bitIdx < NumBits; ++bitIdx)
|
||||
{
|
||||
VTKM_TEST_ASSERT(portal.GetBit(bitIdx) == value, "Incorrect bit in result BitField.");
|
||||
}
|
||||
}
|
||||
|
||||
// Now fill the BitField with the reversed mask to test the no-alloc
|
||||
// overload:
|
||||
{
|
||||
Algorithm::Fill(bits, !value);
|
||||
|
||||
vtkm::Id numBits = bits.GetNumberOfBits();
|
||||
VTKM_TEST_ASSERT(numBits == NumBits, "Unexpected number of bits.");
|
||||
|
||||
auto portal = bits.GetPortalConstControl();
|
||||
for (vtkm::Id bitIdx = 0; bitIdx < NumBits; ++bitIdx)
|
||||
{
|
||||
VTKM_TEST_ASSERT(portal.GetBit(bitIdx) == !value, "Incorrect bit in result BitField.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static VTKM_CONT void TestFillBitField()
|
||||
{
|
||||
TestFillBitFieldBool(true);
|
||||
TestFillBitFieldBool(false);
|
||||
TestFillBitFieldMask<vtkm::UInt8>(vtkm::UInt8{ 0 });
|
||||
TestFillBitFieldMask<vtkm::UInt8>(static_cast<vtkm::UInt8>(~vtkm::UInt8{ 0 }));
|
||||
TestFillBitFieldMask<vtkm::UInt8>(vtkm::UInt8{ 0xab });
|
||||
TestFillBitFieldMask<vtkm::UInt8>(vtkm::UInt8{ 0x4f });
|
||||
TestFillBitFieldMask<vtkm::UInt16>(vtkm::UInt16{ 0 });
|
||||
TestFillBitFieldMask<vtkm::UInt16>(static_cast<vtkm::UInt16>(~vtkm::UInt16{ 0 }));
|
||||
TestFillBitFieldMask<vtkm::UInt16>(vtkm::UInt16{ 0xfade });
|
||||
TestFillBitFieldMask<vtkm::UInt16>(vtkm::UInt16{ 0xbeef });
|
||||
TestFillBitFieldMask<vtkm::UInt32>(vtkm::UInt32{ 0 });
|
||||
TestFillBitFieldMask<vtkm::UInt32>(static_cast<vtkm::UInt32>(~vtkm::UInt32{ 0 }));
|
||||
TestFillBitFieldMask<vtkm::UInt32>(vtkm::UInt32{ 0xfacecafe });
|
||||
TestFillBitFieldMask<vtkm::UInt32>(vtkm::UInt32{ 0xbaddecaf });
|
||||
TestFillBitFieldMask<vtkm::UInt64>(vtkm::UInt64{ 0 });
|
||||
TestFillBitFieldMask<vtkm::UInt64>(static_cast<vtkm::UInt64>(~vtkm::UInt64{ 0 }));
|
||||
TestFillBitFieldMask<vtkm::UInt64>(vtkm::UInt64{ 0xbaddefacedfacade });
|
||||
TestFillBitFieldMask<vtkm::UInt64>(vtkm::UInt64{ 0xfeeddeadbeef2dad });
|
||||
}
|
||||
|
||||
static VTKM_CONT void TestFillArrayHandle()
|
||||
{
|
||||
vtkm::cont::ArrayHandle<vtkm::Int32> handle;
|
||||
Algorithm::Fill(handle, 867, ARRAY_SIZE);
|
||||
|
||||
{
|
||||
auto portal = handle.GetPortalConstControl();
|
||||
VTKM_TEST_ASSERT(portal.GetNumberOfValues() == ARRAY_SIZE);
|
||||
for (vtkm::Id i = 0; i < ARRAY_SIZE; ++i)
|
||||
{
|
||||
VTKM_TEST_ASSERT(portal.Get(i) == 867);
|
||||
}
|
||||
}
|
||||
|
||||
Algorithm::Fill(handle, 5309);
|
||||
{
|
||||
auto portal = handle.GetPortalConstControl();
|
||||
VTKM_TEST_ASSERT(portal.GetNumberOfValues() == ARRAY_SIZE);
|
||||
for (vtkm::Id i = 0; i < ARRAY_SIZE; ++i)
|
||||
{
|
||||
VTKM_TEST_ASSERT(portal.Get(i) == 5309);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct TestAll
|
||||
{
|
||||
VTKM_CONT void operator()() const
|
||||
{
|
||||
std::cout << "Doing DeviceAdapter tests" << std::endl;
|
||||
|
||||
TestArrayTransfer();
|
||||
TestOutOfMemory();
|
||||
TestTimer();
|
||||
@ -2541,6 +2774,10 @@ private:
|
||||
TestAtomicArray();
|
||||
|
||||
TestBitFieldToUnorderedSet();
|
||||
TestCountSetBits();
|
||||
TestFillBitField();
|
||||
|
||||
TestFillArrayHandle();
|
||||
}
|
||||
};
|
||||
|
||||
|
18
vtkm/thirdparty/diy/CMakeLists.txt
vendored
18
vtkm/thirdparty/diy/CMakeLists.txt
vendored
@ -24,23 +24,7 @@ target_include_directories(vtkm_diy INTERFACE
|
||||
$<INSTALL_INTERFACE:${VTKm_INSTALL_INCLUDE_DIR}/vtkm/thirdparty/diy>)
|
||||
|
||||
if(VTKm_ENABLE_MPI)
|
||||
set(arg)
|
||||
foreach(apath IN LISTS MPI_C_INCLUDE_PATH MPI_CXX_INCLUDE_PATH)
|
||||
list(APPEND arg $<BUILD_INTERFACE:${apath}>)
|
||||
endforeach()
|
||||
list(REMOVE_DUPLICATES arg)
|
||||
target_include_directories(vtkm_diy INTERFACE ${arg})
|
||||
target_link_libraries(vtkm_diy INTERFACE
|
||||
$<BUILD_INTERFACE:${MPI_C_LIBRARIES}>
|
||||
$<BUILD_INTERFACE:${MPI_CXX_LIBRARIES}>)
|
||||
if(MPI_C_COMPILE_DEFINITIONS)
|
||||
target_compile_definitions(vtkm_diy INTERFACE
|
||||
$<$<COMPILE_LANGUAGE:C>:${MPI_C_COMPILE_DEFINITIONS}>)
|
||||
endif()
|
||||
if(MPI_CXX_COMPILE_DEFNITIONS)
|
||||
target_compile_definitions(vtkm_diy INTERFACE
|
||||
$<$<COMPILE_LANGUAGE:CXX>:${MPI_CXX_COMPILE_DEFNITIONS>)
|
||||
endif()
|
||||
target_link_libraries(vtkm_diy INTERFACE MPI::MPI_CXX)
|
||||
endif()
|
||||
|
||||
install(TARGETS vtkm_diy
|
||||
|
Loading…
Reference in New Issue
Block a user