Merge branch 'master' of gitlab.kitware.com:m-kim/vtk-m into advdatamodel

This commit is contained in:
Mark Kim 2019-06-26 19:37:47 -04:00
commit 8dbb1c4de3
15 changed files with 2923 additions and 100 deletions

1767
CMake/FindMPI.cmake Normal file

File diff suppressed because it is too large Load Diff

24
CMake/VTKmMPI.cmake Normal file

@ -0,0 +1,24 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
if(VTKm_ENABLE_MPI AND NOT TARGET MPI::MPI_CXX)
if(CMAKE_VERSION VERSION_LESS 3.15)
#While CMake 3.10 introduced the new MPI module.
#Fixes related to MPI+CUDA that VTK-m needs are
#only found in CMake 3.15+.
find_package(MPI REQUIRED MODULE)
else()
#clunky but we need to make sure we use the upstream module if it exists
set(orig_CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH})
set(CMAKE_MODULE_PATH "")
find_package(MPI MODULE)
set(CMAKE_MODULE_PATH ${orig_CMAKE_MODULE_PATH})
endif()
endif()

@ -12,6 +12,7 @@ include(CMakeParseArguments)
include(VTKmDeviceAdapters)
include(VTKmCPUVectorization)
include(VTKmMPI)
#-----------------------------------------------------------------------------
# Utility to build a kit name from the current directory.

@ -62,13 +62,13 @@ file(GENERATE
OUTPUT "${${file_loc_var}}"
CONTENT
"
set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING \"\")
set(CMAKE_PREFIX_PATH ${install_prefix} CACHE STRING \"\")
set(CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER} CACHE FILEPATH \"\")
set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} CACHE STRING \"\")
set(CMAKE_CUDA_COMPILER ${CMAKE_CUDA_COMPILER} CACHE FILEPATH \"\")
set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} CACHE STRING \"\")
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CUDA_HOST_COMPILER} CACHE FILEPATH \"\")
set(CMAKE_MAKE_PROGRAM \"${CMAKE_MAKE_PROGRAM}\" CACHE FILEPATH \"\")
set(CMAKE_PREFIX_PATH \"${CMAKE_PREFIX_PATH};${install_prefix}/\" CACHE STRING \"\")
set(CMAKE_CXX_COMPILER \"${CMAKE_CXX_COMPILER}\" CACHE FILEPATH \"\")
set(CMAKE_CXX_FLAGS \"$CACHE{CMAKE_CXX_FLAGS}\" CACHE STRING \"\")
set(CMAKE_CUDA_COMPILER \"${CMAKE_CUDA_COMPILER}\" CACHE FILEPATH \"\")
set(CMAKE_CUDA_FLAGS \"$CACHE{CMAKE_CUDA_FLAGS}\" CACHE STRING \"\")
set(CMAKE_CUDA_HOST_COMPILER \"${CMAKE_CUDA_HOST_COMPILER}\" CACHE FILEPATH \"\")
"
)
@ -81,8 +81,34 @@ function(vtkm_test_against_install dir)
set(src_dir "${CMAKE_CURRENT_SOURCE_DIR}/${name}/")
set(build_dir "${VTKm_BINARY_DIR}/CMakeFiles/_tmp_build/test_${name}/")
set(build_config "${build_dir}/build_options.cmake")
vtkm_generate_install_build_options(build_config)
set(args )
if(CMAKE_VERSION VERSION_LESS 3.13)
#Before 3.13 the config file passing to cmake via ctest --build-options
#was broken
set(args
-DCMAKE_MAKE_PROGRAM:FILEPATH=${CMAKE_MAKE_PROGRAM}
-DCMAKE_PREFIX_PATH:STRING=${install_prefix}
-DCMAKE_CXX_COMPILER:FILEPATH=${CMAKE_CXX_COMPILER}
-DCMAKE_CUDA_COMPILER:FILEPATH=${CMAKE_CUDA_COMPILER}
-DCMAKE_CUDA_HOST_COMPILER:FILEPATH=${CMAKE_CUDA_HOST_COMPILER}
-DCMAKE_CXX_FLAGS:STRING=$CACHE{CMAKE_CXX_FLAGS}
-DCMAKE_CUDA_FLAGS:STRING=$CACHE{CMAKE_CUDA_FLAGS}
)
else()
set(build_config "${build_dir}build_options.cmake")
vtkm_generate_install_build_options(build_config)
set(args -C ${build_config})
endif()
if(WIN32 AND TARGET vtkm::tbb)
#on windows we need to specify these as FindTBB won't
#find the installed version just with the prefix path
list(APPEND args
-DTBB_LIBRARY_DEBUG:FILEPATH=${TBB_LIBRARY_DEBUG}
-DTBB_LIBRARY_RELEASE:FILEPATH=${TBB_LIBRARY_RELEASE}
-DTBB_INCLUDE_DIR:PATH=${TBB_INCLUDE_DIR}
)
endif()
#determine if the test is expected to compile or fail to build. We use
#this information to built the test name to make it clear to the user
@ -93,10 +119,13 @@ function(vtkm_test_against_install dir)
add_test(NAME ${build_name}
COMMAND ${CMAKE_CTEST_COMMAND}
-C $<CONFIG>
--build-and-test ${src_dir} ${build_dir}
--build-generator ${CMAKE_GENERATOR}
--build-makeprogram ${CMAKE_MAKE_PROGRAM}
--build-options -C "${build_config}"
--build-options
${args}
--no-warn-unused-cli
)
set_tests_properties(${build_name} PROPERTIES LABELS ${test_label} )

@ -182,14 +182,6 @@ check_type_size("long long" VTKm_SIZE_LONG_LONG BUILTIN_TYPES_ONLY)
#-----------------------------------------------------------------------------
# Add subdirectories
if(VTKm_ENABLE_MPI)
# This `if` is temporary and will be removed once `diy` supports building
# without MPI.
if (NOT MPI_C_FOUND)
find_package(MPI ${VTKm_FIND_PACKAGE_QUIETLY})
endif()
endif()
add_subdirectory(vtkm)
#-----------------------------------------------------------------------------
@ -241,6 +233,7 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
install(
FILES
${VTKm_SOURCE_DIR}/CMake/FindTBB.cmake
${VTKm_SOURCE_DIR}/CMake/FindMPI.cmake
${VTKm_SOURCE_DIR}/CMake/FindOpenGL.cmake
${VTKm_SOURCE_DIR}/CMake/FindOpenMP.cmake
DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
@ -253,6 +246,7 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
${VTKm_SOURCE_DIR}/CMake/VTKmDetectCUDAVersion.cu
${VTKm_SOURCE_DIR}/CMake/VTKmDeviceAdapters.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmExportHeaderTemplate.h.in
${VTKm_SOURCE_DIR}/CMake/VTKmMPI.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmRenderingContexts.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmWrappers.cmake
DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
@ -315,6 +309,4 @@ endif()
#-----------------------------------------------------------------------------
# Build examples
if(VTKm_ENABLE_EXAMPLES)
add_subdirectory(examples)
endif(VTKm_ENABLE_EXAMPLES)
add_subdirectory(examples)

@ -49,7 +49,6 @@ contents of these for details on the specifics of their respective
licenses.
- - - - - - - - - - - - - - - - - - - - - - - - do not remove this line
CMake/FindTBB.cmake
CMake/FindGLEW.cmake
Utilities
vtkm/cont/tbb/internal/parallel_sort.h
vtkm/cont/tbb/internal/parallel_radix_sort_tbb.h

@ -57,21 +57,24 @@ enum BenchmarkName
BITFIELD_TO_UNORDERED_SET = 1 << 0,
COPY = 1 << 1,
COPY_IF = 1 << 2,
LOWER_BOUNDS = 1 << 3,
REDUCE = 1 << 4,
REDUCE_BY_KEY = 1 << 5,
SCAN_INCLUSIVE = 1 << 6,
SCAN_EXCLUSIVE = 1 << 7,
SORT = 1 << 8,
SORT_BY_KEY = 1 << 9,
STABLE_SORT_INDICES = 1 << 10,
STABLE_SORT_INDICES_UNIQUE = 1 << 11,
UNIQUE = 1 << 12,
UPPER_BOUNDS = 1 << 13,
COUNT_SET_BITS = 1 << 3,
FILL = 1 << 4,
LOWER_BOUNDS = 1 << 5,
REDUCE = 1 << 6,
REDUCE_BY_KEY = 1 << 7,
SCAN_EXCLUSIVE = 1 << 8,
SCAN_INCLUSIVE = 1 << 9,
SORT = 1 << 10,
SORT_BY_KEY = 1 << 11,
STABLE_SORT_INDICES = 1 << 12,
STABLE_SORT_INDICES_UNIQUE = 1 << 13,
UNIQUE = 1 << 14,
UPPER_BOUNDS = 1 << 15,
ALL = BITFIELD_TO_UNORDERED_SET | COPY | COPY_IF | LOWER_BOUNDS | REDUCE | REDUCE_BY_KEY |
SCAN_INCLUSIVE |
ALL = BITFIELD_TO_UNORDERED_SET | COPY | COPY_IF | COUNT_SET_BITS | FILL | LOWER_BOUNDS | REDUCE |
REDUCE_BY_KEY |
SCAN_EXCLUSIVE |
SCAN_INCLUSIVE |
SORT |
SORT_BY_KEY |
STABLE_SORT_INDICES |
@ -84,28 +87,20 @@ enum BenchmarkName
/// described below:
struct BenchDevAlgoConfig
{
/// Benchmarks to run. Possible values:
/// Copy, CopyIf, LowerBounds, Reduce, ReduceByKey, ScanInclusive,
/// ScanExclusive, Sort, SortByKey, StableSortIndices, StableSortIndicesUnique,
/// Unique, UpperBounds, or All. (Default: All).
// Zero is for parsing, will change to 'all' in main if needed.
/// Benchmarks to run. See BenchmarkName enum.
int BenchmarkFlags{ 0 };
/// ValueTypes to test.
/// CLI arg: "TypeList [Base|Extended]" (Base is default).
/// CLI arg: "--base-typelist | --extended-typelist" (Base is default).
bool ExtendedTypeList{ false };
/// Run benchmarks using the same number of bytes for all arrays.
/// CLI arg: "FixBytes [n|off]" (n is the number of bytes, default: 2097152, ie. 2MiB)
/// @note FixBytes and FixSizes are not mutually exclusive. If both are
/// specified, both will run.
/// CLI arg: "--array-size-bytes [n]" (n is the number of bytes, default: 2097152, ie. 2MiB)
bool TestArraySizeBytes{ true };
vtkm::UInt64 ArraySizeBytes{ 1 << 21 };
/// Run benchmarks using the same number of values for all arrays.
/// CLI arg: "FixSizes [n|off]" (n is the number of values, default: off)
/// @note FixBytes and FixSizes are not mutually exclusive. If both are
/// specified, both will run.
/// CLI arg: "--array-size-values [n]" (n is the number of values, default: off)
bool TestArraySizeValues{ false };
vtkm::UInt64 ArraySizeValues{ 1 << 21 };
@ -113,7 +108,7 @@ struct BenchDevAlgoConfig
/// values (5%, 10%, 15%, 20%, 25%, 30%, 35%, 40%, 45%, 50%, 75%, 100%
/// unique). If false (default), the range is limited to 5%, 25%, 50%, 75%,
/// 100%.
/// CLI arg: "DetailedOutputRange" enables the extended range.
/// CLI arg: "--more-output-range" enables the extended range.
bool DetailedOutputRangeScaling{ false };
// Internal: The benchmarking code will set this depending on execution phase:
@ -289,11 +284,11 @@ public:
{
if (wordIdx <= this->MaxMaskedWord && (wordIdx % this->Stride) == 0)
{
this->Portal.SetWord(wordIdx, this->Exemplar);
this->Portal.SetWordAtomic(wordIdx, this->Exemplar);
}
else
{
this->Portal.SetWord(wordIdx, static_cast<WordType>(0));
this->Portal.SetWordAtomic(wordIdx, static_cast<WordType>(0));
}
}
};
@ -317,8 +312,10 @@ public:
stride = 1;
}
vtkm::Id numBits = numWords * static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT);
vtkm::cont::BitField bits;
auto portal = bits.PrepareForOutput(numWords, DeviceAdapterTag{});
auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{});
using Functor = GenerateBitFieldFunctor<WordType, decltype(portal)>;
@ -533,6 +530,184 @@ private:
VTKM_MAKE_BENCHMARK(CopyIf75, BenchCopyIf, 75);
VTKM_MAKE_BENCHMARK(CopyIf100, BenchCopyIf, 100);
template <typename WordType, typename DeviceAdapter>
struct BenchCountSetBits
{
vtkm::Id NumWords;
vtkm::Id NumBits;
WordType Exemplar;
vtkm::Id Stride;
vtkm::Float32 FillRatio;
vtkm::Id MaxMaskedIndex;
std::string Name;
vtkm::cont::BitField Bits;
// See GenerateBitField for details. fillRatio is used to compute
// maxMaskedWord.
VTKM_CONT
BenchCountSetBits(WordType exemplar,
vtkm::Id stride,
vtkm::Float32 fillRatio,
const std::string& name)
: NumWords(Config.ComputeNumberOfWords<WordType>())
, NumBits(this->NumWords * static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT))
, Exemplar(exemplar)
, Stride(stride)
, FillRatio(fillRatio)
, MaxMaskedIndex(this->NumWords / static_cast<vtkm::Id>(1. / this->FillRatio))
, Name(name)
, Bits(GenerateBitField<WordType, DeviceAdapter>(this->Exemplar,
this->Stride,
this->MaxMaskedIndex,
this->NumWords))
{
}
VTKM_CONT
vtkm::Float64 operator()()
{
Timer timer(DeviceAdapter{});
timer.Start();
Algorithm::CountSetBits(DeviceAdapter{}, this->Bits);
return timer.GetElapsedTime();
}
VTKM_CONT
std::string Description() const
{
const vtkm::Id numFilledWords = this->MaxMaskedIndex / this->Stride;
const vtkm::Id numSetBits = numFilledWords * vtkm::CountSetBits(this->Exemplar);
std::stringstream description;
description << "CountSetBits" << this->Name << " ( "
<< "NumWords: " << this->NumWords << " "
<< "Exemplar: " << std::hex << this->Exemplar << std::dec << " "
<< "FillRatio: " << this->FillRatio << " "
<< "Stride: " << this->Stride << " "
<< "NumSetBits: " << numSetBits << " )";
return description.str();
}
};
VTKM_MAKE_BENCHMARK(CountSetBitsNull, BenchCountSetBits, 0x00000000, 1, 0.f, "Null");
VTKM_MAKE_BENCHMARK(CountSetBitsFull, BenchCountSetBits, 0xffffffff, 1, 1.f, "Full");
VTKM_MAKE_BENCHMARK(CountSetBitsHalfWord, BenchCountSetBits, 0xffff0000, 1, 1.f, "HalfWord");
VTKM_MAKE_BENCHMARK(CountSetBitsHalfField, BenchCountSetBits, 0xffffffff, 1, 0.5f, "HalfField");
VTKM_MAKE_BENCHMARK(CountSetBitsAlternateWords,
BenchCountSetBits,
0xffffffff,
2,
1.f,
"AlternateWords");
VTKM_MAKE_BENCHMARK(CountSetBitsAlternateBits,
BenchCountSetBits,
0x55555555,
1,
1.f,
"AlternateBits");
template <typename ValueType, typename DeviceAdapter>
struct BenchFillArrayHandle
{
vtkm::cont::ArrayHandle<ValueType> Handle;
VTKM_CONT
BenchFillArrayHandle() {}
VTKM_CONT
vtkm::Float64 operator()()
{
const vtkm::Id numVals = Config.ComputeSize<ValueType>();
Timer timer(DeviceAdapter{});
timer.Start();
Algorithm::Fill(DeviceAdapter{}, this->Handle, TestValue(19, ValueType{}), numVals);
return timer.GetElapsedTime();
}
VTKM_CONT
std::string Description() const
{
const vtkm::Id numVals = Config.ComputeSize<ValueType>();
std::stringstream description;
description << "Fill (ArrayHandle, " << numVals << " values)";
return description.str();
}
};
VTKM_MAKE_BENCHMARK(FillArrayHandle, BenchFillArrayHandle);
template <typename WordType, typename DeviceAdapter>
struct BenchFillBitField
{
vtkm::Id NumWords;
vtkm::Id NumBits;
bool UseBool;
WordType Pattern;
std::string Name;
vtkm::cont::BitField Bits;
VTKM_CONT
BenchFillBitField(bool useBool, WordType pattern, const std::string& name)
: NumWords(Config.ComputeNumberOfWords<WordType>())
, NumBits(this->NumWords * static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT))
, UseBool(useBool)
, Pattern(pattern)
, Name(name)
{
}
VTKM_CONT
vtkm::Float64 operator()()
{
Timer timer(DeviceAdapter{});
if (this->UseBool)
{
timer.Start();
Algorithm::Fill(DeviceAdapter{}, this->Bits, this->Pattern != 0, this->NumBits);
return timer.GetElapsedTime();
}
else
{
timer.Start();
Algorithm::Fill(DeviceAdapter{}, this->Bits, this->Pattern, this->NumBits);
return timer.GetElapsedTime();
}
}
VTKM_CONT
std::string Description() const
{
std::stringstream description;
description << "Fill (BitField)" << this->Name << " ( "
<< "FillPattern: " << std::hex << this->Pattern << std::dec << " "
<< "UseBool: " << this->UseBool << " "
<< "NumBits: " << this->NumBits << " )";
return description.str();
}
};
VTKM_MAKE_BENCHMARK(FillBitFieldTrue, BenchFillBitField, true, 0x1, "True");
VTKM_MAKE_BENCHMARK(FillBitFieldFalse, BenchFillBitField, true, 0x0, "False");
VTKM_MAKE_BENCHMARK(FillBitField8Bit,
BenchFillBitField,
false,
static_cast<vtkm::UInt8>(0xcc),
"8Bit");
VTKM_MAKE_BENCHMARK(FillBitField16Bit,
BenchFillBitField,
false,
static_cast<vtkm::UInt16>(0xcccc),
"16Bit");
VTKM_MAKE_BENCHMARK(FillBitField32Bit,
BenchFillBitField,
false,
static_cast<vtkm::UInt32>(0xcccccccc),
"32Bit");
VTKM_MAKE_BENCHMARK(FillBitField64Bit,
BenchFillBitField,
false,
static_cast<vtkm::UInt64>(0xcccccccccccccccc),
"64Bit");
template <typename Value, typename DeviceAdapter>
struct BenchLowerBounds
{
@ -1154,17 +1329,46 @@ public:
template <typename ValueTypes>
static VTKM_CONT void RunInternal(vtkm::cont::DeviceAdapterId id)
{
using BitFieldWordTypes = vtkm::ListTagBase<vtkm::UInt32>;
using UInt8Type = vtkm::ListTagBase<vtkm::UInt8>;
using UInt16Type = vtkm::ListTagBase<vtkm::UInt16>;
using UInt32Type = vtkm::ListTagBase<vtkm::UInt32>;
using UInt64Type = vtkm::ListTagBase<vtkm::UInt64>;
// These need specific word types:
if (Config.BenchmarkFlags & BITFIELD_TO_UNORDERED_SET)
{
std::cout << DIVIDER << "\nBenchmarking BitFieldToUnorderedSet\n";
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetNull, BitFieldWordTypes{}, id);
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetFull, BitFieldWordTypes{}, id);
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfWord, BitFieldWordTypes{}, id);
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfField, BitFieldWordTypes{}, id);
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateWords, BitFieldWordTypes{}, id);
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateBits, BitFieldWordTypes{}, id);
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetNull, UInt32Type{}, id);
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetFull, UInt32Type{}, id);
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfWord, UInt32Type{}, id);
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetHalfField, UInt32Type{}, id);
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateWords, UInt32Type{}, id);
VTKM_RUN_BENCHMARK(BitFieldToUnorderedSetAlternateBits, UInt32Type{}, id);
}
if (Config.BenchmarkFlags & COUNT_SET_BITS)
{
std::cout << DIVIDER << "\nBenchmarking CountSetBits\n";
VTKM_RUN_BENCHMARK(CountSetBitsNull, UInt32Type{}, id);
VTKM_RUN_BENCHMARK(CountSetBitsFull, UInt32Type{}, id);
VTKM_RUN_BENCHMARK(CountSetBitsHalfWord, UInt32Type{}, id);
VTKM_RUN_BENCHMARK(CountSetBitsHalfField, UInt32Type{}, id);
VTKM_RUN_BENCHMARK(CountSetBitsAlternateWords, UInt32Type{}, id);
VTKM_RUN_BENCHMARK(CountSetBitsAlternateBits, UInt32Type{}, id);
}
if (Config.BenchmarkFlags & FILL)
{
std::cout << DIVIDER << "\nBenchmarking Fill (ArrayHandle)\n";
VTKM_RUN_BENCHMARK(FillArrayHandle, ValueTypes{}, id);
std::cout << DIVIDER << "\nBenchmarking Fill (BitField)\n";
VTKM_RUN_BENCHMARK(FillBitFieldTrue, UInt32Type{}, id);
VTKM_RUN_BENCHMARK(FillBitFieldFalse, UInt32Type{}, id);
VTKM_RUN_BENCHMARK(FillBitField8Bit, UInt8Type{}, id);
VTKM_RUN_BENCHMARK(FillBitField16Bit, UInt16Type{}, id);
VTKM_RUN_BENCHMARK(FillBitField32Bit, UInt32Type{}, id);
VTKM_RUN_BENCHMARK(FillBitField64Bit, UInt64Type{}, id);
}
if (Config.BenchmarkFlags & COPY)
@ -1413,7 +1617,7 @@ struct Arg : vtkm::cont::internal::option::Arg
const char* c = option.arg;
while (argIsNum && (*c != '\0'))
{
argIsNum &= static_cast<bool>(std::isdigit(*c));
argIsNum = argIsNum && static_cast<bool>(std::isdigit(*c));
++c;
}
@ -1524,7 +1728,8 @@ int main(int argc, char* argv[])
"",
"",
Arg::None,
"\tCopy, CopyIf, LowerBounds, Reduce, ReduceByKey, ScanExclusive, "
"\tBitFieldToUnorderedSet, Copy, CopyIf, CountSetBits, FillBitField, "
"LowerBounds, Reduce, ReduceByKey, ScanExclusive, "
"ScanInclusive, Sort, SortByKey, StableSortIndices, StableSortIndicesUnique, "
"Unique, UpperBounds" });
usage.push_back(
@ -1631,6 +1836,14 @@ int main(int argc, char* argv[])
{
config.BenchmarkFlags |= vtkm::benchmarking::COPY_IF;
}
else if (arg == "countsetbits")
{
config.BenchmarkFlags |= vtkm::benchmarking::COUNT_SET_BITS;
}
else if (arg == "fill")
{
config.BenchmarkFlags |= vtkm::benchmarking::FILL;
}
else if (arg == "lowerbounds")
{
config.BenchmarkFlags |= vtkm::benchmarking::LOWER_BOUNDS;

@ -10,28 +10,28 @@
#add the directory that contains the VTK-m config file to the cmake
#path so that our examples can find VTK-m
set(CMAKE_PREFIX_PATH ${VTKm_BINARY_DIR}/${VTKm_INSTALL_CONFIG_DIR})
add_subdirectory(clipping)
add_subdirectory(contour_tree)
add_subdirectory(contour_tree_augmented)
add_subdirectory(cosmotools)
add_subdirectory(demo)
add_subdirectory(game_of_life)
add_subdirectory(hello_world)
add_subdirectory(histogram)
add_subdirectory(isosurface)
add_subdirectory(lagrangian)
add_subdirectory(multi_backend)
add_subdirectory(oscillator)
add_subdirectory(particle_advection)
add_subdirectory(redistribute_points)
add_subdirectory(rendering)
add_subdirectory(streamline)
add_subdirectory(temporal_advection)
add_subdirectory(tetrahedra)
# add_subdirectory(unified_memory)
if(VTKm_ENABLE_EXAMPLES)
set(CMAKE_PREFIX_PATH ${VTKm_BINARY_DIR}/${VTKm_INSTALL_CONFIG_DIR})
add_subdirectory(clipping)
add_subdirectory(contour_tree)
add_subdirectory(contour_tree_augmented)
add_subdirectory(cosmotools)
add_subdirectory(demo)
add_subdirectory(game_of_life)
add_subdirectory(hello_world)
add_subdirectory(histogram)
add_subdirectory(isosurface)
add_subdirectory(lagrangian)
add_subdirectory(multi_backend)
add_subdirectory(oscillator)
add_subdirectory(particle_advection)
add_subdirectory(redistribute_points)
add_subdirectory(rendering)
add_subdirectory(streamline)
add_subdirectory(temporal_advection)
add_subdirectory(tetrahedra)
endif()
if (VTKm_ENABLE_TESTING)
# These need to be fast to build as they will

@ -108,6 +108,32 @@ struct CopySubRangeFunctor
}
};
struct CountSetBitsFunctor
{
vtkm::Id PopCount{ 0 };
template <typename Device, typename... Args>
VTKM_CONT bool operator()(Device, Args&&... args)
{
this->PopCount = vtkm::cont::DeviceAdapterAlgorithm<Device>::CountSetBits(
PrepareArgForExec<Device>(std::forward<Args>(args))...);
return true;
}
};
struct FillFunctor
{
vtkm::Id PopCount{ 0 };
template <typename Device, typename... Args>
VTKM_CONT bool operator()(Device, Args&&... args)
{
vtkm::cont::DeviceAdapterAlgorithm<Device>::Fill(
PrepareArgForExec<Device>(std::forward<Args>(args))...);
return true;
}
};
struct LowerBoundsFunctor
{
@ -489,6 +515,109 @@ struct Algorithm
outputIndex);
}
VTKM_CONT static vtkm::Id CountSetBits(vtkm::cont::DeviceAdapterId devId,
const vtkm::cont::BitField& bits)
{
detail::CountSetBitsFunctor functor;
vtkm::cont::TryExecuteOnDevice(devId, functor, bits);
return functor.PopCount;
}
VTKM_CONT static vtkm::Id CountSetBits(const vtkm::cont::BitField& bits)
{
return CountSetBits(vtkm::cont::DeviceAdapterTagAny{}, bits);
}
VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId,
vtkm::cont::BitField& bits,
bool value,
vtkm::Id numBits)
{
detail::FillFunctor functor;
vtkm::cont::TryExecuteOnDevice(devId, functor, bits, value, numBits);
}
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value, vtkm::Id numBits)
{
Fill(vtkm::cont::DeviceAdapterTagAny{}, bits, value, numBits);
}
VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId,
vtkm::cont::BitField& bits,
bool value)
{
detail::FillFunctor functor;
vtkm::cont::TryExecuteOnDevice(devId, functor, bits, value);
}
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value)
{
Fill(vtkm::cont::DeviceAdapterTagAny{}, bits, value);
}
template <typename WordType>
VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId,
vtkm::cont::BitField& bits,
WordType word,
vtkm::Id numBits)
{
detail::FillFunctor functor;
vtkm::cont::TryExecuteOnDevice(devId, functor, bits, word, numBits);
}
template <typename WordType>
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word, vtkm::Id numBits)
{
Fill(vtkm::cont::DeviceAdapterTagAny{}, bits, word, numBits);
}
template <typename WordType>
VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId,
vtkm::cont::BitField& bits,
WordType word)
{
detail::FillFunctor functor;
vtkm::cont::TryExecuteOnDevice(devId, functor, bits, word);
}
template <typename WordType>
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word)
{
FillBitField(vtkm::cont::DeviceAdapterTagAny{}, bits, word);
}
template <typename T, typename S>
VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId,
vtkm::cont::ArrayHandle<T, S>& handle,
const T& value)
{
detail::FillFunctor functor;
vtkm::cont::TryExecuteOnDevice(devId, functor, handle, value);
}
template <typename T, typename S>
VTKM_CONT static void Fill(vtkm::cont::ArrayHandle<T, S>& handle, const T& value)
{
Fill(vtkm::cont::DeviceAdapterTagAny{}, handle, value);
}
template <typename T, typename S>
VTKM_CONT static void Fill(vtkm::cont::DeviceAdapterId devId,
vtkm::cont::ArrayHandle<T, S>& handle,
const T& value,
const vtkm::Id numValues)
{
detail::FillFunctor functor;
vtkm::cont::TryExecuteOnDevice(devId, functor, handle, value, numValues);
}
template <typename T, typename S>
VTKM_CONT static void Fill(vtkm::cont::ArrayHandle<T, S>& handle,
const T& value,
const vtkm::Id numValues)
{
Fill(vtkm::cont::DeviceAdapterTagAny{}, handle, value, numValues);
}
template <typename T, class CIn, class CVal, class COut>
VTKM_CONT static void LowerBounds(vtkm::cont::DeviceAdapterId devId,

@ -119,6 +119,35 @@ struct DeviceAdapterAlgorithm
vtkm::cont::ArrayHandle<U, COut>& output,
vtkm::Id outputIndex = 0);
/// \brief Returns the total number of "1" bits in BitField.
VTKM_CONT static vtkm::Id CountSetBits(const vtkm::cont::BitField& bits);
/// \brief Fill the BitField with a specific pattern of bits.
/// For boolean values, all bits are set to 1 if value is true, or 0 if value
/// is false.
/// For word masks, the word type must be an unsigned integral type, which
/// will be stamped across the BitField.
/// If numBits is provided, the BitField is resized appropriately.
/// @{
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value, vtkm::Id numBits);
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value);
template <typename WordType>
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word, vtkm::Id numBits);
template <typename WordType>
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word, bool value);
/// @}
/// Fill @a array with @a value. If @a numValues is specified, the array will
/// be resized.
/// @{
template <typename T, typename S>
VTKM_CONT static void Fill(vtkm::cont::ArrayHandle<T, S>& array, const T& value);
template <typename T, typename S>
VTKM_CONT static void Fill(vtkm::cont::ArrayHandle<T, S>& array,
const T& value,
const vtkm::Id numValues);
/// @}
/// \brief Output is the first index in input for each item in values that wouldn't alter the ordering of input
///
/// LowerBounds is a vectorized search. From each value in \c values it finds

@ -439,6 +439,95 @@ private:
}
}
template <typename BitsPortal, typename GlobalPopCountType>
struct CountSetBitsFunctor : public vtkm::exec::FunctorBase
{
VTKM_STATIC_ASSERT_MSG(VTKM_PASS_COMMAS(std::is_same<GlobalPopCountType, vtkm::Int32>::value ||
std::is_same<GlobalPopCountType, vtkm::UInt32>::value ||
std::is_same<GlobalPopCountType, vtkm::UInt64>::value),
"Unsupported GlobalPopCountType. Must support CUDA atomicAdd.");
//Using typename BitsPortal::WordTypePreferred causes dependent type errors using GCC 4.8.5
//which is the GCC required compiler for CUDA 9.2 on summit/power9
using Word = typename vtkm::cont::internal::AtomicInterfaceExecution<
DeviceAdapterTagCuda>::WordTypePreferred;
VTKM_CONT
CountSetBitsFunctor(const BitsPortal& portal, GlobalPopCountType* globalPopCount)
: Portal{ portal }
, GlobalPopCount{ globalPopCount }
, FinalWordIndex{ portal.GetNumberOfWords() - 1 }
, FinalWordMask{ portal.GetFinalWordMask() }
{
}
~CountSetBitsFunctor() {}
VTKM_CONT void Initialize()
{
assert(this->GlobalPopCount != nullptr);
VTKM_CUDA_CALL(cudaMemset(this->GlobalPopCount, 0, sizeof(GlobalPopCountType)));
}
VTKM_SUPPRESS_EXEC_WARNINGS
__device__ void operator()(vtkm::Id wordIdx) const
{
Word word = this->Portal.GetWord(wordIdx);
// The last word may be partial -- mask out trailing bits if needed.
const Word mask = wordIdx == this->FinalWordIndex ? this->FinalWordMask : ~Word{ 0 };
word &= mask;
if (word != 0)
{
this->LocalPopCount = vtkm::CountSetBits(word);
this->Reduce();
}
}
VTKM_CONT vtkm::Id Finalize() const
{
assert(this->GlobalPopCount != nullptr);
GlobalPopCountType result;
VTKM_CUDA_CALL(cudaMemcpy(
&result, this->GlobalPopCount, sizeof(GlobalPopCountType), cudaMemcpyDeviceToHost));
return static_cast<vtkm::Id>(result);
}
private:
// Every thread with a non-zero local popcount calls this function, which
// computes the total popcount for the coalesced threads and atomically
// increasing the global popcount.
VTKM_SUPPRESS_EXEC_WARNINGS
__device__ void Reduce() const
{
const auto activeLanes = cooperative_groups::coalesced_threads();
const int activeRank = activeLanes.thread_rank();
const int activeSize = activeLanes.size();
// Reduction value:
vtkm::Int32 rVal = this->LocalPopCount;
for (int delta = 1; delta < activeSize; delta *= 2)
{
rVal += activeLanes.shfl_down(rVal, delta);
}
if (activeRank == 0)
{
atomicAdd(this->GlobalPopCount, static_cast<GlobalPopCountType>(rVal));
}
}
const BitsPortal Portal;
GlobalPopCountType* GlobalPopCount;
mutable vtkm::Int32 LocalPopCount{ 0 };
// Used to mask trailing bits the in last word.
vtkm::Id FinalWordIndex{ 0 };
Word FinalWordMask{ 0 };
};
template <class InputPortal, class ValuesPortal, class OutputPortal>
VTKM_CONT static void LowerBoundsPortal(const InputPortal& input,
const ValuesPortal& values,
@ -959,6 +1048,21 @@ private:
return functor.Finalize();
}
template <typename GlobalPopCountType, typename BitsPortal>
VTKM_CONT static vtkm::Id CountSetBitsPortal(const BitsPortal& bits)
{
using Functor = CountSetBitsFunctor<BitsPortal, GlobalPopCountType>;
// RAII for the global atomic counter.
auto globalCount = cuda::internal::make_CudaUniquePtr<GlobalPopCountType>(1);
Functor functor{ bits, globalCount.get() };
functor.Initialize();
Schedule(functor, bits.GetNumberOfWords());
Synchronize(); // Ensure kernel is done before checking final atomic count
return functor.Finalize();
}
//-----------------------------------------------------------------------------
public:
@ -1095,6 +1199,14 @@ public:
return true;
}
VTKM_CONT static vtkm::Id CountSetBits(const vtkm::cont::BitField& bits)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
auto bitsPortal = bits.PrepareForInput(DeviceAdapterTagCuda{});
// Use a uint64 for accumulator, as atomicAdd does not support signed int64.
return CountSetBitsPortal<vtkm::UInt64>(bitsPortal);
}
template <typename T, class SIn, class SVal, class SOut>
VTKM_CONT static void LowerBounds(const vtkm::cont::ArrayHandle<T, SIn>& input,
const vtkm::cont::ArrayHandle<T, SVal>& values,

@ -269,6 +269,170 @@ public:
return true;
}
//--------------------------------------------------------------------------
// Count Set Bits
VTKM_CONT static vtkm::Id CountSetBits(const vtkm::cont::BitField& bits)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
auto bitsPortal = bits.PrepareForInput(DeviceAdapterTag{});
std::atomic<vtkm::UInt64> popCount;
popCount.store(0, std::memory_order_relaxed);
using Functor = CountSetBitsFunctor<decltype(bitsPortal)>;
Functor functor{ bitsPortal, popCount };
DerivedAlgorithm::Schedule(functor, functor.GetNumberOfInstances());
DerivedAlgorithm::Synchronize();
return static_cast<vtkm::Id>(popCount.load(std::memory_order_seq_cst));
}
//--------------------------------------------------------------------------
// Fill Bit Field (bool, resize)
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value, vtkm::Id numBits)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
if (numBits == 0)
{
bits.Shrink(0);
return;
}
auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{});
using WordType =
typename vtkm::cont::BitField::template ExecutionTypes<DeviceAdapterTag>::WordTypePreferred;
using Functor = FillBitFieldFunctor<decltype(portal), WordType>;
Functor functor{ portal, value ? ~WordType{ 0 } : WordType{ 0 } };
const vtkm::Id numWords = portal.template GetNumberOfWords<WordType>();
DerivedAlgorithm::Schedule(functor, numWords);
}
//--------------------------------------------------------------------------
// Fill Bit Field (bool)
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, bool value)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
const vtkm::Id numBits = bits.GetNumberOfBits();
if (numBits == 0)
{
return;
}
auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{});
using WordType =
typename vtkm::cont::BitField::template ExecutionTypes<DeviceAdapterTag>::WordTypePreferred;
using Functor = FillBitFieldFunctor<decltype(portal), WordType>;
Functor functor{ portal, value ? ~WordType{ 0 } : WordType{ 0 } };
const vtkm::Id numWords = portal.template GetNumberOfWords<WordType>();
DerivedAlgorithm::Schedule(functor, numWords);
}
//--------------------------------------------------------------------------
// Fill Bit Field (mask, resize)
template <typename WordType>
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word, vtkm::Id numBits)
{
VTKM_STATIC_ASSERT_MSG(vtkm::cont::BitField::IsValidWordType<WordType>{}, "Invalid word type.");
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
if (numBits == 0)
{
bits.Shrink(0);
return;
}
auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{});
// If less than 32 bits, repeat the word until we get a 32 bit pattern.
// Using this for the pattern prevents races while writing small numbers
// to adjacent memory locations.
auto repWord = RepeatTo32BitsIfNeeded(word);
using RepWordType = decltype(repWord);
using Functor = FillBitFieldFunctor<decltype(portal), RepWordType>;
Functor functor{ portal, repWord };
const vtkm::Id numWords = portal.template GetNumberOfWords<RepWordType>();
DerivedAlgorithm::Schedule(functor, numWords);
}
//--------------------------------------------------------------------------
// Fill Bit Field (mask)
template <typename WordType>
VTKM_CONT static void Fill(vtkm::cont::BitField& bits, WordType word)
{
VTKM_STATIC_ASSERT_MSG(vtkm::cont::BitField::IsValidWordType<WordType>{}, "Invalid word type.");
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
const vtkm::Id numBits = bits.GetNumberOfBits();
if (numBits == 0)
{
return;
}
auto portal = bits.PrepareForOutput(numBits, DeviceAdapterTag{});
// If less than 32 bits, repeat the word until we get a 32 bit pattern.
// Using this for the pattern prevents races while writing small numbers
// to adjacent memory locations.
auto repWord = RepeatTo32BitsIfNeeded(word);
using RepWordType = decltype(repWord);
using Functor = FillBitFieldFunctor<decltype(portal), RepWordType>;
Functor functor{ portal, repWord };
const vtkm::Id numWords = portal.template GetNumberOfWords<RepWordType>();
DerivedAlgorithm::Schedule(functor, numWords);
}
//--------------------------------------------------------------------------
// Fill ArrayHandle
template <typename T, typename S>
VTKM_CONT static void Fill(vtkm::cont::ArrayHandle<T, S>& handle, const T& value)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
const vtkm::Id numValues = handle.GetNumberOfValues();
if (numValues == 0)
{
return;
}
auto portal = handle.PrepareForOutput(numValues, DeviceAdapterTag{});
FillArrayHandleFunctor<decltype(portal)> functor{ portal, value };
DerivedAlgorithm::Schedule(functor, numValues);
}
//--------------------------------------------------------------------------
// Fill ArrayHandle (resize)
template <typename T, typename S>
VTKM_CONT static void Fill(vtkm::cont::ArrayHandle<T, S>& handle,
const T& value,
const vtkm::Id numValues)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
if (numValues == 0)
{
handle.Shrink(0);
return;
}
auto portal = handle.PrepareForOutput(numValues, DeviceAdapterTag{});
FillArrayHandleFunctor<decltype(portal)> functor{ portal, value };
DerivedAlgorithm::Schedule(functor, numValues);
}
//--------------------------------------------------------------------------
// Lower Bounds
template <typename T, class CIn, class CVal, class COut>

@ -494,6 +494,149 @@ struct CopyKernel
void SetErrorMessageBuffer(const vtkm::exec::internal::ErrorMessageBuffer&) {}
};
template <typename BitsPortal>
struct CountSetBitsFunctor : public vtkm::exec::FunctorBase
{
using WordType = typename BitsPortal::WordTypePreferred;
// This functor executes a number of instances, where each instance handles
// two cachelines worth of data. This reduces the number of atomic operations.
// Figure out how many words that is:
static constexpr vtkm::Id CacheLineSize = VTKM_ALLOCATION_ALIGNMENT;
static constexpr vtkm::Id WordsPerCacheLine =
CacheLineSize / static_cast<vtkm::Id>(sizeof(WordType));
static constexpr vtkm::Id CacheLinesPerInstance = 2;
static constexpr vtkm::Id WordsPerInstance = CacheLinesPerInstance * WordsPerCacheLine;
VTKM_CONT
CountSetBitsFunctor(const BitsPortal& input, std::atomic<vtkm::UInt64>& popCount)
: Input{ input }
, PopCount{ popCount }
, FinalWordIndex{ input.GetNumberOfWords() - 1 }
, FinalWordMask{ input.GetFinalWordMask() }
{
}
VTKM_CONT vtkm::Id GetNumberOfInstances() const
{
const auto numWords = this->Input.GetNumberOfWords();
return (numWords + WordsPerInstance - 1) / WordsPerInstance;
}
VTKM_EXEC void operator()(vtkm::Id instanceIdx) const
{
const vtkm::Id numWords = this->Input.GetNumberOfWords();
const vtkm::Id wordStart = vtkm::Min(instanceIdx * WordsPerInstance, numWords);
const vtkm::Id wordEnd = vtkm::Min(wordStart + WordsPerInstance, numWords);
if (wordStart != wordEnd) // range is valid
{
this->ExecuteRange(wordStart, wordEnd);
}
}
VTKM_CONT vtkm::UInt64 GetPopCount() const { return PopCount.load(std::memory_order_relaxed); }
private:
VTKM_EXEC void ExecuteRange(vtkm::Id wordStart, vtkm::Id wordEnd) const
{
#ifndef VTKM_CUDA_DEVICE_PASS // for std::atomic call from VTKM_EXEC function:
// Count bits and allocate space for output:
vtkm::UInt64 chunkBits = this->CountChunkBits(wordStart, wordEnd);
this->PopCount.fetch_add(chunkBits, std::memory_order_relaxed);
#else
(void)wordStart;
(void)wordEnd;
#endif
}
VTKM_EXEC vtkm::UInt64 CountChunkBits(vtkm::Id wordStart, vtkm::Id wordEnd) const
{
// Need to mask out trailing bits from the final word:
const bool isFinalChunk = wordEnd == (this->FinalWordIndex + 1);
if (isFinalChunk)
{
wordEnd = this->FinalWordIndex;
}
vtkm::Int32 tmp = 0;
for (vtkm::Id i = wordStart; i < wordEnd; ++i)
{
tmp += vtkm::CountSetBits(this->Input.GetWord(i));
}
if (isFinalChunk)
{
tmp += vtkm::CountSetBits(this->Input.GetWord(this->FinalWordIndex) & this->FinalWordMask);
}
return static_cast<vtkm::UInt64>(tmp);
}
BitsPortal Input;
std::atomic<vtkm::UInt64>& PopCount;
// Used to mask trailing bits the in last word.
vtkm::Id FinalWordIndex{ 0 };
WordType FinalWordMask{ 0 };
};
// For a given unsigned integer less than 32 bits, repeat its bits until we
// have a 32 bit pattern. This is used to make all fill patterns at least
// 32 bits in size, since concurrently writing to adjacent locations smaller
// than 32 bits may race on some platforms.
template <typename WordType, typename = typename std::enable_if<(sizeof(WordType) >= 4)>::type>
static constexpr VTKM_CONT WordType RepeatTo32BitsIfNeeded(WordType pattern)
{ // for 32 bits or more, just pass the type through.
return pattern;
}
static inline constexpr VTKM_CONT vtkm::UInt32 RepeatTo32BitsIfNeeded(vtkm::UInt16 pattern)
{
return static_cast<vtkm::UInt32>(pattern << 16 | pattern);
}
static inline constexpr VTKM_CONT vtkm::UInt32 RepeatTo32BitsIfNeeded(vtkm::UInt8 pattern)
{
return RepeatTo32BitsIfNeeded(static_cast<vtkm::UInt16>(pattern << 8 | pattern));
}
template <typename BitsPortal, typename WordType>
struct FillBitFieldFunctor : public vtkm::exec::FunctorBase
{
VTKM_CONT
FillBitFieldFunctor(const BitsPortal& portal, WordType mask)
: Portal{ portal }
, Mask{ mask }
{
}
VTKM_EXEC void operator()(vtkm::Id wordIdx) const { this->Portal.SetWord(wordIdx, this->Mask); }
private:
BitsPortal Portal;
WordType Mask;
};
template <typename PortalType>
struct FillArrayHandleFunctor : public vtkm::exec::FunctorBase
{
using ValueType = typename PortalType::ValueType;
VTKM_CONT
FillArrayHandleFunctor(const PortalType& portal, ValueType value)
: Portal{ portal }
, Value{ value }
{
}
VTKM_EXEC void operator()(vtkm::Id idx) const { this->Portal.Set(idx, this->Value); }
private:
PortalType Portal;
ValueType Value;
};
template <class InputPortalType, class ValuesPortalType, class OutputPortalType>
struct LowerBoundsKernel
{

@ -2438,7 +2438,7 @@ private:
auto testRepeatedMask = [&](WordType mask) {
std::cout << "Testing BitFieldToUnorderedSet with repeated 32-bit word 0x" << std::hex << mask
<< std::endl;
<< std::dec << std::endl;
BitField bits;
{
@ -2455,7 +2455,7 @@ private:
auto testRandomMask = [&](WordType seed) {
std::cout << "Testing BitFieldToUnorderedSet with random sequence seeded with 0x" << std::hex
<< seed << std::endl;
<< seed << std::dec << std::endl;
std::mt19937 mt{ seed };
std::uniform_int_distribution<std::mt19937::result_type> rng;
@ -2486,11 +2486,244 @@ private:
testRandomMask(0xdeadbeef);
}
static VTKM_CONT void TestCountSetBits()
{
using WordType = WordTypeDefault;
// Test that everything works correctly with a partial word at the end.
static constexpr vtkm::Id BitsPerWord = static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT);
// +5 to get a partial word:
static constexpr vtkm::Id NumFullWords = 1024;
static constexpr vtkm::Id NumBits = NumFullWords * BitsPerWord + 5;
static constexpr vtkm::Id NumWords = (NumBits + BitsPerWord - 1) / BitsPerWord;
auto verifyPopCount = [](const BitField& bits) {
vtkm::Id refPopCount = 0;
const vtkm::Id numBits = bits.GetNumberOfBits();
auto portal = bits.GetPortalConstControl();
for (vtkm::Id idx = 0; idx < numBits; ++idx)
{
if (portal.GetBit(idx))
{
++refPopCount;
}
}
const vtkm::Id popCount = Algorithm::CountSetBits(bits);
VTKM_TEST_ASSERT(
refPopCount == popCount, "CountSetBits returned ", popCount, ", expected ", refPopCount);
};
auto testRepeatedMask = [&](WordType mask) {
std::cout << "Testing CountSetBits with repeated word 0x" << std::hex << mask << std::dec
<< std::endl;
BitField bits;
{
bits.Allocate(NumBits);
auto fillPortal = bits.GetPortalControl();
for (vtkm::Id i = 0; i < NumWords; ++i)
{
fillPortal.SetWord(i, mask);
}
}
verifyPopCount(bits);
};
auto testRandomMask = [&](WordType seed) {
std::cout << "Testing CountSetBits with random sequence seeded with 0x" << std::hex << seed
<< std::dec << std::endl;
std::mt19937 mt{ seed };
std::uniform_int_distribution<std::mt19937::result_type> rng;
BitField bits;
{
bits.Allocate(NumBits);
auto fillPortal = bits.GetPortalControl();
for (vtkm::Id i = 0; i < NumWords; ++i)
{
fillPortal.SetWord(i, static_cast<WordType>(rng(mt)));
}
}
verifyPopCount(bits);
};
testRepeatedMask(0x00000000);
testRepeatedMask(0xeeeeeeee);
testRepeatedMask(0xffffffff);
testRepeatedMask(0x1c0fd395);
testRepeatedMask(0xdeadbeef);
testRandomMask(0x00000000);
testRandomMask(0xeeeeeeee);
testRandomMask(0xffffffff);
testRandomMask(0x1c0fd395);
testRandomMask(0xdeadbeef);
}
template <typename WordType>
static VTKM_CONT void TestFillBitFieldMask(WordType mask)
{
std::cout << "Testing Fill with " << (sizeof(WordType) * CHAR_BIT) << " bit mask: " << std::hex
<< vtkm::UInt64{ mask } << std::dec << std::endl;
// Test that everything works correctly with a partial word at the end.
static constexpr vtkm::Id BitsPerWord = static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT);
// +5 to get a partial word:
static constexpr vtkm::Id NumFullWords = 1024;
static constexpr vtkm::Id NumBits = NumFullWords * BitsPerWord + 5;
static constexpr vtkm::Id NumWords = (NumBits + BitsPerWord - 1) / BitsPerWord;
vtkm::cont::BitField bits;
{
Algorithm::Fill(bits, mask, NumBits);
vtkm::Id numBits = bits.GetNumberOfBits();
VTKM_TEST_ASSERT(numBits == NumBits, "Unexpected number of bits.");
vtkm::Id numWords = bits.GetNumberOfWords<WordType>();
VTKM_TEST_ASSERT(numWords == NumWords, "Unexpected number of words.");
auto portal = bits.GetPortalConstControl();
for (vtkm::Id wordIdx = 0; wordIdx < NumWords; ++wordIdx)
{
VTKM_TEST_ASSERT(portal.GetWord<WordType>(wordIdx) == mask,
"Incorrect word in result BitField; expected 0x",
std::hex,
vtkm::UInt64{ mask },
", got 0x",
vtkm::UInt64{ portal.GetWord<WordType>(wordIdx) },
std::dec,
" for word ",
wordIdx,
"/",
NumWords);
}
}
// Now fill the BitField with the reversed mask to test the no-alloc
// overload:
{
WordType invWord = static_cast<WordType>(~mask);
Algorithm::Fill(bits, invWord);
vtkm::Id numBits = bits.GetNumberOfBits();
VTKM_TEST_ASSERT(numBits == NumBits, "Unexpected number of bits.");
vtkm::Id numWords = bits.GetNumberOfWords<WordType>();
VTKM_TEST_ASSERT(numWords == NumWords, "Unexpected number of words.");
auto portal = bits.GetPortalConstControl();
for (vtkm::Id wordIdx = 0; wordIdx < NumWords; ++wordIdx)
{
VTKM_TEST_ASSERT(portal.GetWord<WordType>(wordIdx) == invWord,
"Incorrect word in result BitField; expected 0x",
std::hex,
vtkm::UInt64{ invWord },
", got 0x",
vtkm::UInt64{ portal.GetWord<WordType>(wordIdx) },
std::dec,
" for word ",
wordIdx,
"/",
NumWords);
}
}
}
static VTKM_CONT void TestFillBitFieldBool(bool value)
{
std::cout << "Testing Fill with bool: " << value << std::endl;
// Test that everything works correctly with a partial word at the end.
// +5 to get a partial word:
static constexpr vtkm::Id NumBits = 1024 * 32 + 5;
vtkm::cont::BitField bits;
{
Algorithm::Fill(bits, value, NumBits);
vtkm::Id numBits = bits.GetNumberOfBits();
VTKM_TEST_ASSERT(numBits == NumBits, "Unexpected number of bits.");
auto portal = bits.GetPortalConstControl();
for (vtkm::Id bitIdx = 0; bitIdx < NumBits; ++bitIdx)
{
VTKM_TEST_ASSERT(portal.GetBit(bitIdx) == value, "Incorrect bit in result BitField.");
}
}
// Now fill the BitField with the reversed mask to test the no-alloc
// overload:
{
Algorithm::Fill(bits, !value);
vtkm::Id numBits = bits.GetNumberOfBits();
VTKM_TEST_ASSERT(numBits == NumBits, "Unexpected number of bits.");
auto portal = bits.GetPortalConstControl();
for (vtkm::Id bitIdx = 0; bitIdx < NumBits; ++bitIdx)
{
VTKM_TEST_ASSERT(portal.GetBit(bitIdx) == !value, "Incorrect bit in result BitField.");
}
}
}
static VTKM_CONT void TestFillBitField()
{
TestFillBitFieldBool(true);
TestFillBitFieldBool(false);
TestFillBitFieldMask<vtkm::UInt8>(vtkm::UInt8{ 0 });
TestFillBitFieldMask<vtkm::UInt8>(static_cast<vtkm::UInt8>(~vtkm::UInt8{ 0 }));
TestFillBitFieldMask<vtkm::UInt8>(vtkm::UInt8{ 0xab });
TestFillBitFieldMask<vtkm::UInt8>(vtkm::UInt8{ 0x4f });
TestFillBitFieldMask<vtkm::UInt16>(vtkm::UInt16{ 0 });
TestFillBitFieldMask<vtkm::UInt16>(static_cast<vtkm::UInt16>(~vtkm::UInt16{ 0 }));
TestFillBitFieldMask<vtkm::UInt16>(vtkm::UInt16{ 0xfade });
TestFillBitFieldMask<vtkm::UInt16>(vtkm::UInt16{ 0xbeef });
TestFillBitFieldMask<vtkm::UInt32>(vtkm::UInt32{ 0 });
TestFillBitFieldMask<vtkm::UInt32>(static_cast<vtkm::UInt32>(~vtkm::UInt32{ 0 }));
TestFillBitFieldMask<vtkm::UInt32>(vtkm::UInt32{ 0xfacecafe });
TestFillBitFieldMask<vtkm::UInt32>(vtkm::UInt32{ 0xbaddecaf });
TestFillBitFieldMask<vtkm::UInt64>(vtkm::UInt64{ 0 });
TestFillBitFieldMask<vtkm::UInt64>(static_cast<vtkm::UInt64>(~vtkm::UInt64{ 0 }));
TestFillBitFieldMask<vtkm::UInt64>(vtkm::UInt64{ 0xbaddefacedfacade });
TestFillBitFieldMask<vtkm::UInt64>(vtkm::UInt64{ 0xfeeddeadbeef2dad });
}
static VTKM_CONT void TestFillArrayHandle()
{
vtkm::cont::ArrayHandle<vtkm::Int32> handle;
Algorithm::Fill(handle, 867, ARRAY_SIZE);
{
auto portal = handle.GetPortalConstControl();
VTKM_TEST_ASSERT(portal.GetNumberOfValues() == ARRAY_SIZE);
for (vtkm::Id i = 0; i < ARRAY_SIZE; ++i)
{
VTKM_TEST_ASSERT(portal.Get(i) == 867);
}
}
Algorithm::Fill(handle, 5309);
{
auto portal = handle.GetPortalConstControl();
VTKM_TEST_ASSERT(portal.GetNumberOfValues() == ARRAY_SIZE);
for (vtkm::Id i = 0; i < ARRAY_SIZE; ++i)
{
VTKM_TEST_ASSERT(portal.Get(i) == 5309);
}
}
}
struct TestAll
{
VTKM_CONT void operator()() const
{
std::cout << "Doing DeviceAdapter tests" << std::endl;
TestArrayTransfer();
TestOutOfMemory();
TestTimer();
@ -2541,6 +2774,10 @@ private:
TestAtomicArray();
TestBitFieldToUnorderedSet();
TestCountSetBits();
TestFillBitField();
TestFillArrayHandle();
}
};

@ -24,23 +24,7 @@ target_include_directories(vtkm_diy INTERFACE
$<INSTALL_INTERFACE:${VTKm_INSTALL_INCLUDE_DIR}/vtkm/thirdparty/diy>)
if(VTKm_ENABLE_MPI)
set(arg)
foreach(apath IN LISTS MPI_C_INCLUDE_PATH MPI_CXX_INCLUDE_PATH)
list(APPEND arg $<BUILD_INTERFACE:${apath}>)
endforeach()
list(REMOVE_DUPLICATES arg)
target_include_directories(vtkm_diy INTERFACE ${arg})
target_link_libraries(vtkm_diy INTERFACE
$<BUILD_INTERFACE:${MPI_C_LIBRARIES}>
$<BUILD_INTERFACE:${MPI_CXX_LIBRARIES}>)
if(MPI_C_COMPILE_DEFINITIONS)
target_compile_definitions(vtkm_diy INTERFACE
$<$<COMPILE_LANGUAGE:C>:${MPI_C_COMPILE_DEFINITIONS}>)
endif()
if(MPI_CXX_COMPILE_DEFNITIONS)
target_compile_definitions(vtkm_diy INTERFACE
$<$<COMPILE_LANGUAGE:CXX>:${MPI_CXX_COMPILE_DEFNITIONS>)
endif()
target_link_libraries(vtkm_diy INTERFACE MPI::MPI_CXX)
endif()
install(TARGETS vtkm_diy