Merge branch 'master' of https://gitlab.kitware.com/vtk/vtk-m into temporalparticleadvection

This commit is contained in:
ayenpure 2018-05-15 11:46:26 -07:00
commit eb2ed4f275
76 changed files with 2043 additions and 468 deletions

@ -44,9 +44,23 @@ endif()
#-----------------------------------------------------------------------------
# vtkm_compiler_flags is used by all the vtkm targets
# vtkm_compiler_flags is used by all the vtkm targets and consumers of VTK-m
# The flags on vtkm_compiler_flags are needed when using/building vtk-m
add_library(vtkm_compiler_flags INTERFACE)
# When building libraries/tests that are part of the VTK-m repository
# inherit the properties from vtkm_developer_flags and vtkm_vectorization_flags.
# The flags are intended only for VTK-m itself and are not needed by consumers.
# We will export vtkm_vectorization_flags in general so consumer can enable
# vectorization if they so desire
if (VTKm_ENABLE_DEVELOPER_FLAGS)
target_link_libraries(vtkm_compiler_flags
INTERFACE $<BUILD_INTERFACE:vtkm_developer_flags>)
endif()
target_link_libraries(vtkm_compiler_flags
INTERFACE $<BUILD_INTERFACE:vtkm_vectorization_flags>)
# setup that we need C++11 support
if(CMAKE_VERSION VERSION_LESS 3.8)
target_compile_features(vtkm_compiler_flags INTERFACE cxx_nullptr)
@ -56,14 +70,9 @@ endif()
# Enable large object support so we can have 2^32 addressable sections
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
if(CMAKE_VERSION VERSION_LESS 3.11)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=\"/bigobj\"")
else()
target_compile_options(vtkm_compiler_flags INTERFACE $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:/bigobj>>)
if(TARGET vtkm::cuda)
target_compile_options(vtkm_compiler_flags INTERFACE $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler="/bigobj">>)
endif()
target_compile_options(vtkm_compiler_flags INTERFACE $<$<COMPILE_LANGUAGE:CXX>:/bigobj>)
if(TARGET vtkm::cuda)
target_compile_options(vtkm_compiler_flags INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler="/bigobj">)
endif()
endif()
@ -74,29 +83,22 @@ target_include_directories(vtkm_compiler_flags INTERFACE
$<INSTALL_INTERFACE:${VTKm_INSTALL_INCLUDE_DIR}>
)
# Additional warnings just for Clang 3.5+, and AppleClang 7+ we specify
# for all build types, since these failures to vectorize are not limited
# to developer builds
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 3.4)
target_compile_options(vtkm_compiler_flags INTERFACE $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:-Wno-pass-failed>>)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 6.99)
target_compile_options(vtkm_compiler_flags INTERFACE $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:-Wno-pass-failed>>)
endif()
# When building libraries/tests that are part of the VTK-m repository
# inherit the properties from vtkm_developer_flags
target_link_libraries(vtkm_compiler_flags
INTERFACE $<BUILD_INTERFACE:vtkm_developer_flags vtkm_vectorization_flags>)
#-----------------------------------------------------------------------------
# vtkm_developer_flags is used ONLY BY libraries that are built as part of this
# repository
add_library(vtkm_developer_flags INTERFACE)
target_link_libraries(vtkm_developer_flags INTERFACE vtkm_compiler_flags)
# Additional warnings just for Clang 3.5+, and AppleClang 7+
# about failures to vectorize.
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 3.4)
target_compile_options(vtkm_developer_flags INTERFACE $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:-Wno-pass-failed>>)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 6.99)
target_compile_options(vtkm_developer_flags INTERFACE $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:-Wno-pass-failed>>)
endif()
if(VTKM_COMPILER_IS_MSVC)
target_compile_definitions(vtkm_developer_flags INTERFACE "_SCL_SECURE_NO_WARNINGS"
"_CRT_SECURE_NO_WARNINGS")
@ -125,11 +127,22 @@ elseif(VTKM_COMPILER_IS_ICC)
elseif(VTKM_COMPILER_IS_GNU OR VTKM_COMPILER_IS_CLANG)
set(cxx_flags -Wall -Wno-long-long -Wcast-align -Wconversion -Wchar-subscripts -Wextra -Wpointer-arith -Wformat -Wformat-security -Wshadow -Wunused-parameter -fno-common)
set(cuda_flags -Xcudafe=--display_error_number -Xcompiler=-Wall,-Wno-unknown-pragmas,-Wno-unused-local-typedefs,-Wno-unused-local-typedefs,-Wno-unused-function,-Wno-long-long,-Wcast-align,-Wconversion,-Wchar-subscripts,-Wpointer-arith,-Wformat,-Wformat-security,-Wshadow,-Wunused-parameter,-fno-common)
target_compile_options(vtkm_compiler_flags
#GCC 5, 6 don't properly handle strict-overflow suppression through pragma's.
#Instead of suppressing around the location of the strict-overflow you
#have to suppress around the entry point, or in vtk-m case the worklet
#invocation site. This is incredibly tedious and has been fixed in gcc 7
#
if(VTKM_COMPILER_IS_GNU AND
(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.99) AND
(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.99) )
list(APPEND cxx_flags -Wno-strict-overflow)
endif()
target_compile_options(vtkm_developer_flags
INTERFACE $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>>
)
if(TARGET vtkm::cuda)
target_compile_options(vtkm_compiler_flags
target_compile_options(vtkm_developer_flags
INTERFACE $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>>
)
endif()

@ -66,13 +66,9 @@ if(VTKm_ENABLE_CUDA AND NOT TARGET vtkm::cuda)
add_library(vtkm::cuda UNKNOWN IMPORTED GLOBAL)
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND CMAKE_VERSION VERSION_LESS 3.11)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
else()
set_target_properties(vtkm::cuda PROPERTIES
INTERFACE_COMPILE_OPTIONS $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
)
endif()
# We can't have this location/lib empty, so we provide a location that is
# valid and will have no effect on compilation
@ -115,6 +111,8 @@ else()
# for all major virtual architectures, guaranteeing that the code will run
# anywhere.
#
# The option 'none' is provided so that when being built as part of another
# project, its own custom flags can be used.
#
# 1 - native
# - Uses system introspection to determine compile flags
@ -135,21 +133,19 @@ else()
# - Uses: --generate-code=arch=compute_50,code=sm_50
# - Uses: --generate-code=arch=compute_60,code=sm_60
# - Uses: --generate-code=arch=compute_70,code=sm_70
# 8 - none
#
#specify the property
set(VTKm_CUDA_Architecture "native" CACHE STRING "Which GPU Architecture(s) to compile for")
set_property(CACHE VTKm_CUDA_Architecture PROPERTY STRINGS native fermi kepler maxwell pascal volta all)
set_property(CACHE VTKm_CUDA_Architecture PROPERTY STRINGS native fermi kepler maxwell pascal volta all none)
#detect what the propery is set too
if(VTKm_CUDA_Architecture STREQUAL "native")
if(VTKM_CUDA_NATIVE_EXE_PROCESS_RAN_OUTPUT)
#Use the cached value
# replace any semicolons with an empty space as CMAKE_CUDA_FLAGS is
# a string not a list and this could be cached from when it was a list
string(REPLACE ";" " " run_output "${VTKM_CUDA_NATIVE_EXE_PROCESS_RAN_OUTPUT}")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${run_output}")
set(arch_flags ${VTKM_CUDA_NATIVE_EXE_PROCESS_RAN_OUTPUT})
else()
#run execute_process to do auto_detection
@ -174,11 +170,7 @@ else()
string(FIND "${run_output}" "--generate-code" position)
string(SUBSTRING "${run_output}" ${position} -1 run_output)
# replace any semicolons with an empty space as CMAKE_CUDA_FLAGS is
# a string not a list
string(REPLACE ";" " " run_output "${run_output}")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${run_output}")
set(arch_flags ${run_output})
set(VTKM_CUDA_NATIVE_EXE_PROCESS_RAN_OUTPUT ${run_output} CACHE INTERNAL
"device type(s) for cuda[native]")
else()
@ -190,22 +182,27 @@ else()
#since when we are native we can fail, and fall back to "kepler" these have
#to happen after, and separately of the native check
if(VTKm_CUDA_Architecture STREQUAL "fermi")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --generate-code=arch=compute_20,code=sm_20")
set(arch_flags --generate-code=arch=compute_20,code=sm_20)
elseif(VTKm_CUDA_Architecture STREQUAL "kepler")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --generate-code=arch=compute_30,code=sm_30")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --generate-code=arch=compute_35,code=sm_35")
set(arch_flags --generate-code=arch=compute_30,code=sm_30
--generate-code=arch=compute_35,code=sm_35)
elseif(VTKm_CUDA_Architecture STREQUAL "maxwell")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --generate-code=arch=compute_50,code=sm_50")
set(arch_flags --generate-code=arch=compute_50,code=sm_50)
elseif(VTKm_CUDA_Architecture STREQUAL "pascal")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --generate-code=arch=compute_60,code=sm_60")
set(arch_flags --generate-code=arch=compute_60,code=sm_60)
elseif(VTKm_CUDA_Architecture STREQUAL "volta")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --generate-code=arch=compute_70,code=sm_70")
set(arch_flags --generate-code=arch=compute_70,code=sm_70)
elseif(VTKm_CUDA_Architecture STREQUAL "all")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --generate-code=arch=compute_30,code=sm_30")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --generate-code=arch=compute_35,code=sm_35")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --generate-code=arch=compute_50,code=sm_50")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --generate-code=arch=compute_60,code=sm_60")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --generate-code=arch=compute_70,code=sm_70")
set(arch_flags --generate-code=arch=compute_30,code=sm_30
--generate-code=arch=compute_35,code=sm_35
--generate-code=arch=compute_50,code=sm_50
--generate-code=arch=compute_60,code=sm_60
--generate-code=arch=compute_70,code=sm_70)
endif()
string(REPLACE ";" " " arch_flags "${arch_flags}")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${arch_flags}")
set_target_properties(vtkm::cuda PROPERTIES VTKm_CUDA_Architecture_Flags "${arch_flags}")
endif()

@ -414,9 +414,8 @@ function(vtkm_unit_tests)
set_property(TARGET ${test_prog} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${VTKm_EXECUTABLE_OUTPUT_PATH})
target_link_libraries(${test_prog} PRIVATE vtkm_cont ${VTKm_UT_LIBRARIES})
if(VTKm_UT_NO_TESTS)
return()
if(backend)
target_compile_definitions(${test_prog} PRIVATE "VTKM_DEVICE_ADAPTER=VTKM_DEVICE_ADAPTER_${backend}")
endif()
#determine the timeout for all the tests based on the backend. CUDA tests

@ -22,11 +22,16 @@
# - Support for target_sources
# - Support for usage requirements
#
# If you want CUDA support, you will need to have CMake 3.9 on Linux/OSX or
# CMake 3.10 on windows.
# If you want CUDA support, you will need to have CMake 3.9 on Linux/OSX.
# We require CMake 3.11 on windows as the $<COMPILE_LANGUAGE:> generator
# expression is not supported on older versions.
cmake_minimum_required(VERSION 3.3)
project (VTKm)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
cmake_minimum_required(VERSION 3.11 FATAL_ERROR)
endif()
# Update module path
set(VTKm_CMAKE_MODULE_PATH ${VTKm_SOURCE_DIR}/CMake)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${VTKm_CMAKE_MODULE_PATH})
@ -112,10 +117,16 @@ vtkm_option(VTKm_USE_DEFAULT_SYMBOL_VISIBILITY "Don't explicitly hide symbols fr
vtkm_option(BUILD_SHARED_LIBS "Build VTK-m with shared libraries" ON)
set(VTKm_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
# This flag can be used to prevent VTK-m from exporting its warning flags in its
# build interface. This is useful when building VTK-m as a thirdparty library
# and the warnings are too strict for the parent project.
vtkm_option(VTKm_ENABLE_DEVELOPER_FLAGS "Enable compiler flags that are usefull while developing VTK-m" ON)
mark_as_advanced(
VTKm_NO_ASSERT
VTKm_INSTALL_ONLY_LIBRARIES
VTKm_USE_DEFAULT_SYMBOL_VISIBILITY
VTKm_ENABLE_DEVELOPER_FLAGS
)
#-----------------------------------------------------------------------------

@ -21,4 +21,22 @@
list(APPEND CTEST_CUSTOM_WARNING_EXCEPTION
".*warning: ignoring loop annotation.*"
".*warning: Included by graph for.*not generated, too many nodes. Consider increasing DOT_GRAPH_MAX_NODES."
# disable doxygen warning about potential recursion.
".*warning: Detected potential recursive class relation between class vtkm::exec::internal::ArrayPortalTransform"
# disable doxygen warning about not generating graph
".*warning: Included by graph for"
# disable doxygen warnings from CONTRIBUTING.md, CodingConventions.md.
# these files are really intended for Gitlab, hence we don't want to use
# doxygen tags in them.
"CONTRIBUTING.md.*warning"
"CodingConventions.md.*warning"
# disable PTX warning about recursive functions. These look like they can't be silenced
# without disabling all PTX warnings, show hide them on the dashboard.
# We explicitly only suppress specific worklets so we can see when new recursive
# worklets are added
"ptxas warning : Stack size for entry function.*NearestNeighborSearch3DWorklet.*"
)

@ -72,7 +72,7 @@ int main(int argc, char* argv[])
std::cout << "using: " << argv[1] << " as MarchingCubes input file" << std::endl;
vtkm::io::reader::VTKDataSetReader reader(argv[1]);
inputData = reader.ReadDataSet();
isovalue = atof(argv[2]);
isovalue = static_cast<vtkm::Float32>(atof(argv[2]));
fieldName = "SCALARS:pointvar";
}

@ -94,10 +94,10 @@ struct UpdateLifeState : public vtkm::worklet::WorkletPointNeighborhood3x3x3
// Any live cell with two or three live neighbors lives on to the next generation.
// Any live cell with more than three live neighbors dies, as if by overcrowding.
// Any dead cell with exactly three live neighbors becomes a live cell, as if by reproduction.
vtkm::UInt8 current = prevstate.Get(0, 0, 0);
vtkm::UInt8 count = prevstate.Get(-1, -1, 0) + prevstate.Get(-1, 0, 0) +
prevstate.Get(-1, 1, 0) + prevstate.Get(0, -1, 0) + prevstate.Get(0, 1, 0) +
prevstate.Get(1, -1, 0) + prevstate.Get(1, 0, 0) + prevstate.Get(1, 1, 0);
auto current = prevstate.Get(0, 0, 0);
auto count = prevstate.Get(-1, -1, 0) + prevstate.Get(-1, 0, 0) + prevstate.Get(-1, 1, 0) +
prevstate.Get(0, -1, 0) + prevstate.Get(0, 1, 0) + prevstate.Get(1, -1, 0) +
prevstate.Get(1, 0, 0) + prevstate.Get(1, 1, 0);
if (current == 1 && (count == 2 || count == 3))
{
@ -113,8 +113,8 @@ struct UpdateLifeState : public vtkm::worklet::WorkletPointNeighborhood3x3x3
}
color[0] = 0;
color[1] = state * (100 + (count * 32));
color[2] = (state && !current) ? (100 + (count * 32)) : 0;
color[1] = static_cast<vtkm::UInt8>(state * (100 + (count * 32)));
color[2] = (state && !current) ? static_cast<vtkm::UInt8>(100 + (count * 32)) : 0;
color[3] = 255; //alpha channel
}
};

@ -84,12 +84,12 @@ public:
// now reduce across ranks using MPI.
// converting to std::vector
std::vector<vtkm::Id> send_buf(numBins);
std::vector<vtkm::Id> send_buf(static_cast<std::size_t>(numBins));
std::copy(vtkm::cont::ArrayPortalToIteratorBegin(local.GetPortalConstControl()),
vtkm::cont::ArrayPortalToIteratorEnd(local.GetPortalConstControl()),
send_buf.begin());
std::vector<vtkm::Id> recv_buf(numBins);
std::vector<vtkm::Id> recv_buf(static_cast<std::size_t>(numBins));
MPI_Reduce(&send_buf[0],
&recv_buf[0],
static_cast<int>(numBins),

@ -23,15 +23,28 @@ cmake_minimum_required(VERSION 3.3 FATAL_ERROR)
project(MultiBackend CXX)
#Find the VTK-m package
find_package(VTKm REQUIRED QUIET)
find_package(VTKm REQUIRED)
find_package(Threads REQUIRED QUIET)
set(headers
IOGenerator.h
MultiDeviceGradient.h
TaskQueue.h
)
set(device_srcs
MultiDeviceGradient.cxx
)
set(srcs
MultiBackend.cxx)
IOGenerator.cxx
MultiBackend.cxx
)
if(TARGET vtkm::cuda)
vtkm_compile_as_cuda(cuda_srcs ${srcs})
set(srcs ${cuda_srcs})
vtkm_compile_as_cuda(cuda_srcs ${device_srcs})
set(device_srcs ${cuda_srcs})
endif()
add_executable(MultiBackend ${srcs})
target_link_libraries(MultiBackend PRIVATE vtkm_cont)
add_executable(MultiBackend ${device_srcs} ${srcs} ${headers})
target_link_libraries(MultiBackend PRIVATE vtkm_cont Threads::Threads)

@ -0,0 +1,111 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#include "IOGenerator.h"
#include <vtkm/Math.h>
#include <vtkm/cont/DataSetBuilderUniform.h>
#include <vtkm/cont/DataSetFieldAdd.h>
#include <vtkm/worklet/DispatcherMapField.h>
#include <vtkm/worklet/WorkletMapField.h>
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
#include <chrono>
#include <random>
struct WaveField : public vtkm::worklet::WorkletMapField
{
typedef void ControlSignature(FieldIn<Vec3>, FieldOut<Vec3>);
typedef void ExecutionSignature(_1, _2);
template <typename T>
VTKM_EXEC void operator()(const vtkm::Vec<T, 3>& input, vtkm::Vec<T, 3>& output) const
{
output[0] = input[0];
output[1] = 0.25f * vtkm::Sin(input[0]) * vtkm::Cos(input[2]);
output[2] = input[2];
}
};
vtkm::cont::DataSet make_test3DImageData(int xdim, int ydim, int zdim)
{
using Builder = vtkm::cont::DataSetBuilderUniform;
using FieldAdd = vtkm::cont::DataSetFieldAdd;
vtkm::cont::DataSet ds = Builder::Create(vtkm::Id3{ xdim, ydim, zdim });
vtkm::cont::ArrayHandle<vtkm::Vec<vtkm::Float32, 3>> field;
vtkm::worklet::DispatcherMapField<WaveField, vtkm::cont::DeviceAdapterTagSerial> dispatcher;
dispatcher.Invoke(ds.GetCoordinateSystem(), field);
FieldAdd::AddPointField(ds, "vec_field", field);
return ds;
}
//=================================================================
void io_generator(TaskQueue<vtkm::cont::MultiBlock>& queue, std::size_t numberOfTasks)
{
//Step 1. We want to build an initial set of blocks
//that vary in size. This way we can generate uneven
//work to show off the vtk-m filter work distribution
vtkm::cont::DataSet small = make_test3DImageData(128, 128, 128);
vtkm::cont::DataSet medium = make_test3DImageData(256, 256, 128);
vtkm::cont::DataSet large = make_test3DImageData(512, 512, 128);
std::vector<vtkm::cont::DataSet> blocks;
blocks.push_back(small);
blocks.push_back(medium);
blocks.push_back(large);
std::mt19937 rng;
//uniform_int_distribution is a closed interval [] so both the min and max
//can be chosen values
std::uniform_int_distribution<vtkm::Id> blockNumGen(6, 32);
std::uniform_int_distribution<std::size_t> blockPicker(0, blocks.size() - 1);
for (std::size_t i = 0; i < numberOfTasks; ++i)
{
//Step 2. Construct a random number of blocks
const vtkm::Id numberOfBlocks = blockNumGen(rng);
//Step 3. Randomly pick the blocks in the dataset
vtkm::cont::MultiBlock mb(numberOfBlocks);
for (vtkm::Id b = 0; b < numberOfBlocks; ++b)
{
mb.AddBlock(blocks[blockPicker(rng)]);
}
std::cout << "adding multi-block with " << mb.GetNumberOfBlocks() << " blocks" << std::endl;
//Step 4. Add the multi-block to the queue. We explicitly
//use std::move to signal that this thread can't use the
//mb object after this call
queue.push(std::move(mb));
//Step 5. Go to sleep for a period of time to replicate
//data stream in
// std::this_thread::sleep_for(std::chrono::seconds(1));
}
//Step 6. Tell the queue that we are done submitting work
queue.shutdown();
std::cout << "io_generator finished" << std::endl;
}

@ -0,0 +1,30 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_examples_multibackend_IOWorker_h
#define vtk_m_examples_multibackend_IOWorker_h
#include "TaskQueue.h"
#include <vtkm/cont/DataSet.h>
#include <vtkm/cont/MultiBlock.h>
vtkm::cont::DataSet make_test3DImageData(int xdim, int ydim, int zdim);
void io_generator(TaskQueue<vtkm::cont::MultiBlock>& queue, std::size_t numberOfTasks);
#endif

@ -17,118 +17,96 @@
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#include <iostream>
#include <thread>
#include <vtkm/Math.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/RuntimeDeviceInformation.h>
#include <vtkm/cont/MultiBlock.h>
#include <vtkm/worklet/DispatcherMapField.h>
#include <vtkm/worklet/WorkletMapField.h>
#include "IOGenerator.h"
#include "MultiDeviceGradient.h"
#include "TaskQueue.h"
#include <vtkm/cont/TryExecute.h>
#include <vtkm/cont/cuda/DeviceAdapterCuda.h>
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
#include <vtkm/cont/tbb/DeviceAdapterTBB.h>
using FloatVec3 = vtkm::Vec<vtkm::Float32, 3>;
using Uint8Vec4 = vtkm::Vec<vtkm::UInt8, 4>;
struct GenerateSurfaceWorklet : public vtkm::worklet::WorkletMapField
{
vtkm::Float32 t;
GenerateSurfaceWorklet(vtkm::Float32 st)
: t(st)
{
}
typedef void ControlSignature(FieldIn<>, FieldOut<>, FieldOut<>);
typedef void ExecutionSignature(_1, _2, _3);
template <typename T>
VTKM_EXEC void operator()(const vtkm::Vec<T, 3>& input,
vtkm::Vec<T, 3>& output,
vtkm::Vec<vtkm::UInt8, 4>& color) const
{
output[0] = input[0];
output[1] = 0.25f * vtkm::Sin(input[0] * 10.f + t) * vtkm::Cos(input[2] * 10.f + t);
output[2] = input[2];
color[0] = 0;
color[1] = static_cast<vtkm::UInt8>(160 + (96 * vtkm::Sin(input[0] * 10.f + t)));
color[2] = static_cast<vtkm::UInt8>(160 + (96 * vtkm::Cos(input[2] * 5.f + t)));
color[3] = 255;
}
};
struct RunGenerateSurfaceWorklet
{
template <typename DeviceAdapterTag>
bool operator()(DeviceAdapterTag) const
{
//At this point we know we have runtime support
using DeviceTraits = vtkm::cont::DeviceAdapterTraits<DeviceAdapterTag>;
using DispatcherType =
vtkm::worklet::DispatcherMapField<GenerateSurfaceWorklet, DeviceAdapterTag>;
std::cout << "Running a worklet on device adapter: " << DeviceTraits::GetName() << std::endl;
GenerateSurfaceWorklet worklet(0.05f);
DispatcherType(worklet).Invoke(this->In, this->Out, this->Color);
return true;
}
vtkm::cont::ArrayHandle<FloatVec3> In;
vtkm::cont::ArrayHandle<FloatVec3> Out;
vtkm::cont::ArrayHandle<Uint8Vec4> Color;
};
template <typename T>
std::vector<vtkm::Vec<T, 3>> make_testData(int size)
{
std::vector<vtkm::Vec<T, 3>> data;
data.reserve(static_cast<std::size_t>(size * size));
for (int i = 0; i < size; ++i)
{
for (int j = 0; j < size; ++j)
{
data.push_back(vtkm::Vec<T, 3>(
2.f * static_cast<T>(i / size) - 1.f, 0.f, 2.f * static_cast<T>(j / size) - 1.f));
}
}
return data;
}
//This is the list of devices to compile in support for. The order of the
//devices determines the runtime preference.
struct DevicesToTry : vtkm::ListTagBase<vtkm::cont::DeviceAdapterTagCuda,
vtkm::cont::DeviceAdapterTagTBB,
vtkm::cont::DeviceAdapterTagSerial>
{
};
//This demo shows off using vtk-m in multiple threads in two different ways.
//
//At a high level we have 2 primary threads, an IO thread and a Worker thread
//The IO thread will generate all data using the vtk-m serial device, and
//will post this data to a worker queue as a vtk-m multiblock.
//The Worker thread will pull down these vtk-m multiblock data and run a
//vtk-m filter on the multiblock.
//The vtk-m filter it runs will itself have a worker pool which it will
//distribute work too. The number of workers is based on what device adapters
//are enabled but uses the following logic:
// - If TBB is enabled construct a single TBB worker
// - If CUDA is enabled construct 4 workers for each GPU on the machine
//
//Unfortunately due to some thread unsafe logic in VTK-m it is currently not
//possible to have CUDA and TBB workers at the same time. So the class will
//choose CUDA over TBB when possible.
//Once the thread unsafe logic is fixed a machine that has a single CPU
//and single GPU we should expect that we will have 2 primary 'main loop'
//threads, and 5 threads for heavy 'task' work.
void multiblock_processing(TaskQueue<vtkm::cont::MultiBlock>& queue);
int main(int, char**)
{
std::vector<FloatVec3> data = make_testData<vtkm::Float32>(1024);
//Step 1. Construct the two primary 'main loops'. The threads
//share a queue object so we need to explicitly pass it
//by reference (the std::ref call)
TaskQueue<vtkm::cont::MultiBlock> queue;
std::thread io(io_generator, std::ref(queue), 12);
std::thread worker(multiblock_processing, std::ref(queue));
//make array handles for the data
// TryExecutes takes a functor and a list of devices. It then tries to run
// the functor for each device (in the order given in the list) until the
// execution succeeds. This allows you to compile in support for multiple
// devices which have runtime requirements ( GPU / HW Accelerator ) and
// correctly choose the best device at runtime.
//
// The functor parentheses operator should take exactly one argument, which is
// the DeviceAdapterTag to use. The functor should return true if the execution
// succeeds.
//
// This function also optionally takes a vtkm::cont::RuntimeDeviceTracker, which
// will monitor for certain failures across calls to TryExecute and skip trying
// devices with a history of failure.
RunGenerateSurfaceWorklet task;
task.In = vtkm::cont::make_ArrayHandle(data);
vtkm::cont::TryExecute(task, DevicesToTry());
//Step N. Wait for the work to finish
io.join();
worker.join();
return 0;
}
//=================================================================
void multiblock_processing(TaskQueue<vtkm::cont::MultiBlock>& queue)
{
//Step 1. Construct the gradient filter outside the work loop
//so that we can reuse the thread pool it constructs
MultiDeviceGradient gradient;
gradient.SetComputePointGradient(true);
while (queue.hasTasks())
{
//Step 2. grab the next multi-block skipping any that are empty
//as empty ones can be returned when the queue is about
//to say it has no work
vtkm::cont::MultiBlock mb = queue.pop();
if (mb.GetNumberOfBlocks() == 0)
{
continue;
}
//Step 3. Get the first field name from the multi-block
std::string fieldName = mb.GetBlock(0).GetField(0).GetName();
//Step 4. Run a multi device gradient
gradient.SetActiveField(fieldName);
vtkm::cont::MultiBlock result = gradient.Execute(mb);
std::cout << "finished processing a multi-block" << std::endl;
//Step 5. Verify each block has a "Gradients" field
for (auto&& block : result)
{
// std::cout << std::endl << std::endl << std::endl;
// std::cout << "block: " << std::endl;
// block.PrintSummary(std::cout);
try
{
const auto& field = block.GetField("Gradients", vtkm::cont::Field::ASSOC_POINTS);
(void)field;
}
catch (vtkm::cont::ErrorBadValue)
{
std::cerr << "gradient filter failed!" << std::endl;
break;
}
}
}
std::cout << "multiblock_processing finished" << std::endl;
}

@ -0,0 +1,28 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define vtk_m_examples_multibackend_MultiDeviceGradient_cxx
#include "MultiDeviceGradient.h"
#include "MultiDeviceGradient.hxx"
template vtkm::cont::MultiBlock MultiDeviceGradient::PrepareForExecution<
vtkm::filter::PolicyDefault>(const vtkm::cont::MultiBlock&,
const vtkm::filter::PolicyBase<vtkm::filter::PolicyDefault>&);

@ -0,0 +1,94 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_examples_multibackend_MultiDeviceGradient_h
#define vtk_m_examples_multibackend_MultiDeviceGradient_h
#include <vtkm/filter/FilterField.h>
#include <vtkm/filter/FilterTraits.h>
#include "TaskQueue.h"
#include <thread>
using RuntimeTaskQueue = TaskQueue<std::function<void(const vtkm::cont::RuntimeDeviceTracker&)>>;
/// \brief Construct a MultiDeviceGradient for a given multiblock dataset
///
/// The Policy used with MultiDeviceGradient must include the TBB and CUDA
/// backends.
class MultiDeviceGradient : public vtkm::filter::FilterField<MultiDeviceGradient>
{
public:
//Construct a MultiDeviceGradient and worker pool
VTKM_CONT
MultiDeviceGradient();
//Needed so that we can shut down the worker pool properly
VTKM_CONT
~MultiDeviceGradient();
/// When this flag is on (default is off), the gradient filter will provide a
/// point based gradients, which are significantly more costly since for each
/// point we need to compute the gradient of each cell that uses it.
void SetComputePointGradient(bool enable) { ComputePointGradient = enable; }
bool GetComputePointGradient() const { return ComputePointGradient; }
/// Will submit each block to a work queue that the threads will
/// pull work from
template <typename DerivedPolicy>
VTKM_CONT vtkm::cont::MultiBlock PrepareForExecution(
const vtkm::cont::MultiBlock&,
const vtkm::filter::PolicyBase<DerivedPolicy>&);
private:
bool ComputePointGradient;
RuntimeTaskQueue Queue;
std::vector<std::thread> Workers;
};
namespace vtkm
{
namespace filter
{
template <>
class FilterTraits<MultiDeviceGradient>
{
public:
struct TypeListTagGradientInputs : vtkm::ListTagBase<vtkm::Float32,
vtkm::Float64,
vtkm::Vec<vtkm::Float32, 3>,
vtkm::Vec<vtkm::Float64, 3>>
{
};
using InputFieldTypeList = TypeListTagGradientInputs;
};
}
} // namespace vtkm::filter
#ifndef vtk_m_examples_multibackend_MultiDeviceGradient_cxx
extern template vtkm::cont::MultiBlock MultiDeviceGradient::PrepareForExecution<
vtkm::filter::PolicyDefault>(const vtkm::cont::MultiBlock&,
const vtkm::filter::PolicyBase<vtkm::filter::PolicyDefault>&);
#endif
#endif

@ -0,0 +1,231 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#include <vtkm/cont/RuntimeDeviceTracker.h>
#include <vtkm/cont/cuda/DeviceAdapterCuda.h>
#include <vtkm/cont/tbb/DeviceAdapterTBB.h>
#include <vtkm/filter/Gradient.h>
namespace
{
vtkm::Id deterine_cuda_gpu_count()
{
vtkm::Id count = 0;
#if defined(VTKM_ENABLE_CUDA)
int numberOfDevices = 0;
auto res = cudaGetDeviceCount(&numberOfDevices);
if (res == cudaSuccess)
{
count = static_cast<vtkm::Id>(numberOfDevices);
}
#endif
return count;
}
void process_block_tbb(RuntimeTaskQueue& queue)
{
//Step 1. Set the device adapter to this thread to TBB.
//This makes sure that any vtkm::filters used by our
//task operate only on TBB
//
vtkm::cont::RuntimeDeviceTracker tracker;
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagTBB{});
while (queue.hasTasks())
{
//Step 2. Get the task to run on TBB
auto task = queue.pop();
//Step 3. Run the task on TBB. We check the validity
//of the task since we could be given an empty task
//when the queue is empty and we are shutting down
if (task != nullptr)
{
task(tracker);
}
//Step 4. Notify the queue that we finished processing this task
queue.completedTask();
std::cout << "finished a block on tbb (" << std::this_thread::get_id() << ")" << std::endl;
}
}
void process_block_cuda(RuntimeTaskQueue& queue, int gpuId)
{
//Step 1. Set the device adapter to this thread to cuda.
//This makes sure that any vtkm::filters used by our
//task operate only on cuda
//
vtkm::cont::RuntimeDeviceTracker tracker;
#if defined(VTKM_ENABLE_CUDA)
auto error = cudaSetDevice(gpuId);
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagCuda{});
#endif
(void)gpuId;
while (queue.hasTasks())
{
//Step 2. Get the task to run on cuda
auto task = queue.pop();
//Step 3. Run the task on TBB. We check the validity
//of the task since we could be given an empty task
//when the queue is empty and we are shutting down
if (task != nullptr)
{
task(tracker);
}
//Step 4. Notify the queue that we finished processing this task
queue.completedTask();
std::cout << "finished a block on cuda (" << std::this_thread::get_id() << ")" << std::endl;
}
}
} //namespace
//-----------------------------------------------------------------------------
VTKM_CONT MultiDeviceGradient::MultiDeviceGradient()
: ComputePointGradient(false)
, Queue()
, Workers()
{
//Step 1. Determine the number of workers we want
vtkm::cont::RuntimeDeviceTracker tracker;
const bool runOnTbb = tracker.CanRunOn(vtkm::cont::DeviceAdapterTagTBB{});
const bool runOnCuda = tracker.CanRunOn(vtkm::cont::DeviceAdapterTagCuda{});
//Note currently the virtual implementation has some issues
//In a multi-threaded enviornment only cuda can be used or
//all SMP backends ( Serial, TBB, OpenMP ).
//Once this issue is resolved we can enable CUDA + TBB in
//this example
//Step 2. Launch workers that will use cuda (if enabled).
//The threads share a queue object so we need to explicitly pass it
//by reference (the std::ref call)
if (runOnCuda)
{
std::cout << "adding cuda workers" << std::endl;
const vtkm::Id gpu_count = deterine_cuda_gpu_count();
for (vtkm::Id i = 0; i < gpu_count; ++i)
{
//The number of workers per GPU is purely arbitrary currently,
//but in general we want multiple of them so we can overlap compute
//and transfer
this->Workers.emplace_back(process_block_cuda, std::ref(this->Queue), i);
this->Workers.emplace_back(process_block_cuda, std::ref(this->Queue), i);
this->Workers.emplace_back(process_block_cuda, std::ref(this->Queue), i);
this->Workers.emplace_back(process_block_cuda, std::ref(this->Queue), i);
}
}
//Step 3. Launch a worker that will use tbb (if enabled).
//The threads share a queue object so we need to explicitly pass it
//by reference (the std::ref call)
else if (runOnTbb)
{
std::cout << "adding a tbb worker" << std::endl;
this->Workers.emplace_back(process_block_tbb, std::ref(this->Queue));
}
}
//-----------------------------------------------------------------------------
VTKM_CONT MultiDeviceGradient::~MultiDeviceGradient()
{
this->Queue.shutdown();
//shutdown all workers
for (auto&& thread : this->Workers)
{
thread.join();
}
}
//-----------------------------------------------------------------------------
template <typename DerivedPolicy>
inline VTKM_CONT vtkm::cont::MultiBlock MultiDeviceGradient::PrepareForExecution(
const vtkm::cont::MultiBlock& mb,
const vtkm::filter::PolicyBase<DerivedPolicy>& policy)
{
//Step 1. Say that we have no more to submit for this multi block
//This is needed to happen for each execute as we want to support
//the same filter being used for multiple inputs
this->Queue.reset();
//Step 2. Construct the multi-block we are going to fill. The size signature
//to MultiBlock just reserves size
vtkm::cont::MultiBlock output;
output.AddBlocks(std::vector<vtkm::cont::DataSet>(mb.GetNumberOfBlocks()));
vtkm::cont::MultiBlock* outPtr = &output;
//Step 3. Construct the filter we want to run on each block
vtkm::filter::Gradient gradient;
gradient.SetComputePointGradient(this->GetComputePointGradient());
gradient.SetActiveField(this->GetActiveFieldName());
//Step 3b. Post 1 block up as work and block intil it is
//complete. This is needed as currently constructing the virtual
//Point Coordinates is not thread safe.
auto block = mb.cbegin();
{
vtkm::cont::DataSet input = *block;
this->Queue.push( //build a lambda that is the work to do
[=](const vtkm::cont::RuntimeDeviceTracker& tracker) {
//make a per thread copy of the filter
//and give it the device tracker
vtkm::filter::Gradient perThreadGrad = gradient;
perThreadGrad.SetRuntimeDeviceTracker(tracker);
vtkm::cont::DataSet result = perThreadGrad.Execute(input, policy);
outPtr->ReplaceBlock(0, result);
});
this->Queue.waitForAllTasksToComplete();
block++;
}
vtkm::Id index = 1;
for (; block != mb.cend(); ++block)
{
vtkm::cont::DataSet input = *block;
//Step 4. For each input block construct a lambda
//and add it to the queue for workers to take. This
//will allows us to have multiple works execute in a non
//blocking manner
this->Queue.push( //build a lambda that is the work to do
[=](const vtkm::cont::RuntimeDeviceTracker& tracker) {
//make a per thread copy of the filter
//and give it the device tracker
vtkm::filter::Gradient perThreadGrad = gradient;
perThreadGrad.SetRuntimeDeviceTracker(tracker);
vtkm::cont::DataSet result = perThreadGrad.Execute(input, policy);
outPtr->ReplaceBlock(index, result);
});
index++;
}
// Step 5. Wait on all workers to finish
this->Queue.waitForAllTasksToComplete();
return output;
}

@ -0,0 +1,151 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_examples_multibackend_TaskQueue_h
#define vtk_m_examples_multibackend_TaskQueue_h
#include <vtkm/cont/MultiBlock.h>
#include <condition_variable>
#include <mutex>
#include <queue>
template <typename T>
class TaskQueue
{
public:
TaskQueue() = default;
void reset()
{
{
std::unique_lock<std::mutex> lock(this->Lock);
this->ShutdownOnceTasksCompleted = false;
this->TaskCount = 0;
}
this->CV.notify_all();
}
void shutdown()
{
{
std::unique_lock<std::mutex> lock(this->Lock);
this->ShutdownOnceTasksCompleted = true;
}
this->CV.notify_all();
}
//Say we always have tasks while the producer (IO) hasn't
//reported it is finished adding tasks. Once it has finished
//submitting tasks, we run until the queue is empty
bool hasTasks()
{
{
std::unique_lock<std::mutex> lock(this->Lock);
if (this->ShutdownOnceTasksCompleted)
{
return this->Queue.size() > 0;
}
return true;
}
}
//Add a task to the Queue.
void push(T&& item)
{
{
std::unique_lock<std::mutex> lock(this->Lock);
this->Queue.push(item);
this->TaskCount++;
} //unlock before we notify so we don't deadlock
this->CV.notify_all();
}
//Get a task from the Queue.
T pop()
{
T item;
{
//wait for a job to come into the queue
std::unique_lock<std::mutex> lock(this->Lock);
this->CV.wait(lock, [this] {
//if we are shutting down we need to always wake up
if (this->ShutdownOnceTasksCompleted)
{
return true;
}
//if we aren't shutting down sleep when we have no work
return this->Queue.size() > 0;
});
//When shutting down we don't check the queue size
//so make sure we have something to pop
if (this->Queue.size() > 0)
{
//take the job
item = this->Queue.front();
this->Queue.pop();
}
} //unlock before we notify so we don't deadlock
this->CV.notify_all();
return item;
}
//Report that you finished processing a task popped from
//the Queue
void completedTask()
{
{
std::unique_lock<std::mutex> lock(this->Lock);
this->TaskCount--;
} //unlock before we notify so we don't deadlock
this->CV.notify_all();
}
//Wait for all task to be removed from the queue
//and to be completed
//For this to , threads after processing the
//data they got from pop() must call didTask()
//
void waitForAllTasksToComplete()
{
{
std::unique_lock<std::mutex> lock(this->Lock);
this->CV.wait(lock, [this] { return this->TaskCount == 0; });
}
this->CV.notify_all();
}
private:
std::mutex Lock;
std::queue<T> Queue;
std::condition_variable CV;
int TaskCount = 0;
bool ShutdownOnceTasksCompleted = false;
//don't want copies of this
TaskQueue(const TaskQueue& rhs) = delete;
TaskQueue& operator=(const TaskQueue& rhs) = delete;
TaskQueue(TaskQueue&& rhs) = delete;
TaskQueue& operator=(TaskQueue&& rhs) = delete;
};
#endif

@ -129,7 +129,10 @@ public:
typedef _2 ExecutionSignature(_1, WorkIndex);
VTKM_EXEC
vtkm::Float32 operator()(vtkm::Int64 x, vtkm::Id& index) const { return (vtkm::Sin(1.0 * x)); }
vtkm::Float32 operator()(vtkm::Int64 x, vtkm::Id&) const
{
return (vtkm::Sin(static_cast<vtkm::Float32>(x)));
}
};
}
}

@ -78,7 +78,7 @@ set(headers
ErrorFilterExecution.h
ErrorExecution.h
ErrorInternal.h
ExecutionObjectFactoryBase.h
ExecutionObjectBase.h
Field.h
FieldRangeCompute.h
FieldRangeGlobalCompute.h

@ -610,7 +610,7 @@ public:
ArrayPortalConst<vtkm::Id> CellIds;
};
struct TwoLevelUniformGridExecutionObjectFactory : public vtkm::cont::ExecutionObjectFactoryBase
struct TwoLevelUniformGridExecutionObjectFactory : public vtkm::cont::ExecutionObjectBase
{
template <typename DeviceAdapter>
VTKM_CONT TwoLevelUniformGridExecution<DeviceAdapter> PrepareForExecution(

@ -128,7 +128,7 @@ private:
std::string coordNm,
std::string cellNm)
{
vtkm::Id dims[3];
vtkm::Id dims[3] = { 1, 1, 1 };
int ndims = 0;
for (int i = 0; i < 3; ++i)
{

@ -17,23 +17,23 @@
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_ExecutionObjectFactoryBase_h
#define vtk_m_cont_ExecutionObjectFactoryBase_h
#ifndef vtk_m_cont_ExecutionObjectBase_h
#define vtk_m_cont_ExecutionObjectBase_h
#include <vtkm/Types.h>
namespace vtkm
{
namespace cont
{
/// Base \c ExecutionObjectFactoryBase for execution objects to inherit from so that
/// Base \c ExecutionObjectBase for execution objects to inherit from so that
/// you can use an arbitrary object as a parameter in an execution environment
/// function. Any method you want to use on the execution side must have the
/// VTKM_EXEC modifier.
/// \tparam Device
class ExecutionObjectFactoryBase
class ExecutionObjectBase
{
};
}
} // namespace vtkm::cont
#endif //vtk_m_cont_ExecutionObjectFactoryBase_h
#endif //vtk_m_cont_ExecutionObjectBase_h

@ -173,7 +173,8 @@ public:
/// \param coords An ArrayHandle of x, y, z coordinates of input points.
/// \param device Tag for selecting device adapter
template <typename DeviceAdapter>
void Build(const vtkm::cont::ArrayHandle<vtkm::Vec<T, 3>>& coords, DeviceAdapter)
void Build(const vtkm::cont::ArrayHandle<vtkm::Vec<T, 3>>& coords,
DeviceAdapter vtkmNotUsed(device))
{
using Algorithm = vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapter>;
// generate unique id for each input point

@ -24,7 +24,7 @@
#include <vtkm/cont/arg/Transport.h>
#include <vtkm/cont/ExecutionObjectFactoryBase.h>
#include <vtkm/cont/ExecutionObjectBase.h>
namespace vtkm
@ -48,10 +48,10 @@ struct Transport<vtkm::cont::arg::TransportTagExecObject, ContObjectType, Device
{
// If you get a compile error here, it means you tried to use an object that is not an execution
// object as an argument that is expected to be one. All execution objects are expected to
// inherit from vtkm::exec::ExecutionObjectFactoryBase.
// inherit from vtkm::cont::ExecutionObjectBase.
VTKM_STATIC_ASSERT_MSG(
(std::is_base_of<vtkm::cont::ExecutionObjectFactoryBase, ContObjectType>::value),
"All execution objects are expected to inherit from vtkm::exec::ExecutionObjectFactoryBase");
(std::is_base_of<vtkm::cont::ExecutionObjectBase, ContObjectType>::value),
"All execution objects are expected to inherit from vtkm::cont::ExecutionObjectBase");
using ExecObjectType = decltype(std::declval<ContObjectType>().PrepareForExecution(Device()));
template <typename InputDomainType>

@ -24,7 +24,7 @@
#include <vtkm/cont/arg/TypeCheck.h>
#include <vtkm/cont/ExecutionObjectFactoryBase.h>
#include <vtkm/cont/ExecutionObjectBase.h>
#include <type_traits>
@ -36,7 +36,7 @@ namespace arg
{
/// The ExecObject type check passes for any object that inherits from \c
/// ExecutionObjectFactoryBase. This is supposed to signify that the object can be
/// ExecutionObjectBase. This is supposed to signify that the object can be
/// used in the execution environment although there is no way to verify that.
///
struct TypeCheckTagExecObject
@ -46,8 +46,7 @@ struct TypeCheckTagExecObject
template <typename Type>
struct TypeCheck<TypeCheckTagExecObject, Type>
{
static constexpr bool value =
std::is_base_of<vtkm::cont::ExecutionObjectFactoryBase, Type>::value;
static constexpr bool value = std::is_base_of<vtkm::cont::ExecutionObjectBase, Type>::value;
};
}
}

@ -24,7 +24,7 @@
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
#include <vtkm/cont/ExecutionObjectFactoryBase.h>
#include <vtkm/cont/ExecutionObjectBase.h>
#include <vtkm/cont/testing/Testing.h>
@ -39,7 +39,7 @@ struct ExecutionObject
vtkm::Int32 Number;
};
struct TestExecutionObject : public vtkm::cont::ExecutionObjectFactoryBase
struct TestExecutionObject : public vtkm::cont::ExecutionObjectBase
{
vtkm::Int32 Number;

@ -27,7 +27,7 @@
namespace
{
struct TestExecutionObject : vtkm::cont::ExecutionObjectFactoryBase
struct TestExecutionObject : vtkm::cont::ExecutionObjectBase
{
};
struct TestNotExecutionObject

@ -24,7 +24,9 @@ set(headers
CudaAllocator.h
DeviceAdapterAlgorithmCuda.h
DeviceAdapterAlgorithmThrust.h
DeviceAdapterRuntimeDetectorCuda.h
DeviceAdapterTagCuda.h
DeviceAdapterTimerImplementationCuda.h
ExecutionArrayInterfaceBasicCuda.h
MakeThrustIterator.h
TaskTuner.h
@ -41,7 +43,9 @@ endif()
target_sources(vtkm_cont PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/ArrayManagerExecutionCuda.cu
${CMAKE_CURRENT_SOURCE_DIR}/ExecutionArrayInterfaceBasicCuda.cu
${CMAKE_CURRENT_SOURCE_DIR}/CudaAllocator.cu
${CMAKE_CURRENT_SOURCE_DIR}/DeviceAdapterAlgorithmThrust.cu
${CMAKE_CURRENT_SOURCE_DIR}/DeviceAdapterRuntimeDetectorCuda.cu
${CMAKE_CURRENT_SOURCE_DIR}/DeviceAdapterTimerImplementationCuda.cu
${CMAKE_CURRENT_SOURCE_DIR}/ExecutionArrayInterfaceBasicCuda.cu
)

@ -31,29 +31,14 @@
// Here are the actual implementation of the algorithms.
#include <vtkm/cont/cuda/internal/DeviceAdapterAlgorithmThrust.h>
// Here are the implementations of device adapter specific classes
#include <vtkm/cont/cuda/internal/DeviceAdapterRuntimeDetectorCuda.h>
#include <vtkm/cont/cuda/internal/DeviceAdapterTimerImplementationCuda.h>
#include <vtkm/exec/cuda/internal/TaskStrided.h>
#include <cuda.h>
namespace vtkm
{
namespace cont
{
namespace cuda
{
namespace internal
{
static __global__ void DetermineIfValidCudaDevice()
{
//used only to see if we can launch kernels. It is possible to have a
//CUDA capable device, but still fail to have CUDA support.
}
}
}
}
}
namespace vtkm
{
namespace cont
@ -71,129 +56,6 @@ struct DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>
}
};
/// CUDA contains its own high resolution timer.
///
template <>
class DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagCuda>
{
public:
VTKM_CONT DeviceAdapterTimerImplementation()
{
VTKM_CUDA_CALL(cudaEventCreate(&this->StartEvent));
VTKM_CUDA_CALL(cudaEventCreate(&this->EndEvent));
this->Reset();
}
VTKM_CONT ~DeviceAdapterTimerImplementation()
{
// These aren't wrapped in VTKM_CUDA_CALL because we can't throw errors
// from destructors. We're relying on cudaGetLastError in the
// VTKM_CUDA_CHECK_ASYNCHRONOUS_ERROR catching any issues from these calls
// later.
cudaEventDestroy(this->StartEvent);
cudaEventDestroy(this->EndEvent);
}
VTKM_CONT void Reset()
{
VTKM_CUDA_CALL(cudaEventRecord(this->StartEvent, cudaStreamPerThread));
VTKM_CUDA_CALL(cudaEventSynchronize(this->StartEvent));
}
VTKM_CONT vtkm::Float64 GetElapsedTime()
{
VTKM_CUDA_CALL(cudaEventRecord(this->EndEvent, cudaStreamPerThread));
VTKM_CUDA_CALL(cudaEventSynchronize(this->EndEvent));
float elapsedTimeMilliseconds;
VTKM_CUDA_CALL(
cudaEventElapsedTime(&elapsedTimeMilliseconds, this->StartEvent, this->EndEvent));
return static_cast<vtkm::Float64>(0.001f * elapsedTimeMilliseconds);
}
private:
// Copying CUDA events is problematic.
DeviceAdapterTimerImplementation(
const DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagCuda>&) = delete;
void operator=(const DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagCuda>&) =
delete;
cudaEvent_t StartEvent;
cudaEvent_t EndEvent;
};
/// \brief Class providing a CUDA runtime support detector.
///
/// The class provide the actual implementation used by
/// vtkm::cont::RuntimeDeviceInformation for the CUDA backend.
///
/// We will verify at runtime that the machine has at least one CUDA
/// capable device, and said device is from the 'fermi' (SM_20) generation
/// or newer.
///
template <>
class DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagCuda>
{
public:
VTKM_CONT DeviceAdapterRuntimeDetector()
: NumberOfDevices(0)
, HighestArchSupported(0)
{
static bool deviceQueryInit = false;
static int numDevices = 0;
static int archVersion = 0;
if (!deviceQueryInit)
{
deviceQueryInit = true;
//first query for the number of devices
VTKM_CUDA_CALL(cudaGetDeviceCount(&numDevices));
for (vtkm::Int32 i = 0; i < numDevices; i++)
{
cudaDeviceProp prop;
VTKM_CUDA_CALL(cudaGetDeviceProperties(&prop, i));
const vtkm::Int32 arch = (prop.major * 10) + prop.minor;
archVersion = vtkm::Max(arch, archVersion);
}
//Make sure we can actually launch a kernel. This could fail for any
//of the following reasons:
//
// 1. cudaErrorInsufficientDriver, caused by out of data drives
// 2. cudaErrorDevicesUnavailable, caused by another process locking the
// device or somebody disabling cuda support on the device
// 3. cudaErrorNoKernelImageForDevice we built for a compute version
// greater than the device we are running on
// Most likely others that I don't even know about
vtkm::cont::cuda::internal::DetermineIfValidCudaDevice<<<1, 1, 0, cudaStreamPerThread>>>();
if (cudaSuccess != cudaGetLastError())
{
numDevices = 0;
archVersion = 0;
}
}
this->NumberOfDevices = numDevices;
this->HighestArchSupported = archVersion;
}
/// Returns true if the given device adapter is supported on the current
/// machine.
///
/// Only returns true if we have at-least one CUDA capable device of SM_20 or
/// greater ( fermi ).
///
VTKM_CONT bool Exists() const
{
//
return this->NumberOfDevices > 0 && this->HighestArchSupported >= 20;
}
private:
vtkm::Int32 NumberOfDevices;
vtkm::Int32 HighestArchSupported;
};
/// CUDA contains its own atomic operations
///
template <typename T>

@ -32,11 +32,17 @@ namespace cuda
namespace internal
{
VTKM_CONT_EXPORT int getNumSMs(int dId)
VTKM_CONT_EXPORT vtkm::UInt32 getNumSMs(int dId)
{
std::size_t index = 0;
if (dId > 0)
{
index = static_cast<size_t>(dId);
}
//check
static bool lookupBuilt = false;
static std::vector<int> numSMs;
static std::vector<vtkm::UInt32> numSMs;
if (!lookupBuilt)
{
@ -53,11 +59,11 @@ VTKM_CONT_EXPORT int getNumSMs(int dId)
{ //get the number of sm's per deviceId
VTKM_CUDA_CALL(
cudaDeviceGetAttribute(&numberOfSMs, cudaDevAttrMultiProcessorCount, deviceId));
numSMs.push_back(numberOfSMs);
numSMs.push_back(static_cast<vtkm::UInt32>(numberOfSMs));
}
lookupBuilt = true;
}
return numSMs[dId];
return numSMs[index];
}
// we use cuda pinned memory to reduce the amount of synchronization
@ -107,8 +113,8 @@ char* DeviceAdapterAlgorithmThrust<vtkm::cont::DeviceAdapterTagCuda>::SetupError
template <>
void DeviceAdapterAlgorithmThrust<vtkm::cont::DeviceAdapterTagCuda>::GetGridsAndBlocks(
int& grids,
int& blocks,
vtkm::UInt32& grids,
vtkm::UInt32& blocks,
vtkm::Id size)
{
(void)size;
@ -120,7 +126,7 @@ void DeviceAdapterAlgorithmThrust<vtkm::cont::DeviceAdapterTagCuda>::GetGridsAnd
template <>
void DeviceAdapterAlgorithmThrust<vtkm::cont::DeviceAdapterTagCuda>::GetGridsAndBlocks(
int& grids,
vtkm::UInt32& grids,
dim3& blocks,
const dim3& size)
{

@ -1084,10 +1084,10 @@ public:
static char* SetupErrorBuffer(vtkm::exec::cuda::internal::TaskStrided& functor);
VTKM_CONT_EXPORT
static void GetGridsAndBlocks(int& grid, int& blocks, vtkm::Id size);
static void GetGridsAndBlocks(vtkm::UInt32& grid, vtkm::UInt32& blocks, vtkm::Id size);
VTKM_CONT_EXPORT
static void GetGridsAndBlocks(int& grid, dim3& blocks, const dim3& size);
static void GetGridsAndBlocks(vtkm::UInt32& grid, dim3& blocks, const dim3& size);
public:
template <typename WType, typename IType>
@ -1102,7 +1102,7 @@ public:
}
char* hostErrorPtr = SetupErrorBuffer(functor);
int grids, blocks;
vtkm::UInt32 grids, blocks;
GetGridsAndBlocks(grids, blocks, numInstances);
TaskStrided1DLaunch<<<grids, blocks, 0, cudaStreamPerThread>>>(functor, numInstances);
@ -1140,7 +1140,7 @@ public:
static_cast<vtkm::UInt32>(rangeMax[1]),
static_cast<vtkm::UInt32>(rangeMax[2]));
int grids;
vtkm::UInt32 grids;
dim3 blocks;
GetGridsAndBlocks(grids, blocks, ranges);

@ -0,0 +1,100 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#include <vtkm/cont/cuda/internal/DeviceAdapterRuntimeDetectorCuda.h>
#include <vtkm/Math.h>
#include <vtkm/cont/cuda/ErrorCuda.h>
#include <cuda.h>
namespace vtkm
{
namespace cont
{
namespace cuda
{
namespace internal
{
static __global__ void DetermineIfValidCudaDevice()
{
//used only to see if we can launch kernels. It is possible to have a
//CUDA capable device, but still fail to have CUDA support.
}
}
}
}
}
namespace vtkm
{
namespace cont
{
DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagCuda>::DeviceAdapterRuntimeDetector()
: NumberOfDevices(0)
, HighestArchSupported(0)
{
static bool deviceQueryInit = false;
static int numDevices = 0;
static int archVersion = 0;
if (!deviceQueryInit)
{
deviceQueryInit = true;
//first query for the number of devices
VTKM_CUDA_CALL(cudaGetDeviceCount(&numDevices));
for (vtkm::Int32 i = 0; i < numDevices; i++)
{
cudaDeviceProp prop;
VTKM_CUDA_CALL(cudaGetDeviceProperties(&prop, i));
const vtkm::Int32 arch = (prop.major * 10) + prop.minor;
archVersion = vtkm::Max(arch, archVersion);
}
//Make sure we can actually launch a kernel. This could fail for any
//of the following reasons:
//
// 1. cudaErrorInsufficientDriver, caused by out of data drives
// 2. cudaErrorDevicesUnavailable, caused by another process locking the
// device or somebody disabling cuda support on the device
// 3. cudaErrorNoKernelImageForDevice we built for a compute version
// greater than the device we are running on
// Most likely others that I don't even know about
vtkm::cont::cuda::internal::DetermineIfValidCudaDevice<<<1, 1, 0, cudaStreamPerThread>>>();
if (cudaSuccess != cudaGetLastError())
{
numDevices = 0;
archVersion = 0;
}
}
this->NumberOfDevices = numDevices;
this->HighestArchSupported = archVersion;
}
bool DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagCuda>::Exists() const
{
return this->NumberOfDevices > 0 && this->HighestArchSupported >= 20;
}
}
} // namespace vtkm::cont

@ -0,0 +1,67 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_cuda_internal_DeviceAdapterRuntimeDetectorCuda_h
#define vtk_m_cont_cuda_internal_DeviceAdapterRuntimeDetectorCuda_h
#include <vtkm/cont/vtkm_cont_export.h>
#include <vtkm/Types.h>
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
namespace vtkm
{
namespace cont
{
/// \brief Class providing a CUDA runtime support detector.
///
/// The class provide the actual implementation used by
/// vtkm::cont::RuntimeDeviceInformation for the CUDA backend.
///
/// We will verify at runtime that the machine has at least one CUDA
/// capable device, and said device is from the 'fermi' (SM_20) generation
/// or newer.
///
template <>
class VTKM_CONT_EXPORT DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagCuda>
{
public:
VTKM_CONT DeviceAdapterRuntimeDetector();
/// Returns true if the given device adapter is supported on the current
/// machine.
///
/// Only returns true if we have at-least one CUDA capable device of SM_20 or
/// greater ( fermi ).
///
VTKM_CONT bool Exists() const;
private:
vtkm::Int32 NumberOfDevices;
vtkm::Int32 HighestArchSupported;
};
}
} // namespace vtkm::cont
#endif

@ -0,0 +1,66 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#include <vtkm/cont/cuda/internal/DeviceAdapterTimerImplementationCuda.h>
#include <vtkm/Types.h>
#include <vtkm/cont/cuda/ErrorCuda.h>
#include <cuda.h>
namespace vtkm
{
namespace cont
{
DeviceAdapterTimerImplementation<
vtkm::cont::DeviceAdapterTagCuda>::DeviceAdapterTimerImplementation()
{
VTKM_CUDA_CALL(cudaEventCreate(&this->StartEvent));
VTKM_CUDA_CALL(cudaEventCreate(&this->EndEvent));
this->Reset();
}
DeviceAdapterTimerImplementation<
vtkm::cont::DeviceAdapterTagCuda>::~DeviceAdapterTimerImplementation()
{
// These aren't wrapped in VTKM_CUDA_CALL because we can't throw errors
// from destructors. We're relying on cudaGetLastError in the
// VTKM_CUDA_CHECK_ASYNCHRONOUS_ERROR catching any issues from these calls
// later.
cudaEventDestroy(this->StartEvent);
cudaEventDestroy(this->EndEvent);
}
void DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagCuda>::Reset()
{
VTKM_CUDA_CALL(cudaEventRecord(this->StartEvent, cudaStreamPerThread));
VTKM_CUDA_CALL(cudaEventSynchronize(this->StartEvent));
}
vtkm::Float64 DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagCuda>::GetElapsedTime()
{
VTKM_CUDA_CALL(cudaEventRecord(this->EndEvent, cudaStreamPerThread));
VTKM_CUDA_CALL(cudaEventSynchronize(this->EndEvent));
float elapsedTimeMilliseconds;
VTKM_CUDA_CALL(cudaEventElapsedTime(&elapsedTimeMilliseconds, this->StartEvent, this->EndEvent));
return static_cast<vtkm::Float64>(0.001f * elapsedTimeMilliseconds);
}
}
} // namespace vtkm::cont

@ -0,0 +1,69 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_cuda_internal_DeviceAdapterRuntimeDetectorCuda_h
#define vtk_m_cont_cuda_internal_DeviceAdapterRuntimeDetectorCuda_h
#include <vtkm/cont/vtkm_cont_export.h>
#include <vtkm/Types.h>
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
#include <cuda.h>
namespace vtkm
{
namespace cont
{
///
/// Specialization of DeviceAdapterTimerImplementation for CUDA
/// CUDA contains its own high resolution timer that are able
/// to track how long it takes to execute async kernels.
/// If we simply measured time on the CPU it would incorrectly
/// just capture how long it takes to launch a kernel.
template <>
class VTKM_CONT_EXPORT DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagCuda>
{
public:
VTKM_CONT DeviceAdapterTimerImplementation();
VTKM_CONT ~DeviceAdapterTimerImplementation();
VTKM_CONT void Reset();
VTKM_CONT vtkm::Float64 GetElapsedTime();
private:
// Copying CUDA events is problematic.
DeviceAdapterTimerImplementation(
const DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagCuda>&) = delete;
void operator=(const DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagCuda>&) =
delete;
cudaEvent_t StartEvent;
cudaEvent_t EndEvent;
};
}
} // namespace vtkm::cont
#endif

@ -41,7 +41,7 @@ namespace cuda
namespace internal
{
int getNumSMs(int dId);
vtkm::UInt32 getNumSMs(int dId);
template <typename TaskType>
__global__ void TaskStrided1DLaunch(TaskType task, vtkm::Id);
@ -110,10 +110,10 @@ static void parameter_sweep_1d_schedule(const TaskT& task, const vtkm::Id& numIn
for (vtkm::UInt32 g = 0; g < 12; g++)
{
int grids = gridIndexTable[g] * getNumSMs(deviceId);
vtkm::UInt32 grids = gridIndexTable[g] * getNumSMs(deviceId);
for (vtkm::UInt32 b = 0; b < 12; b++)
{
int blocks = blockIndexTable[b];
vtkm::UInt32 blocks = blockIndexTable[b];
cudaEvent_t start, stop;
VTKM_CUDA_CALL(cudaEventCreate(&start));
@ -156,7 +156,7 @@ static void parameter_sweep_3d_schedule(const TaskT& task, const vtkm::Id3& rang
int deviceId;
for (vtkm::UInt32 g = 0; g < 12; g++)
{
int grids = gridIndexTable[g] * getNumSMs(deviceId);
vtkm::UInt32 grids = gridIndexTable[g] * getNumSMs(deviceId);
for (vtkm::UInt32 i = 0; i < 16; i++)
{
for (vtkm::UInt32 j = 0; j < 16; j++)

@ -258,6 +258,10 @@ bool ArrayHandleImpl::PrepareForDevice(DeviceAdapterId devId, vtkm::UInt64 sizeO
VTKM_ASSERT(!this->ExecutionArrayValid);
switch (devId)
{
case VTKM_DEVICE_ADAPTER_ERROR:
throw vtkm::cont::ErrorBadValue("device should not be VTKM_DEVICE_ADAPTER_ERROR");
break;
#ifdef VTKM_ENABLE_TBB
case VTKM_DEVICE_ADAPTER_TBB:
this->ExecutionInterface =
@ -274,6 +278,9 @@ bool ArrayHandleImpl::PrepareForDevice(DeviceAdapterId devId, vtkm::UInt64 sizeO
new ExecutionArrayInterfaceBasic<DeviceAdapterTagCuda>(*this->ControlArray);
break;
#endif
case VTKM_DEVICE_ADAPTER_SERIAL:
VTKM_FALLTHROUGH;
default:
this->ExecutionInterface =
new ExecutionArrayInterfaceBasic<DeviceAdapterTagSerial>(*this->ControlArray);

@ -32,4 +32,4 @@ set(unit_tests
UnitTestSerialPointLocatorUniformGrid.cxx
UnitTestSerialVirtualObjectHandle.cxx
)
vtkm_unit_tests(SOURCES ${unit_tests} BACKEND SERIAL)
vtkm_unit_tests(SOURCES ${unit_tests})

@ -33,4 +33,4 @@ set(unit_tests
UnitTestTBBVirtualObjectHandle.cxx
)
vtkm_unit_tests(SOURCES ${unit_tests} BACKEND TBB)
vtkm_unit_tests(SOURCES ${unit_tests})

@ -23,7 +23,7 @@
#include <vtkm/ListTag.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/DeviceAdapter.h>
#include <vtkm/cont/ExecutionObjectFactoryBase.h>
#include <vtkm/cont/ExecutionObjectBase.h>
namespace vtkm
{
@ -51,7 +51,7 @@ struct AtomicArrayTypeListTag : vtkm::ListTagBase<vtkm::Int32, vtkm::Int64>
///
///
template <typename T, typename DeviceAdapterTag>
class AtomicArray : public vtkm::cont::ExecutionObjectFactoryBase
class AtomicArray : public vtkm::cont::ExecutionObjectBase
{
public:
using ValueType = T;

@ -556,7 +556,8 @@ VTKM_EXEC vtkm::Vec<typename FieldVecType::ComponentType, 3> CellDerivative(
using T = typename FieldVecType::ComponentType;
return vtkm::Vec<T, 3>((field[1] - field[0]) / wCoords.GetSpacing()[0], T(0), T(0));
return vtkm::Vec<T, 3>(
static_cast<T>((field[1] - field[0]) / wCoords.GetSpacing()[0]), T(0), T(0));
}
//-----------------------------------------------------------------------------
@ -857,7 +858,9 @@ VTKM_EXEC vtkm::Vec<typename FieldVecType::ComponentType, 3> CellDerivative(
sum = sum + field[2] * VecT(pc[1], pc[0]);
sum = sum + field[3] * VecT(-pc[1], rc[0]);
return vtkm::Vec<T, 3>(sum[0] / wCoords.GetSpacing()[0], sum[1] / wCoords.GetSpacing()[1], T(0));
return vtkm::Vec<T, 3>(static_cast<T>(sum[0] / wCoords.GetSpacing()[0]),
static_cast<T>(sum[1] / wCoords.GetSpacing()[1]),
T(0));
}
//-----------------------------------------------------------------------------

@ -22,7 +22,7 @@
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/DeviceAdapter.h>
#include <vtkm/cont/ExecutionObjectFactoryBase.h>
#include <vtkm/cont/ExecutionObjectBase.h>
namespace vtkm
{
@ -39,7 +39,7 @@ namespace exec
/// structure.
///
template <typename T, typename StorageTag, typename DeviceAdapterTag>
class ExecutionWholeArray : public vtkm::cont::ExecutionObjectFactoryBase
class ExecutionWholeArray : public vtkm::cont::ExecutionObjectBase
{
public:
using ValueType = T;
@ -89,7 +89,7 @@ private:
/// structure
///
template <typename T, typename StorageTag, typename DeviceAdapterTag>
class ExecutionWholeArrayConst : public vtkm::cont::ExecutionObjectFactoryBase
class ExecutionWholeArrayConst : public vtkm::cont::ExecutionObjectBase
{
public:
using ValueType = T;
@ -126,4 +126,4 @@ private:
}
} // namespace vtkm::exec
#endif //vtk_m_exec_ExecutionObjectFactoryBase_h
#endif //vtk_m_exec_ExecutionObjectBase_h

@ -23,8 +23,6 @@
#include <vtkm/exec/arg/AspectTagDefault.h>
#include <vtkm/exec/arg/Fetch.h>
#include <vtkm/cont/ExecutionObjectFactoryBase.h>
#include <type_traits>
namespace vtkm

@ -22,7 +22,7 @@
#include <vtkm/exec/arg/testing/ThreadIndicesTesting.h>
#include <vtkm/cont/ExecutionObjectFactoryBase.h>
#include <vtkm/cont/ExecutionObjectBase.h>
#include <vtkm/testing/Testing.h>
@ -31,7 +31,7 @@
namespace
{
struct TestExecutionObject : public vtkm::cont::ExecutionObjectFactoryBase
struct TestExecutionObject : public vtkm::cont::ExecutionObjectBase
{
TestExecutionObject()
: Number(static_cast<vtkm::Int32>(0xDEADDEAD))

@ -32,6 +32,20 @@
#include <vtkm/cont/testing/Testing.h>
#if defined(VTKM_MSVC)
#pragma warning(push)
#pragma warning(disable : 4068) //unknown pragma
#endif
#if defined(__NVCC__) && defined(__CUDACC_VER_MAJOR__)
// Disable warning "declared but never referenced"
// This file produces several false-positive warnings
// Eg: TestExecObject::TestExecObject, MyOutputToInputMapPortal::Get,
// TestWorkletProxy::operator()
#pragma push
#pragma diag_suppress 177
#endif
namespace
{
@ -359,3 +373,11 @@ int UnitTestTaskStrided(int, char* [])
{
return vtkm::cont::testing::Testing::Run(TestTaskStrided<vtkm::cont::DeviceAdapterTagCuda>);
}
#if defined(__NVCC__) && defined(__CUDACC_VER_MAJOR__)
#pragma pop
#endif
#if defined(VTKM_MSVC)
#pragma warning(pop)
#endif

@ -20,7 +20,7 @@
#ifndef vtk_m_exec_internal_ReduceByKeyLookup_h
#define vtk_m_exec_internal_ReduceByKeyLookup_h
#include <vtkm/cont/ExecutionObjectFactoryBase.h>
#include <vtkm/cont/ExecutionObjectBase.h>
#include <vtkm/StaticAssert.h>
#include <vtkm/Types.h>
@ -41,7 +41,7 @@ namespace internal
/// state.
///
template <typename KeyPortalType, typename IdPortalType, typename IdComponentPortalType>
struct ReduceByKeyLookup : vtkm::cont::ExecutionObjectFactoryBase
struct ReduceByKeyLookup : vtkm::cont::ExecutionObjectBase
{
using KeyType = typename KeyPortalType::ValueType;

@ -61,9 +61,10 @@ struct LinearField
template <typename T>
FieldType GetValue(vtkm::Vec<T, 3> coordinates) const
{
return ((coordinates[0] * this->Gradient[0] + coordinates[1] * this->Gradient[1] +
coordinates[2] * this->Gradient[2]) +
this->OriginValue);
return static_cast<FieldType>((coordinates[0] * this->Gradient[0] +
coordinates[1] * this->Gradient[1] +
coordinates[2] * this->Gradient[2]) +
this->OriginValue);
}
};

@ -95,7 +95,7 @@ inline VTKM_CONT vtkm::cont::DataSet Streamline::DoExecute(
//RGEvalType eval(input.GetCoordinateSystem(), input.GetCellSet(0), field);
RGEvalType eval(coords, cells, field);
RK4RGType rk4(eval, static_cast<vtkm::FloatDefault>(this->StepSize));
RK4RGType rk4(eval, static_cast<T>(this->StepSize));
vtkm::worklet::Streamline streamline;
vtkm::worklet::StreamlineResult<T> res;

@ -312,4 +312,23 @@
#endif
//----------------------------------------------------------------------------
// Switch case fall-through policy.
// Use "VTKM_FALLTHROUGH;" to annotate deliberate fall-through in switches,
// use it analogously to "break;". The trailing semi-colon is required.
#if !defined(VTKM_FALLTHROUGH) && defined(__has_cpp_attribute)
# if __cplusplus >= 201703L && __has_cpp_attribute(fallthrough)
# define VTKM_FALLTHROUGH [[fallthrough]]
# elif __cplusplus >= 201103L && __has_cpp_attribute(gnu::fallthrough)
# define VTKM_FALLTHROUGH [[gnu::fallthrough]]
# elif __cplusplus >= 201103L && __has_cpp_attribute(clang::fallthrough)
# define VTKM_FALLTHROUGH [[clang::fallthrough]]
# endif
#endif
#ifndef VTKM_FALLTHROUGH
# define VTKM_FALLTHROUGH ((void)0)
#endif
#endif //vtkm_internal_Configure_h

@ -75,7 +75,7 @@ private:
if (stream.fail())
throw vtkm::io::ErrorIO("Failed to open file: " + this->FileName);
DataFormat dataFormat;
DataFormat dataFormat = ByteData;
std::string bovFile, line, token, options, variableName;
vtkm::Id numComponents = 1;
vtkm::Id3 dim;

@ -25,7 +25,7 @@
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/DeviceAdapter.h>
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/ExecutionObjectFactoryBase.h>
#include <vtkm/cont/ExecutionObjectBase.h>
namespace vtkm
{
@ -211,7 +211,7 @@ public:
TextureWrapMode WrapMode;
};
class Texture2DSampler : public vtkm::cont::ExecutionObjectFactoryBase
class Texture2DSampler : public vtkm::cont::ExecutionObjectBase
{
public:
VTKM_CONT

@ -455,7 +455,7 @@ void Camera::SetHeight(const vtkm::Int32& height)
if (Height != height)
{
this->Height = height;
this->SetFieldOfView(this->FovX);
this->SetFieldOfView(this->FovY);
}
}
@ -475,7 +475,7 @@ void Camera::SetWidth(const vtkm::Int32& width)
if (this->Width != width)
{
this->Width = width;
this->SetFieldOfView(this->FovX);
this->SetFieldOfView(this->FovY);
}
}
@ -529,8 +529,32 @@ void Camera::SetFieldOfView(const vtkm::Float32& degrees)
throw vtkm::cont::ErrorBadValue("Camera feild of view must be less than 180.");
}
vtkm::Float32 newFOVY = (vtkm::Float32(this->Height) / vtkm::Float32(this->Width)) * degrees;
vtkm::Float32 newFOVX = degrees;
vtkm::Float32 newFOVY = degrees;
vtkm::Float32 newFOVX;
if (this->Width != this->Height)
{
vtkm::Float32 fovyRad = (newFOVY * static_cast<vtkm::Float32>(vtkm::Pi())) / 180.0f;
// Use the tan function to find the distance from the center of the image to the top (or
// bottom). (Actually, we are finding the ratio of this distance to the near plane distance,
// but since we scale everything by the near plane distance, we can use this ratio as a scaled
// proxy of the distances we need.)
vtkm::Float32 verticalDistance = vtkm::Tan(0.5f * fovyRad);
// Scale the vertical distance by the aspect ratio to get the horizontal distance.
vtkm::Float32 aspectRatio = vtkm::Float32(this->Width) / vtkm::Float32(this->Height);
vtkm::Float32 horizontalDistance = aspectRatio * verticalDistance;
// Now use the arctan function to get the proper field of view in the x direction.
vtkm::Float32 fovxRad = 2.0f * vtkm::ATan(horizontalDistance);
newFOVX = 180.0f * (fovxRad / static_cast<vtkm::Float32>(vtkm::Pi()));
}
else
{
newFOVX = newFOVY;
}
if (newFOVX != this->FovX)
{
this->IsViewDirty = true;
@ -541,13 +565,13 @@ void Camera::SetFieldOfView(const vtkm::Float32& degrees)
}
this->FovX = newFOVX;
this->FovY = newFOVY;
this->CameraView.SetFieldOfView(this->FovX);
this->CameraView.SetFieldOfView(this->FovY);
}
VTKM_CONT
vtkm::Float32 Camera::GetFieldOfView() const
{
return this->FovX;
return this->FovY;
}
VTKM_CONT

@ -213,8 +213,7 @@ public:
// the wonders of floating point math. This is bad. If we calculate in the same order
// for all faces, then at worst, two different faces can enter the same bucket, which
// we currently check for.
vtkm::Vec<vtkm::Id, 4> faceIndices;
faceIndices[3] = -1;
vtkm::Vec<vtkm::Id, 4> faceIndices(-1);
//Number of indices this face has
const vtkm::Int32 indiceCount = tables.ShapesFaceList(tableOffset + i, 0);
for (vtkm::Int32 j = 1; j <= indiceCount; j++)

@ -54,6 +54,7 @@ set(headers
ParticleAdvection.h
PointAverage.h
PointElevation.h
PointTransform.h
Probe.h
RemoveUnusedPoints.h
ScalarsToColors.h

@ -149,7 +149,7 @@ private:
IdPortal IndexOffsets;
};
class ExecutionConnectivityExplicit : vtkm::cont::ExecutionObjectFactoryBase
class ExecutionConnectivityExplicit : vtkm::cont::ExecutionObjectBase
{
public:
VTKM_CONT

@ -75,6 +75,15 @@
#include <vtkm/worklet/DispatcherMapField.h>
#include <vtkm/worklet/WorkletMapField.h>
// For numerous functions inside contourTree GCC is able to determine if i is
// always greater than j ( or vice-versa ) and optimizes those call sites.
// But when it does these optimizations is presumes that i and j will not
// overflow and emits a Wstrict-overflow warning
#ifdef VTKM_GCC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstrict-overflow"
#endif
#include <vtkm/worklet/contourtree/ChainGraph.h>
#include <vtkm/worklet/contourtree/ContourTree.h>
#include <vtkm/worklet/contourtree/MergeTree.h>
@ -202,4 +211,8 @@ public:
}
} // namespace vtkm::worklet
#ifdef VTKM_GCC
#pragma GCC diagnostic pop
#endif
#endif // vtk_m_worklet_ContourTreeUniform_h

@ -120,7 +120,7 @@ private:
} //namespace gradient
template <typename T>
struct GradientOutputFields : public vtkm::cont::ExecutionObjectFactoryBase
struct GradientOutputFields : public vtkm::cont::ExecutionObjectBase
{
using ValueType = T;

@ -0,0 +1,124 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_worklet_PointTransform_h
#define vtk_m_worklet_PointTransform_h
#include <vtkm/Math.h>
#include <vtkm/Matrix.h>
#include <vtkm/Transform3D.h>
#include <vtkm/worklet/WorkletMapField.h>
namespace vtkm
{
namespace worklet
{
template <typename T>
class PointTransform : public vtkm::worklet::WorkletMapField
{
public:
typedef void ControlSignature(FieldIn<Vec3>, FieldOut<Vec3>);
typedef _2 ExecutionSignature(_1);
VTKM_CONT
PointTransform() {}
//Translation
template <typename S>
VTKM_CONT void SetTranslation(const S& tx, const S& ty, const S& tz)
{
matrix = vtkm::Transform3DTranslate(static_cast<T>(tx), static_cast<T>(ty), static_cast<T>(tz));
}
template <typename S>
VTKM_CONT void SetTranslation(const vtkm::Vec<S, 3>& v)
{
SetTranslation(v[0], v[1], v[2]);
}
//Rotation
template <typename S>
VTKM_CONT void SetRotation(const S& angleDegrees, const vtkm::Vec<S, 3>& axis)
{
matrix = vtkm::Transform3DRotate(angleDegrees, axis);
}
template <typename S>
VTKM_CONT void SetRotationX(const S& angleDegrees, const S& rx, const S& ry, const S& rz)
{
SetRotation(angleDegrees, vtkm::Vec<S, 3>(rx, ry, rz));
}
template <typename S>
VTKM_CONT void SetRotationX(const S& angleDegrees)
{
SetRotation(angleDegrees, 1, 0, 0);
}
template <typename S>
VTKM_CONT void SetRotationY(const S& angleDegrees)
{
SetRotation(angleDegrees, 0, 1, 0);
}
template <typename S>
VTKM_CONT void SetRotationZ(const S& angleDegrees)
{
SetRotation(angleDegrees, 0, 0, 1);
}
//Scaling
template <typename S>
VTKM_CONT void SetScale(const S& s)
{
matrix = vtkm::Transform3DScale(s, s, s);
}
template <typename S>
VTKM_CONT void SetScale(const S& sx, const S& sy, const S& sz)
{
matrix = vtkm::Transform3DScale(static_cast<T>(sx), static_cast<T>(sy), static_cast<T>(sz));
}
template <typename S>
VTKM_CONT void SetScale(const vtkm::Vec<S, 3>& v)
{
matrix = vtkm::Transform3DScale(v[0], v[1], v[2]);
}
//General transformation
VTKM_CONT
void SetTransform(const vtkm::Matrix<T, 4, 4>& mtx) { matrix = mtx; }
//Functor
VTKM_EXEC
vtkm::Vec<T, 3> operator()(const vtkm::Vec<T, 3>& vec) const
{
return vtkm::Transform3DPoint(matrix, vec);
}
private:
vtkm::Matrix<T, 4, 4> matrix;
};
}
} // namespace vtkm::worklet
#endif // vtk_m_worklet_PointTransform_h

@ -23,6 +23,8 @@ set(headers
CellSetDualGraph.h
GraphConnectivity.h
InnerJoin.h
ImageConnectivity.h
UnionFind.h
)
#-----------------------------------------------------------------------------

@ -25,6 +25,13 @@
#include <vtkm/worklet/connectivities/CellSetDualGraph.h>
#include <vtkm/worklet/connectivities/GraphConnectivity.h>
namespace vtkm
{
namespace worklet
{
namespace connectivity
{
class CellSetConnectivity
{
public:
@ -45,4 +52,8 @@ public:
numIndicesArray, indexOffsetArray, connectivityArray, componentArray);
}
};
}
}
} // vtkm::worklet::connectivity
#endif // vtk_m_worklet_connectivity_CellSetConnectivity_h

@ -28,10 +28,20 @@
#include <vtkm/worklet/ScatterCounting.h>
#include <vtkm/worklet/WorkletMapTopology.h>
namespace vtkm
{
namespace worklet
{
namespace connectivity
{
namespace detail
{
struct EdgeCount : public vtkm::worklet::WorkletMapPointToCell
{
typedef void ControlSignature(CellSetIn, FieldOutCell<> numEdgesInCell);
typedef _2 ExecutionSignature(CellShape, PointCount);
using InputDomain = _1;
template <typename CellShapeTag>
@ -46,6 +56,7 @@ struct EdgeExtract : public vtkm::worklet::WorkletMapPointToCell
typedef void ControlSignature(CellSetIn, FieldOutCell<> cellIndices, FieldOutCell<> edgeIndices);
typedef void ExecutionSignature(CellShape, InputIndex, PointIndices, VisitIndex, _2, _3);
using InputDomain = _1;
using ScatterType = vtkm::worklet::ScatterCounting;
@ -75,6 +86,7 @@ struct CellToCellConnectivity : public vtkm::worklet::WorkletMapField
WholeArrayOut<> to);
typedef void ExecutionSignature(_1, InputIndex, _2, _3, _4);
using InputDomain = _1;
template <typename ConnectivityPortalType, typename CellIdPortalType>
@ -90,6 +102,7 @@ struct CellToCellConnectivity : public vtkm::worklet::WorkletMapField
to.Set(index * 2 + 1, cells.Get(offset));
}
};
} // vtkm::worklet::connectivity::detail
template <typename DeviceAdapter>
class CellSetDualGraph
@ -110,12 +123,14 @@ public:
{
// Get number of edges for each cell and use it as scatter count.
vtkm::cont::ArrayHandle<vtkm::IdComponent> numEdgesPerCell;
vtkm::worklet::DispatcherMapTopology<EdgeCount, DeviceAdapter> edgesPerCellDisp;
vtkm::worklet::DispatcherMapTopology<detail::EdgeCount, DeviceAdapter> edgesPerCellDisp;
edgesPerCellDisp.Invoke(cellSet, numEdgesPerCell);
// Get uncompress Cell to Edge mapping
vtkm::worklet::ScatterCounting scatter{ numEdgesPerCell, DeviceAdapter() };
vtkm::worklet::DispatcherMapTopology<EdgeExtract, DeviceAdapter> edgeExtractDisp{ scatter };
vtkm::worklet::DispatcherMapTopology<detail::EdgeExtract, DeviceAdapter> edgeExtractDisp{
scatter
};
edgeExtractDisp.Invoke(cellSet, cellIds, cellEdges);
}
@ -157,7 +172,7 @@ public:
vtkm::cont::ArrayHandle<vtkm::Id> connTo;
connFrom.Allocate(sharedEdges.GetNumberOfValues() * 2);
connTo.Allocate(sharedEdges.GetNumberOfValues() * 2);
vtkm::worklet::DispatcherMapField<CellToCellConnectivity, DeviceAdapter> c2cDisp;
vtkm::worklet::DispatcherMapField<detail::CellToCellConnectivity, DeviceAdapter> c2cDisp;
c2cDisp.Invoke(lb, cellIds, connFrom, connTo);
// Turn dual graph into Compressed Sparse Row format
@ -174,4 +189,8 @@ public:
Algorithm::ScanExclusive(numIndicesArray, indexOffsetArray);
}
};
}
}
}
#endif //vtk_m_worklet_CellSetDualGraph_h

@ -25,7 +25,16 @@
#include <vtkm/worklet/connectivities/CellSetDualGraph.h>
#include <vtkm/worklet/connectivities/InnerJoin.h>
#include <vtkm/worklet/connectivities/UnionFind.h>
namespace vtkm
{
namespace worklet
{
namespace connectivity
{
namespace detail
{
class Graft : public vtkm::worklet::WorkletMapField
{
public:
@ -36,6 +45,7 @@ public:
WholeArrayInOut<IdType> comp);
typedef void ExecutionSignature(_1, _2, _3, _4, _5);
using InputDomain = _1;
// TODO: Use Scatter?
@ -56,38 +66,7 @@ public:
}
}
};
class PointerJumping : public vtkm::worklet::WorkletMapField
{
public:
typedef void ControlSignature(FieldIn<IdType> index, WholeArrayInOut<IdType> comp);
typedef void ExecutionSignature(_1, _2);
using InputDomain = _1;
template <typename InOutPortalType>
VTKM_EXEC void operator()(vtkm::Id index, InOutPortalType& comp) const
{
// keep updating component id until we reach the root of the tree.
for (auto parent = comp.Get(index); comp.Get(parent) != parent; parent = comp.Get(index))
{
comp.Set(index, comp.Get(parent));
}
}
};
class IsStar : public vtkm::worklet::WorkletMapField
{
public:
typedef void ControlSignature(FieldIn<IdType> index, WholeArrayIn<IdType> comp, FieldOut<>);
typedef _3 ExecutionSignature(_1, _2);
using InputDomain = _1;
template <typename InOutPortalType>
VTKM_EXEC bool operator()(vtkm::Id index, InOutPortalType& comp) const
{
return comp.Get(index) == comp.Get(comp.Get(index));
}
};
}
template <typename DeviceAdapter>
class GraphConnectivity
@ -111,7 +90,7 @@ public:
do
{
vtkm::worklet::DispatcherMapField<Graft, DeviceAdapter> graftDispatcher;
vtkm::worklet::DispatcherMapField<detail::Graft, DeviceAdapter> graftDispatcher;
graftDispatcher.Invoke(
cellIds, indexOffsetArray, numIndexArray, connectivityArray, components);
@ -142,4 +121,7 @@ public:
Algorithm::SortByKey(cellIdsOut, componentsOut);
}
};
}
}
}
#endif //vtk_m_worklet_connectivity_graph_connectivity_h

@ -0,0 +1,171 @@
//=============================================================================
//
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//
//=============================================================================
#ifndef vtk_m_worklet_connectivity_graph_connectivity_h
#define vtk_m_worklet_connectivity_graph_connectivity_h
#include <vtkm/worklet/DispatcherMapField.h>
#include <vtkm/worklet/DispatcherPointNeighborhood.h>
#include <vtkm/worklet/WorkletMapField.h>
#include <vtkm/worklet/WorkletPointNeighborhood.h>
#include <vtkm/worklet/connectivities/InnerJoin.h>
#include <vtkm/worklet/connectivities/UnionFind.h>
namespace vtkm
{
namespace worklet
{
namespace connectivity
{
namespace detail
{
template <int Dimension>
class ImageGraft;
template <>
class ImageGraft<2> : public vtkm::worklet::WorkletPointNeighborhood3x3x3
{
public:
typedef void ControlSignature(CellSetIn,
FieldInNeighborhood<> comp,
FieldInNeighborhood<> color,
FieldOut<> newComp);
typedef _4 ExecutionSignature(_2, _3);
template <typename Comp, typename NeighborColor>
VTKM_EXEC vtkm::Id operator()(const Comp& comp, const NeighborColor& color) const
{
vtkm::Id myComp = comp.Get(0, 0, 0);
auto myColor = color.Get(0, 0, 0);
for (int j = -1; j <= 1; j++)
{
for (int i = -1; i <= 1; i++)
{
if (myColor == color.Get(i, j, 0))
{
myComp = vtkm::Min(myComp, comp.Get(i, j, 0));
}
}
}
return myComp;
}
};
}
class ImageConnectivity
{
public:
class RunImpl
{
public:
template <typename StorageT, typename OutputPortalType, typename Device>
void operator()(const vtkm::cont::ArrayHandle<vtkm::UInt8, StorageT>& pixels,
const vtkm::cont::CellSetStructured<2>& input,
OutputPortalType& componentsOut,
Device) const
{
using Algorithm = vtkm::cont::DeviceAdapterAlgorithm<Device>;
// TODO: template pixel type?
Algorithm::Copy(vtkm::cont::ArrayHandleCounting<vtkm::Id>(0, 1, pixels.GetNumberOfValues()),
componentsOut);
vtkm::cont::ArrayHandle<vtkm::Id> newComponents;
vtkm::cont::ArrayHandle<vtkm::Id> pixelIds;
Algorithm::Copy(vtkm::cont::ArrayHandleCounting<vtkm::Id>(0, 1, pixels.GetNumberOfValues()),
pixelIds);
bool allStar = false;
vtkm::cont::ArrayHandle<bool> isStar;
using DispatcherType =
vtkm::worklet::DispatcherPointNeighborhood<detail::ImageGraft<2>, Device>;
do
{
DispatcherType().Invoke(input, componentsOut, pixels, newComponents);
// Detection of allStar has to come before pointer jumping. Don't try to rearrange it.
vtkm::worklet::DispatcherMapField<IsStar, Device> isStarDisp;
isStarDisp.Invoke(pixelIds, newComponents, isStar);
allStar = Algorithm::Reduce(isStar, true, vtkm::LogicalAnd());
vtkm::worklet::DispatcherMapField<PointerJumping, Device> pointJumpingDispatcher;
pointJumpingDispatcher.Invoke(pixelIds, newComponents);
Algorithm::Copy(newComponents, componentsOut);
} while (!allStar);
// renumber connected component to the range of [0, number of components).
vtkm::cont::ArrayHandle<vtkm::Id> uniqueComponents;
Algorithm::Copy(componentsOut, uniqueComponents);
Algorithm::Sort(uniqueComponents);
Algorithm::Unique(uniqueComponents);
vtkm::cont::ArrayHandle<vtkm::Id> uniqueColor;
Algorithm::Copy(
vtkm::cont::ArrayHandleCounting<vtkm::Id>(0, 1, uniqueComponents.GetNumberOfValues()),
uniqueColor);
vtkm::cont::ArrayHandle<vtkm::Id> cellColors;
vtkm::cont::ArrayHandle<vtkm::Id> pixelIdsOut;
InnerJoin<Device>().Run(componentsOut,
pixelIds,
uniqueComponents,
uniqueColor,
cellColors,
pixelIdsOut,
componentsOut);
Algorithm::SortByKey(pixelIdsOut, componentsOut);
}
};
template <typename T, typename S, typename OutputPortalType, typename Device>
void Run(const vtkm::cont::CellSetStructured<2>& input,
const vtkm::cont::DynamicArrayHandleBase<T, S>& pixels,
OutputPortalType& componentsOut,
Device device) const
{
using Types = vtkm::ListTagBase<vtkm::UInt8>;
vtkm::cont::CastAndCall(pixels.ResetTypeList(Types{}), RunImpl(), input, componentsOut, device);
}
template <typename T, typename S, typename OutputPortalType, typename Device>
void Run(const vtkm::cont::CellSetStructured<2>& input,
const vtkm::cont::ArrayHandle<T, S>& pixels,
OutputPortalType& componentsOut,
Device device) const
{
vtkm::cont::CastAndCall(pixels, RunImpl(), input, componentsOut, device);
}
};
}
}
}
#endif

@ -29,6 +29,12 @@
#include <vtkm/worklet/ScatterCounting.h>
#include <vtkm/worklet/WorkletMapField.h>
namespace vtkm
{
namespace worklet
{
namespace connectivity
{
template <typename DeviceAdapter>
class InnerJoin
{
@ -93,4 +99,8 @@ public:
mergeDisp.Invoke(key1, value1, lbs, value2, keyOut, value1Out, value2Out);
}
};
}
}
} // vtkm::worklet::connectivity
#endif //vtk_m_worklet_connectivity_InnerJoin_h

@ -0,0 +1,58 @@
//=============================================================================
//
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//
//=============================================================================
#ifndef vtk_m_worklet_connectivity_union_find_h
#define vtk_m_worklet_connectivity_union_find_h
class PointerJumping : public vtkm::worklet::WorkletMapField
{
public:
typedef void ControlSignature(FieldIn<IdType> index, WholeArrayInOut<IdType> comp);
typedef void ExecutionSignature(_1, _2);
using InputDomain = _1;
template <typename InOutPortalType>
VTKM_EXEC void operator()(vtkm::Id index, InOutPortalType& comp) const
{
// keep updating component id until we reach the root of the tree.
for (auto parent = comp.Get(index); comp.Get(parent) != parent; parent = comp.Get(index))
{
comp.Set(index, comp.Get(parent));
}
}
};
class IsStar : public vtkm::worklet::WorkletMapField
{
public:
typedef void ControlSignature(FieldIn<IdType> index, WholeArrayIn<IdType> comp, FieldOut<>);
typedef _3 ExecutionSignature(_1, _2);
using InputDomain = _1;
template <typename InOutPortalType>
VTKM_EXEC bool operator()(vtkm::Id index, InOutPortalType& comp) const
{
return comp.Get(index) == comp.Get(comp.Get(index));
}
};
#endif // vtk_m_worklet_connectivity_union_find_h

@ -192,7 +192,7 @@ public:
// and set the initial inverse index to a flag
isCritical = (outDegree != 1) ? 1 : 0;
}
}; // Mesh2D_DEM_VertexStarter
}; // Mesh2D_DEM_VertexOutdegreeStarter
} // namespace contourtree
} // namespace worklet

@ -123,15 +123,6 @@ public:
{
}
// For numerous calls of this function GCC is able to determine if i is
// always greater than j ( or vice-versa ) and optimizes those call sites.
// But when it does these optimizations is presumes that i and j will not
// overflow and emits a Wstrict-overflow warning
#ifdef VTKM_GCC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstrict-overflow"
#endif
// Locate the next vertex in direction indicated
template <typename InFieldPortalType>
VTKM_EXEC void operator()(const vtkm::Id& vertex,
@ -223,10 +214,6 @@ public:
chain = destination;
} // operator()
#ifdef VTKM_GCC
#pragma GCC diagnostic pop
#endif
}; // Mesh2D_DEM_VertexStarter
}
}

@ -103,14 +103,6 @@ public:
inline VTKM_EXEC bool operator()(const vtkm::Id& i, const vtkm::Id& j, bool ascending) const
{
// For numerous calls of this function GCC is able to determine if i is
// always greater than j ( or vice-versa ) and optimizes those call sites.
// But when it does these optimizations is presumes that i and j will not
// overflow and emits a Wstrict-overflow warning
#ifdef VTKM_GCC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstrict-overflow"
#endif
if (values.Get(i) < values.Get(j))
{
return ascending ^ true;
@ -127,9 +119,6 @@ public:
{
return ascending ^ false;
}
#ifdef VTKM_GCC
#pragma GCC diagnostic pop
#endif
// fall through to return false
return false;
}

@ -26,7 +26,7 @@
#include <vtkm/cont/arg/TransportTagArrayOut.h>
#include <vtkm/cont/arg/TransportTagExecObject.h>
#include <vtkm/cont/ExecutionObjectFactoryBase.h>
#include <vtkm/cont/ExecutionObjectBase.h>
#include <vtkm/exec/arg/FetchTagArrayDirectOut.h>
#include <vtkm/worklet/gradient/Divergence.h>
@ -39,7 +39,7 @@ namespace exec
{
template <typename T, typename DeviceAdapter>
struct GradientScalarOutput : public vtkm::cont::ExecutionObjectFactoryBase
struct GradientScalarOutput : public vtkm::cont::ExecutionObjectBase
{
using ValueType = vtkm::Vec<T, 3>;
using BaseTType = typename vtkm::BaseComponent<T>::Type;
@ -77,7 +77,7 @@ struct GradientScalarOutput : public vtkm::cont::ExecutionObjectFactoryBase
};
template <typename T, typename DeviceAdapter>
struct GradientVecOutput : public vtkm::cont::ExecutionObjectFactoryBase
struct GradientVecOutput : public vtkm::cont::ExecutionObjectBase
{
using ValueType = vtkm::Vec<T, 3>;
using BaseTType = typename vtkm::BaseComponent<T>::Type;

@ -23,7 +23,7 @@
#include <vtkm/CellShape.h>
#include <vtkm/Types.h>
#include <vtkm/cont/ExecutionObjectFactoryBase.h>
#include <vtkm/cont/ExecutionObjectBase.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/StorageBasic.h>
@ -147,7 +147,7 @@ private:
PortalType Indices;
};
class TriangulateTablesExecutionObjectFactory : public vtkm::cont::ExecutionObjectFactoryBase
class TriangulateTablesExecutionObjectFactory : public vtkm::cont::ExecutionObjectBase
{
public:
template <typename Device>
@ -344,7 +344,7 @@ private:
PortalType Indices;
};
class TetrahedralizeTablesExecutionObjectFactory : public vtkm::cont::ExecutionObjectFactoryBase
class TetrahedralizeTablesExecutionObjectFactory : public vtkm::cont::ExecutionObjectBase
{
public:
template <typename Device>

@ -56,7 +56,7 @@ struct ExecutionObject
vtkm::Id Value;
};
struct TestExecObjectType : vtkm::cont::ExecutionObjectFactoryBase
struct TestExecObjectType : vtkm::cont::ExecutionObjectBase
{
template <typename Functor, typename... Args>
void CastAndCall(Functor f, Args&&... args) const
@ -74,7 +74,7 @@ struct TestExecObjectType : vtkm::cont::ExecutionObjectFactoryBase
};
struct TestExecObjectTypeBad
{ //this will fail as it doesn't inherit from vtkm::cont::ExecutionObjectFactoryBase
{ //this will fail as it doesn't inherit from vtkm::cont::ExecutionObjectBase
template <typename Functor, typename... Args>
void CastAndCall(Functor f, Args&&... args) const
{

@ -23,7 +23,7 @@ set(unit_tests
UnitTestCellAverage.cxx
UnitTestCellDeepCopy.cxx
UnitTestCellGradient.cxx
UnitTestCellSetConnectivity.cpp
UnitTestCellSetConnectivity.cxx
UnitTestCellSetDualGraph.cxx
UnitTestClipping.cxx
UnitTestContourTreeUniform.cxx
@ -37,6 +37,7 @@ set(unit_tests
UnitTestFieldHistogram.cxx
UnitTestFieldStatistics.cxx
UnitTestInnerJoin.cxx
UnitTestImageConnectivity.cxx
UnitTestKdTreeBuildNNS.cxx
UnitTestKeys.cxx
UnitTestMagnitude.cxx
@ -50,6 +51,7 @@ set(unit_tests
UnitTestParticleAdvection.cxx
UnitTestPointElevation.cxx
UnitTestPointGradient.cxx
UnitTestPointTransform.cxx
UnitTestProbe.cxx
UnitTestRemoveUnusedPoints.cxx
UnitTestScalarsToColors.cxx

@ -140,7 +140,8 @@ public:
auto cellSet = outputData.GetCellSet().Cast<vtkm::cont::CellSetSingleType<>>();
vtkm::cont::ArrayHandle<vtkm::Id> componentArray;
CellSetConnectivity().Run(cellSet, componentArray, DeviceAdapter());
vtkm::worklet::connectivity::CellSetConnectivity().Run(
cellSet, componentArray, DeviceAdapter());
using Algorithm = vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapter>;
Algorithm::Sort(componentArray);
@ -155,7 +156,8 @@ public:
auto cellSet = dataSet.GetCellSet().Cast<vtkm::cont::CellSetExplicit<>>();
vtkm::cont::ArrayHandle<vtkm::Id> componentArray;
CellSetConnectivity().Run(cellSet, componentArray, DeviceAdapter());
vtkm::worklet::connectivity::CellSetConnectivity().Run(
cellSet, componentArray, DeviceAdapter());
using Algorithm = vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapter>;
Algorithm::Sort(componentArray);
@ -170,7 +172,8 @@ public:
auto cellSet = dataSet.GetCellSet();
vtkm::cont::ArrayHandle<vtkm::Id> componentArray;
CellSetConnectivity().Run(cellSet, componentArray, DeviceAdapter());
vtkm::worklet::connectivity::CellSetConnectivity().Run(
cellSet, componentArray, DeviceAdapter());
using Algorithm = vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapter>;
Algorithm::Sort(componentArray);
@ -179,7 +182,8 @@ public:
"Wrong number of connected components");
}
void operator()() const {
void operator()() const
{
this->TestTangleIsosurface();
this->TestExplicitDataSet();
this->TestUniformDataSet();

@ -28,7 +28,7 @@
template <typename DeviceAdapter>
class TestCellSetDualGraph
{
public:
private:
template <typename T, typename Storage>
bool TestArrayHandle(const vtkm::cont::ArrayHandle<T, Storage>& ah,
const T* expected,
@ -50,6 +50,7 @@ public:
return true;
}
public:
void TestTriangleMesh() const
{
std::vector<vtkm::Id> connectivity = { 0, 2, 4, 1, 3, 5, 2, 6, 4, 5, 3, 7, 2, 9, 6, 4, 6, 8 };
@ -61,7 +62,7 @@ public:
vtkm::cont::ArrayHandle<vtkm::Id> indexOffsetArray;
vtkm::cont::ArrayHandle<vtkm::Id> connectivityArray;
CellSetDualGraph<DeviceAdapter>().Run(
vtkm::worklet::connectivity::CellSetDualGraph<DeviceAdapter>().Run(
cellSet, numIndicesArray, indexOffsetArray, connectivityArray);
vtkm::Id expectedNumIndices[] = { 1, 1, 3, 1, 1, 1 };

@ -0,0 +1,74 @@
//=============================================================================
//
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//
//=============================================================================
#include <vtkm/cont/testing/MakeTestDataSet.h>
#include <vtkm/cont/testing/Testing.h>
#include <vtkm/filter/MarchingCubes.h>
#include <vtkm/worklet/connectivities/ImageConnectivity.h>
template <typename DeviceAdapter>
class TestImageConnectivity
{
public:
using Algorithm = vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapter>;
void operator()() const
{
// example image from Connected Component Labeling in CUDA by OndˇrejˇŚtava,
// Bedˇrich Beneˇ
std::vector<vtkm::UInt8> pixels(8 * 4, 0);
pixels[3] = pixels[4] = pixels[10] = pixels[11] = 1;
pixels[1] = pixels[9] = pixels[16] = pixels[17] = pixels[24] = pixels[25] = 1;
pixels[7] = pixels[15] = pixels[21] = pixels[23] = pixels[28] = pixels[29] = pixels[30] =
pixels[31] = 1;
vtkm::cont::DataSetBuilderUniform builder;
vtkm::cont::DataSet data = builder.Create(vtkm::Id3(8, 4, 1));
auto colorField = vtkm::cont::make_Field("color", vtkm::cont::Field::ASSOC_POINTS, pixels);
data.AddField(colorField);
vtkm::cont::ArrayHandle<vtkm::Id> component;
vtkm::worklet::connectivity::ImageConnectivity().Run(
data.GetCellSet(0).Cast<vtkm::cont::CellSetStructured<2>>(),
colorField.GetData(),
component,
DeviceAdapter());
std::vector<vtkm::Id> componentExpected = { 0, 1, 2, 1, 1, 3, 3, 4, 0, 1, 1, 1, 3, 3, 3, 4,
1, 1, 3, 3, 3, 4, 3, 4, 1, 1, 3, 3, 4, 4, 4, 4 };
std::size_t i = 0;
for (vtkm::Id index = 0; index < component.GetNumberOfValues(); index++, i++)
{
VTKM_TEST_ASSERT(component.GetPortalConstControl().Get(index) == componentExpected[i],
"Components has unexpected value.");
}
}
};
int UnitTestImageConnectivity(int, char* [])
{
return vtkm::cont::testing::Testing::Run(
TestImageConnectivity<VTKM_DEFAULT_DEVICE_ADAPTER_TAG>());
}

@ -23,6 +23,7 @@
#include <vtkm/cont/testing/Testing.h>
#include <vtkm/worklet/connectivities/InnerJoin.h>
template <typename DeviceAdapter>
class TestInnerJoin
{
@ -67,7 +68,8 @@ public:
vtkm::cont::ArrayHandle<vtkm::Id> outA;
vtkm::cont::ArrayHandle<vtkm::Id> outB;
InnerJoin<DeviceAdapter>().Run(A_arr, idxA, B_arr, idxB, joinedIndex, outA, outB);
vtkm::worklet::connectivity::InnerJoin<DeviceAdapter>().Run(
A_arr, idxA, B_arr, idxB, joinedIndex, outA, outB);
vtkm::Id expectedIndex[] = { 5, 5, 8, 8, 9 };
VTKM_TEST_ASSERT(TestArrayHandle(joinedIndex, expectedIndex, 5), "Wrong joined keys");

@ -0,0 +1,201 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#include <vtkm/cont/CellSetExplicit.h>
#include <vtkm/cont/DataSet.h>
#include <vtkm/cont/testing/Testing.h>
#include <vtkm/worklet/DispatcherMapField.h>
#include <vtkm/worklet/PointTransform.h>
#include <random>
#include <vector>
namespace
{
std::mt19937 randGenerator;
vtkm::cont::DataSet MakePointTransformTestDataSet()
{
vtkm::cont::DataSet dataSet;
std::vector<vtkm::Vec<vtkm::FloatDefault, 3>> coordinates;
const vtkm::Id dim = 5;
for (vtkm::Id j = 0; j < dim; ++j)
{
vtkm::FloatDefault z =
static_cast<vtkm::FloatDefault>(j) / static_cast<vtkm::FloatDefault>(dim - 1);
for (vtkm::Id i = 0; i < dim; ++i)
{
vtkm::FloatDefault x =
static_cast<vtkm::FloatDefault>(i) / static_cast<vtkm::FloatDefault>(dim - 1);
vtkm::FloatDefault y = (x * x + z * z) / 2.0f;
coordinates.push_back(vtkm::make_Vec(x, y, z));
}
}
vtkm::Id numCells = (dim - 1) * (dim - 1);
dataSet.AddCoordinateSystem(
vtkm::cont::make_CoordinateSystem("coordinates", coordinates, vtkm::CopyFlag::On));
vtkm::cont::CellSetExplicit<> cellSet("cells");
cellSet.PrepareToAddCells(numCells, numCells * 4);
for (vtkm::Id j = 0; j < dim - 1; ++j)
{
for (vtkm::Id i = 0; i < dim - 1; ++i)
{
cellSet.AddCell(vtkm::CELL_SHAPE_QUAD,
4,
vtkm::make_Vec<vtkm::Id>(
j * dim + i, j * dim + i + 1, (j + 1) * dim + i + 1, (j + 1) * dim + i));
}
}
cellSet.CompleteAddingCells(vtkm::Id(coordinates.size()));
dataSet.AddCellSet(cellSet);
return dataSet;
}
void ValidatePointTransform(const vtkm::cont::CoordinateSystem& coords,
const vtkm::cont::ArrayHandle<vtkm::Vec<vtkm::FloatDefault, 3>>& result,
const vtkm::Matrix<vtkm::FloatDefault, 4, 4>& matrix)
{
auto points = coords.GetData();
VTKM_TEST_ASSERT(points.GetNumberOfValues() == result.GetNumberOfValues(),
"Incorrect number of points in point transform");
auto pointsPortal = points.GetPortalConstControl();
auto resultsPortal = result.GetPortalConstControl();
for (vtkm::Id i = 0; i < points.GetNumberOfValues(); i++)
VTKM_TEST_ASSERT(
test_equal(resultsPortal.Get(i), vtkm::Transform3DPoint(matrix, pointsPortal.Get(i))),
"Wrong result for PointTransform worklet");
}
void TestPointTransformTranslation(const vtkm::cont::DataSet& ds,
const vtkm::Vec<vtkm::FloatDefault, 3>& trans)
{
vtkm::cont::ArrayHandle<vtkm::Vec<vtkm::FloatDefault, 3>> result;
vtkm::worklet::PointTransform<vtkm::FloatDefault> worklet;
worklet.SetTranslation(trans);
vtkm::worklet::DispatcherMapField<vtkm::worklet::PointTransform<vtkm::FloatDefault>> dispatcher(
worklet);
dispatcher.Invoke(ds.GetCoordinateSystem(), result);
ValidatePointTransform(ds.GetCoordinateSystem(), result, Transform3DTranslate(trans));
}
void TestPointTransformScale(const vtkm::cont::DataSet& ds,
const vtkm::Vec<vtkm::FloatDefault, 3>& scale)
{
vtkm::cont::ArrayHandle<vtkm::Vec<vtkm::FloatDefault, 3>> result;
vtkm::worklet::PointTransform<vtkm::FloatDefault> worklet;
worklet.SetScale(scale);
vtkm::worklet::DispatcherMapField<vtkm::worklet::PointTransform<vtkm::FloatDefault>> dispatcher(
worklet);
dispatcher.Invoke(ds.GetCoordinateSystem(), result);
ValidatePointTransform(ds.GetCoordinateSystem(), result, Transform3DScale(scale));
}
void TestPointTransformRotation(const vtkm::cont::DataSet& ds,
const vtkm::FloatDefault& angle,
const vtkm::Vec<vtkm::FloatDefault, 3>& axis)
{
vtkm::cont::ArrayHandle<vtkm::Vec<vtkm::FloatDefault, 3>> result;
vtkm::worklet::PointTransform<vtkm::FloatDefault> worklet;
worklet.SetRotation(angle, axis);
vtkm::worklet::DispatcherMapField<vtkm::worklet::PointTransform<vtkm::FloatDefault>> dispatcher(
worklet);
dispatcher.Invoke(ds.GetCoordinateSystem(), result);
ValidatePointTransform(ds.GetCoordinateSystem(), result, Transform3DRotate(angle, axis));
}
}
void TestPointTransform()
{
std::cout << "Testing PointTransform Worklet" << std::endl;
vtkm::cont::DataSet ds = MakePointTransformTestDataSet();
int N = 41;
//Test translation
TestPointTransformTranslation(ds, vtkm::Vec<vtkm::FloatDefault, 3>(0, 0, 0));
TestPointTransformTranslation(ds, vtkm::Vec<vtkm::FloatDefault, 3>(1, 1, 1));
TestPointTransformTranslation(ds, vtkm::Vec<vtkm::FloatDefault, 3>(-1, -1, -1));
std::uniform_real_distribution<vtkm::FloatDefault> transDist(-100, 100);
for (int i = 0; i < N; i++)
TestPointTransformTranslation(ds,
vtkm::Vec<vtkm::FloatDefault, 3>(transDist(randGenerator),
transDist(randGenerator),
transDist(randGenerator)));
//Test scaling
TestPointTransformScale(ds, vtkm::Vec<vtkm::FloatDefault, 3>(1, 1, 1));
TestPointTransformScale(ds, vtkm::Vec<vtkm::FloatDefault, 3>(.23f, .23f, .23f));
TestPointTransformScale(ds, vtkm::Vec<vtkm::FloatDefault, 3>(1, 2, 3));
TestPointTransformScale(ds, vtkm::Vec<vtkm::FloatDefault, 3>(3.23f, 9.23f, 4.23f));
std::uniform_real_distribution<vtkm::FloatDefault> scaleDist(0.0001f, 100);
for (int i = 0; i < N; i++)
{
TestPointTransformScale(ds, vtkm::Vec<vtkm::FloatDefault, 3>(scaleDist(randGenerator)));
TestPointTransformScale(ds,
vtkm::Vec<vtkm::FloatDefault, 3>(scaleDist(randGenerator),
scaleDist(randGenerator),
scaleDist(randGenerator)));
}
//Test rotation
std::vector<vtkm::FloatDefault> angles;
std::uniform_real_distribution<vtkm::FloatDefault> angleDist(0, 360);
for (int i = 0; i < N; i++)
angles.push_back(angleDist(randGenerator));
std::vector<vtkm::Vec<vtkm::FloatDefault, 3>> axes;
axes.push_back(vtkm::Vec<vtkm::FloatDefault, 3>(1, 0, 0));
axes.push_back(vtkm::Vec<vtkm::FloatDefault, 3>(0, 1, 0));
axes.push_back(vtkm::Vec<vtkm::FloatDefault, 3>(0, 0, 1));
axes.push_back(vtkm::Vec<vtkm::FloatDefault, 3>(1, 1, 1));
axes.push_back(-axes[0]);
axes.push_back(-axes[1]);
axes.push_back(-axes[2]);
axes.push_back(-axes[3]);
std::uniform_real_distribution<vtkm::FloatDefault> axisDist(-1, 1);
for (int i = 0; i < N; i++)
axes.push_back(vtkm::Vec<vtkm::FloatDefault, 3>(
axisDist(randGenerator), axisDist(randGenerator), axisDist(randGenerator)));
for (std::size_t i = 0; i < angles.size(); i++)
for (std::size_t j = 0; j < axes.size(); j++)
TestPointTransformRotation(ds, angles[i], axes[j]);
}
int UnitTestPointTransform(int, char* [])
{
return vtkm::cont::testing::Testing::Run(TestPointTransform);
}