Merge remote-tracking branch 'vtkm/master'

This commit is contained in:
Brent Lessley 2019-06-26 21:58:55 -07:00
commit 10da5e13e5
49 changed files with 3164 additions and 1298 deletions

1767
CMake/FindMPI.cmake Normal file

File diff suppressed because it is too large Load Diff

24
CMake/VTKmMPI.cmake Normal file

@ -0,0 +1,24 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
if(VTKm_ENABLE_MPI AND NOT TARGET MPI::MPI_CXX)
if(CMAKE_VERSION VERSION_LESS 3.15)
#While CMake 3.10 introduced the new MPI module.
#Fixes related to MPI+CUDA that VTK-m needs are
#only found in CMake 3.15+.
find_package(MPI REQUIRED MODULE)
else()
#clunky but we need to make sure we use the upstream module if it exists
set(orig_CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH})
set(CMAKE_MODULE_PATH "")
find_package(MPI MODULE)
set(CMAKE_MODULE_PATH ${orig_CMAKE_MODULE_PATH})
endif()
endif()

@ -12,6 +12,7 @@ include(CMakeParseArguments)
include(VTKmDeviceAdapters)
include(VTKmCPUVectorization)
include(VTKmMPI)
#-----------------------------------------------------------------------------
# Utility to build a kit name from the current directory.

@ -62,13 +62,13 @@ file(GENERATE
OUTPUT "${${file_loc_var}}"
CONTENT
"
set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING \"\")
set(CMAKE_PREFIX_PATH ${install_prefix} CACHE STRING \"\")
set(CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER} CACHE FILEPATH \"\")
set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} CACHE STRING \"\")
set(CMAKE_CUDA_COMPILER ${CMAKE_CUDA_COMPILER} CACHE FILEPATH \"\")
set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} CACHE STRING \"\")
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CUDA_HOST_COMPILER} CACHE FILEPATH \"\")
set(CMAKE_MAKE_PROGRAM \"${CMAKE_MAKE_PROGRAM}\" CACHE FILEPATH \"\")
set(CMAKE_PREFIX_PATH \"${CMAKE_PREFIX_PATH};${install_prefix}/\" CACHE STRING \"\")
set(CMAKE_CXX_COMPILER \"${CMAKE_CXX_COMPILER}\" CACHE FILEPATH \"\")
set(CMAKE_CXX_FLAGS \"$CACHE{CMAKE_CXX_FLAGS}\" CACHE STRING \"\")
set(CMAKE_CUDA_COMPILER \"${CMAKE_CUDA_COMPILER}\" CACHE FILEPATH \"\")
set(CMAKE_CUDA_FLAGS \"$CACHE{CMAKE_CUDA_FLAGS}\" CACHE STRING \"\")
set(CMAKE_CUDA_HOST_COMPILER \"${CMAKE_CUDA_HOST_COMPILER}\" CACHE FILEPATH \"\")
"
)
@ -81,8 +81,34 @@ function(vtkm_test_against_install dir)
set(src_dir "${CMAKE_CURRENT_SOURCE_DIR}/${name}/")
set(build_dir "${VTKm_BINARY_DIR}/CMakeFiles/_tmp_build/test_${name}/")
set(build_config "${build_dir}/build_options.cmake")
vtkm_generate_install_build_options(build_config)
set(args )
if(CMAKE_VERSION VERSION_LESS 3.13)
#Before 3.13 the config file passing to cmake via ctest --build-options
#was broken
set(args
-DCMAKE_MAKE_PROGRAM:FILEPATH=${CMAKE_MAKE_PROGRAM}
-DCMAKE_PREFIX_PATH:STRING=${install_prefix}
-DCMAKE_CXX_COMPILER:FILEPATH=${CMAKE_CXX_COMPILER}
-DCMAKE_CUDA_COMPILER:FILEPATH=${CMAKE_CUDA_COMPILER}
-DCMAKE_CUDA_HOST_COMPILER:FILEPATH=${CMAKE_CUDA_HOST_COMPILER}
-DCMAKE_CXX_FLAGS:STRING=$CACHE{CMAKE_CXX_FLAGS}
-DCMAKE_CUDA_FLAGS:STRING=$CACHE{CMAKE_CUDA_FLAGS}
)
else()
set(build_config "${build_dir}build_options.cmake")
vtkm_generate_install_build_options(build_config)
set(args -C ${build_config})
endif()
if(WIN32 AND TARGET vtkm::tbb)
#on windows we need to specify these as FindTBB won't
#find the installed version just with the prefix path
list(APPEND args
-DTBB_LIBRARY_DEBUG:FILEPATH=${TBB_LIBRARY_DEBUG}
-DTBB_LIBRARY_RELEASE:FILEPATH=${TBB_LIBRARY_RELEASE}
-DTBB_INCLUDE_DIR:PATH=${TBB_INCLUDE_DIR}
)
endif()
#determine if the test is expected to compile or fail to build. We use
#this information to built the test name to make it clear to the user
@ -93,10 +119,13 @@ function(vtkm_test_against_install dir)
add_test(NAME ${build_name}
COMMAND ${CMAKE_CTEST_COMMAND}
-C $<CONFIG>
--build-and-test ${src_dir} ${build_dir}
--build-generator ${CMAKE_GENERATOR}
--build-makeprogram ${CMAKE_MAKE_PROGRAM}
--build-options -C "${build_config}"
--build-options
${args}
--no-warn-unused-cli
)
set_tests_properties(${build_name} PROPERTIES LABELS ${test_label} )

@ -182,14 +182,6 @@ check_type_size("long long" VTKm_SIZE_LONG_LONG BUILTIN_TYPES_ONLY)
#-----------------------------------------------------------------------------
# Add subdirectories
if(VTKm_ENABLE_MPI)
# This `if` is temporary and will be removed once `diy` supports building
# without MPI.
if (NOT MPI_C_FOUND)
find_package(MPI ${VTKm_FIND_PACKAGE_QUIETLY})
endif()
endif()
add_subdirectory(vtkm)
#-----------------------------------------------------------------------------
@ -241,6 +233,7 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
install(
FILES
${VTKm_SOURCE_DIR}/CMake/FindTBB.cmake
${VTKm_SOURCE_DIR}/CMake/FindMPI.cmake
${VTKm_SOURCE_DIR}/CMake/FindOpenGL.cmake
${VTKm_SOURCE_DIR}/CMake/FindOpenMP.cmake
DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
@ -253,6 +246,7 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
${VTKm_SOURCE_DIR}/CMake/VTKmDetectCUDAVersion.cu
${VTKm_SOURCE_DIR}/CMake/VTKmDeviceAdapters.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmExportHeaderTemplate.h.in
${VTKm_SOURCE_DIR}/CMake/VTKmMPI.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmRenderingContexts.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmWrappers.cmake
DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
@ -315,6 +309,4 @@ endif()
#-----------------------------------------------------------------------------
# Build examples
if(VTKm_ENABLE_EXAMPLES)
add_subdirectory(examples)
endif(VTKm_ENABLE_EXAMPLES)
add_subdirectory(examples)

@ -49,7 +49,6 @@ contents of these for details on the specifics of their respective
licenses.
- - - - - - - - - - - - - - - - - - - - - - - - do not remove this line
CMake/FindTBB.cmake
CMake/FindGLEW.cmake
Utilities
vtkm/cont/tbb/internal/parallel_sort.h
vtkm/cont/tbb/internal/parallel_radix_sort_tbb.h

File diff suppressed because it is too large Load Diff

@ -1,24 +0,0 @@
## `StorageBasic` StealArray() now provides delete function to new owner
Memory that is stolen from VTK-m has to be freed correctly. This is required
as the memory could have been allocated with `new`, `malloc` or even `cudaMallocManaged`.
Previously it was very easy to transfer ownership of memory out of VTK-m and
either fail to capture the free function, or ask for it after the transfer
operation which would return a nullptr. Now stealing an array also
provides the free function reducing one source of memory leaks.
To properly steal memory from VTK-m you do the following:
```cpp
vtkm::cont::ArrayHandle<T> arrayHandle;
...
auto* stolen = arrayHandle.StealArray();
T* ptr = stolen.first;
auto free_function = stolen.second;
...
free_function(ptr);
```

@ -1,6 +0,0 @@
# VariantArrayHandle::AsVirtual<T>() performs casting
The AsVirtual<T> method of VariantArrayHandle now works for any arithmetic type,
not just the actual type of the underlying array. This works by inserting an
ArrayHandleCast between the underlying concrete array and the new
ArrayHandleVirtual when needed.

@ -1,12 +0,0 @@
# VTK-m logs details about each CUDA kernel launch
The VTK-m logging infrastructure has been extended with a new log level
`KernelLaunches` which exists between `MemTransfer` and `Cast`.
This log level reports the number of blocks, threads per block, and the
PTX version of each CUDA kernel launched.
This logging level was primarily introduced to help developers that are
tracking down issues that occur when VTK-m components have been built with
different `sm_XX` flags and help people looking to do kernel performance
tuning.

@ -1,4 +0,0 @@
# VTK-m provides a vtkm_filter target
VTK-m now provides a `vtkm_filter` that contains pre-built components
of filters for consuming projects.

@ -1,11 +0,0 @@
# Make ArrayHandleVirtual conform with other ArrayHandle structure
Previously, ArrayHandleVirtual was defined as a specialization of
ArrayHandle with the virtual storage tag. This was because the storage
object was polymorphic and needed to be handled special. These changes
moved the existing storage definition to an internal class, and then
managed the pointer to that implementation class in a Storage object that
can be managed like any other storage object.
Also moved the implementation of StorageAny into the implementation of the
internal storage object.

@ -1,45 +0,0 @@
# Add vtkm::cont::ArrayHandleVirtual
Added a new class named `ArrayHandleVirtual` that allows you to type erase an
ArrayHandle storage type by using virtual calls. This simplification makes
storing `Fields` and `Coordinates` significantly easier as VTK-m doesn't
need to deduce both the storage and value type when executing worklets.
To construct an `ArrayHandleVirtual` one can do one of the following:
```cpp
vtkm::cont::ArrayHandle<vtkm::Float32> pressure;
vtkm::cont::ArrayHandleConstant<vtkm::Float32> constant(42.0f);
// constrcut from an array handle
vtkm::cont::ArrayHandleVirtual<vtkm::Float32> v(pressure);
// or assign from an array handle
v = constant;
```
To help maintain performance `ArrayHandleVirtual` provides a collection of helper
functions/methods to query and cast back to the concrete storage and value type:
```cpp
vtkm::cont::ArrayHandleConstant<vtkm::Float32> constant(42.0f);
vtkm::cont::ArrayHandleVirtual<vtkm::Float32> v = constant;
bool isConstant = vtkm::cont::IsType< decltype(constant) >(v);
if(isConstant)
vtkm::cont::ArrayHandleConstant<vtkm::Float32> t = vtkm::cont::Cast< decltype(constant) >(v);
```
Lastly, a common operation of calling code using `ArrayHandleVirtual` is a desire to construct a new instance
of an existing virtual handle with the same storage type. This can be done by using the `NewInstance` method
as seen below
```cpp
vtkm::cont::ArrayHandle<vtkm::Float32> pressure;
vtkm::cont::ArrayHandleVirtual<vtkm::Float32> v = pressure;
vtkm::cont::ArrayHandleVirtual<vtkm::Float32> newArray = v->NewInstance();
bool isConstant = vtkm::cont::IsType< vtkm::cont::ArrayHandle<vtkm::Float32> >(newArray); //will be true
```

@ -1,9 +0,0 @@
# vtkm::cont::ArrayHandleZip provides a consistent API even with non-writable handles
Previously ArrayHandleZip could not wrap an implicit handle and provide a consistent experience.
The primary issue was that if you tried to use the PortalType returned by GetPortalControl() you
would get a compile failure. This would occur as the PortalType returned would try to call `Set`
on an ImplicitPortal which doesn't have a set method.
Now with this change, the `ZipPortal` use SFINAE to determine if `Set` and `Get` should call the
underlying zipped portals.

@ -1,65 +0,0 @@
# Introduce asynchronous and device independent timer
The timer class now is asynchronous and device independent. it's using an
similiar API as vtkOpenGLRenderTimer with Start(), Stop(), Reset(), Ready(),
and GetElapsedTime() function. For convenience and backward compability, Each
Start() function call will call Reset() internally. GetElapsedTime() function
can be used multiple times to time sequential operations and Stop() function
can be helpful when you want to get the elapsed time latter.
Bascially it can be used in two modes:
* Create a Timer without any device info.
* It would enable the timer for all enabled devices on the machine. Users can get a
specific elapsed time by passing a device id into the GetElapsedTime function.
If no device is provided, it would pick the maximum of all timer results - the
logic behind this decision is that if cuda is disabled, openmp, serial and tbb
roughly give the same results; if cuda is enabled it's safe to return the
maximum elapsed time since users are more interested in the device execution
time rather than the kernal launch time. The Ready function can be handy here
to query the status of the timer.
``` Construct a generic timer
// Assume CUDA is enabled on the machine
vtkm::cont::Timer timer;
timer.Start();
// Run the algorithm
auto timeHost = timer.GetElapsedTime(vtkm::cont::DeviceAdapterTagSerial());
// To avoid the expensive device synchronization, we query is ready here.
if (timer.IsReady())
{
auto timeDevice = timer.GetElapsedTime(vtkm::cont::DeviceAdapterTagCuda());
}
// Force the synchronization. Ideally device execution time would be returned
which takes longer time than ther kernal call
auto timeGeneral = timer.GetElapsedTime();
```
* Create a Timer with a specific device.
* It works as the old timer that times for a specific device id.
``` Construct a device specific timer
// Assume TBB is enabled on the machine
vtkm::cont::Timer timer{vtkm::cont::DeviceAdaptertagTBB()};
timer.Start(); // t0
// Run the algorithm
// Timer would just return 0 and warn the user in the logger that an invalid
// device is used to query elapsed time
auto timeInvalid = timer.GetElapsedTime(vtkm::cont::DeviceAdapterTagSerial());
if timer.IsReady()
{
// Either will work and mark t1, return t1-t0
auto time1TBB = timer.GetElapsedTime(vtkm::cont::DeviceAdapterTagTBB());
auto time1General = timer.GetElapsedTime();
}
// Do something
auto time2 = timer.GetElapsedTime(); // t2 will be marked and t2-t0 will be returned
// Do something
timer.Stop() // t3 marked
// Do something then summarize latter
auto timeFinal = timer.GetElapsedTime(); // t3-t0
```

@ -1,51 +0,0 @@
# Add support for BitFields.
BitFields are:
- Stored in memory using a contiguous buffer of bits.
- Accessible via portals, a la ArrayHandle.
- Portals operate on individual bits or words.
- Operations may be atomic for safe use from concurrent kernels.
The new BitFieldToUnorderedSet device algorithm produces an
ArrayHandle containing the indices of all set bits, in no particular
order.
The new AtomicInterface classes provide an abstraction into bitwise
atomic operations across control and execution environments and are
used to implement the BitPortals.
BitFields may be used as boolean-typed ArrayHandles using the
ArrayHandleBitField adapter. ArrayHandleBitField uses atomic operations to read
and write bits in the BitField, and is safe to use in concurrent code.
For example, a simple worklet that merges two arrays based on a boolean
condition is tested in TestingBitField:
```
class ConditionalMergeWorklet : public vtkm::worklet::WorkletMapField
{
public:
using ControlSignature = void(FieldIn cond,
FieldIn trueVals,
FieldIn falseVals,
FieldOut result);
using ExecutionSignature = _4(_1, _2, _3);
template <typename T>
VTKM_EXEC T operator()(bool cond, const T& trueVal, const T& falseVal) const
{
return cond ? trueVal : falseVal;
}
};
BitField bits = ...;
auto condArray = vtkm::cont::make_ArrayHandleBitField(bits);
auto trueArray = vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(20, 2, NUM_BITS);
auto falseArray = vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(13, 2, NUM_BITS);
vtkm::cont::ArrayHandle<vtkm::Id> output;
vtkm::worklet::DispatcherMapField<ConditionalMergeWorklet> dispatcher;
dispatcher.Invoke(condArray, trueArray, falseArray, output);
```

@ -1,10 +0,0 @@
# Put CellLocatorBoundingIntervalHierarchy in vtkm_cont library
All of the methods in CellLocatorBoundingIntervalHierarchy were listed in
header files. This is sometimes problematic with virtual methods. Since
everything implemented in it can just be embedded in a library, move the
code into the vtkm_cont library.
These changes caused some warnings in clang to show up based on virtual
methods in other cell locators. Hence, the rest of the cell locators
have also had some of their code moved to vtkm_cont.

@ -1,14 +0,0 @@
# VTK-m `vtkm::cont::DeviceAdapterId` construction from string are now case-insensitive
You can now construct a `vtkm::cont::DeviceAdapterId` from a string no matter
the case of it. The following all will construct the same `vtkm::cont::DeviceAdapterId`.
```cpp
vtkm::cont::DeviceAdapterId id1 = vtkm::cont::make_DeviceAdapterId("cuda");
vtkm::cont::DeviceAdapterId id2 = vtkm::cont::make_DeviceAdapterId("CUDA");
vtkm::cont::DeviceAdapterId id3 = vtkm::cont::make_DeviceAdapterId("Cuda");
auto& tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
vtkm::cont::DeviceAdapterId id4 = tracker.GetDeviceAdapterId("cuda");
vtkm::cont::DeviceAdapterId id5 = tracker.GetDeviceAdapterId("CUDA");
vtkm::cont::DeviceAdapterId id6 = tracker.GetDeviceAdapterId("Cuda");

@ -1,58 +0,0 @@
# Allow VariantArrayHandle CastAndCall to cast to concrete types
Previously, the `VariantArrayHandle::CastAndCall` (and indirect calls through
`vtkm::cont::CastAndCall`) attempted to cast to only
`vtkm::cont::ArrayHandleVirtual` with different value types. That worked, but
it meant that whatever was called had to operate through virtual functions.
Under most circumstances, it is worthwhile to also check for some common
storage types that, when encountered, can be accessed much faster. This
change provides the casting to concrete storage types and now uses
`vtkm::cont::ArrayHandleVirtual` as a fallback when no concrete storage
type is found.
By default, `CastAndCall` checks all the storage types in
`VTKM_DEFAULT_STORAGE_LIST_TAG`, which typically contains only the basic
storage. The `ArrayHandleVirtual::CastAndCall` method also allows you to
override this behavior by specifying a different type list in the first
argument. If the first argument is a list type, `CastAndCall` assumes that
all the types in the list are storage tags. If you pass in
`vtkm::ListTagEmpty`, then `CastAndCall` will always cast to an
`ArrayHandleVirtual` (the previous behavior). Alternately, you can pass in
storage tags that might be likely under the current usage.
As an example, consider the following simple code.
``` cpp
vtkm::cont::VariantArrayHandle array;
// stuff happens
array.CastAndCall(myFunctor);
```
Previously, `myFunctor` would be called with
`vtkm::cont::ArrayHandleVirtual<T>` with different type `T`s. After this
change, `myFunctor` will be called with that and with
`vtkm::cont::ArrayHandle<T>` of the same type `T`s.
If you want to only call `myFunctor` with
`vtkm::cont::ArrayHandleVirtual<T>`, then replace the previous line with
``` cpp
array.CastAndCall(vtkm::ListTagEmpty(), myFunctor);
```
Let's say that additionally using `vtkm::cont::ArrayHandleIndex` was also
common. If you want to also specialize for that array, you can do so with
the following line.
``` cpp
array.CastAndCall(vtkm::ListTagBase<vtkm::cont::StorageBasic,
vtkm::cont::ArrayHandleIndex::StorageTag>,
myFunctor);
```
Note that `myFunctor` will be called with
`vtkm::cont::ArrayHandle<T,vtkm::cont::ArrayHandleIndex::StorageTag>`, not
`vtkm::cont::ArrayHandleIndex`.

@ -1,10 +0,0 @@
# CMake 3.8 Required to build VTK-m
While VTK-m has always required a fairly recent version
of CMake when building for Visual Studio, or if OpenMP or
CUDA are enabled, it has supported building with the TBB
device with CMake 3.3.
Given the fact that our primary consumer (VTK) has moved
to require CMake 3.8, it doesn't make sense to require
CMake 3.3 and we have moved to a minimum of 3.8.

@ -1,9 +0,0 @@
# Add connected component worklets and filters
We have added the `ImageConnectivity` and `CellSetConnectivity` worklets and
the corresponding filters to identify connected components in DataSet. The ImageConnectivity
identify connected components in CellSetStructured, based on same field value of neighboring
cells and the CellSetConnective identify connected components based on cell connectivity.
Currently Moore neighborhood (i.e. 8 neighboring pixels for 2D and 27 neighboring pixels
for 3D) is used for ImageConnectivity. For CellSetConnectivity, neighborhood is defined
as cells sharing a common edge.

@ -1,6 +0,0 @@
# CudaAllocator Managed Memory can be disabled from C++
Previously it was impossible for calling code to explicitly
disable managed memory. This can be desirable for projects
that know they don't need managed memory and are super
performance critical.

@ -1,4 +0,0 @@
# VTK-m now requires CUDA separable compilation to build
With the introduction of `vtkm::cont::ArrayHandleVirtual` and the related infrastructure, vtk-m now
requires that all CUDA code be compiled using separable compilation ( -rdc ).

@ -1,132 +0,0 @@
# Remove templates from ControlSignature field tags
Previously, several of the `ControlSignature` tags had a template to
specify a type list. This was to specify potential valid value types for an
input array. The importance of this typelist was to limit the number of
code paths created when resolving a `vtkm::cont::VariantArrayHandle`
(formerly a `DynamicArrayHandle`). This (potentially) reduced the compile
time, the size of libraries/executables, and errors from unexpected types.
Much has changed since this feature was originally implemented. Since then,
the filter infrastructure has been created, and it is through this that
most dynamic worklet invocations happen. However, since the filter
infrastrcture does its own type resolution (and has its own policies) the
type arguments in `ControlSignature` are now of little value.
## Script to update code
This update requires changes to just about all code implementing a VTK-m
worklet. To facilitate the update of this code to these new changes (not to
mention all the code in VTK-m) a script is provided to automatically remove
these template parameters from VTK-m code.
This script is at
[Utilities/Scripts/update-control-signature-tags.sh](../../Utilities/Scripts/update-control-signature-tags.sh).
It needs to be run in a Unix-compatible shell. It takes a single argument,
which is a top level directory to modify files. The script processes all C++
source files recursively from that directory.
## Selecting data types for auxiliary filter fields
The main rational for making these changes is that the types of the inputs
to worklets is almost always already determined by the calling filter.
However, although it is straightforward to specify the type of the "main"
(active) scalars in a filter, it is less clear what to do for additional
fields if a filter needs a second or third field.
Typically, in the case of a second or third field, it is up to the
`DoExecute` method in the filter implementation to apply a policy to that
field. When applying a policy, you give it a policy object (nominally
passed by the user) and a traits of the filter. Generally, the accepted
list of types for a field should be part of the filter's traits. For
example, consider the `WarpVector` filter. This filter only works on
`Vec`s of size 3, so its traits class looks like this.
``` cpp
template <>
class FilterTraits<WarpVector>
{
public:
// WarpVector can only applies to Float and Double Vec3 arrays
using InputFieldTypeList = vtkm::TypeListTagFieldVec3;
};
```
However, the `WarpVector` filter also requires two fields instead of one.
The first (active) field is handled by its superclass (`FilterField`), but
the second (auxiliary) field must be managed in the `DoExecute`. Generally,
this can be done by simply applying the policy with the filter traits.
## The corner cases
Most of the calls to worklets happen within filter implementations, which
have their own way of narrowing down potential types (as previously
described). The majority of the remainder either use static types or work
with a variety of types.
However, there is a minority of corner cases that require a reduction of
types. Since the type argument of the worklet `ControlSignature` arguments
are no longer available, the narrowing of types must be done before the
call to `Invoke`.
This narrowing of arguments is not particularly difficult. Such type-unsure
arguments usually come from a `VariantArrayHandle` (or something that uses
one). You can select the types from a `VariantArrayHandle` simply by using
the `ResetTypes` method. For example, say you know that a variant array is
supposed to be a scalar.
``` cpp
dispatcher.Invoke(variantArray.ResetTypes(vtkm::TypeListTagFieldScalar()),
staticArray);
```
Even more common is to have a `vtkm::cont::Field` object. A `Field` object
internally holds a `VariantArrayHandle`, which is accessible via the
`GetData` method.
``` cpp
dispatcher.Invoke(field.GetData().ResetTypes(vtkm::TypeListTagFieldScalar()),
staticArray);
```
## Change in executable size
The whole intention of these template parameters in the first place was to
reduce the number of code paths compiled. The hypothesis of this change was
that in the current structure the code paths were not being reduced much
if at all. If that is true, the size of executables and libraries should
not change.
Here is a recording of the library and executable sizes before this change
(using `ds -h`).
```
3.0M libvtkm_cont-1.2.1.dylib
6.2M libvtkm_rendering-1.2.1.dylib
312K Rendering_SERIAL
312K Rendering_TBB
22M Worklets_SERIAL
23M Worklets_TBB
22M UnitTests_vtkm_filter_testing
5.7M UnitTests_vtkm_cont_serial_testing
6.0M UnitTests_vtkm_cont_tbb_testing
7.1M UnitTests_vtkm_cont_testing
```
After the changes, the executable sizes are as follows.
```
3.0M libvtkm_cont-1.2.1.dylib
6.0M libvtkm_rendering-1.2.1.dylib
312K Rendering_SERIAL
312K Rendering_TBB
21M Worklets_SERIAL
21M Worklets_TBB
22M UnitTests_vtkm_filter_testing
5.6M UnitTests_vtkm_cont_serial_testing
6.0M UnitTests_vtkm_cont_tbb_testing
7.1M UnitTests_vtkm_cont_testing
```
As we can see, the built sizes have not changed significantly. (If
anything, the build is a little smaller.)

@ -1,45 +0,0 @@
# VTK-m CUDA kernel scheduling including improved defaults, and user customization
VTK-m now offers a more GPU aware set of defaults for kernel scheduling.
When VTK-m first launches a kernel we do system introspection and determine
what GPU's are on the machine and than match this information to a preset
table of values. The implementation is designed in a way that allows for
VTK-m to offer both specific presets for a given GPU ( V100 ) or for
an entire generation of cards ( Pascal ).
Currently VTK-m offers preset tables for the following GPU's:
- Tesla V100
- Tesla P100
If the hardware doesn't match a specific GPU card we than try to find the
nearest know hardware generation and use those defaults. Currently we offer
defaults for
- Older than Pascal Hardware
- Pascal Hardware
- Volta+ Hardware
Some users have workloads that don't align with the defaults provided by
VTK-m. When that is the cause, it is possible to override the defaults
by binding a custom function to `vtkm::cont::cuda::InitScheduleParameters`.
As shown below:
```cpp
ScheduleParameters CustomScheduleValues(char const* name,
int major,
int minor,
int multiProcessorCount,
int maxThreadsPerMultiProcessor,
int maxThreadsPerBlock)
{
ScheduleParameters params {
64 * multiProcessorCount, //1d blocks
64, //1d threads per block
64 * multiProcessorCount, //2d blocks
{ 8, 8, 1 }, //2d threads per block
64 * multiProcessorCount, //3d blocks
{ 4, 4, 4 } }; //3d threads per block
return params;
}
vtkm::cont::cuda::InitScheduleParameters(&CustomScheduleValues);
```

@ -1,20 +0,0 @@
# vtkm::cont::Initialize
A new initialization function, vtkm::cont::Initialize, has been added.
Initialization is not required, but will configure the logging utilities (when
enabled) and allows forcing a device via a `-d` or `--device` command line
option.
Usage:
```
#include <vtkm/cont/Initialize.h>
int main(int argc, char *argv[])
{
auto config = vtkm::cont::Initialize(argc, argv);
...
}
```

@ -1,21 +0,0 @@
# `vtkm::worklet::Invoker` now able to worklets that have non-default scatter type
This change allows the `Invoker` class to support launching worklets that require
a custom scatter operation. This is done by providing the scatter as the second
argument when launch a worklet with the `()` operator.
The following example shows a scatter being provided with a worklet launch.
```cpp
struct CheckTopology : vtkm::worklet::WorkletMapPointToCell
{
using ControlSignature = void(CellSetIn cellset, FieldOutCell);
using ExecutionSignature = _2(FromIndices);
using ScatterType = vtkm::worklet::ScatterPermutation<>;
...
};
vtkm::worklet::Ivoker invoke;
invoke( CheckTopology{}, vtkm::worklet::ScatterPermutation{}, cellset, result );
```

@ -1,5 +0,0 @@
# LodePNG added as a thirdparty
The lodepng library was brought is an thirdparty library.
This has allowed the VTK-m rendering library to have a robust
png decode functionality.

@ -1,104 +0,0 @@
# Allow masking of worklet invocations
There have recently been use cases where it would be helpful to mask out
some of the invocations of a worklet. The idea is that when invoking a
worklet with a mask array on the input domain, you might implement your
worklet more-or-less like the following.
```cpp
VTKM_EXEC void operator()(bool mask, /* other parameters */)
{
if (mask)
{
// Do interesting stuff
}
}
```
This works, but what if your mask has mostly false values? In that case,
you are spending tons of time loading data to and from memory where fields
are stored for no reason.
You could potentially get around this problem by adding a scatter to the
worklet. However, that will compress the output arrays to only values that
are active in the mask. That is problematic if you want the masked output
in the appropriate place in the original arrays. You will have to do some
complex (and annoying and possibly expensive) permutations of the output
arrays.
Thus, we would like a new feature similar to scatter that instead masks out
invocations so that the worklet is simply not run on those outputs.
## New Interface
The new "Mask" feature that is similar (and orthogonal) to the existing
"Scatter" feature. Worklet objects now define a `MaskType` that provides on
object that manages the selections of which invocations are skipped. The
following Mask objects are defined.
* `MaskNone` - This removes any mask of the output. All outputs are
generated. This is the default if no `MaskType` is explicitly defined.
* `MaskSelect` - The user to provides an array that specifies whether
each output is created with a 1 to mean that the output should be
created an 0 the mean that it should not.
* `MaskIndices` - The user provides an array with a list of indices for
all outputs that should be created.
It will be straightforward to implement other versions of masks. (For
example, you could make a mask class that selectes every Nth entry.) Those
could be made on an as-needed basis.
## Implementation
The implementation follows the same basic idea of how scatters are
implemented.
### Mask Classes
The mask class is required to implement the following items.
* `ThreadToOutputType` - A type for an array that maps a thread index (an
index in the array) to an output index. A reasonable type for this
could be `vtkm::cont::ArrayHandle<vtkm::Id>`.
* `GetThreadToOutputMap` - Given the range for the output (e.g. the
number of items in the output domain), returns an array of type
`ThreadToOutputType` that is the actual map.
* `GetThreadRange` - Given a range for the output (e.g. the number of
items in the output domain), returns the range for the threads (e.g.
the number of times the worklet will be invoked).
### Dispatching
The `vtkm::worklet::internal::DispatcherBase` manages a mask class in
the same way it manages the scatter class. It gets the `MaskType` from
the worklet it is templated on. It requires a `MaskType` object during
its construction.
Previously the dispatcher (and downstream) had to manage the range and
indices of inputs and threads. They now have to also manage a separate
output range/index as now all three may be different.
The `vtkm::Invocation` is changed to hold the ThreadToOutputMap array from
the mask. It likewises has a templated `ChangeThreadToOutputMap` method
added (similar to those already existing for the arrays from a scatter).
This method is used in `DispatcherBase::InvokeTransportParameters` to add
the mask's array to the invocation before calling `InvokeSchedule`.
### Thread Indices
With the addition of masks, the `ThreadIndices` classes are changed to
manage the actual output index. Previously, the output index was always the
same as the thread index. However, now these two can be different. The
`GetThreadIndices` methods of the worklet base classes have an argument
added that is the portal to the ThreadToOutputMap.
The worklet `GetThreadIndices` is called from the `Task` classes. These
classes are changed to pass in this additional argument. Since the `Task`
classes get an `Invocation` object from the dispatcher, which contains the
`ThreadToOutputMap`, this change is trivial.
## Interaction Between Mask and Scatter
Although it seems weird, it should work fine to mix scatters and masks. The
scatter will first be applied to the input to generate a (potential) list
of output elements. The mask will then be applied to these output elements.

@ -1,6 +0,0 @@
# Merge benchmark executables into a device dependent shared library
VTK-m has been updated to replace old per device benchmark executables with a device
dependent shared library so that it's able to accept a device adapter at runtime through
the "--device=" argument.

@ -1,3 +0,0 @@
Merge rendering testing executables to a shared library
This commit allows rendering testing executables to select the device at runtime.

@ -1,8 +0,0 @@
# Merge worklet testing executables into a device dependent shared library
VTK-m has been updated to replace old per device worklet testing executables with a device
dependent shared library so that it's able to accept a device adapter at runtime through
the "--device=" argument.

@ -1,13 +0,0 @@
# Wrap third party optionparser.h in vtkm/cont/internal/OptionParser.h
Previously we just took the optionparser.h file and stuck it right in
our source code. That was problematic for a variety of reasons.
1. It incorrectly assigned our license to external code.
2. It made lots of unnecessary changes to the original source (like
reformatting).
3. It made it near impossible to track patches we make and updates to
the original software.
Instead, use the third-party system to track changes to optionparser.h
in a different repository and then pull that into ours.

@ -1,91 +0,0 @@
# Allow Initialize to parse only some arguments
When a library requires reading some command line arguments through a
function like Initialize, it is typical that it will parse through
arguments it supports and then remove those arguments from `argc` and
`argv` so that the remaining arguments can be parsed by the calling
program. Recent changes to the `vtkm::cont::Initialize` function support
that.
## Use Case
Say you are creating a simple benchmark where you want to provide a command
line option `--size` that allows you to adjust the size of the data that
you are working on. However, you also want to support flags like `--device`
and `-v` that are performed by `vtkm::cont::Initialize`. Rather than have
to re-implement all of `Initialize`'s parsing, you can now first call
`Initialize` to handle its arguments and then parse the remaining objects.
The following is a simple (and rather incomplete) example:
```cpp
int main(int argc, char** argv)
{
vtkm::cont::InitializeResult initResult = vtkm::cont::Initialize(argc, argv);
if ((argc > 1) && (strcmp(argv[1], "--size") == 0))
{
if (argc < 3)
{
std::cerr << "--size option requires a numeric argument" << std::endl;
std::cerr << "USAGE: " << argv[0] << " [options]" << std::endl;
std::cerr << "Options are:" << std::endl;
std::cerr << " --size <number>\tSpecify the size of the data." << std::endl;
std::cerr << initResult.Usage << std::endl;
exit(1);
}
g_size = atoi(argv[2]);
}
std::cout << "Using device: " << initResult.Device.GetName() << std::endl;
```
## Additional Initialize Options
Because `Initialize` no longer has the assumption that it is responsible
for parsing _all_ arguments, some options have been added to
`vtkm::cont::InitializeOptions` to manage these different use cases. The
following options are now supported.
* `None` A placeholder for having all options off, which is the default.
(Same as before this change.)
* `RequireDevice` Issue an error if the device argument is not specified.
(Same as before this change.)
* `DefaultAnyDevice` If no device is specified, treat it as if the user
gave --device=Any. This means that DeviceAdapterTagUndefined will never
be return in the result.
* `AddHelp` Add a help argument. If `-h` or `--help` is provided, prints
a usage statement. Of course, the usage statement will only print out
arguments processed by VTK-m.
* `ErrorOnBadOption` If an unknown option is encountered, the program
terminates with an error and a usage statement is printed. If this
option is not provided, any unknown options are returned in `argv`. If
this option is used, it is a good idea to use `AddHelp` as well.
* `ErrorOnBadArgument` If an extra argument is encountered, the program
terminates with an error and a usage statement is printed. If this
option is not provided, any unknown arguments are returned in `argv`.
* `Strict` If supplied, Initialize treats its own arguments as the only
ones supported by the application and provides an error if not followed
exactly. This is a convenience option that is a combination of
`ErrorOnBadOption`, `ErrorOnBadArgument`, and `AddHelp`.
## InitializeResult Changes
The changes in `Initialize` have also necessitated the changing of some of
the fields in the `InitializeResult` structure. The following fields are
now provided in the `InitializeResult` struct.
* `Device` Returns the device selected in the command line arguments as a
`DeviceAdapterId`. If no device was selected,
`DeviceAdapterTagUndefined` is returned. (Same as before this change.)
* `Usage` Returns a string containing the usage for the options
recognized by `Initialize`. This can be used to build larger usage
statements containing options for both `Initialize` and the calling
program. See the example above.
Note that the `Arguments` field has been removed from `InitializeResult`.
This is because the unparsed arguments are now returned in the modified
`argc` and `argv`, which provides a more complete result than the
`Arguments` field did.

@ -1,26 +0,0 @@
# Add point merge capabilities to CleanGrid filter
We have added a `PointMerge` worklet that uses a virtual grid approach to
identify nearby points. The worklet works by creating a very fine but
sparsely represented locator grid. It then groups points by grid bins and
finds those within a specified radius.
This functionality has been integrated into the `CleanGrid` filter. The
following flags have been added to `CleanGrid` to modify the behavior of
point merging.
* `Set`/`GetMergePoints` - a flag to turn on/off the merging of
duplicated coincident points. This extra operation will find points
spatially located near each other and merge them together.
* `Set`/`GetTolerance` - Defines the tolerance used when determining
whether two points are considered coincident. If the
`ToleranceIsAbsolute` flag is false (the default), then this tolerance
is scaled by the diagonal of the points. This parameter is only used
when merge points is on.
* `Set`/`GetToleranceIsAbsolute` - When ToleranceIsAbsolute is false (the
default) then the tolerance is scaled by the diagonal of the bounds of
the dataset. If true, then the tolerance is taken as the actual
distance to use. This parameter is only used when merge points is on.
* `Set`/`GetFastMerge` - When FastMerge is true (the default), some
corners are cut when computing coincident points. The point merge will
go faster but the tolerance will not be strictly followed.

@ -1,12 +0,0 @@
# Added specialized operators for ArrayPortalValueReference
The ArrayPortalValueReference is supposed to behave just like the value it
encapsulates and does so by automatically converting to the base type when
necessary. However, when it is possible to convert that to something else,
it is possible to get errors about ambiguous overloads. To avoid these, add
specialized versions of the operators to specify which ones should be used.
Also consolidated the CUDA version of an ArrayPortalValueReference to the
standard one. The two implementations were equivalent and we would like
changes to apply to both.

@ -1,90 +0,0 @@
# Redesign Runtime Device Tracking
The device tracking infrastructure in VTK-m has been redesigned to
remove multiple redundant codes paths and to simplify reasoning
about around what an instance of RuntimeDeviceTracker will modify.
`vtkm::cont::RuntimeDeviceTracker` tracks runtime information on
a per-user thread basis. This is done to allow multiple calling
threads to use different vtk-m backends such as seen in this
example:
```cpp
vtkm::cont::DeviceAdapterTagCuda cuda;
vtkm::cont::DeviceAdapterTagOpenMP openmp;
{ // thread 1
auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
tracker->ForceDevice(cuda);
vtkm::worklet::Invoker invoke;
invoke(LightTask{}, input, output);
vtkm::cont::Algorithm::Sort(output);
invoke(HeavyTask{}, output);
}
{ // thread 2
auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
tracker->ForceDevice(openmp);
vtkm::worklet::Invoker invoke;
invoke(LightTask{}, input, output);
vtkm::cont::Algorithm::Sort(output);
invoke(HeavyTask{}, output);
}
```
While this address the ability for threads to specify what
device they should run on. It doesn't make it easy to toggle
the status of a device in a programmatic way, for example
the following block forces execution to only occur on
`cuda` and doesn't restore previous active devices after
```cpp
{
vtkm::cont::DeviceAdapterTagCuda cuda;
auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
tracker->ForceDevice(cuda);
vtkm::worklet::Invoker invoke;
invoke(LightTask{}, input, output);
}
//openmp/tbb/... still inactive
```
To resolve those issues we have `vtkm::cont::ScopedRuntimeDeviceTracker` which
has the same interface as `vtkm::cont::RuntimeDeviceTracker` but additionally
resets any per-user thread modifications when it goes out of scope. So by
switching over the previous example to use `ScopedRuntimeDeviceTracker` we
correctly restore the threads `RuntimeDeviceTracker` state when `tracker`
goes out of scope.
```cpp
{
vtkm::cont::DeviceAdapterTagCuda cuda;
vtkm::cont::ScopedRuntimeDeviceTracker tracker(cuda);
vtkm::worklet::Invoker invoke;
invoke(LightTask{}, input, output);
}
//openmp/tbb/... are now again active
```
The `vtkm::cont::ScopedRuntimeDeviceTracker` is not limited to forcing
execution to occur on a single device. When constructed it can either force
execution to a device, disable a device or enable a device. These options
also work with the `DeviceAdapterTagAny`.
```cpp
{
//enable all devices
vtkm::cont::DeviceAdapterTagAny any;
vtkm::cont::ScopedRuntimeDeviceTracker tracker(any,
vtkm::cont::RuntimeDeviceTrackerMode::Enable);
...
}
{
//disable only cuda
vtkm::cont::DeviceAdapterTagCuda cuda;
vtkm::cont::ScopedRuntimeDeviceTracker tracker(cuda,
vtkm::cont::RuntimeDeviceTrackerMode::Disable);
...
}
```

@ -1,53 +0,0 @@
# DeviceAdapter Reduction supports differing input and output types
It is common to want to perform a reduction where the input and output types
are of differing types. A basic example would be when the input is `vtkm::UInt8`
but the output is `vtkm::UInt64`. This has been supported since v1.2, as the input
type can be implicitly convertible to the output type.
What we now support is when the input type is not implicitly convertible to the output type,
such as when the output type is `vtkm::Pair< vtkm::UInt64, vtkm::UInt64>`. For this to work
we require that the custom binary operator implements also an `operator()` which handles
the unary transformation of input to output.
An example of a custom reduction operator for differing input and output types is:
```cxx
struct CustomMinAndMax
{
using OutputType = vtkm::Pair<vtkm::Float64, vtkm::Float64>;
VTKM_EXEC_CONT
OutputType operator()(vtkm::Float64 a) const
{
return OutputType(a, a);
}
VTKM_EXEC_CONT
OutputType operator()(vtkm::Float64 a, vtkm::Float64 b) const
{
return OutputType(vtkm::Min(a, b), vtkm::Max(a, b));
}
VTKM_EXEC_CONT
OutputType operator()(const OutputType& a, const OutputType& b) const
{
return OutputType(vtkm::Min(a.first, b.first), vtkm::Max(a.second, b.second));
}
VTKM_EXEC_CONT
OutputType operator()(vtkm::Float64 a, const OutputType& b) const
{
return OutputType(vtkm::Min(a, b.first), vtkm::Max(a, b.second));
}
VTKM_EXEC_CONT
OutputType operator()(const OutputType& a, vtkm::Float64 b) const
{
return OutputType(vtkm::Min(a.first, b), vtkm::Max(a.second, b));
}
};
```

@ -1,9 +0,0 @@
# Renamed RuntimeDeviceTrackers to use the term Global
The `GetGlobalRuntimeDeviceTracker` never actually returned a process wide
runtime device tracker but always a unique one for each control side thread.
This was the design as it would allow for different threads to have different
runtime device settings.
By removing the term Global from the name it becomes more clear what scope this
class has.

@ -1,147 +0,0 @@
# Add ability to specialize a worklet for a device
This change adds an execution signature tag named `Device` that passes
a `DeviceAdapterTag` to the worklet's parenthesis operator. This allows the
worklet to specialize its operation. This features is available in all
worklets.
The following example shows a worklet that specializes itself for the CUDA
device.
```cpp
struct DeviceSpecificWorklet : vtkm::worklet::WorkletMapField
{
using ControlSignature = void(FieldIn, FieldOut);
using ExecutionSignature = _2(_1, Device);
// Specialization for the Cuda device.
template <typename T>
T operator()(T x, vtkm::cont::DeviceAdapterTagCuda) const
{
// Special cuda implementation
}
// General implementation
template <typename T, typename Device>
T operator()(T x, Device) const
{
// General implementation
}
};
```
## Effect on compile time and binary size
This change necessitated adding a template parameter for the device that
followed at least from the schedule all the way down. This has the
potential for duplicating several of the support methods (like
`DoWorkletInvokeFunctor`) that would otherwise have the same type. This is
especially true between the devices that run on the CPU as they should all
be sharing the same portals from `ArrayHandle`s. So the question is whether
it causes compile to take longer or cause a significant increase in
binaries.
To informally test, I first ran a clean debug compile on my Windows machine
with the serial and tbb devices. The build itself took **3 minutes, 50
seconds**. Here is a list of the binary sizes in the bin directory:
```
kmorel2 0> du -sh *.exe *.dll
200K BenchmarkArrayTransfer_SERIAL.exe
204K BenchmarkArrayTransfer_TBB.exe
424K BenchmarkAtomicArray_SERIAL.exe
424K BenchmarkAtomicArray_TBB.exe
440K BenchmarkCopySpeeds_SERIAL.exe
580K BenchmarkCopySpeeds_TBB.exe
4.1M BenchmarkDeviceAdapter_SERIAL.exe
5.3M BenchmarkDeviceAdapter_TBB.exe
7.9M BenchmarkFieldAlgorithms_SERIAL.exe
7.9M BenchmarkFieldAlgorithms_TBB.exe
22M BenchmarkFilters_SERIAL.exe
22M BenchmarkFilters_TBB.exe
276K BenchmarkRayTracing_SERIAL.exe
276K BenchmarkRayTracing_TBB.exe
4.4M BenchmarkTopologyAlgorithms_SERIAL.exe
4.4M BenchmarkTopologyAlgorithms_TBB.exe
712K Rendering_SERIAL.exe
712K Rendering_TBB.exe
708K UnitTests_vtkm_cont_arg_testing.exe
1.7M UnitTests_vtkm_cont_internal_testing.exe
13M UnitTests_vtkm_cont_serial_testing.exe
14M UnitTests_vtkm_cont_tbb_testing.exe
18M UnitTests_vtkm_cont_testing.exe
13M UnitTests_vtkm_cont_testing_mpi.exe
736K UnitTests_vtkm_exec_arg_testing.exe
136K UnitTests_vtkm_exec_internal_testing.exe
196K UnitTests_vtkm_exec_serial_internal_testing.exe
196K UnitTests_vtkm_exec_tbb_internal_testing.exe
2.0M UnitTests_vtkm_exec_testing.exe
83M UnitTests_vtkm_filter_testing.exe
476K UnitTests_vtkm_internal_testing.exe
148K UnitTests_vtkm_interop_internal_testing.exe
1.3M UnitTests_vtkm_interop_testing.exe
2.9M UnitTests_vtkm_io_reader_testing.exe
548K UnitTests_vtkm_io_writer_testing.exe
792K UnitTests_vtkm_rendering_testing.exe
3.7M UnitTests_vtkm_testing.exe
320K UnitTests_vtkm_worklet_internal_testing.exe
65M UnitTests_vtkm_worklet_testing.exe
11M vtkm_cont-1.3.dll
2.1M vtkm_interop-1.3.dll
21M vtkm_rendering-1.3.dll
3.9M vtkm_worklet-1.3.dll
```
After making the singular change to the `Invocation` object to add the
`DeviceAdapterTag` as a template parameter (which should cause any extra
compile instances) the compile took **4 minuts and 5 seconds**. Here is the
new list of binaries.
```
kmorel2 0> du -sh *.exe *.dll
200K BenchmarkArrayTransfer_SERIAL.exe
204K BenchmarkArrayTransfer_TBB.exe
424K BenchmarkAtomicArray_SERIAL.exe
424K BenchmarkAtomicArray_TBB.exe
440K BenchmarkCopySpeeds_SERIAL.exe
580K BenchmarkCopySpeeds_TBB.exe
4.1M BenchmarkDeviceAdapter_SERIAL.exe
5.3M BenchmarkDeviceAdapter_TBB.exe
7.9M BenchmarkFieldAlgorithms_SERIAL.exe
7.9M BenchmarkFieldAlgorithms_TBB.exe
22M BenchmarkFilters_SERIAL.exe
22M BenchmarkFilters_TBB.exe
276K BenchmarkRayTracing_SERIAL.exe
276K BenchmarkRayTracing_TBB.exe
4.4M BenchmarkTopologyAlgorithms_SERIAL.exe
4.4M BenchmarkTopologyAlgorithms_TBB.exe
712K Rendering_SERIAL.exe
712K Rendering_TBB.exe
708K UnitTests_vtkm_cont_arg_testing.exe
1.7M UnitTests_vtkm_cont_internal_testing.exe
13M UnitTests_vtkm_cont_serial_testing.exe
14M UnitTests_vtkm_cont_tbb_testing.exe
19M UnitTests_vtkm_cont_testing.exe
13M UnitTests_vtkm_cont_testing_mpi.exe
736K UnitTests_vtkm_exec_arg_testing.exe
136K UnitTests_vtkm_exec_internal_testing.exe
196K UnitTests_vtkm_exec_serial_internal_testing.exe
196K UnitTests_vtkm_exec_tbb_internal_testing.exe
2.0M UnitTests_vtkm_exec_testing.exe
86M UnitTests_vtkm_filter_testing.exe
476K UnitTests_vtkm_internal_testing.exe
148K UnitTests_vtkm_interop_internal_testing.exe
1.3M UnitTests_vtkm_interop_testing.exe
2.9M UnitTests_vtkm_io_reader_testing.exe
548K UnitTests_vtkm_io_writer_testing.exe
792K UnitTests_vtkm_rendering_testing.exe
3.7M UnitTests_vtkm_testing.exe
320K UnitTests_vtkm_worklet_internal_testing.exe
68M UnitTests_vtkm_worklet_testing.exe
11M vtkm_cont-1.3.dll
2.1M vtkm_interop-1.3.dll
21M vtkm_rendering-1.3.dll
3.9M vtkm_worklet-1.3.dll
```
So far the increase is quite negligible.

@ -1,31 +0,0 @@
# update-CellLocatorTwoLevelUniformGrid
`CellLocatorTwoLevelUniformGrid` has been renamed to `CellLocatorUniformBins`
for brevity. It has been modified to be a subclass of `vtkm::cont::CellLocator`
and can be used wherever a `CellLocator` is accepted.
`CellLocatorUniformBins` can work with all kinds of datasets, but there are cell
locators that are more efficient for specific data sets. Therefore, a new cell
locator - `CellLocatorGeneral` has been implemented that can be configured to use
specialized cell locators based on its input data. A "configurator" function object
can be specified using the `SetConfigurator` function. The configurator should
have the following signature:
```c++
void (std::unique_ptr<vtkm::cont::CellLocator>&,
const vtkm::cont::DynamicCellSet&,
const vtkm::cont::CoordinateSystem&);
```
The configurator is invoked whenever the `Update` method is called and the input
has changed. The current cell locator is passed in a `std::unique_ptr`. Based on
the types of the input cellset and coordinates, and possibly some heuristics on
their values, the current cell locator's parameters can be updated, or a different
cell-locator can be instantiated and transferred to the `unique_ptr`. The default
configurator configures a `CellLocatorUniformGrid` for uniform grid datasets,
a `CellLocatorRecitlinearGrid` for rectilinear datasets, and `CellLocatorUniformBins`
for all other dataset types.
The class `CellLocatorHelper` that implemented similar functionality to
`CellLocatorGeneral` has been removed.

@ -1,3 +0,0 @@
# Optional Parser is bumped from 1.3 to 1.7.
VTK-m internal version of Optional Parser has been moved to 1.7

@ -1,43 +0,0 @@
# vtkm::cont::VariantArrayHandle replaces vtkm::cont::DynamicArrayHandle
`ArrayHandleVariant` replaces `DynamicArrayHandle` as the primary method
for holding onto a type erased `vtkm::cont::ArrayHandle`. The major difference
between the two implementations is how they handle the Storage component of
an array handle.
`DynamicArrayHandle` approach was to find the fully deduced type of the `ArrayHandle`
meaning it would check all value and storage types it knew about until it found a match.
This cross product of values and storages would cause significant compilation times when
a `DynamicArrayHandle` had multiple storage types.
`VariantArrayHandle` approach is to only deduce the value type of the `ArrayHandle` and
return a `vtkm::cont::ArrayHandleVirtual` which uses polymorpishm to hide the actual
storage type. This approach allows for better compile times, and for calling code
to always expect an `ArrayHandleVirtual` instead of the fully deduced type. This conversion
to `ArrayHandleVirtual` is usually done internally within VTK-m when a worklet or filter
is invoked.
In certain cases users of `VariantArrayHandle` want to be able to access the concrete
`ArrayHandle<T,S>` and not have it wrapped in a `ArrayHandleVirtual`. For those occurrences
`VariantArrayHandle` provides a collection of helper functions/methods to query and
cast back to the concrete storage and value type:
```cpp
vtkm::cont::ArrayHandleConstant<vtkm::Float32> constant(42.0f);
vtkm::cont::ArrayHandleVariant v(constant);
bool isConstant = vtkm::cont::IsType< decltype(constant) >(v);
if(isConstant)
vtkm::cont::ArrayHandleConstant<vtkm::Float32> t = vtkm::cont::Cast< decltype(constant) >(v);
```
Lastly, a common operation of calling code using `VariantArrayHandle` is a desire to construct a new instance
of an existing virtual handle with the same storage type. This can be done by using the `NewInstance` method
as seen below
```cpp
vtkm::cont::ArrayHandle<vtkm::Float32> pressure;
vtkm::cont::ArrayHandleVariant v(pressure);
vtkm::cont::ArrayHandleVariant newArray = v->NewInstance();
bool isConstant = vtkm::cont::IsType< decltype(pressure) >(newArray); //will be true
```

@ -1,9 +0,0 @@
# VTK-m now can verify that it installs itself correctly
It was a fairly common occurrence of VTK-m to have a broken install
tree as it had no easy way to verify that all headers would be installed.
Now VTK-m offers a testing infrastructure that creates a temporary installed
version and is able to run tests with that VTK-m installed version. Currently
the only test is to verify that each header listed in VTK-m is also installed,
but this can expand in the future to include compilation tests.

@ -1,17 +0,0 @@
# VTK-m CUDA detection properly handles busy devices
When an application that uses VTK-m is first launched it will
do a check to see if CUDA is supported at runtime. If for
some reason that CUDA card is not allowing kernel execution
VTK-m would report the hardware doesn't have CUDA support.
This was problematic as was over aggressive in disabling CUDA
support for hardware that could support kernel execution in
the future. With the fact that every VTK-m worklet is executed
through a TryExecute it is no longer necessary to be so
aggressive in disabling CUDA support.
Now the behavior is that VTK-m considers a machine to have
CUDA runtime support if it has 1+ GPU's of Kepler or
higher hardware (SM_30+).

@ -1,11 +0,0 @@
# VTK-m thirdparty diy now can coexist with external diy
Previously VTK-m would leak macros that would cause an
external diy to be incorrectly mangled breaking consumers
of VTK-m that used diy.
Going forward to use `diy` from VTK-m all calls must use the
`vtkmdiy` namespace instead of the `diy` namespace. This
allows for VTK-m to properly forward calls to either
the external or internal version correctly.

@ -10,28 +10,28 @@
#add the directory that contains the VTK-m config file to the cmake
#path so that our examples can find VTK-m
set(CMAKE_PREFIX_PATH ${VTKm_BINARY_DIR}/${VTKm_INSTALL_CONFIG_DIR})
add_subdirectory(clipping)
add_subdirectory(contour_tree)
add_subdirectory(contour_tree_augmented)
add_subdirectory(cosmotools)
add_subdirectory(demo)
add_subdirectory(game_of_life)
add_subdirectory(hello_world)
add_subdirectory(histogram)
add_subdirectory(isosurface)
add_subdirectory(lagrangian)
add_subdirectory(multi_backend)
add_subdirectory(oscillator)
add_subdirectory(particle_advection)
add_subdirectory(redistribute_points)
add_subdirectory(rendering)
add_subdirectory(streamline)
add_subdirectory(temporal_advection)
add_subdirectory(tetrahedra)
# add_subdirectory(unified_memory)
if(VTKm_ENABLE_EXAMPLES)
set(CMAKE_PREFIX_PATH ${VTKm_BINARY_DIR}/${VTKm_INSTALL_CONFIG_DIR})
add_subdirectory(clipping)
add_subdirectory(contour_tree)
add_subdirectory(contour_tree_augmented)
add_subdirectory(cosmotools)
add_subdirectory(demo)
add_subdirectory(game_of_life)
add_subdirectory(hello_world)
add_subdirectory(histogram)
add_subdirectory(isosurface)
add_subdirectory(lagrangian)
add_subdirectory(multi_backend)
add_subdirectory(oscillator)
add_subdirectory(particle_advection)
add_subdirectory(redistribute_points)
add_subdirectory(rendering)
add_subdirectory(streamline)
add_subdirectory(temporal_advection)
add_subdirectory(tetrahedra)
endif()
if (VTKm_ENABLE_TESTING)
# These need to be fast to build as they will

@ -1 +1 @@
1.3.0
1.4.0

@ -24,23 +24,7 @@ target_include_directories(vtkm_diy INTERFACE
$<INSTALL_INTERFACE:${VTKm_INSTALL_INCLUDE_DIR}/vtkm/thirdparty/diy>)
if(VTKm_ENABLE_MPI)
set(arg)
foreach(apath IN LISTS MPI_C_INCLUDE_PATH MPI_CXX_INCLUDE_PATH)
list(APPEND arg $<BUILD_INTERFACE:${apath}>)
endforeach()
list(REMOVE_DUPLICATES arg)
target_include_directories(vtkm_diy INTERFACE ${arg})
target_link_libraries(vtkm_diy INTERFACE
$<BUILD_INTERFACE:${MPI_C_LIBRARIES}>
$<BUILD_INTERFACE:${MPI_CXX_LIBRARIES}>)
if(MPI_C_COMPILE_DEFINITIONS)
target_compile_definitions(vtkm_diy INTERFACE
$<$<COMPILE_LANGUAGE:C>:${MPI_C_COMPILE_DEFINITIONS}>)
endif()
if(MPI_CXX_COMPILE_DEFNITIONS)
target_compile_definitions(vtkm_diy INTERFACE
$<$<COMPILE_LANGUAGE:CXX>:${MPI_CXX_COMPILE_DEFNITIONS>)
endif()
target_link_libraries(vtkm_diy INTERFACE MPI::MPI_CXX)
endif()
install(TARGETS vtkm_diy