vtk-m/benchmarking/BenchmarkArrayTransfer.cxx

521 lines
16 KiB
C++
Raw Normal View History

//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
2019-04-15 23:24:21 +00:00
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include "Benchmarker.h"
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/DeviceAdapter.h>
#include <vtkm/cont/Invoker.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/worklet/WorkletMapField.h>
#include <sstream>
#include <string>
#include <vector>
namespace
{
// Make this global so benchmarks can access the current device id:
vtkm::cont::InitializeResult Config;
const vtkm::UInt64 COPY_SIZE_MIN = (1 << 10); // 1 KiB
const vtkm::UInt64 COPY_SIZE_MAX = (1 << 30); // 1 GiB
using TestTypes = vtkm::List<vtkm::Float32>;
//------------- Functors for benchmarks --------------------------------------
// Reads all values in ArrayHandle.
struct ReadValues : vtkm::worklet::WorkletMapField
{
using ControlSignature = void(FieldIn);
template <typename T>
VTKM_EXEC void operator()(const T& val) const
{
if (val < 0)
{
// We don't really do anything with this, we just need to do *something*
// to prevent the compiler from optimizing out the array accesses.
this->RaiseError("Unexpected value.");
}
}
};
// Writes values to ArrayHandle.
struct WriteValues : vtkm::worklet::WorkletMapField
{
using ControlSignature = void(FieldOut);
using ExecutionSignature = void(_1, InputIndex);
template <typename T>
VTKM_EXEC void operator()(T& val, vtkm::Id idx) const
{
val = static_cast<T>(idx);
}
};
// Reads and writes values to ArrayHandle.
struct ReadWriteValues : vtkm::worklet::WorkletMapField
{
using ControlSignature = void(FieldInOut);
using ExecutionSignature = void(_1, InputIndex);
template <typename T>
VTKM_EXEC void operator()(T& val, vtkm::Id idx) const
{
val += static_cast<T>(idx);
}
};
// Takes a vector of data and creates a fresh ArrayHandle with memory just allocated
// in the control environment.
template <typename T>
vtkm::cont::ArrayHandle<T> CreateFreshArrayHandle(const std::vector<T>& vec)
{
return vtkm::cont::make_ArrayHandleMove(std::vector<T>(vec));
}
//------------- Benchmark functors -------------------------------------------
// Copies NumValues from control environment to execution environment and
// accesses them as read-only.
template <typename ValueType>
void BenchContToExecRead(benchmark::State& state)
{
using ArrayType = vtkm::cont::ArrayHandle<ValueType>;
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::UInt64 numBytes = static_cast<vtkm::UInt64>(state.range(0));
const vtkm::Id numValues = static_cast<vtkm::Id>(numBytes / sizeof(ValueType));
{
std::ostringstream desc;
desc << "Control --> Execution (read-only): " << numValues << " values ("
<< vtkm::cont::GetHumanReadableSize(numBytes) << ")";
state.SetLabel(desc.str());
}
Improvements to moving data into ArrayHandle We have made several improvements to adding data into an `ArrayHandle`. ## Moving data from an `std::vector` For numerous reasons, it is convenient to define data in a `std::vector` and then wrap that into an `ArrayHandle`. It is often the case that an `std::vector` is filled and then becomes unused once it is converted to an `ArrayHandle`. In this case, what we really want is to pass the data off to the `ArrayHandle` so that the `ArrayHandle` is now managing the data and not the `std::vector`. C++11 has a mechanism to do this: move semantics. You can now pass variables to functions as an "rvalue" (right-hand value). When something is passed as an rvalue, it can pull state out of that variable and move it somewhere else. `std::vector` implements this movement so that an rvalue can be moved to another `std::vector` without actually copying the data. `make_ArrayHandle` now also takes advantage of this feature to move rvalue `std::vector`s. There is a special form of `make_ArrayHandle` named `make_ArrayHandleMove` that takes an rvalue. There is also a special overload of `make_ArrayHandle` itself that handles an rvalue `vector`. (However, using the explicit move version is better if you want to make sure the data is actually moved.) ## Make `ArrayHandle` from initalizer list A common use case for using `std::vector` (particularly in our unit tests) is to quickly add an initalizer list into an `ArrayHandle`. Now you can by simply passing an initializer list to `make_ArrayHandle`. ## Deprecated `make_ArrayHandle` with default shallow copy For historical reasons, passing an `std::vector` or a pointer to `make_ArrayHandle` does a shallow copy (i.e. `CopyFlag` defaults to `Off`). Although more efficient, this mode is inherintly unsafe, and making it the default is asking for trouble. To combat this, calling `make_ArrayHandle` without a copy flag is deprecated. In this way, if you wish to do the faster but more unsafe creation of an `ArrayHandle` you should explicitly express that. This requried quite a few changes through the VTK-m source (particularly in the tests). ## Similar changes to `Field` `vtkm::cont::Field` has a `make_Field` helper function that is similar to `make_ArrayHandle`. It also features the ability to create fields from `std::vector`s and C arrays. It also likewise had the same unsafe behavior by default of not copying from the source of the arrays. That behavior has similarly been depreciated. You now have to specify a copy flag. The ability to construct a `Field` from an initializer list of values has also been added.
2020-07-16 16:32:32 +00:00
std::vector<ValueType> vec(static_cast<std::size_t>(numValues), 2);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
// Make a fresh array each iteration to force a copy from control to execution each time.
// (Prevents unified memory devices from caching data.)
ArrayType array = CreateFreshArrayHandle(vec);
timer.Start();
invoker(ReadValues{}, array);
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
}
const int64_t iterations = static_cast<int64_t>(state.iterations());
state.SetBytesProcessed(static_cast<int64_t>(numBytes) * iterations);
state.SetItemsProcessed(static_cast<int64_t>(numValues) * iterations);
}
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchContToExecRead,
->Range(COPY_SIZE_MIN, COPY_SIZE_MAX)
->ArgName("Bytes"),
TestTypes);
// Writes values to ArrayHandle in execution environment. There is no actual
// copy between control/execution in this case.
template <typename ValueType>
void BenchContToExecWrite(benchmark::State& state)
{
using ArrayType = vtkm::cont::ArrayHandle<ValueType>;
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::UInt64 numBytes = static_cast<vtkm::UInt64>(state.range(0));
const vtkm::Id numValues = static_cast<vtkm::Id>(numBytes / sizeof(ValueType));
{
std::ostringstream desc;
desc << "Copying from Control --> Execution (write-only): " << numValues << " values ("
<< vtkm::cont::GetHumanReadableSize(numBytes) << ")";
state.SetLabel(desc.str());
}
ArrayType array;
array.Allocate(numValues);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
timer.Start();
invoker(WriteValues{}, array);
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
}
const int64_t iterations = static_cast<int64_t>(state.iterations());
state.SetBytesProcessed(static_cast<int64_t>(numBytes) * iterations);
state.SetItemsProcessed(static_cast<int64_t>(numValues) * iterations);
}
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchContToExecWrite,
->Range(COPY_SIZE_MIN, COPY_SIZE_MAX)
->ArgName("Bytes"),
TestTypes);
// Copies NumValues from control environment to execution environment and
// both reads and writes them.
template <typename ValueType>
void BenchContToExecReadWrite(benchmark::State& state)
{
using ArrayType = vtkm::cont::ArrayHandle<ValueType>;
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::UInt64 numBytes = static_cast<vtkm::UInt64>(state.range(0));
const vtkm::Id numValues = static_cast<vtkm::Id>(numBytes / sizeof(ValueType));
{
std::ostringstream desc;
desc << "Control --> Execution (read-write): " << numValues << " values ("
<< vtkm::cont::GetHumanReadableSize(numBytes) << ")";
state.SetLabel(desc.str());
}
Improvements to moving data into ArrayHandle We have made several improvements to adding data into an `ArrayHandle`. ## Moving data from an `std::vector` For numerous reasons, it is convenient to define data in a `std::vector` and then wrap that into an `ArrayHandle`. It is often the case that an `std::vector` is filled and then becomes unused once it is converted to an `ArrayHandle`. In this case, what we really want is to pass the data off to the `ArrayHandle` so that the `ArrayHandle` is now managing the data and not the `std::vector`. C++11 has a mechanism to do this: move semantics. You can now pass variables to functions as an "rvalue" (right-hand value). When something is passed as an rvalue, it can pull state out of that variable and move it somewhere else. `std::vector` implements this movement so that an rvalue can be moved to another `std::vector` without actually copying the data. `make_ArrayHandle` now also takes advantage of this feature to move rvalue `std::vector`s. There is a special form of `make_ArrayHandle` named `make_ArrayHandleMove` that takes an rvalue. There is also a special overload of `make_ArrayHandle` itself that handles an rvalue `vector`. (However, using the explicit move version is better if you want to make sure the data is actually moved.) ## Make `ArrayHandle` from initalizer list A common use case for using `std::vector` (particularly in our unit tests) is to quickly add an initalizer list into an `ArrayHandle`. Now you can by simply passing an initializer list to `make_ArrayHandle`. ## Deprecated `make_ArrayHandle` with default shallow copy For historical reasons, passing an `std::vector` or a pointer to `make_ArrayHandle` does a shallow copy (i.e. `CopyFlag` defaults to `Off`). Although more efficient, this mode is inherintly unsafe, and making it the default is asking for trouble. To combat this, calling `make_ArrayHandle` without a copy flag is deprecated. In this way, if you wish to do the faster but more unsafe creation of an `ArrayHandle` you should explicitly express that. This requried quite a few changes through the VTK-m source (particularly in the tests). ## Similar changes to `Field` `vtkm::cont::Field` has a `make_Field` helper function that is similar to `make_ArrayHandle`. It also features the ability to create fields from `std::vector`s and C arrays. It also likewise had the same unsafe behavior by default of not copying from the source of the arrays. That behavior has similarly been depreciated. You now have to specify a copy flag. The ability to construct a `Field` from an initializer list of values has also been added.
2020-07-16 16:32:32 +00:00
std::vector<ValueType> vec(static_cast<std::size_t>(numValues), 2);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
// Make a fresh array each iteration to force a copy from control to execution each time.
// (Prevents unified memory devices from caching data.)
ArrayType array = CreateFreshArrayHandle(vec);
timer.Start();
invoker(ReadWriteValues{}, array);
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
// Remove data from execution environment so it has to be transferred again.
array.ReleaseResourcesExecution();
}
const int64_t iterations = static_cast<int64_t>(state.iterations());
state.SetBytesProcessed(static_cast<int64_t>(numBytes) * iterations);
state.SetItemsProcessed(static_cast<int64_t>(numValues) * iterations);
}
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchContToExecReadWrite,
->Range(COPY_SIZE_MIN, COPY_SIZE_MAX)
->ArgName("Bytes"),
TestTypes);
// Copies NumValues from control environment to execution environment and
// back, then accesses them as read-only.
template <typename ValueType>
void BenchRoundTripRead(benchmark::State& state)
{
using ArrayType = vtkm::cont::ArrayHandle<ValueType>;
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::UInt64 numBytes = static_cast<vtkm::UInt64>(state.range(0));
const vtkm::Id numValues = static_cast<vtkm::Id>(numBytes / sizeof(ValueType));
{
std::ostringstream desc;
desc << "Copying from Control --> Execution --> Control (read-only): " << numValues
<< " values (" << vtkm::cont::GetHumanReadableSize(numBytes) << ")";
state.SetLabel(desc.str());
}
Improvements to moving data into ArrayHandle We have made several improvements to adding data into an `ArrayHandle`. ## Moving data from an `std::vector` For numerous reasons, it is convenient to define data in a `std::vector` and then wrap that into an `ArrayHandle`. It is often the case that an `std::vector` is filled and then becomes unused once it is converted to an `ArrayHandle`. In this case, what we really want is to pass the data off to the `ArrayHandle` so that the `ArrayHandle` is now managing the data and not the `std::vector`. C++11 has a mechanism to do this: move semantics. You can now pass variables to functions as an "rvalue" (right-hand value). When something is passed as an rvalue, it can pull state out of that variable and move it somewhere else. `std::vector` implements this movement so that an rvalue can be moved to another `std::vector` without actually copying the data. `make_ArrayHandle` now also takes advantage of this feature to move rvalue `std::vector`s. There is a special form of `make_ArrayHandle` named `make_ArrayHandleMove` that takes an rvalue. There is also a special overload of `make_ArrayHandle` itself that handles an rvalue `vector`. (However, using the explicit move version is better if you want to make sure the data is actually moved.) ## Make `ArrayHandle` from initalizer list A common use case for using `std::vector` (particularly in our unit tests) is to quickly add an initalizer list into an `ArrayHandle`. Now you can by simply passing an initializer list to `make_ArrayHandle`. ## Deprecated `make_ArrayHandle` with default shallow copy For historical reasons, passing an `std::vector` or a pointer to `make_ArrayHandle` does a shallow copy (i.e. `CopyFlag` defaults to `Off`). Although more efficient, this mode is inherintly unsafe, and making it the default is asking for trouble. To combat this, calling `make_ArrayHandle` without a copy flag is deprecated. In this way, if you wish to do the faster but more unsafe creation of an `ArrayHandle` you should explicitly express that. This requried quite a few changes through the VTK-m source (particularly in the tests). ## Similar changes to `Field` `vtkm::cont::Field` has a `make_Field` helper function that is similar to `make_ArrayHandle`. It also features the ability to create fields from `std::vector`s and C arrays. It also likewise had the same unsafe behavior by default of not copying from the source of the arrays. That behavior has similarly been depreciated. You now have to specify a copy flag. The ability to construct a `Field` from an initializer list of values has also been added.
2020-07-16 16:32:32 +00:00
std::vector<ValueType> vec(static_cast<std::size_t>(numValues), 2);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
// Make a fresh array each iteration to force a copy from control to execution each time.
// (Prevents unified memory devices from caching data.)
ArrayType array = CreateFreshArrayHandle(vec);
timer.Start();
invoker(ReadValues{}, array);
// Copy back to host and read:
// (Note, this probably does not copy. The array exists in both control and execution for read.)
auto portal = array.ReadPortal();
for (vtkm::Id i = 0; i < numValues; ++i)
{
benchmark::DoNotOptimize(portal.Get(i));
}
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
}
const int64_t iterations = static_cast<int64_t>(state.iterations());
state.SetBytesProcessed(static_cast<int64_t>(numBytes) * iterations);
state.SetItemsProcessed(static_cast<int64_t>(numValues) * iterations);
}
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchRoundTripRead,
->Range(COPY_SIZE_MIN, COPY_SIZE_MAX)
->ArgName("Bytes"),
TestTypes);
// Copies NumValues from control environment to execution environment and
// back, then reads and writes them in-place.
template <typename ValueType>
void BenchRoundTripReadWrite(benchmark::State& state)
{
using ArrayType = vtkm::cont::ArrayHandle<ValueType>;
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::UInt64 numBytes = static_cast<vtkm::UInt64>(state.range(0));
const vtkm::Id numValues = static_cast<vtkm::Id>(numBytes / sizeof(ValueType));
{
std::ostringstream desc;
desc << "Copying from Control --> Execution --> Control (read-write): " << numValues
<< " values (" << vtkm::cont::GetHumanReadableSize(numBytes) << ")";
state.SetLabel(desc.str());
}
std::vector<ValueType> vec(static_cast<std::size_t>(numValues));
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
// Make a fresh array each iteration to force a copy from control to execution each time.
// (Prevents unified memory devices from caching data.)
ArrayType array = CreateFreshArrayHandle(vec);
timer.Start();
// Do work on device:
invoker(ReadWriteValues{}, array);
// Copy back to host and read/write:
auto portal = array.WritePortal();
for (vtkm::Id i = 0; i < numValues; ++i)
{
portal.Set(i, portal.Get(i) - static_cast<ValueType>(i));
}
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
}
const int64_t iterations = static_cast<int64_t>(state.iterations());
state.SetBytesProcessed(static_cast<int64_t>(numBytes) * iterations);
state.SetItemsProcessed(static_cast<int64_t>(numValues) * iterations);
}
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchRoundTripReadWrite,
->Range(COPY_SIZE_MIN, COPY_SIZE_MAX)
->ArgName("Bytes"),
TestTypes);
// Write NumValues to device allocated memory and copies them back to control
// for reading.
template <typename ValueType>
void BenchExecToContRead(benchmark::State& state)
{
using ArrayType = vtkm::cont::ArrayHandle<ValueType>;
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::UInt64 numBytes = static_cast<vtkm::UInt64>(state.range(0));
const vtkm::Id numValues = static_cast<vtkm::Id>(numBytes / sizeof(ValueType));
{
std::ostringstream desc;
desc << "Copying from Execution --> Control (read-only on control): " << numValues
<< " values (" << vtkm::cont::GetHumanReadableSize(numBytes) << ")";
state.SetLabel(desc.str());
}
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
ArrayType array;
array.Allocate(numValues);
// Time the copy:
timer.Start();
// Allocate/write data on device
invoker(WriteValues{}, array);
// Read back on host:
auto portal = array.WritePortal();
for (vtkm::Id i = 0; i < numValues; ++i)
{
benchmark::DoNotOptimize(portal.Get(i));
}
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
}
const int64_t iterations = static_cast<int64_t>(state.iterations());
state.SetBytesProcessed(static_cast<int64_t>(numBytes) * iterations);
state.SetItemsProcessed(static_cast<int64_t>(numValues) * iterations);
};
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchExecToContRead,
->Range(COPY_SIZE_MIN, COPY_SIZE_MAX)
->ArgName("Bytes"),
TestTypes);
// Write NumValues to device allocated memory and copies them back to control
// and overwrites them.
template <typename ValueType>
void BenchExecToContWrite(benchmark::State& state)
{
using ArrayType = vtkm::cont::ArrayHandle<ValueType>;
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::UInt64 numBytes = static_cast<vtkm::UInt64>(state.range(0));
const vtkm::Id numValues = static_cast<vtkm::Id>(numBytes / sizeof(ValueType));
{
std::ostringstream desc;
desc << "Copying from Execution --> Control (write-only on control): " << numValues
<< " values (" << vtkm::cont::GetHumanReadableSize(numBytes) << ")";
state.SetLabel(desc.str());
}
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
ArrayType array;
array.Allocate(numValues);
timer.Start();
// Allocate/write data on device
invoker(WriteValues{}, array);
// Read back on host:
auto portal = array.WritePortal();
for (vtkm::Id i = 0; i < numValues; ++i)
{
portal.Set(i, portal.Get(i) - static_cast<ValueType>(i));
}
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
}
const int64_t iterations = static_cast<int64_t>(state.iterations());
state.SetBytesProcessed(static_cast<int64_t>(numBytes) * iterations);
state.SetItemsProcessed(static_cast<int64_t>(numValues) * iterations);
}
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchExecToContWrite,
->Range(COPY_SIZE_MIN, COPY_SIZE_MAX)
->ArgName("Bytes"),
TestTypes);
// Write NumValues to device allocated memory and copies them back to control
// for reading and writing.
template <typename ValueType>
void BenchExecToContReadWrite(benchmark::State& state)
{
using ArrayType = vtkm::cont::ArrayHandle<ValueType>;
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::UInt64 numBytes = static_cast<vtkm::UInt64>(state.range(0));
const vtkm::Id numValues = static_cast<vtkm::Id>(numBytes / sizeof(ValueType));
{
std::ostringstream desc;
desc << "Copying from Execution --> Control (read-write on control): " << numValues
<< " values (" << vtkm::cont::GetHumanReadableSize(numBytes) << ")";
state.SetLabel(desc.str());
}
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
ArrayType array;
array.Allocate(numValues);
timer.Start();
// Allocate/write data on device
invoker(WriteValues{}, array);
// Read back on host:
auto portal = array.WritePortal();
for (vtkm::Id i = 0; i < numValues; ++i)
{
benchmark::DoNotOptimize(portal.Get(i));
}
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
}
const int64_t iterations = static_cast<int64_t>(state.iterations());
state.SetBytesProcessed(static_cast<int64_t>(numBytes) * iterations);
state.SetItemsProcessed(static_cast<int64_t>(numValues) * iterations);
}
VTKM_BENCHMARK_TEMPLATES_OPTS(BenchExecToContReadWrite,
->Range(COPY_SIZE_MIN, COPY_SIZE_MAX)
->ArgName("Bytes"),
TestTypes);
} // end anon namespace
int main(int argc, char* argv[])
{
auto opts = vtkm::cont::InitializeOptions::RequireDevice;
// Initialize command line args
std::vector<char*> args(argv, argv + argc);
vtkm::bench::detail::InitializeArgs(&argc, args, opts);
// Parse VTK-m options:
Config = vtkm::cont::Initialize(argc, args.data(), opts);
// This occurs when it is help
if (opts == vtkm::cont::InitializeOptions::None)
{
std::cout << Config.Usage << std::endl;
}
else
{
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
}
// handle benchmarking related args and run benchmarks:
VTKM_EXECUTE_BENCHMARKS(argc, args.data());
}