vtk-m/vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h
Kenneth Moreland 316ccc04a9 Remove the Extent class and related functions.
We have decided that we do not need the concept of an Extent in our data
model. Instead, we simply use dimensions, which can be represented in a
vtkm::Id3.
2016-03-22 15:41:42 -06:00

303 lines
10 KiB
C++

//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_tbb_internal_DeviceAdapterAlgorithmTBB_h
#define vtk_m_cont_tbb_internal_DeviceAdapterAlgorithmTBB_h
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/ArrayHandleIndex.h>
#include <vtkm/cont/ArrayHandleZip.h>
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/ErrorExecution.h>
#include <vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h>
#include <vtkm/cont/internal/IteratorFromArrayPortal.h>
#include <vtkm/cont/tbb/internal/ArrayManagerExecutionTBB.h>
#include <vtkm/cont/tbb/internal/DeviceAdapterTagTBB.h>
#include <vtkm/cont/tbb/internal/FunctorsTBB.h>
#include <vtkm/exec/internal/ErrorMessageBuffer.h>
VTKM_THIRDPARTY_PRE_INCLUDE
#if defined(VTKM_MSVC)
// TBB includes windows.h, which clobbers min and max functions so we
// define NOMINMAX to fix that problem. We also include WIN32_LEAN_AND_MEAN
// to reduce the number of macros and objects windows.h imports as those also
// can cause conflicts
#define WIN32_LEAN_AND_MEAN
#define NOMINMAX
#endif
#include <tbb/tbb_stddef.h>
#if (TBB_VERSION_MAJOR == 4) && (TBB_VERSION_MINOR == 2)
//we provide an patched implementation of tbb parallel_sort
//that fixes ADL for std::swap. This patch has been submitted to Intel
//and is fixed in TBB 4.2 update 2.
#include <vtkm/cont/tbb/internal/parallel_sort.h>
#else
#include <tbb/parallel_sort.h>
#endif
#include <tbb/blocked_range.h>
#include <tbb/blocked_range3d.h>
#include <tbb/parallel_for.h>
#include <tbb/parallel_scan.h>
#include <tbb/partitioner.h>
#include <tbb/tick_count.h>
#if defined(VTKM_MSVC)
#include <Windows.h>
#undef WIN32_LEAN_AND_MEAN
#undef NOMINMAX
#endif
VTKM_THIRDPARTY_POST_INCLUDE
namespace vtkm {
namespace cont {
template<>
struct DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagTBB> :
vtkm::cont::internal::DeviceAdapterAlgorithmGeneral<
DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagTBB>,
vtkm::cont::DeviceAdapterTagTBB>
{
public:
template<typename T, class CIn, class COut>
VTKM_CONT_EXPORT static T ScanInclusive(
const vtkm::cont::ArrayHandle<T,CIn> &input,
vtkm::cont::ArrayHandle<T,COut> &output)
{
return tbb::ScanInclusivePortals(
input.PrepareForInput(vtkm::cont::DeviceAdapterTagTBB()),
output.PrepareForOutput(input.GetNumberOfValues(),
vtkm::cont::DeviceAdapterTagTBB()), vtkm::internal::Add());
}
template<typename T, class CIn, class COut, class BinaryFunctor>
VTKM_CONT_EXPORT static T ScanInclusive(
const vtkm::cont::ArrayHandle<T,CIn> &input,
vtkm::cont::ArrayHandle<T,COut> &output,
BinaryFunctor binary_functor)
{
return tbb::ScanInclusivePortals(
input.PrepareForInput(vtkm::cont::DeviceAdapterTagTBB()),
output.PrepareForOutput(input.GetNumberOfValues(),
vtkm::cont::DeviceAdapterTagTBB()), binary_functor);
}
template<typename T, class CIn, class COut>
VTKM_CONT_EXPORT static T ScanExclusive(
const vtkm::cont::ArrayHandle<T,CIn> &input,
vtkm::cont::ArrayHandle<T,COut> &output)
{
return tbb::ScanExclusivePortals(
input.PrepareForInput(vtkm::cont::DeviceAdapterTagTBB()),
output.PrepareForOutput(input.GetNumberOfValues(),
vtkm::cont::DeviceAdapterTagTBB()),
vtkm::internal::Add(), vtkm::TypeTraits<T>::ZeroInitialization());
}
template<typename T, class CIn, class COut, class BinaryFunctor>
VTKM_CONT_EXPORT static T ScanExclusive(
const vtkm::cont::ArrayHandle<T,CIn> &input,
vtkm::cont::ArrayHandle<T,COut> &output,
BinaryFunctor binary_functor,
const T& initialValue)
{
return tbb::ScanExclusivePortals(
input.PrepareForInput(vtkm::cont::DeviceAdapterTagTBB()),
output.PrepareForOutput(input.GetNumberOfValues(),
vtkm::cont::DeviceAdapterTagTBB()), binary_functor, initialValue);
}
template<class FunctorType>
VTKM_CONT_EXPORT
static void Schedule(FunctorType functor, vtkm::Id numInstances)
{
const vtkm::Id MESSAGE_SIZE = 1024;
char errorString[MESSAGE_SIZE];
errorString[0] = '\0';
vtkm::exec::internal::ErrorMessageBuffer
errorMessage(errorString, MESSAGE_SIZE);
tbb::ScheduleKernel<FunctorType> kernel(functor);
kernel.SetErrorMessageBuffer(errorMessage);
::tbb::blocked_range<vtkm::Id> range(0, numInstances, tbb::TBB_GRAIN_SIZE);
::tbb::parallel_for(range, kernel);
if (errorMessage.IsErrorRaised())
{
throw vtkm::cont::ErrorExecution(errorString);
}
}
template<class FunctorType>
VTKM_CONT_EXPORT
static void Schedule(FunctorType functor,
vtkm::Id3 rangeMax)
{
//we need to extract from the functor that uniform grid information
const vtkm::Id MESSAGE_SIZE = 1024;
char errorString[MESSAGE_SIZE];
errorString[0] = '\0';
vtkm::exec::internal::ErrorMessageBuffer
errorMessage(errorString, MESSAGE_SIZE);
//memory is generally setup in a way that iterating the first range
//in the tightest loop has the best cache coherence.
::tbb::blocked_range3d<vtkm::Id> range(0, rangeMax[2],
0, rangeMax[1],
0, rangeMax[0]);
tbb::ScheduleKernelId3<FunctorType> kernel(functor,rangeMax);
kernel.SetErrorMessageBuffer(errorMessage);
::tbb::parallel_for(range, kernel);
if (errorMessage.IsErrorRaised())
{
throw vtkm::cont::ErrorExecution(errorString);
}
}
template<typename T, class Container>
VTKM_CONT_EXPORT static void Sort(
vtkm::cont::ArrayHandle<T,Container> &values)
{
//this is required to get sort to work with zip handles
std::less< T > lessOp;
Sort(values, lessOp );
}
template<typename T, class Container, class BinaryCompare>
VTKM_CONT_EXPORT static void Sort(
vtkm::cont::ArrayHandle<T,Container> &values, BinaryCompare binary_compare)
{
typedef typename vtkm::cont::ArrayHandle<T,Container>::template
ExecutionTypes<vtkm::cont::DeviceAdapterTagTBB>::Portal PortalType;
PortalType arrayPortal = values.PrepareForInPlace(
vtkm::cont::DeviceAdapterTagTBB());
typedef vtkm::cont::ArrayPortalToIterators<PortalType> IteratorsType;
IteratorsType iterators(arrayPortal);
internal::WrappedBinaryOperator<bool,BinaryCompare> wrappedCompare(binary_compare);
::tbb::parallel_sort(iterators.GetBegin(),
iterators.GetEnd(),
wrappedCompare);
}
template<typename T, typename U, class StorageT, class StorageU>
VTKM_CONT_EXPORT static void SortByKey(
vtkm::cont::ArrayHandle<T,StorageT> &keys,
vtkm::cont::ArrayHandle<U,StorageU> &values)
{
SortByKey(keys, values, std::less<T>());
}
template<typename T, typename U,
class StorageT, class StorageU,
class Compare>
VTKM_CONT_EXPORT static void SortByKey(
vtkm::cont::ArrayHandle<T,StorageT>& keys,
vtkm::cont::ArrayHandle<U,StorageU>& values,
Compare comp)
{
typedef vtkm::cont::ArrayHandle<T,StorageT> KeyType;
if (sizeof(U) > sizeof(vtkm::Id))
{
/// More efficient sort:
/// Move value indexes when sorting and reorder the value array at last
typedef vtkm::cont::ArrayHandle<U,StorageU> ValueType;
typedef vtkm::cont::ArrayHandle<vtkm::Id> IndexType;
typedef vtkm::cont::ArrayHandleZip<KeyType,IndexType> ZipHandleType;
IndexType indexArray;
ValueType valuesScattered;
const vtkm::Id size = values.GetNumberOfValues();
Copy( ArrayHandleIndex(keys.GetNumberOfValues()), indexArray);
ZipHandleType zipHandle = vtkm::cont::make_ArrayHandleZip(keys,indexArray);
Sort(zipHandle,vtkm::cont::internal::KeyCompare<T,vtkm::Id,Compare>(comp));
tbb::ScatterPortal(values.PrepareForInput(vtkm::cont::DeviceAdapterTagTBB()),
indexArray.PrepareForInput(vtkm::cont::DeviceAdapterTagTBB()),
valuesScattered.PrepareForOutput(size,vtkm::cont::DeviceAdapterTagTBB()));
Copy( valuesScattered, values );
}
else
{
typedef vtkm::cont::ArrayHandle<U,StorageU> ValueType;
typedef vtkm::cont::ArrayHandleZip<KeyType,ValueType> ZipHandleType;
ZipHandleType zipHandle = vtkm::cont::make_ArrayHandleZip(keys,values);
Sort(zipHandle,vtkm::cont::internal::KeyCompare<T,U,Compare>(comp));
}
}
VTKM_CONT_EXPORT static void Synchronize()
{
// Nothing to do. This device schedules all of its operations using a
// split/join paradigm. This means that the if the control threaad is
// calling this method, then nothing should be running in the execution
// environment.
}
};
/// TBB contains its own high resolution timer.
///
template<>
class DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagTBB>
{
public:
VTKM_CONT_EXPORT DeviceAdapterTimerImplementation()
{
this->Reset();
}
VTKM_CONT_EXPORT void Reset()
{
vtkm::cont::DeviceAdapterAlgorithm<
vtkm::cont::DeviceAdapterTagTBB>::Synchronize();
this->StartTime = ::tbb::tick_count::now();
}
VTKM_CONT_EXPORT vtkm::Float64 GetElapsedTime()
{
vtkm::cont::DeviceAdapterAlgorithm<
vtkm::cont::DeviceAdapterTagTBB>::Synchronize();
::tbb::tick_count currentTime = ::tbb::tick_count::now();
::tbb::tick_count::interval_t elapsedTime = currentTime - this->StartTime;
return static_cast<vtkm::Float64>(elapsedTime.seconds());
}
private:
::tbb::tick_count StartTime;
};
}
} // namespace vtkm::cont
#endif //vtk_m_cont_tbb_internal_DeviceAdapterAlgorithmTBB_h