Simplify the implementation of loading through textures.

We don't need this super complicated system for texture loading.
This commit is contained in:
Robert Maynard 2015-03-09 16:34:48 -04:00
parent 9b49973621
commit 63b1f03187
4 changed files with 54 additions and 350 deletions

@ -44,37 +44,12 @@
#endif // gcc && !CUDA
#include <vtkm/exec/cuda/internal/ArrayPortalFromThrust.h>
#include <vtkm/exec/cuda/internal/ArrayPortalFromTexture.h>
#include <boost/utility/enable_if.hpp>
namespace vtkm {
namespace cont {
namespace cuda {
namespace internal {
template<typename T> struct UseTexturePortal {typedef boost::false_type type;};
template<> struct UseTexturePortal<vtkm::Int8> {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::UInt8> {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::Int16> {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::UInt16> {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::Int32> {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::UInt32> {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::Vec<vtkm::Int32,2> > {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::Vec<vtkm::UInt32,2> > {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::Vec<vtkm::Int32,4> > {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::Vec<vtkm::UInt32,4> > {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::Float32> {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::Float64> {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::Vec<vtkm::Float32,2> > {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::Vec<vtkm::Float32,4> > {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::Vec<vtkm::Float64,2> > {typedef boost::true_type type; };
/// \c ArrayManagerExecutionThrustDevice provides an implementation for a \c
/// ArrayManagerExecution class for a thrust device adapter that is designed
/// for the cuda backend which has separate memory spaces for host and device. This
@ -84,15 +59,9 @@ template<> struct UseTexturePortal<vtkm::Vec<vtkm::Float64,2> > {typedef boost::
/// This array manager should only be used with the cuda device adapter,
/// since in the future it will take advantage of texture memory and
/// the unique memory access patterns of cuda systems.
template<typename T, class StorageTag, typename Enable= void>
template<typename T, class StorageTag>
class ArrayManagerExecutionThrustDevice
{
//we need a way to detect that we are using FERMI or lower and disable
//the usage of texture iterator. The __CUDA_ARCH__ define is only around
//for device code so that can't be used. I expect that we will have to devise
//some form of Try/Compile with CUDA or just offer this as an advanced CMake
//option. We could also try and see if a runtime switch is possible.
public:
typedef T ValueType;
@ -224,143 +193,6 @@ private:
};
/// This is a specialization that is used to enable texture memory iterators
template<typename T, class StorageTag>
class ArrayManagerExecutionThrustDevice<T, StorageTag,
typename ::boost::enable_if< typename UseTexturePortal<T>::type >::type >
{
public:
typedef T ValueType;
typedef vtkm::cont::internal::Storage<ValueType, StorageTag> ContainerType;
typedef vtkm::exec::cuda::internal::ArrayPortalFromThrust< T > PortalType;
typedef vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< T > PortalConstType;
VTKM_CONT_EXPORT ArrayManagerExecutionThrustDevice():
NumberOfValues(0),
ArrayBegin(),
ArrayEnd()
{
}
~ArrayManagerExecutionThrustDevice()
{
this->ReleaseResources();
}
/// Returns the size of the array.
///
VTKM_CONT_EXPORT vtkm::Id GetNumberOfValues() const {
return this->NumberOfValues;
}
/// Allocates the appropriate size of the array and copies the given data
/// into the array.
///
template<class PortalControl>
VTKM_CONT_EXPORT void LoadDataForInput(PortalControl arrayPortal)
{
//don't bind to the texture yet, as we could have allocate the array
//on a previous call with AllocateArrayForOutput and now are directly
//calling get portal const
try
{
this->NumberOfValues = arrayPortal.GetNumberOfValues();
this->ArrayBegin = ::thrust::system::cuda::malloc<T>( static_cast<std::size_t>(this->NumberOfValues) );
this->ArrayEnd = this->ArrayBegin + this->NumberOfValues;
::thrust::copy(arrayPortal.GetRawIterator(),
arrayPortal.GetRawIterator() + this->NumberOfValues,
this->ArrayBegin);
}
catch (std::bad_alloc error)
{
throw vtkm::cont::ErrorControlOutOfMemory(error.what());
}
}
/// Allocates the appropriate size of the array and copies the given data
/// into the array.
///
template<class PortalControl>
VTKM_CONT_EXPORT void LoadDataForInPlace(PortalControl arrayPortal)
{
this->LoadDataForInput(arrayPortal);
}
/// Allocates the array to the given size.
///
VTKM_CONT_EXPORT void AllocateArrayForOutput(
ContainerType &vtkmNotUsed(container),
vtkm::Id numberOfValues)
{
if(this->NumberOfValues > 0)
{
::thrust::system::cuda::free( this->ArrayBegin );
}
this->NumberOfValues = numberOfValues;
this->ArrayBegin = ::thrust::system::cuda::malloc<T>( this->NumberOfValues );
this->ArrayEnd = this->ArrayBegin + numberOfValues;
}
/// Allocates enough space in \c controlArray and copies the data in the
/// device vector into it.
///
VTKM_CONT_EXPORT void RetrieveOutputData(ContainerType &controlArray) const
{
controlArray.Allocate(this->NumberOfValues);
::thrust::copy(this->ArrayBegin,
this->ArrayEnd,
controlArray.GetPortal().GetRawIterator());
}
/// Resizes the device vector.
///
VTKM_CONT_EXPORT void Shrink(vtkm::Id numberOfValues)
{
// The operation will succeed even if this assertion fails, but this
// is still supposed to be a precondition to Shrink.
VTKM_ASSERT_CONT(numberOfValues <= this->NumberOfValues);
this->NumberOfValues = numberOfValues;
this->ArrayEnd = this->ArrayBegin + this->NumberOfValues;
}
VTKM_CONT_EXPORT PortalType GetPortal()
{
return PortalType(this->ArrayBegin, this->ArrayEnd);
}
VTKM_CONT_EXPORT PortalConstType GetPortalConst() const
{
return PortalConstType(this->ArrayBegin, this->ArrayEnd);
}
/// Frees all memory.
///
VTKM_CONT_EXPORT void ReleaseResources()
{
::thrust::system::cuda::free( this->ArrayBegin );
this->ArrayBegin = ::thrust::system::cuda::pointer<ValueType>();
this->ArrayEnd = ::thrust::system::cuda::pointer<ValueType>();
}
private:
// Not implemented
ArrayManagerExecutionThrustDevice(
ArrayManagerExecutionThrustDevice<T, StorageTag> &);
void operator=(
ArrayManagerExecutionThrustDevice<T, StorageTag> &);
vtkm::Id NumberOfValues;
::thrust::system::cuda::pointer<ValueType> ArrayBegin;
::thrust::system::cuda::pointer<ValueType> ArrayEnd;
};
}
}
}

@ -24,7 +24,6 @@
#include <vtkm/internal/ExportMacros.h>
#include <vtkm/exec/cuda/internal/ArrayPortalFromThrust.h>
#include <vtkm/exec/cuda/internal/ArrayPortalFromTexture.h>
// Disable GCC warnings we check vtkmfor but Thrust does not.
#if defined(__GNUC__) && !defined(VTKM_CUDA)
@ -59,7 +58,6 @@ namespace detail {
// Tags to specify what type of thrust iterator to use.
struct ThrustIteratorTransformTag { };
struct ThrustIteratorDevicePtrTag { };
struct ThrustIteratorDeviceTextureTag { };
// Traits to help classify what thrust iterators will be used.
template<class IteratorType>
@ -149,13 +147,6 @@ struct IteratorTraits
typedef typename IteratorChooser<PortalType, Tag>::Type IteratorType;
};
template<typename T>
struct IteratorTraits< vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< T > >
{
typedef vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< T > PortalType;
typedef ThrustIteratorDeviceTextureTag Tag;
typedef thrust::system::cuda::pointer< const T > IteratorType;
};
template<typename T>
VTKM_CONT_EXPORT static
@ -190,14 +181,6 @@ MakeIteratorBegin(PortalType portal, detail::ThrustIteratorDevicePtrTag)
return MakeDevicePtr(portal.GetIteratorBegin());
}
template<class PortalType>
VTKM_CONT_EXPORT static
typename IteratorTraits<PortalType>::IteratorType
MakeIteratorBegin(PortalType portal, detail::ThrustIteratorDeviceTextureTag)
{
return MakeDevicePtr(portal.GetIteratorBegin());
}
} // namespace detail

@ -1,163 +0,0 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014. Los Alamos National Security
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
#ifndef vtk_m_exec_cuda_internal_ArrayPortalFromTexture_h
#define vtk_m_exec_cuda_internal_ArrayPortalFromTexture_h
#include <vtkm/Types.h>
#include <iterator>
// Disable GCC warnings we check vtkmfor but Thrust does not.
#if defined(__GNUC__) && !defined(VTKM_CUDA)
#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 6)
#pragma GCC diagnostic push
#endif // gcc version >= 4.6
#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 2)
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wunused-parameter"
#endif // gcc version >= 4.2
#endif // gcc && !CUDA
#include <thrust/system/cuda/memory.h>
#include <thrust/iterator/iterator_facade.h>
#if defined(__GNUC__) && !defined(VTKM_CUDA)
#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 6)
#pragma GCC diagnostic pop
#endif // gcc version >= 4.6
#endif // gcc && !CUDA
namespace vtkm {
namespace exec {
namespace cuda {
namespace internal {
template<typename T>
class ConstArrayPortalFromTexture : public ArrayPortalFromThrustBase
{
public:
typedef T ValueType;
typedef typename thrust::system::cuda::pointer< T > PointerType;
typedef const T* IteratorType;
VTKM_EXEC_CONT_EXPORT ConstArrayPortalFromTexture() { }
VTKM_CONT_EXPORT
ConstArrayPortalFromTexture(const PointerType begin, const PointerType end)
: BeginIterator( begin ),
EndIterator( end )
{ }
/// Copy constructor for any other ConstArrayPortalFromTexture with an iterator
/// type that can be copied to this iterator type. This allows us to do any
/// type casting that the iterators do (like the non-const to const cast).
///
template<typename OtherT>
VTKM_EXEC_CONT_EXPORT
ConstArrayPortalFromTexture(const ConstArrayPortalFromTexture<OtherT> &src)
: BeginIterator(src.BeginIterator),
EndIterator(src.EndIterator)
{ }
template<typename OtherT>
VTKM_EXEC_CONT_EXPORT
ConstArrayPortalFromTexture<T> &operator=(
const ConstArrayPortalFromTexture<OtherT> &src)
{
this->BeginIterator = src.BeginIterator;
this->EndIterator = src.EndIterator;
return *this;
}
VTKM_EXEC_CONT_EXPORT
vtkm::Id GetNumberOfValues() const {
// Not using std::distance because on CUDA it cannot be used on a device.
return (this->EndIterator - this->BeginIterator);
}
VTKM_EXEC_EXPORT
ValueType Get(vtkm::Id index) const {
#if __CUDA_ARCH__ >= 350
return __ldg(this->IteratorAt(index).get());
#else
return *this->IteratorAt(index);
#endif
}
VTKM_EXEC_EXPORT
void Set(vtkm::Id index, ValueType value) const {
*this->IteratorAt(index) = value;
}
VTKM_CONT_EXPORT
IteratorType GetIteratorBegin() const { return this->BeginIterator.get(); }
VTKM_CONT_EXPORT
IteratorType GetIteratorEnd() const { return this->EndIterator.get(); }
private:
PointerType BeginIterator;
PointerType EndIterator;
VTKM_EXEC_EXPORT
PointerType IteratorAt(vtkm::Id index) const {
// Not using std::advance because on CUDA it cannot be used on a device.
return (this->BeginIterator + index);
}
};
}
}
}
} // namespace vtkm::exec::cuda::internal
#endif //vtk_m_exec_cuda_internal_ArrayPortalFromTexture_h

@ -42,11 +42,63 @@
#pragma GCC diagnostic pop
#endif // gcc version >= 4.6
#endif // gcc && !CUDA
#include <boost/utility/enable_if.hpp>
namespace vtkm {
namespace exec {
namespace cuda {
namespace internal {
template<typename T> struct UseTextureLoad {typedef boost::false_type type;};
template<> struct UseTextureLoad<vtkm::Int8*> {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::UInt8*> {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Int16*> {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::UInt16*> {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Int32*> {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::UInt32*> {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Vec<vtkm::Int32,2>* > {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Vec<vtkm::UInt32,2>* > {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Vec<vtkm::Int32,4>* > {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Vec<vtkm::UInt32,4>* > {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Float32* > {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Float64* > {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Vec<vtkm::Float32,2>* > {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Vec<vtkm::Float32,4>* > {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Vec<vtkm::Float64,2>* > {typedef boost::true_type type; };
//this T type is not one that is valid to be loaded through texture memory
template<typename T, typename Enable = void>
struct load_through_texture
{
VTKM_EXEC_EXPORT
static T get(const thrust::system::cuda::pointer<T> data)
{
return *(data.get());
}
};
//this T type is valid to be loaded through texture memory
template<typename T>
struct load_through_texture<T, typename ::boost::enable_if< typename UseTextureLoad<T>::type >::type >
{
VTKM_EXEC_EXPORT
static T get(const thrust::system::cuda::pointer<T> data)
{
//only load through a texture if we have sm 35 support
#if __CUDA_ARCH__ >= 350
return __ldg(data.get());
#else
return *(data.get());
#endif
}
};
class ArrayPortalFromThrustBase {};
/// This templated implementation of an ArrayPortal allows you to adapt a pair
@ -168,7 +220,7 @@ public:
VTKM_EXEC_EXPORT
ValueType Get(vtkm::Id index) const {
return *this->IteratorAt(index);
return vtkm::exec::cuda::internal::load_through_texture<ValueType>::get( this->IteratorAt(index) );
}
VTKM_EXEC_EXPORT