Merge branch 'use_ldg_cuda_load'

This commit is contained in:
Robert Maynard 2015-03-11 10:47:57 -04:00
commit e4bd0132e6
4 changed files with 54 additions and 691 deletions

@ -44,30 +44,12 @@
#endif // gcc && !CUDA
#include <vtkm/exec/cuda/internal/ArrayPortalFromThrust.h>
#include <vtkm/exec/cuda/internal/ArrayPortalFromTexture.h>
#include <boost/utility/enable_if.hpp>
namespace vtkm {
namespace cont {
namespace cuda {
namespace internal {
template<typename T> struct UseTexturePortal {typedef boost::false_type type;};
//Currently disabled as we are still tracking down issues with Texture
//Memory. The major issue is that in testing it is slower than classic arrays
#ifdef VTKM_USE_TEXTURE_MEM
template<> struct UseTexturePortal<vtkm::Int8> {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::UInt8> {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::Int16> {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::UInt16> {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::Int32> {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::UInt32> {typedef boost::true_type type; };
template<> struct UseTexturePortal<vtkm::Float32> {typedef boost::true_type type; };
#endif
/// \c ArrayManagerExecutionThrustDevice provides an implementation for a \c
/// ArrayManagerExecution class for a thrust device adapter that is designed
/// for the cuda backend which has separate memory spaces for host and device. This
@ -77,15 +59,9 @@ template<> struct UseTexturePortal<vtkm::Float32> {typedef boost::true_type type
/// This array manager should only be used with the cuda device adapter,
/// since in the future it will take advantage of texture memory and
/// the unique memory access patterns of cuda systems.
template<typename T, class StorageTag, typename Enable= void>
template<typename T, class StorageTag>
class ArrayManagerExecutionThrustDevice
{
//we need a way to detect that we are using FERMI or lower and disable
//the usage of texture iterator. The __CUDA_ARCH__ define is only around
//for device code so that can't be used. I expect that we will have to devise
//some form of Try/Compile with CUDA or just offer this as an advanced CMake
//option. We could also try and see if a runtime switch is possible.
public:
typedef T ValueType;
@ -217,164 +193,6 @@ private:
};
/// This is a specialization that is used to enable texture memory iterators
template<typename T, class StorageTag>
class ArrayManagerExecutionThrustDevice<T, StorageTag,
typename ::boost::enable_if< typename UseTexturePortal<T>::type >::type >
{
//we need a way to detect that we are using FERMI or lower and disable
//the usage of texture iterator. The __CUDA_ARCH__ define is only around
//for device code so that can't be used. I expect that we will have to devise
//some form of Try/Compile with CUDA or just offer this as an advanced CMake
//option. We could also try and see if a runtime switch is possible.
public:
typedef T ValueType;
typedef vtkm::cont::internal::Storage<ValueType, StorageTag> ContainerType;
typedef vtkm::exec::cuda::internal::ArrayPortalFromThrust< T > PortalType;
typedef ::vtkm::exec::cuda::internal::DaxTexObjInputIterator<T> TextureIteratorType;
typedef ::vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< TextureIteratorType > PortalConstType;
VTKM_CONT_EXPORT ArrayManagerExecutionThrustDevice():
NumberOfValues(0),
ArrayBegin(),
ArrayEnd(),
HaveTextureBound(false),
InputArrayIterator()
{
}
~ArrayManagerExecutionThrustDevice()
{
this->ReleaseResources();
}
/// Returns the size of the array.
///
VTKM_CONT_EXPORT vtkm::Id GetNumberOfValues() const {
return this->NumberOfValues;
}
/// Allocates the appropriate size of the array and copies the given data
/// into the array.
///
template<class PortalControl>
VTKM_CONT_EXPORT void LoadDataForInput(PortalControl arrayPortal)
{
//don't bind to the texture yet, as we could have allocate the array
//on a previous call with AllocateArrayForOutput and now are directly
//calling get portal const
try
{
this->NumberOfValues = arrayPortal.GetNumberOfValues();
this->ArrayBegin = ::thrust::system::cuda::malloc<T>( static_cast<std::size_t>(this->NumberOfValues) );
this->ArrayEnd = this->ArrayBegin + this->NumberOfValues;
::thrust::copy(arrayPortal.GetRawIterator(),
arrayPortal.GetRawIterator() + this->NumberOfValues,
this->ArrayBegin);
}
catch (std::bad_alloc error)
{
throw vtkm::cont::ErrorControlOutOfMemory(error.what());
}
}
/// Allocates the appropriate size of the array and copies the given data
/// into the array.
///
template<class PortalControl>
VTKM_CONT_EXPORT void LoadDataForInPlace(PortalControl arrayPortal)
{
this->LoadDataForInput(arrayPortal);
}
/// Allocates the array to the given size.
///
VTKM_CONT_EXPORT void AllocateArrayForOutput(
ContainerType &vtkmNotUsed(container),
vtkm::Id numberOfValues)
{
if(this->NumberOfValues > 0)
{
::thrust::system::cuda::free( this->ArrayBegin );
}
this->NumberOfValues = numberOfValues;
this->ArrayBegin = ::thrust::system::cuda::malloc<T>( this->NumberOfValues );
this->ArrayEnd = this->ArrayBegin + numberOfValues;
}
/// Allocates enough space in \c controlArray and copies the data in the
/// device vector into it.
///
VTKM_CONT_EXPORT void RetrieveOutputData(ContainerType &controlArray) const
{
controlArray.Allocate(this->NumberOfValues);
::thrust::copy(this->ArrayBegin,
this->ArrayEnd,
controlArray.GetPortal().GetRawIterator());
}
/// Resizes the device vector.
///
VTKM_CONT_EXPORT void Shrink(vtkm::Id numberOfValues)
{
// The operation will succeed even if this assertion fails, but this
// is still supposed to be a precondition to Shrink.
VTKM_ASSERT_CONT(numberOfValues <= this->NumberOfValues);
this->NumberOfValues = numberOfValues;
this->ArrayEnd = this->ArrayBegin + this->NumberOfValues;
}
VTKM_CONT_EXPORT PortalType GetPortal()
{
return PortalType(this->ArrayBegin, this->ArrayEnd);
}
VTKM_CONT_EXPORT PortalConstType GetPortalConst() const
{
if(!this->HaveTextureBound)
{
this->HaveTextureBound = true;
this->InputArrayIterator.BindTexture(ArrayBegin,this->NumberOfValues);
}
//if we have a texture iterator bound use that
return PortalConstType(this->InputArrayIterator, this->NumberOfValues);
}
/// Frees all memory.
///
VTKM_CONT_EXPORT void ReleaseResources() {
if(this->HaveTextureBound)
{
this->HaveTextureBound = false;
this->InputArrayIterator.UnbindTexture();
}
::thrust::system::cuda::free( this->ArrayBegin );
this->ArrayBegin = ::thrust::system::cuda::pointer<ValueType>();
this->ArrayEnd = ::thrust::system::cuda::pointer<ValueType>();
}
private:
// Not implemented
ArrayManagerExecutionThrustDevice(
ArrayManagerExecutionThrustDevice<T, StorageTag> &);
void operator=(
ArrayManagerExecutionThrustDevice<T, StorageTag> &);
vtkm::Id NumberOfValues;
::thrust::system::cuda::pointer<ValueType> ArrayBegin;
::thrust::system::cuda::pointer<ValueType> ArrayEnd;
mutable bool HaveTextureBound;
mutable TextureIteratorType InputArrayIterator;
};
}
}
}

@ -24,7 +24,6 @@
#include <vtkm/internal/ExportMacros.h>
#include <vtkm/exec/cuda/internal/ArrayPortalFromThrust.h>
#include <vtkm/exec/cuda/internal/ArrayPortalFromTexture.h>
// Disable GCC warnings we check vtkmfor but Thrust does not.
#if defined(__GNUC__) && !defined(VTKM_CUDA)
@ -59,7 +58,6 @@ namespace detail {
// Tags to specify what type of thrust iterator to use.
struct ThrustIteratorTransformTag { };
struct ThrustIteratorDevicePtrTag { };
struct ThrustIteratorDeviceTextureTag { };
// Traits to help classify what thrust iterators will be used.
template<class IteratorType>
@ -149,13 +147,6 @@ struct IteratorTraits
typedef typename IteratorChooser<PortalType, Tag>::Type IteratorType;
};
template<typename T>
struct IteratorTraits< vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< T > >
{
typedef vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< T > PortalType;
typedef ThrustIteratorDeviceTextureTag Tag;
typedef typename PortalType::IteratorType IteratorType;
};
template<typename T>
VTKM_CONT_EXPORT static
@ -190,14 +181,6 @@ MakeIteratorBegin(PortalType portal, detail::ThrustIteratorDevicePtrTag)
return MakeDevicePtr(portal.GetIteratorBegin());
}
template<class PortalType>
VTKM_CONT_EXPORT static
typename IteratorTraits<PortalType>::IteratorType
MakeIteratorBegin(PortalType portal, detail::ThrustIteratorDeviceTextureTag)
{
return portal.GetIteratorBegin();
}
} // namespace detail

@ -1,490 +0,0 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014. Los Alamos National Security
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
#ifndef vtk_m_exec_cuda_internal_ArrayPortalFromTexture_h
#define vtk_m_exec_cuda_internal_ArrayPortalFromTexture_h
#include <vtkm/Types.h>
#include <iterator>
// Disable GCC warnings we check vtkmfor but Thrust does not.
#if defined(__GNUC__) && !defined(VTKM_CUDA)
#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 6)
#pragma GCC diagnostic push
#endif // gcc version >= 4.6
#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 2)
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wunused-parameter"
#endif // gcc version >= 4.2
#endif // gcc && !CUDA
#include <thrust/system/cuda/memory.h>
#include <thrust/iterator/iterator_facade.h>
#if defined(__GNUC__) && !defined(VTKM_CUDA)
#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 6)
#pragma GCC diagnostic pop
#endif // gcc version >= 4.6
#endif // gcc && !CUDA
namespace
{
/**
* \brief Type selection (<tt>IF ? ThenType : ElseType</tt>)
*/
template <bool IF, typename ThenType, typename ElseType>
struct If
{
/// Conditional type result
typedef ThenType Type; // true
};
template <typename ThenType, typename ElseType>
struct If<false, ThenType, ElseType>
{
typedef ElseType Type; // false
};
/******************************************************************************
* Size and alignment
******************************************************************************/
/// Structure alignment
template <typename T>
struct AlignBytes
{
struct Pad
{
T val;
char byte;
};
enum
{
/// The alignment of T in bytes
ALIGN_BYTES = sizeof(Pad) - sizeof(T)
};
};
// Specializations where host C++ compilers (e.g., Windows) may disagree with device C++ compilers (EDG)
template <> struct AlignBytes<short4> { enum { ALIGN_BYTES = 8 }; };
template <> struct AlignBytes<ushort4> { enum { ALIGN_BYTES = 8 }; };
template <> struct AlignBytes<int2> { enum { ALIGN_BYTES = 8 }; };
template <> struct AlignBytes<uint2> { enum { ALIGN_BYTES = 8 }; };
#ifdef _WIN32
template <> struct AlignBytes<long2> { enum { ALIGN_BYTES = 8 }; };
template <> struct AlignBytes<ulong2> { enum { ALIGN_BYTES = 8 }; };
#endif
template <> struct AlignBytes<long long> { enum { ALIGN_BYTES = 8 }; };
template <> struct AlignBytes<unsigned long long> { enum { ALIGN_BYTES = 8 }; };
template <> struct AlignBytes<float2> { enum { ALIGN_BYTES = 8 }; };
template <> struct AlignBytes<double> { enum { ALIGN_BYTES = 8 }; };
template <> struct AlignBytes<int4> { enum { ALIGN_BYTES = 16 }; };
template <> struct AlignBytes<uint4> { enum { ALIGN_BYTES = 16 }; };
template <> struct AlignBytes<float4> { enum { ALIGN_BYTES = 16 }; };
#ifndef _WIN32
template <> struct AlignBytes<long2> { enum { ALIGN_BYTES = 16 }; };
template <> struct AlignBytes<ulong2> { enum { ALIGN_BYTES = 16 }; };
#endif
template <> struct AlignBytes<long4> { enum { ALIGN_BYTES = 16 }; };
template <> struct AlignBytes<ulong4> { enum { ALIGN_BYTES = 16 }; };
template <> struct AlignBytes<longlong2> { enum { ALIGN_BYTES = 16 }; };
template <> struct AlignBytes<ulonglong2> { enum { ALIGN_BYTES = 16 }; };
template <> struct AlignBytes<double2> { enum { ALIGN_BYTES = 16 }; };
template <> struct AlignBytes<longlong4> { enum { ALIGN_BYTES = 16 }; };
template <> struct AlignBytes<ulonglong4> { enum { ALIGN_BYTES = 16 }; };
template <> struct AlignBytes<double4> { enum { ALIGN_BYTES = 16 }; };
/// Unit-words of data movement
template <typename T>
struct UnitWord
{
enum {
ALIGN_BYTES = AlignBytes<T>::ALIGN_BYTES
};
template <typename Unit>
struct IsMultiple
{
enum {
UNIT_ALIGN_BYTES = AlignBytes<Unit>::ALIGN_BYTES,
IS_MULTIPLE = (sizeof(T) % sizeof(Unit) == 0) && (ALIGN_BYTES % UNIT_ALIGN_BYTES == 0)
};
};
/// Biggest shuffle word that T is a whole multiple of and is not larger than the alignment of T
typedef typename If<IsMultiple<int>::IS_MULTIPLE,
unsigned int,
typename If<IsMultiple<short>::IS_MULTIPLE,
unsigned short,
unsigned char>::Type>::Type ShuffleWord;
/// Biggest volatile word that T is a whole multiple of and is not larger than the alignment of T
typedef typename If<IsMultiple<long long>::IS_MULTIPLE,
unsigned long long,
ShuffleWord>::Type VolatileWord;
/// Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T
typedef typename If<IsMultiple<longlong2>::IS_MULTIPLE,
ulonglong2,
VolatileWord>::Type DeviceWord;
/// Biggest texture reference word that T is a whole multiple of and is not larger than the alignment of T
typedef typename If<IsMultiple<int4>::IS_MULTIPLE,
uint4,
typename If<IsMultiple<int2>::IS_MULTIPLE,
uint2,
ShuffleWord>::Type>::Type TextureWord;
};
}
namespace vtkm {
namespace exec {
namespace cuda {
namespace internal {
template <
typename T,
typename Offset = ptrdiff_t>
class DaxTexObjInputIterator
{
public:
// Required iterator traits
typedef DaxTexObjInputIterator self_type; ///< My own type
typedef Offset difference_type; ///< Type to express the result of subtracting one iterator from another
typedef T value_type; ///< The type of the element the iterator can point to
typedef T* pointer; ///< The type of a pointer to an element the iterator can point to
typedef T reference; ///< The type of a reference to an element the iterator can point to
// Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods
typedef typename ::thrust::detail::iterator_facade_category<
::thrust::device_system_tag,
::thrust::random_access_traversal_tag,
value_type,
reference
>::type iterator_category; ///< The iterator category
private:
// Largest texture word we can use in device
typedef typename UnitWord<T>::TextureWord TextureWord;
// Number of texture words per T
enum { TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord) };
private:
const T* ptr;
difference_type tex_offset;
cudaTextureObject_t tex_obj;
public:
/// Constructor
__host__ __device__ __forceinline__ DaxTexObjInputIterator()
:
ptr(NULL),
tex_offset(0),
tex_obj(0)
{}
/// Use this iterator to bind \p ptr with a texture reference
cudaError_t BindTexture(
const ::thrust::system::cuda::pointer<T> ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment
size_t numElements, ///< Number of elements in the range
size_t tex_offset = 0) ///< Offset (in items) from \p ptr denoting the position of the iterator
{
this->ptr = ptr.get();
this->tex_offset = tex_offset;
cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc<TextureWord>();
cudaResourceDesc res_desc;
cudaTextureDesc tex_desc;
memset(&res_desc, 0, sizeof(cudaResourceDesc));
memset(&tex_desc, 0, sizeof(cudaTextureDesc));
res_desc.resType = cudaResourceTypeLinear;
res_desc.res.linear.devPtr = (void*)ptr.get();
res_desc.res.linear.desc = channel_desc;
res_desc.res.linear.sizeInBytes = numElements * sizeof(T);
tex_desc.readMode = cudaReadModeElementType;
return cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL);
}
/// Unbind this iterator from its texture reference
cudaError_t UnbindTexture()
{
return cudaDestroyTextureObject(tex_obj);
}
/// Postfix increment
__host__ __device__ __forceinline__ self_type operator++(int)
{
self_type retval = *this;
tex_offset++;
return retval;
}
/// Prefix increment
__host__ __device__ __forceinline__ self_type operator++()
{
tex_offset++;
return *this;
}
/// Postfix decrement
__host__ __device__ __forceinline__ self_type operator--(int)
{
self_type retval = *this;
tex_offset--;
return retval;
}
/// Prefix decrement
__host__ __device__ __forceinline__ self_type operator--()
{
tex_offset--;
return *this;
}
/// Indirection
__host__ __device__ __forceinline__ reference operator*() const
{
#ifndef DAX_CUDA_COMPILATION
// Simply dereference the pointer on the host
return ptr[tex_offset];
#else
// Move array of uninitialized words, then alias and assign to return value
TextureWord words[TEXTURE_MULTIPLE];
#pragma unroll
for (int i = 0; i < TEXTURE_MULTIPLE; ++i)
{
words[i] = tex1Dfetch<TextureWord>(
tex_obj,
(tex_offset * TEXTURE_MULTIPLE) + i);
}
// Load from words
return *reinterpret_cast<T*>(words);
#endif
}
/// Addition
template <typename Distance>
__host__ __device__ __forceinline__ self_type operator+(Distance n) const
{
self_type retval;
retval.ptr = ptr;
retval.tex_obj = tex_obj;
retval.tex_offset = tex_offset + n;
return retval;
}
/// Addition assignment
template <typename Distance>
__host__ __device__ __forceinline__ self_type& operator+=(Distance n)
{
tex_offset += n;
return *this;
}
/// Subtraction
template <typename Distance>
__host__ __device__ __forceinline__ self_type operator-(Distance n) const
{
self_type retval;
retval.ptr = ptr;
retval.tex_obj = tex_obj;
retval.tex_offset = tex_offset - n;
return retval;
}
/// Subtraction assignment
template <typename Distance>
__host__ __device__ __forceinline__ self_type& operator-=(Distance n)
{
tex_offset -= n;
return *this;
}
/// Distance
__host__ __device__ __forceinline__ difference_type operator-(self_type other) const
{
return tex_offset - other.tex_offset;
}
/// Array subscript
template <typename Distance>
__host__ __device__ __forceinline__ reference operator[](Distance n) const
{
return *(*this + n);
}
/// Structure dereference
__host__ __device__ __forceinline__ pointer operator->()
{
return &(*(*this));
}
/// Equal to
__host__ __device__ __forceinline__ bool operator==(const self_type& rhs) const
{
return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset) && (tex_obj == rhs.tex_obj));
}
/// Not equal to
__host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) const
{
return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset) || (tex_obj != rhs.tex_obj));
}
/// less than
__host__ __device__ __forceinline__ bool operator<(const self_type& rhs)
{
return (tex_offset < rhs.tex_offset);
}
/// ostream operator
friend std::ostream& operator<<(std::ostream& os, const self_type& itr)
{
return os;
}
};
template<class TextureIterator>
class ConstArrayPortalFromTexture
{
public:
typedef typename TextureIterator::value_type ValueType;
typedef TextureIterator IteratorType;
VTKM_EXEC_CONT_EXPORT ConstArrayPortalFromTexture() { }
VTKM_CONT_EXPORT
ConstArrayPortalFromTexture(IteratorType begin, ptrdiff_t size)
: Length(size),
BeginIterator(begin),
EndIterator(begin+size)
{ }
/// Copy constructor for any other ConstArrayPortalFromTexture with an iterator
/// type that can be copied to this iterator type. This allows us to do any
/// type casting that the iterators do (like the non-const to const cast).
///
template<typename OtherIteratorT>
VTKM_EXEC_CONT_EXPORT
ConstArrayPortalFromTexture(const ConstArrayPortalFromTexture<OtherIteratorT> &src)
: Length(src.Length),
BeginIterator(src.BeginIterator),
EndIterator(src.EndIterator)
{ }
template<typename OtherIteratorT>
VTKM_EXEC_CONT_EXPORT
ConstArrayPortalFromTexture<IteratorType> &operator=(
const ConstArrayPortalFromTexture<OtherIteratorT> &src)
{
this->Length = src.Length;
this->BeginIterator = src.BeginIterator;
this->EndIterator = src.EndIterator;
return *this;
}
VTKM_EXEC_CONT_EXPORT
vtkm::Id GetNumberOfValues() const {
return static_cast<vtkm::Id>(this->Length);
}
VTKM_EXEC_EXPORT
ValueType Get(vtkm::Id index) const {
return *this->IteratorAt(index);
}
VTKM_EXEC_EXPORT
void Set(vtkm::Id index, ValueType value) const {
*this->IteratorAt(index) = value;
}
VTKM_CONT_EXPORT
IteratorType GetIteratorBegin() const { return this->BeginIterator; }
VTKM_CONT_EXPORT
IteratorType GetIteratorEnd() const { return this->EndIterator; }
private:
ptrdiff_t Length;
IteratorType BeginIterator;
IteratorType EndIterator;
VTKM_EXEC_EXPORT
IteratorType IteratorAt(vtkm::Id index) const {
// Not using std::advance because on CUDA it cannot be used on a device.
return (this->BeginIterator + index);
}
};
}
}
}
} // namespace vtkm::exec::cuda::internal
#endif //vtk_m_exec_cuda_internal_ArrayPortalFromTexture_h

@ -42,11 +42,63 @@
#pragma GCC diagnostic pop
#endif // gcc version >= 4.6
#endif // gcc && !CUDA
#include <boost/utility/enable_if.hpp>
namespace vtkm {
namespace exec {
namespace cuda {
namespace internal {
template<typename T> struct UseTextureLoad {typedef boost::false_type type;};
template<> struct UseTextureLoad<vtkm::Int8*> {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::UInt8*> {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Int16*> {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::UInt16*> {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Int32*> {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::UInt32*> {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Vec<vtkm::Int32,2>* > {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Vec<vtkm::UInt32,2>* > {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Vec<vtkm::Int32,4>* > {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Vec<vtkm::UInt32,4>* > {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Float32* > {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Float64* > {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Vec<vtkm::Float32,2>* > {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Vec<vtkm::Float32,4>* > {typedef boost::true_type type; };
template<> struct UseTextureLoad<vtkm::Vec<vtkm::Float64,2>* > {typedef boost::true_type type; };
//this T type is not one that is valid to be loaded through texture memory
template<typename T, typename Enable = void>
struct load_through_texture
{
VTKM_EXEC_EXPORT
static T get(const thrust::system::cuda::pointer<T> data)
{
return *(data.get());
}
};
//this T type is valid to be loaded through texture memory
template<typename T>
struct load_through_texture<T, typename ::boost::enable_if< typename UseTextureLoad<T>::type >::type >
{
VTKM_EXEC_EXPORT
static T get(const thrust::system::cuda::pointer<T> data)
{
//only load through a texture if we have sm 35 support
#if __CUDA_ARCH__ >= 350
return __ldg(data.get());
#else
return *(data.get());
#endif
}
};
class ArrayPortalFromThrustBase {};
/// This templated implementation of an ArrayPortal allows you to adapt a pair
@ -168,7 +220,7 @@ public:
VTKM_EXEC_EXPORT
ValueType Get(vtkm::Id index) const {
return *this->IteratorAt(index);
return vtkm::exec::cuda::internal::load_through_texture<ValueType>::get( this->IteratorAt(index) );
}
VTKM_EXEC_EXPORT