Simplify the implementation of loading through textures.

We don't need this super complicated system for texture loading.
2024-09-16 17:22:55 +00:00 · 2015-03-09 16:34:48 -04:00 · 2015-03-09 16:34:48 -04:00 · 63b1f03187
commit 63b1f03187
parent 9b49973621
4 changed files with 54 additions and 350 deletions
--- a/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h
+++ b/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h
@ -44,37 +44,12 @@
 #endif // gcc && !CUDA

 #include <vtkm/exec/cuda/internal/ArrayPortalFromThrust.h>
-#include <vtkm/exec/cuda/internal/ArrayPortalFromTexture.h>
-
-#include <boost/utility/enable_if.hpp>

 namespace vtkm {
 namespace cont {
 namespace cuda {
 namespace internal {

-template<typename T> struct UseTexturePortal      {typedef  boost::false_type type;};
-
-template<> struct UseTexturePortal<vtkm::Int8>    {typedef boost::true_type type; };
-template<> struct UseTexturePortal<vtkm::UInt8>   {typedef boost::true_type type; };
-template<> struct UseTexturePortal<vtkm::Int16>   {typedef boost::true_type type; };
-template<> struct UseTexturePortal<vtkm::UInt16>  {typedef boost::true_type type; };
-template<> struct UseTexturePortal<vtkm::Int32>   {typedef boost::true_type type; };
-template<> struct UseTexturePortal<vtkm::UInt32>  {typedef boost::true_type type; };
-
-template<> struct UseTexturePortal<vtkm::Vec<vtkm::Int32,2> > {typedef boost::true_type type; };
-template<> struct UseTexturePortal<vtkm::Vec<vtkm::UInt32,2> > {typedef boost::true_type type; };
-template<> struct UseTexturePortal<vtkm::Vec<vtkm::Int32,4> > {typedef boost::true_type type; };
-template<> struct UseTexturePortal<vtkm::Vec<vtkm::UInt32,4> > {typedef boost::true_type type; };
-
-template<> struct UseTexturePortal<vtkm::Float32> {typedef boost::true_type type; };
-template<> struct UseTexturePortal<vtkm::Float64> {typedef boost::true_type type; };
-
-template<> struct UseTexturePortal<vtkm::Vec<vtkm::Float32,2> > {typedef boost::true_type type; };
-template<> struct UseTexturePortal<vtkm::Vec<vtkm::Float32,4> > {typedef boost::true_type type; };
-template<> struct UseTexturePortal<vtkm::Vec<vtkm::Float64,2> > {typedef boost::true_type type; };
-
-
 /// \c ArrayManagerExecutionThrustDevice provides an implementation for a \c
 /// ArrayManagerExecution class for a thrust device adapter that is designed
 /// for the cuda backend which has separate memory spaces for host and device. This
@ -84,15 +59,9 @@ template<> struct UseTexturePortal<vtkm::Vec<vtkm::Float64,2> > {typedef boost::
 /// This array manager should only be used with the cuda device adapter,
 /// since in the future it will take advantage of texture memory and
 /// the unique memory access patterns of cuda systems.
-template<typename T, class StorageTag, typename Enable= void>
+template<typename T, class StorageTag>
 class ArrayManagerExecutionThrustDevice
 {
-  //we need a way to detect that we are using FERMI or lower and disable
-  //the usage of texture iterator. The __CUDA_ARCH__ define is only around
-  //for device code so that can't be used. I expect that we will have to devise
-  //some form of Try/Compile with CUDA or just offer this as an advanced CMake
-  //option. We could also try and see if a runtime switch is possible.
-
 public:
  typedef T ValueType;

@ -224,143 +193,6 @@ private:
 };


-/// This is a specialization that is used to enable texture memory iterators
-template<typename T, class StorageTag>
-class ArrayManagerExecutionThrustDevice<T, StorageTag,
-    typename ::boost::enable_if< typename UseTexturePortal<T>::type >::type >
-{
-public:
-  typedef T ValueType;
-
-  typedef vtkm::cont::internal::Storage<ValueType, StorageTag> ContainerType;
-
-  typedef vtkm::exec::cuda::internal::ArrayPortalFromThrust< T > PortalType;
-  typedef vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< T > PortalConstType;
-
-
-  VTKM_CONT_EXPORT ArrayManagerExecutionThrustDevice():
-    NumberOfValues(0),
-    ArrayBegin(),
-    ArrayEnd()
-  {
-
-  }
-
-  ~ArrayManagerExecutionThrustDevice()
-  {
-    this->ReleaseResources();
-  }
-
-  /// Returns the size of the array.
-  ///
-  VTKM_CONT_EXPORT vtkm::Id GetNumberOfValues() const {
-    return this->NumberOfValues;
-  }
-
-  /// Allocates the appropriate size of the array and copies the given data
-  /// into the array.
-  ///
-  template<class PortalControl>
-  VTKM_CONT_EXPORT void LoadDataForInput(PortalControl arrayPortal)
-  {
-    //don't bind to the texture yet, as we could have allocate the array
-    //on a previous call with AllocateArrayForOutput and now are directly
-    //calling get portal const
-    try
-      {
-      this->NumberOfValues = arrayPortal.GetNumberOfValues();
-      this->ArrayBegin = ::thrust::system::cuda::malloc<T>( static_cast<std::size_t>(this->NumberOfValues)  );
-      this->ArrayEnd = this->ArrayBegin + this->NumberOfValues;
-
-      ::thrust::copy(arrayPortal.GetRawIterator(),
-                     arrayPortal.GetRawIterator() + this->NumberOfValues,
-                     this->ArrayBegin);
-      }
-    catch (std::bad_alloc error)
-      {
-      throw vtkm::cont::ErrorControlOutOfMemory(error.what());
-      }
-  }
-
-  /// Allocates the appropriate size of the array and copies the given data
-  /// into the array.
-  ///
-  template<class PortalControl>
-  VTKM_CONT_EXPORT void LoadDataForInPlace(PortalControl arrayPortal)
-  {
-    this->LoadDataForInput(arrayPortal);
-  }
-
-  /// Allocates the array to the given size.
-  ///
-  VTKM_CONT_EXPORT void AllocateArrayForOutput(
-      ContainerType &vtkmNotUsed(container),
-      vtkm::Id numberOfValues)
-  {
-    if(this->NumberOfValues > 0)
-      {
-      ::thrust::system::cuda::free( this->ArrayBegin  );
-      }
-    this->NumberOfValues = numberOfValues;
-    this->ArrayBegin = ::thrust::system::cuda::malloc<T>( this->NumberOfValues  );
-    this->ArrayEnd = this->ArrayBegin + numberOfValues;
-  }
-
-  /// Allocates enough space in \c controlArray and copies the data in the
-  /// device vector into it.
-  ///
-  VTKM_CONT_EXPORT void RetrieveOutputData(ContainerType &controlArray) const
-  {
-    controlArray.Allocate(this->NumberOfValues);
-    ::thrust::copy(this->ArrayBegin,
-                   this->ArrayEnd,
-                   controlArray.GetPortal().GetRawIterator());
-  }
-
-  /// Resizes the device vector.
-  ///
-  VTKM_CONT_EXPORT void Shrink(vtkm::Id numberOfValues)
-  {
-    // The operation will succeed even if this assertion fails, but this
-    // is still supposed to be a precondition to Shrink.
-    VTKM_ASSERT_CONT(numberOfValues <= this->NumberOfValues);
-    this->NumberOfValues = numberOfValues;
-    this->ArrayEnd = this->ArrayBegin + this->NumberOfValues;
-  }
-
-  VTKM_CONT_EXPORT PortalType GetPortal()
-  {
-    return PortalType(this->ArrayBegin, this->ArrayEnd);
-  }
-
-  VTKM_CONT_EXPORT PortalConstType GetPortalConst() const
-  {
-    return PortalConstType(this->ArrayBegin, this->ArrayEnd);
-  }
-
-
-  /// Frees all memory.
-  ///
-  VTKM_CONT_EXPORT void ReleaseResources()
-  {
-    ::thrust::system::cuda::free( this->ArrayBegin  );
-    this->ArrayBegin = ::thrust::system::cuda::pointer<ValueType>();
-    this->ArrayEnd = ::thrust::system::cuda::pointer<ValueType>();
-  }
-
-private:
-  // Not implemented
-  ArrayManagerExecutionThrustDevice(
-      ArrayManagerExecutionThrustDevice<T, StorageTag> &);
-  void operator=(
-      ArrayManagerExecutionThrustDevice<T, StorageTag> &);
-
-  vtkm::Id NumberOfValues;
-  ::thrust::system::cuda::pointer<ValueType> ArrayBegin;
-  ::thrust::system::cuda::pointer<ValueType> ArrayEnd;
-};
-
-
 }
 }
 }
--- a/vtkm/cont/cuda/internal/MakeThrustIterator.h
+++ b/vtkm/cont/cuda/internal/MakeThrustIterator.h
@ -24,7 +24,6 @@
 #include <vtkm/internal/ExportMacros.h>

 #include <vtkm/exec/cuda/internal/ArrayPortalFromThrust.h>
-#include <vtkm/exec/cuda/internal/ArrayPortalFromTexture.h>

 // Disable GCC warnings we check vtkmfor but Thrust does not.
 #if defined(__GNUC__) && !defined(VTKM_CUDA)
@ -59,7 +58,6 @@ namespace detail {
 // Tags to specify what type of thrust iterator to use.
 struct ThrustIteratorTransformTag {  };
 struct ThrustIteratorDevicePtrTag {  };
-struct ThrustIteratorDeviceTextureTag {  };

 // Traits to help classify what thrust iterators will be used.
 template<class IteratorType>
@ -149,13 +147,6 @@ struct IteratorTraits
  typedef typename IteratorChooser<PortalType, Tag>::Type IteratorType;
 };

-template<typename T>
-struct IteratorTraits< vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< T > >
-{
-  typedef vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< T > PortalType;
-  typedef ThrustIteratorDeviceTextureTag Tag;
-  typedef thrust::system::cuda::pointer< const T > IteratorType;
-};

 template<typename T>
 VTKM_CONT_EXPORT static
@ -190,14 +181,6 @@ MakeIteratorBegin(PortalType portal, detail::ThrustIteratorDevicePtrTag)
  return MakeDevicePtr(portal.GetIteratorBegin());
 }

-template<class PortalType>
-VTKM_CONT_EXPORT static
-typename IteratorTraits<PortalType>::IteratorType
-MakeIteratorBegin(PortalType portal, detail::ThrustIteratorDeviceTextureTag)
-{
-  return MakeDevicePtr(portal.GetIteratorBegin());
-}
-
 } // namespace detail


--- a/vtkm/exec/cuda/internal/ArrayPortalFromTexture.h
+++ b/vtkm/exec/cuda/internal/ArrayPortalFromTexture.h
@ -1,163 +0,0 @@
-//============================================================================
-//  Copyright (c) Kitware, Inc.
-//  All rights reserved.
-//  See LICENSE.txt for details.
-//  This software is distributed WITHOUT ANY WARRANTY; without even
-//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-//  PURPOSE.  See the above copyright notice for more information.
-//
-//  Copyright 2014 Sandia Corporation.
-//  Copyright 2014 UT-Battelle, LLC.
-//  Copyright 2014. Los Alamos National Security
-//
-//  Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-//  the U.S. Government retains certain rights in this software.
-//
-//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
-//  Laboratory (LANL), the U.S. Government retains certain rights in
-//  this software.
-//============================================================================
-
-/******************************************************************************
- * Copyright (c) 2011, Duane Merrill.  All rights reserved.
- * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in the
- *       documentation and/or other materials provided with the distribution.
- *     * Neither the name of the NVIDIA CORPORATION nor the
- *       names of its contributors may be used to endorse or promote products
- *       derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- ******************************************************************************/
-
-#ifndef vtk_m_exec_cuda_internal_ArrayPortalFromTexture_h
-#define vtk_m_exec_cuda_internal_ArrayPortalFromTexture_h
-
-#include <vtkm/Types.h>
-
-#include <iterator>
-
-// Disable GCC warnings we check vtkmfor but Thrust does not.
-#if defined(__GNUC__) && !defined(VTKM_CUDA)
-#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 6)
-#pragma GCC diagnostic push
-#endif // gcc version >= 4.6
-#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 2)
-#pragma GCC diagnostic ignored "-Wshadow"
-#pragma GCC diagnostic ignored "-Wunused-parameter"
-#endif // gcc version >= 4.2
-#endif // gcc && !CUDA
-
-#include <thrust/system/cuda/memory.h>
-#include <thrust/iterator/iterator_facade.h>
-
-#if defined(__GNUC__) && !defined(VTKM_CUDA)
-#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 6)
-#pragma GCC diagnostic pop
-#endif // gcc version >= 4.6
-#endif // gcc && !CUDA
-
-
-namespace vtkm {
-namespace exec {
-namespace cuda {
-namespace internal {
-
-template<typename T>
-class ConstArrayPortalFromTexture : public ArrayPortalFromThrustBase
-{
-public:
-
-  typedef T ValueType;
-  typedef typename thrust::system::cuda::pointer< T > PointerType;
-  typedef const T* IteratorType;
-
-  VTKM_EXEC_CONT_EXPORT ConstArrayPortalFromTexture() {  }
-
-  VTKM_CONT_EXPORT
-  ConstArrayPortalFromTexture(const PointerType begin, const PointerType end)
-    : BeginIterator( begin ),
-      EndIterator( end )
-      {  }
-
-  /// Copy constructor for any other ConstArrayPortalFromTexture with an iterator
-  /// type that can be copied to this iterator type. This allows us to do any
-  /// type casting that the iterators do (like the non-const to const cast).
-  ///
-  template<typename OtherT>
-  VTKM_EXEC_CONT_EXPORT
-  ConstArrayPortalFromTexture(const ConstArrayPortalFromTexture<OtherT> &src)
-    : BeginIterator(src.BeginIterator),
-      EndIterator(src.EndIterator)
-  {  }
-
-  template<typename OtherT>
-  VTKM_EXEC_CONT_EXPORT
-  ConstArrayPortalFromTexture<T> &operator=(
-      const ConstArrayPortalFromTexture<OtherT> &src)
-  {
-    this->BeginIterator = src.BeginIterator;
-    this->EndIterator = src.EndIterator;
-    return *this;
-  }
-
-  VTKM_EXEC_CONT_EXPORT
-  vtkm::Id GetNumberOfValues() const {
-    // Not using std::distance because on CUDA it cannot be used on a device.
-    return (this->EndIterator - this->BeginIterator);
-  }
-
-  VTKM_EXEC_EXPORT
-  ValueType Get(vtkm::Id index) const {
-#if __CUDA_ARCH__ >= 350
-    return __ldg(this->IteratorAt(index).get());
-#else
-    return *this->IteratorAt(index);
-#endif
-  }
-
-  VTKM_EXEC_EXPORT
-  void Set(vtkm::Id index, ValueType value) const {
-    *this->IteratorAt(index) = value;
-  }
-
-  VTKM_CONT_EXPORT
-  IteratorType GetIteratorBegin() const { return this->BeginIterator.get(); }
-
-  VTKM_CONT_EXPORT
-  IteratorType GetIteratorEnd() const { return this->EndIterator.get(); }
-
-private:
-  PointerType BeginIterator;
-  PointerType EndIterator;
-
-  VTKM_EXEC_EXPORT
-  PointerType IteratorAt(vtkm::Id index) const {
-    // Not using std::advance because on CUDA it cannot be used on a device.
-    return (this->BeginIterator + index);
-  }
-};
-
-}
-}
-}
-} // namespace vtkm::exec::cuda::internal
-
-
-#endif //vtk_m_exec_cuda_internal_ArrayPortalFromTexture_h
--- a/vtkm/exec/cuda/internal/ArrayPortalFromThrust.h
+++ b/vtkm/exec/cuda/internal/ArrayPortalFromThrust.h
@ -42,11 +42,63 @@
 #pragma GCC diagnostic pop
 #endif // gcc version >= 4.6
 #endif // gcc && !CUDA
+
+#include <boost/utility/enable_if.hpp>
+
 namespace vtkm {
 namespace exec {
 namespace cuda {
 namespace internal {

+template<typename T> struct UseTextureLoad      {typedef  boost::false_type type;};
+
+template<> struct UseTextureLoad<vtkm::Int8*>    {typedef boost::true_type type; };
+template<> struct UseTextureLoad<vtkm::UInt8*>   {typedef boost::true_type type; };
+template<> struct UseTextureLoad<vtkm::Int16*>   {typedef boost::true_type type; };
+template<> struct UseTextureLoad<vtkm::UInt16*>  {typedef boost::true_type type; };
+template<> struct UseTextureLoad<vtkm::Int32*>   {typedef boost::true_type type; };
+template<> struct UseTextureLoad<vtkm::UInt32*>  {typedef boost::true_type type; };
+
+template<> struct UseTextureLoad<vtkm::Vec<vtkm::Int32,2>* > {typedef boost::true_type type; };
+template<> struct UseTextureLoad<vtkm::Vec<vtkm::UInt32,2>* > {typedef boost::true_type type; };
+template<> struct UseTextureLoad<vtkm::Vec<vtkm::Int32,4>* > {typedef boost::true_type type; };
+template<> struct UseTextureLoad<vtkm::Vec<vtkm::UInt32,4>* > {typedef boost::true_type type; };
+
+template<> struct UseTextureLoad<vtkm::Float32* > {typedef boost::true_type type; };
+template<> struct UseTextureLoad<vtkm::Float64* > {typedef boost::true_type type; };
+
+template<> struct UseTextureLoad<vtkm::Vec<vtkm::Float32,2>* > {typedef boost::true_type type; };
+template<> struct UseTextureLoad<vtkm::Vec<vtkm::Float32,4>* > {typedef boost::true_type type; };
+template<> struct UseTextureLoad<vtkm::Vec<vtkm::Float64,2>* > {typedef boost::true_type type; };
+
+//this T type is not one that is valid to be loaded through texture memory
+template<typename T, typename Enable = void>
+struct load_through_texture
+{
+  VTKM_EXEC_EXPORT
+  static  T get(const thrust::system::cuda::pointer<T> data)
+  {
+  return *(data.get());
+  }
+};
+
+//this T type is valid to be loaded through texture memory
+template<typename T>
+struct load_through_texture<T, typename ::boost::enable_if< typename UseTextureLoad<T>::type >::type >
+{
+  VTKM_EXEC_EXPORT
+  static T get(const thrust::system::cuda::pointer<T> data)
+  {
+  //only load through a texture if we have sm 35 support
+#if __CUDA_ARCH__ >= 350
+  return __ldg(data.get());
+#else
+  return *(data.get());
+#endif
+  }
+};
+
+
 class ArrayPortalFromThrustBase {};

 /// This templated implementation of an ArrayPortal allows you to adapt a pair
@ -168,7 +220,7 @@ public:

  VTKM_EXEC_EXPORT
  ValueType Get(vtkm::Id index) const {
-    return *this->IteratorAt(index);
+    return vtkm::exec::cuda::internal::load_through_texture<ValueType>::get( this->IteratorAt(index) );
  }

  VTKM_EXEC_EXPORT