From 3c8ce36666c9eef1f69aedf669fea01498942cc4 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Wed, 28 Jan 2015 15:54:45 -0500 Subject: [PATCH 01/12] Properly deallocate cuda memory when we are done with it. --- .../cuda/internal/ArrayManagerExecutionThrustDevice.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h b/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h index a74685b07..77822b31c 100644 --- a/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h +++ b/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h @@ -102,6 +102,11 @@ public: } + ~ArrayManagerExecutionThrustDevice() + { + this->ReleaseResources(); + } + /// Returns the size of the array. /// VTKM_CONT_EXPORT vtkm::Id GetNumberOfValues() const { @@ -244,11 +249,7 @@ public: ~ArrayManagerExecutionThrustDevice() { - if(this->HaveTextureBound) - { - this->HaveTextureBound = false; - this->InputArrayIterator.UnbindTexture(); - } + this->ReleaseResources(); } /// Returns the size of the array. From c224c2b98a86decd46f72fb967b53388bd092372 Mon Sep 17 00:00:00 2001 From: Kenneth Moreland Date: Mon, 9 Feb 2015 14:54:04 -0700 Subject: [PATCH 02/12] Make ArrayHandle work better when uninitialized Fixed a problem where ArrayHandle would cause a crash if you tried to get the control portal on an uninitialized array (it was supposed to throw an exception). Also changed some methods in ArrayHandle so that they work resonably without error when used with an uninitialized array. Specifically, the aforementioned behavior was changed to "allocate" an array of size 0 (that is, call Allocate(0) on the storage object) to return an empty array portal rather than throw an error. Although this use of ArrayHandle can be considered erroneous, it is convenient the get an empty array portal when dealing with a potentially unallocated array rather than create a special condition. --- vtkm/cont/ArrayHandle.h | 26 +++++++++++++++++------ vtkm/cont/testing/UnitTestArrayHandle.cxx | 19 +++++++++++++++++ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/vtkm/cont/ArrayHandle.h b/vtkm/cont/ArrayHandle.h index 16a5c3270..b3b58f85b 100644 --- a/vtkm/cont/ArrayHandle.h +++ b/vtkm/cont/ArrayHandle.h @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -178,7 +179,8 @@ public: } else { - throw vtkm::cont::ErrorControlBadValue("ArrayHandle contains no data."); + throw vtkm::cont::ErrorControlInternal( + "ArrayHandle::SyncControlArray did not make control array valid."); } } @@ -197,7 +199,8 @@ public: } else { - throw vtkm::cont::ErrorControlBadValue("ArrayHandle contains no data."); + throw vtkm::cont::ErrorControlInternal( + "ArrayHandle::SyncControlArray did not make control array valid."); } } @@ -507,15 +510,26 @@ public: /// VTKM_CONT_EXPORT void SyncControlArray() const { - if ( !this->Internals->UserPortalValid - && !this->Internals->ControlArrayValid) + if (!this->Internals->UserPortalValid + && !this->Internals->ControlArrayValid) { // Need to change some state that does not change the logical state from // an external point of view. InternalStruct *internals = const_cast(this->Internals.get()); - internals->ExecutionArray->RetrieveOutputData(internals->ControlArray); - internals->ControlArrayValid = true; + if (this->Internals->ExecutionArrayValid) + { + internals->ExecutionArray->RetrieveOutputData(internals->ControlArray); + internals->ControlArrayValid = true; + } + else + { + // This array is in the null state (there is nothing allocated), but + // the calling function wants to do something with the array. Put this + // class into a valid state by allocating an array of size 0. + internals->ControlArray.Allocate(0); + internals->ControlArrayValid = true; + } } else { diff --git a/vtkm/cont/testing/UnitTestArrayHandle.cxx b/vtkm/cont/testing/UnitTestArrayHandle.cxx index 8d956566c..6393483f0 100644 --- a/vtkm/cont/testing/UnitTestArrayHandle.cxx +++ b/vtkm/cont/testing/UnitTestArrayHandle.cxx @@ -135,6 +135,25 @@ struct TryArrayHandleType TestValue(index, T()) + T(1)), "Did not get result from in place operation."); } + + std::cout << "Try operations on empty arrays." << std::endl; + // After each operation, reinitialize array in case something gets + // allocated. + arrayHandle = vtkm::cont::ArrayHandle(); + VTKM_TEST_ASSERT(arrayHandle.GetNumberOfValues() == 0, + "Uninitialized array does not report zero values."); + arrayHandle = vtkm::cont::ArrayHandle(); + VTKM_TEST_ASSERT( + arrayHandle.GetPortalConstControl().GetNumberOfValues() == 0, + "Uninitialized array does not give portal with zero values."); + arrayHandle = vtkm::cont::ArrayHandle(); + arrayHandle.Shrink(0); + arrayHandle = vtkm::cont::ArrayHandle(); + arrayHandle.ReleaseResourcesExecution(); + arrayHandle = vtkm::cont::ArrayHandle(); + arrayHandle.ReleaseResources(); + arrayHandle = vtkm::cont::ArrayHandle(); + arrayHandle.PrepareForOutput(ARRAY_SIZE, VTKM_DEFAULT_DEVICE_ADAPTER_TAG()); } }; From 2f781dfe7a1c6e68c8fc2af97f8ed64b96b76dd5 Mon Sep 17 00:00:00 2001 From: Kenneth Moreland Date: Tue, 10 Feb 2015 15:49:55 -0700 Subject: [PATCH 03/12] Preserve data in ArrayHandle::ReleaseResourcesExecution Previously when ReleaseResourcesExecution was called, the method blindly deleted the execution array regardless of whether there was a valid copy in the control environment. This could potentially lose data. What if someone wanted to conserve memory on the device by clearing the array of an output array? There is also now an internal method that blindly deletes the array. This is good for internal functions that are doing something to invalidate the execution data anyway. --- vtkm/cont/ArrayHandle.h | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/vtkm/cont/ArrayHandle.h b/vtkm/cont/ArrayHandle.h index b3b58f85b..7da668690 100644 --- a/vtkm/cont/ArrayHandle.h +++ b/vtkm/cont/ArrayHandle.h @@ -174,7 +174,7 @@ public: // If the user writes into the iterator we return, then the execution // array will become invalid. Play it safe and release the execution // resources. (Use the const version to preserve the execution array.) - this->ReleaseResourcesExecution(); + this->ReleaseResourcesExecutionInternal(); return this->Internals->ControlArray.GetPortal(); } else @@ -273,18 +273,18 @@ public: /// VTKM_CONT_EXPORT void ReleaseResourcesExecution() { - if (this->Internals->ExecutionArrayValid) - { - this->Internals->ExecutionArray->ReleaseResources(); - this->Internals->ExecutionArrayValid = false; - } + // Save any data in the execution environment by making sure it is synced + // with the control environment. + this->SyncControlArray(); + + this->ReleaseResourcesExecutionInternal(); } /// Releases all resources in both the control and execution environments. /// VTKM_CONT_EXPORT void ReleaseResources() { - this->ReleaseResourcesExecution(); + this->ReleaseResourcesExecutionInternal(); // Forget about any user iterators. this->Internals->UserPortalValid = false; @@ -541,6 +541,16 @@ public: } } + VTKM_CONT_EXPORT + void ReleaseResourcesExecutionInternal() + { + if (this->Internals->ExecutionArrayValid) + { + this->Internals->ExecutionArray->ReleaseResources(); + this->Internals->ExecutionArrayValid = false; + } + } + boost::shared_ptr Internals; }; From 6141e83be45b20123d0f354e2f179726e7385420 Mon Sep 17 00:00:00 2001 From: Kenneth Moreland Date: Tue, 10 Feb 2015 15:58:41 -0700 Subject: [PATCH 04/12] Expose allocation in ArrayHandle. Add an Allocate method in ArrayHandle that basically forwards the alllocate request to the storage object. This allows some measure of control of the array from the control side. You can allocate the array and set values (by getting the control array portal) if you so desire. --- vtkm/cont/ArrayHandle.h | 17 +++++++++++++++++ vtkm/cont/Storage.h | 4 +++- vtkm/cont/testing/UnitTestArrayHandle.cxx | 6 ++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/vtkm/cont/ArrayHandle.h b/vtkm/cont/ArrayHandle.h index 7da668690..f6cdfd90a 100644 --- a/vtkm/cont/ArrayHandle.h +++ b/vtkm/cont/ArrayHandle.h @@ -227,6 +227,23 @@ public: } } + /// \brief Allocates an array large enough to hold the given number of values. + /// + /// The allocation may be done on an already existing array, but can wipe out + /// any data already in the array. This method can throw + /// ErrorControlOutOfMemory if the array cannot be allocated or + /// ErrorControlBadValue if the allocation is not feasible (for example, the + /// array storage is read-only). + /// + VTKM_CONT_EXPORT + void Allocate(vtkm::Id numberOfValues) + { + this->ReleaseResourcesExecutionInternal(); + this->Internals->UserPortalValid = false; + this->Internals->ControlArray.Allocate(numberOfValues); + this->Internals->ControlArrayValid = true; + } + /// \brief Reduces the size of the array without changing its values. /// /// This method allows you to resize the array without reallocating it. The diff --git a/vtkm/cont/Storage.h b/vtkm/cont/Storage.h index c72022448..b36866884 100644 --- a/vtkm/cont/Storage.h +++ b/vtkm/cont/Storage.h @@ -129,7 +129,9 @@ public: /// /// The allocation may be done on an already existing array, but can wipe out /// any data already in the array. This method can throw - /// ErrorControlOutOfMemory if the array cannot be allocated. + /// ErrorControlOutOfMemory if the array cannot be allocated or + /// ErrorControlBadValue if the allocation is not feasible (for example, the + /// array storage is read-only). /// VTKM_CONT_EXPORT void Allocate(vtkm::Id numberOfValues); diff --git a/vtkm/cont/testing/UnitTestArrayHandle.cxx b/vtkm/cont/testing/UnitTestArrayHandle.cxx index 6393483f0..a8ee60c35 100644 --- a/vtkm/cont/testing/UnitTestArrayHandle.cxx +++ b/vtkm/cont/testing/UnitTestArrayHandle.cxx @@ -113,6 +113,12 @@ struct TryArrayHandleType "Array size did not shrink correctly."); CheckArray(arrayHandle); + std::cout << "Try reallocating array." << std::endl; + arrayHandle.Allocate(ARRAY_SIZE*2); + VTKM_TEST_ASSERT(arrayHandle.GetNumberOfValues() == ARRAY_SIZE*2, + "Array size did not allocate correctly."); + // No point in checking values. This method can invalidate them. + std::cout << "Try in place operation." << std::endl; { typedef typename vtkm::cont::ArrayHandle::template From baf941beb1d0bdbdfdefd20f2de291b4f7fdf862 Mon Sep 17 00:00:00 2001 From: Kenneth Moreland Date: Wed, 11 Feb 2015 09:45:36 -0700 Subject: [PATCH 05/12] Fix the date manipulation on the check copyright script When a new file is added to VTK-m, the copyright statement should go at the top of the file. The copyright contains a date. What should that date be? I usually set the date to the current year so older files will have an older copyright whereas newer files will have a newer one. The check copyright script needs to be flexible on the date. There was an error in the script that was copied over from Dax. It was checking for the year 2011, the start of the Dax project, and replacing that in the text. VTK-m started in 2014, so the script really needs to check for that year instead. --- CMake/VTKmCheckCopyright.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMake/VTKmCheckCopyright.cmake b/CMake/VTKmCheckCopyright.cmake index 4304bc27a..378876b58 100644 --- a/CMake/VTKmCheckCopyright.cmake +++ b/CMake/VTKmCheckCopyright.cmake @@ -89,7 +89,7 @@ function (get_year var) set(${var} "${result}" PARENT_SCOPE) endfunction (get_year) -set(copyright_file_year 2011) +set(copyright_file_year 2014) get_year(current_year) # Escapes ';' characters (list delimiters) and splits the given string into From 9b4997362128640a196318f947bdb6f0559c41ba Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 5 Mar 2015 10:22:59 -0500 Subject: [PATCH 06/12] Use __ldg instead of texture object. --- .../ArrayManagerExecutionThrustDevice.h | 48 +-- vtkm/cont/cuda/internal/MakeThrustIterator.h | 4 +- .../cuda/internal/ArrayPortalFromTexture.h | 379 ++---------------- 3 files changed, 45 insertions(+), 386 deletions(-) diff --git a/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h b/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h index 77822b31c..cb7fed1d8 100644 --- a/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h +++ b/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h @@ -55,17 +55,24 @@ namespace internal { template struct UseTexturePortal {typedef boost::false_type type;}; -//Currently disabled as we are still tracking down issues with Texture -//Memory. The major issue is that in testing it is slower than classic arrays -#ifdef VTKM_USE_TEXTURE_MEM template<> struct UseTexturePortal {typedef boost::true_type type; }; template<> struct UseTexturePortal {typedef boost::true_type type; }; template<> struct UseTexturePortal {typedef boost::true_type type; }; template<> struct UseTexturePortal {typedef boost::true_type type; }; template<> struct UseTexturePortal {typedef boost::true_type type; }; template<> struct UseTexturePortal {typedef boost::true_type type; }; + +template<> struct UseTexturePortal > {typedef boost::true_type type; }; +template<> struct UseTexturePortal > {typedef boost::true_type type; }; +template<> struct UseTexturePortal > {typedef boost::true_type type; }; +template<> struct UseTexturePortal > {typedef boost::true_type type; }; + template<> struct UseTexturePortal {typedef boost::true_type type; }; -#endif +template<> struct UseTexturePortal {typedef boost::true_type type; }; + +template<> struct UseTexturePortal > {typedef boost::true_type type; }; +template<> struct UseTexturePortal > {typedef boost::true_type type; }; +template<> struct UseTexturePortal > {typedef boost::true_type type; }; /// \c ArrayManagerExecutionThrustDevice provides an implementation for a \c @@ -222,27 +229,19 @@ template class ArrayManagerExecutionThrustDevice::type >::type > { - //we need a way to detect that we are using FERMI or lower and disable - //the usage of texture iterator. The __CUDA_ARCH__ define is only around - //for device code so that can't be used. I expect that we will have to devise - //some form of Try/Compile with CUDA or just offer this as an advanced CMake - //option. We could also try and see if a runtime switch is possible. - public: typedef T ValueType; typedef vtkm::cont::internal::Storage ContainerType; typedef vtkm::exec::cuda::internal::ArrayPortalFromThrust< T > PortalType; - typedef ::vtkm::exec::cuda::internal::DaxTexObjInputIterator TextureIteratorType; - typedef ::vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< TextureIteratorType > PortalConstType; + typedef vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< T > PortalConstType; + VTKM_CONT_EXPORT ArrayManagerExecutionThrustDevice(): NumberOfValues(0), ArrayBegin(), - ArrayEnd(), - HaveTextureBound(false), - InputArrayIterator() + ArrayEnd() { } @@ -336,25 +335,14 @@ public: VTKM_CONT_EXPORT PortalConstType GetPortalConst() const { - if(!this->HaveTextureBound) - { - this->HaveTextureBound = true; - this->InputArrayIterator.BindTexture(ArrayBegin,this->NumberOfValues); - } - - //if we have a texture iterator bound use that - return PortalConstType(this->InputArrayIterator, this->NumberOfValues); + return PortalConstType(this->ArrayBegin, this->ArrayEnd); } /// Frees all memory. /// - VTKM_CONT_EXPORT void ReleaseResources() { - if(this->HaveTextureBound) - { - this->HaveTextureBound = false; - this->InputArrayIterator.UnbindTexture(); - } + VTKM_CONT_EXPORT void ReleaseResources() + { ::thrust::system::cuda::free( this->ArrayBegin ); this->ArrayBegin = ::thrust::system::cuda::pointer(); this->ArrayEnd = ::thrust::system::cuda::pointer(); @@ -370,8 +358,6 @@ private: vtkm::Id NumberOfValues; ::thrust::system::cuda::pointer ArrayBegin; ::thrust::system::cuda::pointer ArrayEnd; - mutable bool HaveTextureBound; - mutable TextureIteratorType InputArrayIterator; }; diff --git a/vtkm/cont/cuda/internal/MakeThrustIterator.h b/vtkm/cont/cuda/internal/MakeThrustIterator.h index e59d3cdd1..a3dbcb8e3 100644 --- a/vtkm/cont/cuda/internal/MakeThrustIterator.h +++ b/vtkm/cont/cuda/internal/MakeThrustIterator.h @@ -154,7 +154,7 @@ struct IteratorTraits< vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< { typedef vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< T > PortalType; typedef ThrustIteratorDeviceTextureTag Tag; - typedef typename PortalType::IteratorType IteratorType; + typedef thrust::system::cuda::pointer< const T > IteratorType; }; template @@ -195,7 +195,7 @@ VTKM_CONT_EXPORT static typename IteratorTraits::IteratorType MakeIteratorBegin(PortalType portal, detail::ThrustIteratorDeviceTextureTag) { - return portal.GetIteratorBegin(); + return MakeDevicePtr(portal.GetIteratorBegin()); } } // namespace detail diff --git a/vtkm/exec/cuda/internal/ArrayPortalFromTexture.h b/vtkm/exec/cuda/internal/ArrayPortalFromTexture.h index d21fcd6f1..8d4fdd086 100644 --- a/vtkm/exec/cuda/internal/ArrayPortalFromTexture.h +++ b/vtkm/exec/cuda/internal/ArrayPortalFromTexture.h @@ -74,375 +74,44 @@ #endif // gcc && !CUDA -namespace -{ - -/** - * \brief Type selection (IF ? ThenType : ElseType) - */ -template -struct If -{ - /// Conditional type result - typedef ThenType Type; // true -}; - -template -struct If -{ - typedef ElseType Type; // false -}; - -/****************************************************************************** -* Size and alignment -******************************************************************************/ - -/// Structure alignment -template -struct AlignBytes -{ - struct Pad - { - T val; - char byte; - }; - - enum - { - /// The alignment of T in bytes - ALIGN_BYTES = sizeof(Pad) - sizeof(T) - }; -}; - -// Specializations where host C++ compilers (e.g., Windows) may disagree with device C++ compilers (EDG) - -template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; -#ifdef _WIN32 - template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; - template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; -#endif -template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; - -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -#ifndef _WIN32 - template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; - template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -#endif -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; - - -/// Unit-words of data movement -template -struct UnitWord -{ - enum { - ALIGN_BYTES = AlignBytes::ALIGN_BYTES - }; - - template - struct IsMultiple - { - enum { - UNIT_ALIGN_BYTES = AlignBytes::ALIGN_BYTES, - IS_MULTIPLE = (sizeof(T) % sizeof(Unit) == 0) && (ALIGN_BYTES % UNIT_ALIGN_BYTES == 0) - }; - }; - - /// Biggest shuffle word that T is a whole multiple of and is not larger than the alignment of T - typedef typename If::IS_MULTIPLE, - unsigned int, - typename If::IS_MULTIPLE, - unsigned short, - unsigned char>::Type>::Type ShuffleWord; - - /// Biggest volatile word that T is a whole multiple of and is not larger than the alignment of T - typedef typename If::IS_MULTIPLE, - unsigned long long, - ShuffleWord>::Type VolatileWord; - - /// Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T - typedef typename If::IS_MULTIPLE, - ulonglong2, - VolatileWord>::Type DeviceWord; - - /// Biggest texture reference word that T is a whole multiple of and is not larger than the alignment of T - typedef typename If::IS_MULTIPLE, - uint4, - typename If::IS_MULTIPLE, - uint2, - ShuffleWord>::Type>::Type TextureWord; -}; - -} - - namespace vtkm { namespace exec { namespace cuda { namespace internal { -template < - typename T, - typename Offset = ptrdiff_t> -class DaxTexObjInputIterator +template +class ConstArrayPortalFromTexture : public ArrayPortalFromThrustBase { public: - // Required iterator traits - typedef DaxTexObjInputIterator self_type; ///< My own type - typedef Offset difference_type; ///< Type to express the result of subtracting one iterator from another - typedef T value_type; ///< The type of the element the iterator can point to - typedef T* pointer; ///< The type of a pointer to an element the iterator can point to - typedef T reference; ///< The type of a reference to an element the iterator can point to - - // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods - typedef typename ::thrust::detail::iterator_facade_category< - ::thrust::device_system_tag, - ::thrust::random_access_traversal_tag, - value_type, - reference - >::type iterator_category; ///< The iterator category - -private: - - // Largest texture word we can use in device - typedef typename UnitWord::TextureWord TextureWord; - - // Number of texture words per T - enum { TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord) }; - -private: - - const T* ptr; - difference_type tex_offset; - cudaTextureObject_t tex_obj; - -public: - - /// Constructor - __host__ __device__ __forceinline__ DaxTexObjInputIterator() - : - ptr(NULL), - tex_offset(0), - tex_obj(0) - {} - - /// Use this iterator to bind \p ptr with a texture reference - cudaError_t BindTexture( - const ::thrust::system::cuda::pointer ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment - size_t numElements, ///< Number of elements in the range - size_t tex_offset = 0) ///< Offset (in items) from \p ptr denoting the position of the iterator - { - this->ptr = ptr.get(); - this->tex_offset = tex_offset; - - cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(); - cudaResourceDesc res_desc; - cudaTextureDesc tex_desc; - memset(&res_desc, 0, sizeof(cudaResourceDesc)); - memset(&tex_desc, 0, sizeof(cudaTextureDesc)); - res_desc.resType = cudaResourceTypeLinear; - res_desc.res.linear.devPtr = (void*)ptr.get(); - res_desc.res.linear.desc = channel_desc; - res_desc.res.linear.sizeInBytes = numElements * sizeof(T); - tex_desc.readMode = cudaReadModeElementType; - - return cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL); - } - - /// Unbind this iterator from its texture reference - cudaError_t UnbindTexture() - { - return cudaDestroyTextureObject(tex_obj); - } - - /// Postfix increment - __host__ __device__ __forceinline__ self_type operator++(int) - { - self_type retval = *this; - tex_offset++; - return retval; - } - - /// Prefix increment - __host__ __device__ __forceinline__ self_type operator++() - { - tex_offset++; - return *this; - } - - /// Postfix decrement - __host__ __device__ __forceinline__ self_type operator--(int) - { - self_type retval = *this; - tex_offset--; - return retval; - } - - /// Prefix decrement - __host__ __device__ __forceinline__ self_type operator--() - { - tex_offset--; - return *this; - } - - /// Indirection - __host__ __device__ __forceinline__ reference operator*() const - { -#ifndef DAX_CUDA_COMPILATION - // Simply dereference the pointer on the host - return ptr[tex_offset]; -#else - // Move array of uninitialized words, then alias and assign to return value - TextureWord words[TEXTURE_MULTIPLE]; - - #pragma unroll - for (int i = 0; i < TEXTURE_MULTIPLE; ++i) - { - words[i] = tex1Dfetch( - tex_obj, - (tex_offset * TEXTURE_MULTIPLE) + i); - } - - // Load from words - return *reinterpret_cast(words); -#endif - } - - /// Addition - template - __host__ __device__ __forceinline__ self_type operator+(Distance n) const - { - self_type retval; - retval.ptr = ptr; - retval.tex_obj = tex_obj; - retval.tex_offset = tex_offset + n; - return retval; - } - - /// Addition assignment - template - __host__ __device__ __forceinline__ self_type& operator+=(Distance n) - { - tex_offset += n; - return *this; - } - - /// Subtraction - template - __host__ __device__ __forceinline__ self_type operator-(Distance n) const - { - self_type retval; - retval.ptr = ptr; - retval.tex_obj = tex_obj; - retval.tex_offset = tex_offset - n; - return retval; - } - - /// Subtraction assignment - template - __host__ __device__ __forceinline__ self_type& operator-=(Distance n) - { - tex_offset -= n; - return *this; - } - - /// Distance - __host__ __device__ __forceinline__ difference_type operator-(self_type other) const - { - return tex_offset - other.tex_offset; - } - - /// Array subscript - template - __host__ __device__ __forceinline__ reference operator[](Distance n) const - { - return *(*this + n); - } - - /// Structure dereference - __host__ __device__ __forceinline__ pointer operator->() - { - return &(*(*this)); - } - - /// Equal to - __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) const - { - return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset) && (tex_obj == rhs.tex_obj)); - } - - /// Not equal to - __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) const - { - return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset) || (tex_obj != rhs.tex_obj)); - } - - /// less than - __host__ __device__ __forceinline__ bool operator<(const self_type& rhs) - { - return (tex_offset < rhs.tex_offset); - } - - /// ostream operator - friend std::ostream& operator<<(std::ostream& os, const self_type& itr) - { - return os; - } - -}; - - -template -class ConstArrayPortalFromTexture -{ -public: - - typedef typename TextureIterator::value_type ValueType; - typedef TextureIterator IteratorType; + typedef T ValueType; + typedef typename thrust::system::cuda::pointer< T > PointerType; + typedef const T* IteratorType; VTKM_EXEC_CONT_EXPORT ConstArrayPortalFromTexture() { } VTKM_CONT_EXPORT - ConstArrayPortalFromTexture(IteratorType begin, ptrdiff_t size) - : Length(size), - BeginIterator(begin), - EndIterator(begin+size) - { } + ConstArrayPortalFromTexture(const PointerType begin, const PointerType end) + : BeginIterator( begin ), + EndIterator( end ) + { } /// Copy constructor for any other ConstArrayPortalFromTexture with an iterator /// type that can be copied to this iterator type. This allows us to do any /// type casting that the iterators do (like the non-const to const cast). /// - template + template VTKM_EXEC_CONT_EXPORT - ConstArrayPortalFromTexture(const ConstArrayPortalFromTexture &src) - : Length(src.Length), - BeginIterator(src.BeginIterator), + ConstArrayPortalFromTexture(const ConstArrayPortalFromTexture &src) + : BeginIterator(src.BeginIterator), EndIterator(src.EndIterator) { } - template + template VTKM_EXEC_CONT_EXPORT - ConstArrayPortalFromTexture &operator=( - const ConstArrayPortalFromTexture &src) + ConstArrayPortalFromTexture &operator=( + const ConstArrayPortalFromTexture &src) { - this->Length = src.Length; this->BeginIterator = src.BeginIterator; this->EndIterator = src.EndIterator; return *this; @@ -450,12 +119,17 @@ public: VTKM_EXEC_CONT_EXPORT vtkm::Id GetNumberOfValues() const { - return static_cast(this->Length); + // Not using std::distance because on CUDA it cannot be used on a device. + return (this->EndIterator - this->BeginIterator); } VTKM_EXEC_EXPORT ValueType Get(vtkm::Id index) const { +#if __CUDA_ARCH__ >= 350 + return __ldg(this->IteratorAt(index).get()); +#else return *this->IteratorAt(index); +#endif } VTKM_EXEC_EXPORT @@ -464,18 +138,17 @@ public: } VTKM_CONT_EXPORT - IteratorType GetIteratorBegin() const { return this->BeginIterator; } + IteratorType GetIteratorBegin() const { return this->BeginIterator.get(); } VTKM_CONT_EXPORT - IteratorType GetIteratorEnd() const { return this->EndIterator; } + IteratorType GetIteratorEnd() const { return this->EndIterator.get(); } private: - ptrdiff_t Length; - IteratorType BeginIterator; - IteratorType EndIterator; + PointerType BeginIterator; + PointerType EndIterator; VTKM_EXEC_EXPORT - IteratorType IteratorAt(vtkm::Id index) const { + PointerType IteratorAt(vtkm::Id index) const { // Not using std::advance because on CUDA it cannot be used on a device. return (this->BeginIterator + index); } From 63b1f03187481dbb2ecf0b93d451deb54d4b3850 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 9 Mar 2015 16:34:48 -0400 Subject: [PATCH 07/12] Simplify the implementation of loading through textures. We don't need this super complicated system for texture loading. --- .../ArrayManagerExecutionThrustDevice.h | 170 +----------------- vtkm/cont/cuda/internal/MakeThrustIterator.h | 17 -- .../cuda/internal/ArrayPortalFromTexture.h | 163 ----------------- .../cuda/internal/ArrayPortalFromThrust.h | 54 +++++- 4 files changed, 54 insertions(+), 350 deletions(-) delete mode 100644 vtkm/exec/cuda/internal/ArrayPortalFromTexture.h diff --git a/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h b/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h index cb7fed1d8..e70bc9070 100644 --- a/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h +++ b/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h @@ -44,37 +44,12 @@ #endif // gcc && !CUDA #include -#include - -#include namespace vtkm { namespace cont { namespace cuda { namespace internal { -template struct UseTexturePortal {typedef boost::false_type type;}; - -template<> struct UseTexturePortal {typedef boost::true_type type; }; -template<> struct UseTexturePortal {typedef boost::true_type type; }; -template<> struct UseTexturePortal {typedef boost::true_type type; }; -template<> struct UseTexturePortal {typedef boost::true_type type; }; -template<> struct UseTexturePortal {typedef boost::true_type type; }; -template<> struct UseTexturePortal {typedef boost::true_type type; }; - -template<> struct UseTexturePortal > {typedef boost::true_type type; }; -template<> struct UseTexturePortal > {typedef boost::true_type type; }; -template<> struct UseTexturePortal > {typedef boost::true_type type; }; -template<> struct UseTexturePortal > {typedef boost::true_type type; }; - -template<> struct UseTexturePortal {typedef boost::true_type type; }; -template<> struct UseTexturePortal {typedef boost::true_type type; }; - -template<> struct UseTexturePortal > {typedef boost::true_type type; }; -template<> struct UseTexturePortal > {typedef boost::true_type type; }; -template<> struct UseTexturePortal > {typedef boost::true_type type; }; - - /// \c ArrayManagerExecutionThrustDevice provides an implementation for a \c /// ArrayManagerExecution class for a thrust device adapter that is designed /// for the cuda backend which has separate memory spaces for host and device. This @@ -84,15 +59,9 @@ template<> struct UseTexturePortal > {typedef boost:: /// This array manager should only be used with the cuda device adapter, /// since in the future it will take advantage of texture memory and /// the unique memory access patterns of cuda systems. -template +template class ArrayManagerExecutionThrustDevice { - //we need a way to detect that we are using FERMI or lower and disable - //the usage of texture iterator. The __CUDA_ARCH__ define is only around - //for device code so that can't be used. I expect that we will have to devise - //some form of Try/Compile with CUDA or just offer this as an advanced CMake - //option. We could also try and see if a runtime switch is possible. - public: typedef T ValueType; @@ -224,143 +193,6 @@ private: }; -/// This is a specialization that is used to enable texture memory iterators -template -class ArrayManagerExecutionThrustDevice::type >::type > -{ -public: - typedef T ValueType; - - typedef vtkm::cont::internal::Storage ContainerType; - - typedef vtkm::exec::cuda::internal::ArrayPortalFromThrust< T > PortalType; - typedef vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< T > PortalConstType; - - - VTKM_CONT_EXPORT ArrayManagerExecutionThrustDevice(): - NumberOfValues(0), - ArrayBegin(), - ArrayEnd() - { - - } - - ~ArrayManagerExecutionThrustDevice() - { - this->ReleaseResources(); - } - - /// Returns the size of the array. - /// - VTKM_CONT_EXPORT vtkm::Id GetNumberOfValues() const { - return this->NumberOfValues; - } - - /// Allocates the appropriate size of the array and copies the given data - /// into the array. - /// - template - VTKM_CONT_EXPORT void LoadDataForInput(PortalControl arrayPortal) - { - //don't bind to the texture yet, as we could have allocate the array - //on a previous call with AllocateArrayForOutput and now are directly - //calling get portal const - try - { - this->NumberOfValues = arrayPortal.GetNumberOfValues(); - this->ArrayBegin = ::thrust::system::cuda::malloc( static_cast(this->NumberOfValues) ); - this->ArrayEnd = this->ArrayBegin + this->NumberOfValues; - - ::thrust::copy(arrayPortal.GetRawIterator(), - arrayPortal.GetRawIterator() + this->NumberOfValues, - this->ArrayBegin); - } - catch (std::bad_alloc error) - { - throw vtkm::cont::ErrorControlOutOfMemory(error.what()); - } - } - - /// Allocates the appropriate size of the array and copies the given data - /// into the array. - /// - template - VTKM_CONT_EXPORT void LoadDataForInPlace(PortalControl arrayPortal) - { - this->LoadDataForInput(arrayPortal); - } - - /// Allocates the array to the given size. - /// - VTKM_CONT_EXPORT void AllocateArrayForOutput( - ContainerType &vtkmNotUsed(container), - vtkm::Id numberOfValues) - { - if(this->NumberOfValues > 0) - { - ::thrust::system::cuda::free( this->ArrayBegin ); - } - this->NumberOfValues = numberOfValues; - this->ArrayBegin = ::thrust::system::cuda::malloc( this->NumberOfValues ); - this->ArrayEnd = this->ArrayBegin + numberOfValues; - } - - /// Allocates enough space in \c controlArray and copies the data in the - /// device vector into it. - /// - VTKM_CONT_EXPORT void RetrieveOutputData(ContainerType &controlArray) const - { - controlArray.Allocate(this->NumberOfValues); - ::thrust::copy(this->ArrayBegin, - this->ArrayEnd, - controlArray.GetPortal().GetRawIterator()); - } - - /// Resizes the device vector. - /// - VTKM_CONT_EXPORT void Shrink(vtkm::Id numberOfValues) - { - // The operation will succeed even if this assertion fails, but this - // is still supposed to be a precondition to Shrink. - VTKM_ASSERT_CONT(numberOfValues <= this->NumberOfValues); - this->NumberOfValues = numberOfValues; - this->ArrayEnd = this->ArrayBegin + this->NumberOfValues; - } - - VTKM_CONT_EXPORT PortalType GetPortal() - { - return PortalType(this->ArrayBegin, this->ArrayEnd); - } - - VTKM_CONT_EXPORT PortalConstType GetPortalConst() const - { - return PortalConstType(this->ArrayBegin, this->ArrayEnd); - } - - - /// Frees all memory. - /// - VTKM_CONT_EXPORT void ReleaseResources() - { - ::thrust::system::cuda::free( this->ArrayBegin ); - this->ArrayBegin = ::thrust::system::cuda::pointer(); - this->ArrayEnd = ::thrust::system::cuda::pointer(); - } - -private: - // Not implemented - ArrayManagerExecutionThrustDevice( - ArrayManagerExecutionThrustDevice &); - void operator=( - ArrayManagerExecutionThrustDevice &); - - vtkm::Id NumberOfValues; - ::thrust::system::cuda::pointer ArrayBegin; - ::thrust::system::cuda::pointer ArrayEnd; -}; - - } } } diff --git a/vtkm/cont/cuda/internal/MakeThrustIterator.h b/vtkm/cont/cuda/internal/MakeThrustIterator.h index a3dbcb8e3..976fe46b9 100644 --- a/vtkm/cont/cuda/internal/MakeThrustIterator.h +++ b/vtkm/cont/cuda/internal/MakeThrustIterator.h @@ -24,7 +24,6 @@ #include #include -#include // Disable GCC warnings we check vtkmfor but Thrust does not. #if defined(__GNUC__) && !defined(VTKM_CUDA) @@ -59,7 +58,6 @@ namespace detail { // Tags to specify what type of thrust iterator to use. struct ThrustIteratorTransformTag { }; struct ThrustIteratorDevicePtrTag { }; -struct ThrustIteratorDeviceTextureTag { }; // Traits to help classify what thrust iterators will be used. template @@ -149,13 +147,6 @@ struct IteratorTraits typedef typename IteratorChooser::Type IteratorType; }; -template -struct IteratorTraits< vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< T > > -{ - typedef vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< T > PortalType; - typedef ThrustIteratorDeviceTextureTag Tag; - typedef thrust::system::cuda::pointer< const T > IteratorType; -}; template VTKM_CONT_EXPORT static @@ -190,14 +181,6 @@ MakeIteratorBegin(PortalType portal, detail::ThrustIteratorDevicePtrTag) return MakeDevicePtr(portal.GetIteratorBegin()); } -template -VTKM_CONT_EXPORT static -typename IteratorTraits::IteratorType -MakeIteratorBegin(PortalType portal, detail::ThrustIteratorDeviceTextureTag) -{ - return MakeDevicePtr(portal.GetIteratorBegin()); -} - } // namespace detail diff --git a/vtkm/exec/cuda/internal/ArrayPortalFromTexture.h b/vtkm/exec/cuda/internal/ArrayPortalFromTexture.h deleted file mode 100644 index 8d4fdd086..000000000 --- a/vtkm/exec/cuda/internal/ArrayPortalFromTexture.h +++ /dev/null @@ -1,163 +0,0 @@ -//============================================================================ -// Copyright (c) Kitware, Inc. -// All rights reserved. -// See LICENSE.txt for details. -// This software is distributed WITHOUT ANY WARRANTY; without even -// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR -// PURPOSE. See the above copyright notice for more information. -// -// Copyright 2014 Sandia Corporation. -// Copyright 2014 UT-Battelle, LLC. -// Copyright 2014. Los Alamos National Security -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National -// Laboratory (LANL), the U.S. Government retains certain rights in -// this software. -//============================================================================ - -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#ifndef vtk_m_exec_cuda_internal_ArrayPortalFromTexture_h -#define vtk_m_exec_cuda_internal_ArrayPortalFromTexture_h - -#include - -#include - -// Disable GCC warnings we check vtkmfor but Thrust does not. -#if defined(__GNUC__) && !defined(VTKM_CUDA) -#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 6) -#pragma GCC diagnostic push -#endif // gcc version >= 4.6 -#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 2) -#pragma GCC diagnostic ignored "-Wshadow" -#pragma GCC diagnostic ignored "-Wunused-parameter" -#endif // gcc version >= 4.2 -#endif // gcc && !CUDA - -#include -#include - -#if defined(__GNUC__) && !defined(VTKM_CUDA) -#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 6) -#pragma GCC diagnostic pop -#endif // gcc version >= 4.6 -#endif // gcc && !CUDA - - -namespace vtkm { -namespace exec { -namespace cuda { -namespace internal { - -template -class ConstArrayPortalFromTexture : public ArrayPortalFromThrustBase -{ -public: - - typedef T ValueType; - typedef typename thrust::system::cuda::pointer< T > PointerType; - typedef const T* IteratorType; - - VTKM_EXEC_CONT_EXPORT ConstArrayPortalFromTexture() { } - - VTKM_CONT_EXPORT - ConstArrayPortalFromTexture(const PointerType begin, const PointerType end) - : BeginIterator( begin ), - EndIterator( end ) - { } - - /// Copy constructor for any other ConstArrayPortalFromTexture with an iterator - /// type that can be copied to this iterator type. This allows us to do any - /// type casting that the iterators do (like the non-const to const cast). - /// - template - VTKM_EXEC_CONT_EXPORT - ConstArrayPortalFromTexture(const ConstArrayPortalFromTexture &src) - : BeginIterator(src.BeginIterator), - EndIterator(src.EndIterator) - { } - - template - VTKM_EXEC_CONT_EXPORT - ConstArrayPortalFromTexture &operator=( - const ConstArrayPortalFromTexture &src) - { - this->BeginIterator = src.BeginIterator; - this->EndIterator = src.EndIterator; - return *this; - } - - VTKM_EXEC_CONT_EXPORT - vtkm::Id GetNumberOfValues() const { - // Not using std::distance because on CUDA it cannot be used on a device. - return (this->EndIterator - this->BeginIterator); - } - - VTKM_EXEC_EXPORT - ValueType Get(vtkm::Id index) const { -#if __CUDA_ARCH__ >= 350 - return __ldg(this->IteratorAt(index).get()); -#else - return *this->IteratorAt(index); -#endif - } - - VTKM_EXEC_EXPORT - void Set(vtkm::Id index, ValueType value) const { - *this->IteratorAt(index) = value; - } - - VTKM_CONT_EXPORT - IteratorType GetIteratorBegin() const { return this->BeginIterator.get(); } - - VTKM_CONT_EXPORT - IteratorType GetIteratorEnd() const { return this->EndIterator.get(); } - -private: - PointerType BeginIterator; - PointerType EndIterator; - - VTKM_EXEC_EXPORT - PointerType IteratorAt(vtkm::Id index) const { - // Not using std::advance because on CUDA it cannot be used on a device. - return (this->BeginIterator + index); - } -}; - -} -} -} -} // namespace vtkm::exec::cuda::internal - - -#endif //vtk_m_exec_cuda_internal_ArrayPortalFromTexture_h diff --git a/vtkm/exec/cuda/internal/ArrayPortalFromThrust.h b/vtkm/exec/cuda/internal/ArrayPortalFromThrust.h index 788e255e2..9c3d9ffc5 100644 --- a/vtkm/exec/cuda/internal/ArrayPortalFromThrust.h +++ b/vtkm/exec/cuda/internal/ArrayPortalFromThrust.h @@ -42,11 +42,63 @@ #pragma GCC diagnostic pop #endif // gcc version >= 4.6 #endif // gcc && !CUDA + +#include + namespace vtkm { namespace exec { namespace cuda { namespace internal { +template struct UseTextureLoad {typedef boost::false_type type;}; + +template<> struct UseTextureLoad {typedef boost::true_type type; }; +template<> struct UseTextureLoad {typedef boost::true_type type; }; +template<> struct UseTextureLoad {typedef boost::true_type type; }; +template<> struct UseTextureLoad {typedef boost::true_type type; }; +template<> struct UseTextureLoad {typedef boost::true_type type; }; +template<> struct UseTextureLoad {typedef boost::true_type type; }; + +template<> struct UseTextureLoad* > {typedef boost::true_type type; }; +template<> struct UseTextureLoad* > {typedef boost::true_type type; }; +template<> struct UseTextureLoad* > {typedef boost::true_type type; }; +template<> struct UseTextureLoad* > {typedef boost::true_type type; }; + +template<> struct UseTextureLoad {typedef boost::true_type type; }; +template<> struct UseTextureLoad {typedef boost::true_type type; }; + +template<> struct UseTextureLoad* > {typedef boost::true_type type; }; +template<> struct UseTextureLoad* > {typedef boost::true_type type; }; +template<> struct UseTextureLoad* > {typedef boost::true_type type; }; + +//this T type is not one that is valid to be loaded through texture memory +template +struct load_through_texture +{ + VTKM_EXEC_EXPORT + static T get(const thrust::system::cuda::pointer data) + { + return *(data.get()); + } +}; + +//this T type is valid to be loaded through texture memory +template +struct load_through_texture::type >::type > +{ + VTKM_EXEC_EXPORT + static T get(const thrust::system::cuda::pointer data) + { + //only load through a texture if we have sm 35 support +#if __CUDA_ARCH__ >= 350 + return __ldg(data.get()); +#else + return *(data.get()); +#endif + } +}; + + class ArrayPortalFromThrustBase {}; /// This templated implementation of an ArrayPortal allows you to adapt a pair @@ -168,7 +220,7 @@ public: VTKM_EXEC_EXPORT ValueType Get(vtkm::Id index) const { - return *this->IteratorAt(index); + return vtkm::exec::cuda::internal::load_through_texture::get( this->IteratorAt(index) ); } VTKM_EXEC_EXPORT From 86dc8f1d382b8edb1a2e664cd0a70d955ee56d8b Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 9 Mar 2015 16:48:57 -0400 Subject: [PATCH 08/12] Move back to thrust::cuda::vector to properly handle allocating uint8's Our approach of using the underlying allocator inside thrust was a bad approach, for some reason it fails to properly allocate uint8's or int8's on the correct boundaries. I expect that this logic is somewhere else in the code and instead we should use thrust::system::cuda::vector which does this properly. --- .../ArrayManagerExecutionThrustDevice.h | 61 +++++++++---------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h b/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h index e70bc9070..106771ef8 100644 --- a/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h +++ b/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h @@ -23,6 +23,8 @@ #include #include +#include + // Disable GCC warnings we check vtkmfor but Thrust does not. #if defined(__GNUC__) && !defined(VTKM_CUDA) #if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 6) @@ -34,7 +36,7 @@ #endif // gcc version >= 4.2 #endif // gcc && !CUDA -#include +#include #include #if defined(__GNUC__) && !defined(VTKM_CUDA) @@ -68,12 +70,10 @@ public: typedef vtkm::cont::internal::Storage ContainerType; typedef vtkm::exec::cuda::internal::ArrayPortalFromThrust< T > PortalType; - typedef vtkm::exec::cuda::internal::ConstArrayPortalFromThrust< T > PortalConstType; + typedef vtkm::exec::cuda::internal::ConstArrayPortalFromThrust< const T > PortalConstType; VTKM_CONT_EXPORT ArrayManagerExecutionThrustDevice(): - NumberOfValues(0), - ArrayBegin(), - ArrayEnd() + Array() { } @@ -86,7 +86,7 @@ public: /// Returns the size of the array. /// VTKM_CONT_EXPORT vtkm::Id GetNumberOfValues() const { - return this->NumberOfValues; + return this->Array.size(); } /// Allocates the appropriate size of the array and copies the given data @@ -100,13 +100,8 @@ public: //calling get portal const try { - this->NumberOfValues = arrayPortal.GetNumberOfValues(); - this->ArrayBegin = ::thrust::system::cuda::malloc( static_cast(this->NumberOfValues) ); - this->ArrayEnd = this->ArrayBegin + this->NumberOfValues; - - ::thrust::copy(arrayPortal.GetRawIterator(), - arrayPortal.GetRawIterator() + this->NumberOfValues, - this->ArrayBegin); + this->Array.assign(arrayPortal.GetRawIterator(), + arrayPortal.GetRawIterator() + arrayPortal.GetNumberOfValues()); } catch (std::bad_alloc error) { @@ -129,13 +124,16 @@ public: ContainerType &vtkmNotUsed(container), vtkm::Id numberOfValues) { - if(this->NumberOfValues > 0) + try { - ::thrust::system::cuda::free( this->ArrayBegin ); + this->Array.resize(numberOfValues); } - this->NumberOfValues = numberOfValues; - this->ArrayBegin = ::thrust::system::cuda::malloc( this->NumberOfValues ); - this->ArrayEnd = this->ArrayBegin + numberOfValues; + catch (std::bad_alloc error) + { + throw vtkm::cont::ErrorControlOutOfMemory(error.what()); + } + + } /// Allocates enough space in \c controlArray and copies the data in the @@ -143,9 +141,9 @@ public: /// VTKM_CONT_EXPORT void RetrieveOutputData(ContainerType &controlArray) const { - controlArray.Allocate(this->NumberOfValues); - ::thrust::copy(this->ArrayBegin, - this->ArrayEnd, + controlArray.Allocate(this->Array.size()); + ::thrust::copy( this->Array.data(), + this->Array.data() + this->Array.size(), controlArray.GetPortal().GetRawIterator()); } @@ -155,19 +153,21 @@ public: { // The operation will succeed even if this assertion fails, but this // is still supposed to be a precondition to Shrink. - VTKM_ASSERT_CONT(numberOfValues <= this->NumberOfValues); - this->NumberOfValues = numberOfValues; - this->ArrayEnd = this->ArrayBegin + this->NumberOfValues; + VTKM_ASSERT_CONT(numberOfValues <= this->Array.size()); + + this->Array.resize(numberOfValues); } VTKM_CONT_EXPORT PortalType GetPortal() { - return PortalType(this->ArrayBegin, this->ArrayEnd); + return PortalType( this->Array.data(), + this->Array.data() + this->Array.size()); } VTKM_CONT_EXPORT PortalConstType GetPortalConst() const { - return PortalConstType(this->ArrayBegin, this->ArrayEnd); + return PortalConstType( this->Array.data(), + this->Array.data() + this->Array.size()); } @@ -175,9 +175,8 @@ public: /// VTKM_CONT_EXPORT void ReleaseResources() { - ::thrust::system::cuda::free( this->ArrayBegin ); - this->ArrayBegin = ::thrust::system::cuda::pointer(); - this->ArrayEnd = ::thrust::system::cuda::pointer(); + this->Array.clear(); + this->Array.shrink_to_fit(); } private: @@ -187,9 +186,7 @@ private: void operator=( ArrayManagerExecutionThrustDevice &); - vtkm::Id NumberOfValues; - ::thrust::system::cuda::pointer ArrayBegin; - ::thrust::system::cuda::pointer ArrayEnd; + ::thrust::system::cuda::vector Array; }; From f45b3363e1dc9b42f98bce5ca797a77c2d23a9db Mon Sep 17 00:00:00 2001 From: Dave Pugmire Date: Wed, 25 Mar 2015 09:56:48 -0400 Subject: [PATCH 09/12] comments --- vtkm/cont/DataSet.h | 1 + 1 file changed, 1 insertion(+) diff --git a/vtkm/cont/DataSet.h b/vtkm/cont/DataSet.h index 8177c123d..4ea333b54 100644 --- a/vtkm/cont/DataSet.h +++ b/vtkm/cont/DataSet.h @@ -151,6 +151,7 @@ public: ExplicitConnectivity conn; RegularConnectivity3D reg; + //TODO: Logical structure: vtkm::Extents? Use EAVL logicalStructure? //traditional data-model vtkm::cont::ArrayHandle > Points; From 8f3ed89e92fefc91c04998adc0e852b55fe0add7 Mon Sep 17 00:00:00 2001 From: Dave Pugmire Date: Wed, 25 Mar 2015 09:57:42 -0400 Subject: [PATCH 10/12] comments --- vtkm/cont/testing/UnitTestDataSet.cxx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vtkm/cont/testing/UnitTestDataSet.cxx b/vtkm/cont/testing/UnitTestDataSet.cxx index e2e3ad6eb..2fea57ee3 100644 --- a/vtkm/cont/testing/UnitTestDataSet.cxx +++ b/vtkm/cont/testing/UnitTestDataSet.cxx @@ -27,6 +27,13 @@ #include #include +/* +call notes. +wrap execution portal with restrictors. (abandone the fixed length array). +compile time polymorphic types. + +*/ + static const int LEN_IDS = 6; class CellType : public vtkm::worklet::WorkletMapTopology From a4f740b9337ce30fa4fbe39fb82ca7744a1a4ec7 Mon Sep 17 00:00:00 2001 From: Dave Pugmire Date: Wed, 25 Mar 2015 09:57:58 -0400 Subject: [PATCH 11/12] comments --- vtkm/cont/arg/TransportTagTopologyIn.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vtkm/cont/arg/TransportTagTopologyIn.h b/vtkm/cont/arg/TransportTagTopologyIn.h index 956b2d0f7..ba2dbebe4 100644 --- a/vtkm/cont/arg/TransportTagTopologyIn.h +++ b/vtkm/cont/arg/TransportTagTopologyIn.h @@ -48,6 +48,8 @@ struct Transport Date: Wed, 15 Apr 2015 10:45:39 -0400 Subject: [PATCH 12/12] move connectivity into their own files --- vtkm/cont/CMakeLists.txt | 2 + vtkm/cont/DataSet.h | 115 +------------------------- vtkm/cont/ExplicitConnectivity.h | 53 ++++++++++++ vtkm/cont/RegularConnectivity.h | 95 +++++++++++++++++++++ vtkm/cont/testing/UnitTestDataSet.cxx | 4 + 5 files changed, 156 insertions(+), 113 deletions(-) create mode 100644 vtkm/cont/ExplicitConnectivity.h create mode 100644 vtkm/cont/RegularConnectivity.h diff --git a/vtkm/cont/CMakeLists.txt b/vtkm/cont/CMakeLists.txt index 098b9982b..2f355a718 100644 --- a/vtkm/cont/CMakeLists.txt +++ b/vtkm/cont/CMakeLists.txt @@ -34,6 +34,7 @@ set(headers DeviceAdapterSerial.h DynamicArrayHandle.h DynamicPointCoordinates.h + ExplicitConnectivity.h Error.h ErrorControl.h ErrorControlAssert.h @@ -45,6 +46,7 @@ set(headers PointCoordinatesArray.h PointCoordinatesListTag.h PointCoordinatesUniform.h + RegularConnectivity.h Storage.h StorageBasic.h StorageImplicit.h diff --git a/vtkm/cont/DataSet.h b/vtkm/cont/DataSet.h index 4ea333b54..a45d6969b 100644 --- a/vtkm/cont/DataSet.h +++ b/vtkm/cont/DataSet.h @@ -6,123 +6,12 @@ #include #include #include +#include +#include namespace vtkm { namespace cont { -class ExplicitConnectivity -{ -public: - ExplicitConnectivity() {} - - vtkm::Id GetNumberOfElements() - { - return Shapes.GetNumberOfValues(); - } - vtkm::Id GetNumberOfIndices(vtkm::Id index) - { - return NumIndices.GetPortalControl().Get(index); - } - vtkm::Id GetElementShapeType(vtkm::Id index) - { - return Shapes.GetPortalControl().Get(index); - } - template - void GetIndices(vtkm::Id index, vtkm::Vec &ids) - { - int n = GetNumberOfIndices(index); - int start = MapCellToConnectivityIndex.GetPortalControl().Get(index); - for (int i=0; i - void AddShape(vtkm::CellType cellType, int numVertices, vtkm::Vec &ids) - { - ///\todo: how do I modify an array handle? - } - - vtkm::cont::ArrayHandle Shapes; - vtkm::cont::ArrayHandle NumIndices; - vtkm::cont::ArrayHandle Connectivity; - vtkm::cont::ArrayHandle MapCellToConnectivityIndex; -}; - -class RegularConnectivity3D -{ -public: - void SetNodeDimension3D(int node_i, int node_j, int node_k) - { - cellDims[0] = node_i-1; - cellDims[1] = node_j-1; - cellDims[2] = node_k-1; - nodeDims[0] = node_i; - nodeDims[1] = node_j; - nodeDims[2] = node_k; - } - - vtkm::Id GetNumberOfElements() - { - return cellDims[0]*cellDims[1]*cellDims[2]; - } - vtkm::Id GetNumberOfIndices(vtkm::Id) - { - return 8; - } - vtkm::Id GetElementShapeType(vtkm::Id) - { - return VTKM_VOXEL; - } - template - void GetIndices(vtkm::Id index, vtkm::Vec &ids) - { - int i,j,k; - CalculateLogicalCellIndices3D(index, i,j,k); - ///\todo: assert ItemTupleLength >= 8, or return early? - ids[0] = CalculateNodeIndex3D(i, j, k); - if (ItemTupleLength <= 1) return; - ids[1] = ids[0] + 1; - if (ItemTupleLength <= 2) return; - ids[2] = ids[0] + nodeDims[0]; - if (ItemTupleLength <= 3) return; - ids[3] = ids[2] + 1; - if (ItemTupleLength <= 4) return; - ids[4] = ids[0] + nodeDims[0]*nodeDims[1]; - if (ItemTupleLength <= 5) return; - ids[5] = ids[4] + 1; - if (ItemTupleLength <= 6) return; - ids[6] = ids[4] + nodeDims[0]; - if (ItemTupleLength <= 7) return; - ids[7] = ids[6] + 1; - } -private: - int cellDims[3]; - int nodeDims[3]; - int CalculateCellIndex3D(int i, int j, int k) - { - return (k * cellDims[1] + j) * cellDims[0] + i; - } - int CalculateNodeIndex3D(int i, int j, int k) - { - return (k * nodeDims[1] + j) * nodeDims[0] + i; - } - void CalculateLogicalCellIndices3D(int index, int &i, int &j, int &k) - { - int cellDims01 = cellDims[0] * cellDims[1]; - k = index / cellDims01; - int indexij = index % cellDims01; - j = indexij / cellDims[0]; - i = indexij % cellDims[0]; - } - void CalculateLogicalNodeIndices3D(int index, int &i, int &j, int &k) - { - int nodeDims01 = nodeDims[0] * nodeDims[1]; - k = index / nodeDims01; - int indexij = index % nodeDims01; - j = indexij / nodeDims[0]; - i = indexij % nodeDims[0]; - } -}; - class DataSet { public: diff --git a/vtkm/cont/ExplicitConnectivity.h b/vtkm/cont/ExplicitConnectivity.h new file mode 100644 index 000000000..45c534452 --- /dev/null +++ b/vtkm/cont/ExplicitConnectivity.h @@ -0,0 +1,53 @@ +#ifndef vtk_m_cont_ExplicitConnectivity_h +#define vtk_m_cont_ExplicitConnectivity_h + +#include +#include +#include +#include +#include + +namespace vtkm { +namespace cont { + +class ExplicitConnectivity +{ +public: + ExplicitConnectivity() {} + + vtkm::Id GetNumberOfElements() + { + return Shapes.GetNumberOfValues(); + } + vtkm::Id GetNumberOfIndices(vtkm::Id index) + { + return NumIndices.GetPortalControl().Get(index); + } + vtkm::Id GetElementShapeType(vtkm::Id index) + { + return Shapes.GetPortalControl().Get(index); + } + template + void GetIndices(vtkm::Id index, vtkm::Vec &ids) + { + int n = GetNumberOfIndices(index); + int start = MapCellToConnectivityIndex.GetPortalControl().Get(index); + for (int i=0; i + void AddShape(vtkm::CellType cellType, int numVertices, vtkm::Vec &ids) + { + ///\todo: how do I modify an array handle? + } + + vtkm::cont::ArrayHandle Shapes; + vtkm::cont::ArrayHandle NumIndices; + vtkm::cont::ArrayHandle Connectivity; + vtkm::cont::ArrayHandle MapCellToConnectivityIndex; +}; + +} +} // namespace vtkm::cont + +#endif //vtk_m_cont_ExplicitConnectivity_h diff --git a/vtkm/cont/RegularConnectivity.h b/vtkm/cont/RegularConnectivity.h new file mode 100644 index 000000000..ece30a58a --- /dev/null +++ b/vtkm/cont/RegularConnectivity.h @@ -0,0 +1,95 @@ +#ifndef vtk_m_cont_RegularConnectivity_h +#define vtk_m_cont_RegularConnectivity_h + +#include +#include +#include +#include +#include + +namespace vtkm { +namespace cont { + +class RegularConnectivity +{ +public: + void SetNodeDimension(int node_i, int node_j, int node_k) + { + cellDims[0] = node_i-1; + cellDims[1] = node_j-1; + cellDims[2] = node_k-1; + nodeDims[0] = node_i; + nodeDims[1] = node_j; + nodeDims[2] = node_k; + } + + vtkm::Id GetNumberOfElements() + { + return cellDims[0]*cellDims[1]*cellDims[2]; + } + vtkm::Id GetNumberOfIndices(vtkm::Id) + { + return 8; + } + vtkm::Id GetElementShapeType(vtkm::Id) + { + return VTKM_VOXEL; + } + template + void GetIndices(vtkm::Id index, vtkm::Vec &ids) + { + int i,j,k; + CalculateLogicalCellIndices3D(index, i,j,k); + ///\todo: assert ItemTupleLength >= 8, or return early? + ids[0] = CalculateNodeIndex3D(i, j, k); + if (ItemTupleLength <= 1) return; + ids[1] = ids[0] + 1; + if (ItemTupleLength <= 2) return; + ids[2] = ids[0] + nodeDims[0]; + if (ItemTupleLength <= 3) return; + ids[3] = ids[2] + 1; + if (ItemTupleLength <= 4) return; + ids[4] = ids[0] + nodeDims[0]*nodeDims[1]; + if (ItemTupleLength <= 5) return; + ids[5] = ids[4] + 1; + if (ItemTupleLength <= 6) return; + ids[6] = ids[4] + nodeDims[0]; + if (ItemTupleLength <= 7) return; + ids[7] = ids[6] + 1; + } +private: + int cellDims[3]; + int nodeDims[3]; + int CalculateCellIndex(int i, int j, int k) + { + return (k * cellDims[1] + j) * cellDims[0] + i; + } + int CalculateNodeIndex(int i, int j, int k) + { + return (k * nodeDims[1] + j) * nodeDims[0] + i; + } + void CalculateLogicalCellIndices(int index, int &i, int &j, int &k) + { + int cellDims01 = cellDims[0] * cellDims[1]; + k = index / cellDims01; + int indexij = index % cellDims01; + j = indexij / cellDims[0]; + i = indexij % cellDims[0]; + } + void CalculateLogicalNodeIndices(int index, int &i, int &j, int &k) + { + int nodeDims01 = nodeDims[0] * nodeDims[1]; + k = index / nodeDims01; + int indexij = index % nodeDims01; + j = indexij / nodeDims[0]; + i = indexij % nodeDims[0]; + } +}; + +//TODO: +//Add specialized 1D and 2D versions. + +} +} // namespace vtkm::cont + +#endif //vtk_m_cont_RegularConnectivity_h diff --git a/vtkm/cont/testing/UnitTestDataSet.cxx b/vtkm/cont/testing/UnitTestDataSet.cxx index 2fea57ee3..9f2ad03a4 100644 --- a/vtkm/cont/testing/UnitTestDataSet.cxx +++ b/vtkm/cont/testing/UnitTestDataSet.cxx @@ -18,6 +18,10 @@ // this software. //============================================================================ +//#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_SERIAL +//#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_CUDA +//#include + #include #include #include