diff --git a/CMake/VTKmCheckCopyright.cmake b/CMake/VTKmCheckCopyright.cmake index 4304bc27a..378876b58 100644 --- a/CMake/VTKmCheckCopyright.cmake +++ b/CMake/VTKmCheckCopyright.cmake @@ -89,7 +89,7 @@ function (get_year var) set(${var} "${result}" PARENT_SCOPE) endfunction (get_year) -set(copyright_file_year 2011) +set(copyright_file_year 2014) get_year(current_year) # Escapes ';' characters (list delimiters) and splits the given string into diff --git a/vtkm/cont/ArrayHandle.h b/vtkm/cont/ArrayHandle.h index 16a5c3270..f6cdfd90a 100644 --- a/vtkm/cont/ArrayHandle.h +++ b/vtkm/cont/ArrayHandle.h @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -173,12 +174,13 @@ public: // If the user writes into the iterator we return, then the execution // array will become invalid. Play it safe and release the execution // resources. (Use the const version to preserve the execution array.) - this->ReleaseResourcesExecution(); + this->ReleaseResourcesExecutionInternal(); return this->Internals->ControlArray.GetPortal(); } else { - throw vtkm::cont::ErrorControlBadValue("ArrayHandle contains no data."); + throw vtkm::cont::ErrorControlInternal( + "ArrayHandle::SyncControlArray did not make control array valid."); } } @@ -197,7 +199,8 @@ public: } else { - throw vtkm::cont::ErrorControlBadValue("ArrayHandle contains no data."); + throw vtkm::cont::ErrorControlInternal( + "ArrayHandle::SyncControlArray did not make control array valid."); } } @@ -224,6 +227,23 @@ public: } } + /// \brief Allocates an array large enough to hold the given number of values. + /// + /// The allocation may be done on an already existing array, but can wipe out + /// any data already in the array. This method can throw + /// ErrorControlOutOfMemory if the array cannot be allocated or + /// ErrorControlBadValue if the allocation is not feasible (for example, the + /// array storage is read-only). + /// + VTKM_CONT_EXPORT + void Allocate(vtkm::Id numberOfValues) + { + this->ReleaseResourcesExecutionInternal(); + this->Internals->UserPortalValid = false; + this->Internals->ControlArray.Allocate(numberOfValues); + this->Internals->ControlArrayValid = true; + } + /// \brief Reduces the size of the array without changing its values. /// /// This method allows you to resize the array without reallocating it. The @@ -270,18 +290,18 @@ public: /// VTKM_CONT_EXPORT void ReleaseResourcesExecution() { - if (this->Internals->ExecutionArrayValid) - { - this->Internals->ExecutionArray->ReleaseResources(); - this->Internals->ExecutionArrayValid = false; - } + // Save any data in the execution environment by making sure it is synced + // with the control environment. + this->SyncControlArray(); + + this->ReleaseResourcesExecutionInternal(); } /// Releases all resources in both the control and execution environments. /// VTKM_CONT_EXPORT void ReleaseResources() { - this->ReleaseResourcesExecution(); + this->ReleaseResourcesExecutionInternal(); // Forget about any user iterators. this->Internals->UserPortalValid = false; @@ -507,15 +527,26 @@ public: /// VTKM_CONT_EXPORT void SyncControlArray() const { - if ( !this->Internals->UserPortalValid - && !this->Internals->ControlArrayValid) + if (!this->Internals->UserPortalValid + && !this->Internals->ControlArrayValid) { // Need to change some state that does not change the logical state from // an external point of view. InternalStruct *internals = const_cast(this->Internals.get()); - internals->ExecutionArray->RetrieveOutputData(internals->ControlArray); - internals->ControlArrayValid = true; + if (this->Internals->ExecutionArrayValid) + { + internals->ExecutionArray->RetrieveOutputData(internals->ControlArray); + internals->ControlArrayValid = true; + } + else + { + // This array is in the null state (there is nothing allocated), but + // the calling function wants to do something with the array. Put this + // class into a valid state by allocating an array of size 0. + internals->ControlArray.Allocate(0); + internals->ControlArrayValid = true; + } } else { @@ -527,6 +558,16 @@ public: } } + VTKM_CONT_EXPORT + void ReleaseResourcesExecutionInternal() + { + if (this->Internals->ExecutionArrayValid) + { + this->Internals->ExecutionArray->ReleaseResources(); + this->Internals->ExecutionArrayValid = false; + } + } + boost::shared_ptr Internals; }; diff --git a/vtkm/cont/Storage.h b/vtkm/cont/Storage.h index c72022448..b36866884 100644 --- a/vtkm/cont/Storage.h +++ b/vtkm/cont/Storage.h @@ -129,7 +129,9 @@ public: /// /// The allocation may be done on an already existing array, but can wipe out /// any data already in the array. This method can throw - /// ErrorControlOutOfMemory if the array cannot be allocated. + /// ErrorControlOutOfMemory if the array cannot be allocated or + /// ErrorControlBadValue if the allocation is not feasible (for example, the + /// array storage is read-only). /// VTKM_CONT_EXPORT void Allocate(vtkm::Id numberOfValues); diff --git a/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h b/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h index a74685b07..106771ef8 100644 --- a/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h +++ b/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h @@ -23,6 +23,8 @@ #include #include +#include + // Disable GCC warnings we check vtkmfor but Thrust does not. #if defined(__GNUC__) && !defined(VTKM_CUDA) #if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 6) @@ -34,7 +36,7 @@ #endif // gcc version >= 4.2 #endif // gcc && !CUDA -#include +#include #include #if defined(__GNUC__) && !defined(VTKM_CUDA) @@ -44,30 +46,12 @@ #endif // gcc && !CUDA #include -#include - -#include namespace vtkm { namespace cont { namespace cuda { namespace internal { -template struct UseTexturePortal {typedef boost::false_type type;}; - -//Currently disabled as we are still tracking down issues with Texture -//Memory. The major issue is that in testing it is slower than classic arrays -#ifdef VTKM_USE_TEXTURE_MEM -template<> struct UseTexturePortal {typedef boost::true_type type; }; -template<> struct UseTexturePortal {typedef boost::true_type type; }; -template<> struct UseTexturePortal {typedef boost::true_type type; }; -template<> struct UseTexturePortal {typedef boost::true_type type; }; -template<> struct UseTexturePortal {typedef boost::true_type type; }; -template<> struct UseTexturePortal {typedef boost::true_type type; }; -template<> struct UseTexturePortal {typedef boost::true_type type; }; -#endif - - /// \c ArrayManagerExecutionThrustDevice provides an implementation for a \c /// ArrayManagerExecution class for a thrust device adapter that is designed /// for the cuda backend which has separate memory spaces for host and device. This @@ -77,35 +61,32 @@ template<> struct UseTexturePortal {typedef boost::true_type type /// This array manager should only be used with the cuda device adapter, /// since in the future it will take advantage of texture memory and /// the unique memory access patterns of cuda systems. -template +template class ArrayManagerExecutionThrustDevice { - //we need a way to detect that we are using FERMI or lower and disable - //the usage of texture iterator. The __CUDA_ARCH__ define is only around - //for device code so that can't be used. I expect that we will have to devise - //some form of Try/Compile with CUDA or just offer this as an advanced CMake - //option. We could also try and see if a runtime switch is possible. - public: typedef T ValueType; typedef vtkm::cont::internal::Storage ContainerType; typedef vtkm::exec::cuda::internal::ArrayPortalFromThrust< T > PortalType; - typedef vtkm::exec::cuda::internal::ConstArrayPortalFromThrust< T > PortalConstType; + typedef vtkm::exec::cuda::internal::ConstArrayPortalFromThrust< const T > PortalConstType; VTKM_CONT_EXPORT ArrayManagerExecutionThrustDevice(): - NumberOfValues(0), - ArrayBegin(), - ArrayEnd() + Array() { } + ~ArrayManagerExecutionThrustDevice() + { + this->ReleaseResources(); + } + /// Returns the size of the array. /// VTKM_CONT_EXPORT vtkm::Id GetNumberOfValues() const { - return this->NumberOfValues; + return this->Array.size(); } /// Allocates the appropriate size of the array and copies the given data @@ -119,13 +100,8 @@ public: //calling get portal const try { - this->NumberOfValues = arrayPortal.GetNumberOfValues(); - this->ArrayBegin = ::thrust::system::cuda::malloc( static_cast(this->NumberOfValues) ); - this->ArrayEnd = this->ArrayBegin + this->NumberOfValues; - - ::thrust::copy(arrayPortal.GetRawIterator(), - arrayPortal.GetRawIterator() + this->NumberOfValues, - this->ArrayBegin); + this->Array.assign(arrayPortal.GetRawIterator(), + arrayPortal.GetRawIterator() + arrayPortal.GetNumberOfValues()); } catch (std::bad_alloc error) { @@ -148,13 +124,16 @@ public: ContainerType &vtkmNotUsed(container), vtkm::Id numberOfValues) { - if(this->NumberOfValues > 0) + try { - ::thrust::system::cuda::free( this->ArrayBegin ); + this->Array.resize(numberOfValues); } - this->NumberOfValues = numberOfValues; - this->ArrayBegin = ::thrust::system::cuda::malloc( this->NumberOfValues ); - this->ArrayEnd = this->ArrayBegin + numberOfValues; + catch (std::bad_alloc error) + { + throw vtkm::cont::ErrorControlOutOfMemory(error.what()); + } + + } /// Allocates enough space in \c controlArray and copies the data in the @@ -162,9 +141,9 @@ public: /// VTKM_CONT_EXPORT void RetrieveOutputData(ContainerType &controlArray) const { - controlArray.Allocate(this->NumberOfValues); - ::thrust::copy(this->ArrayBegin, - this->ArrayEnd, + controlArray.Allocate(this->Array.size()); + ::thrust::copy( this->Array.data(), + this->Array.data() + this->Array.size(), controlArray.GetPortal().GetRawIterator()); } @@ -174,19 +153,21 @@ public: { // The operation will succeed even if this assertion fails, but this // is still supposed to be a precondition to Shrink. - VTKM_ASSERT_CONT(numberOfValues <= this->NumberOfValues); - this->NumberOfValues = numberOfValues; - this->ArrayEnd = this->ArrayBegin + this->NumberOfValues; + VTKM_ASSERT_CONT(numberOfValues <= this->Array.size()); + + this->Array.resize(numberOfValues); } VTKM_CONT_EXPORT PortalType GetPortal() { - return PortalType(this->ArrayBegin, this->ArrayEnd); + return PortalType( this->Array.data(), + this->Array.data() + this->Array.size()); } VTKM_CONT_EXPORT PortalConstType GetPortalConst() const { - return PortalConstType(this->ArrayBegin, this->ArrayEnd); + return PortalConstType( this->Array.data(), + this->Array.data() + this->Array.size()); } @@ -194,9 +175,8 @@ public: /// VTKM_CONT_EXPORT void ReleaseResources() { - ::thrust::system::cuda::free( this->ArrayBegin ); - this->ArrayBegin = ::thrust::system::cuda::pointer(); - this->ArrayEnd = ::thrust::system::cuda::pointer(); + this->Array.clear(); + this->Array.shrink_to_fit(); } private: @@ -206,171 +186,7 @@ private: void operator=( ArrayManagerExecutionThrustDevice &); - vtkm::Id NumberOfValues; - ::thrust::system::cuda::pointer ArrayBegin; - ::thrust::system::cuda::pointer ArrayEnd; -}; - - -/// This is a specialization that is used to enable texture memory iterators -template -class ArrayManagerExecutionThrustDevice::type >::type > -{ - //we need a way to detect that we are using FERMI or lower and disable - //the usage of texture iterator. The __CUDA_ARCH__ define is only around - //for device code so that can't be used. I expect that we will have to devise - //some form of Try/Compile with CUDA or just offer this as an advanced CMake - //option. We could also try and see if a runtime switch is possible. - -public: - typedef T ValueType; - - typedef vtkm::cont::internal::Storage ContainerType; - - typedef vtkm::exec::cuda::internal::ArrayPortalFromThrust< T > PortalType; - typedef ::vtkm::exec::cuda::internal::DaxTexObjInputIterator TextureIteratorType; - typedef ::vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< TextureIteratorType > PortalConstType; - - VTKM_CONT_EXPORT ArrayManagerExecutionThrustDevice(): - NumberOfValues(0), - ArrayBegin(), - ArrayEnd(), - HaveTextureBound(false), - InputArrayIterator() - { - - } - - ~ArrayManagerExecutionThrustDevice() - { - if(this->HaveTextureBound) - { - this->HaveTextureBound = false; - this->InputArrayIterator.UnbindTexture(); - } - } - - /// Returns the size of the array. - /// - VTKM_CONT_EXPORT vtkm::Id GetNumberOfValues() const { - return this->NumberOfValues; - } - - /// Allocates the appropriate size of the array and copies the given data - /// into the array. - /// - template - VTKM_CONT_EXPORT void LoadDataForInput(PortalControl arrayPortal) - { - //don't bind to the texture yet, as we could have allocate the array - //on a previous call with AllocateArrayForOutput and now are directly - //calling get portal const - try - { - this->NumberOfValues = arrayPortal.GetNumberOfValues(); - this->ArrayBegin = ::thrust::system::cuda::malloc( static_cast(this->NumberOfValues) ); - this->ArrayEnd = this->ArrayBegin + this->NumberOfValues; - - ::thrust::copy(arrayPortal.GetRawIterator(), - arrayPortal.GetRawIterator() + this->NumberOfValues, - this->ArrayBegin); - } - catch (std::bad_alloc error) - { - throw vtkm::cont::ErrorControlOutOfMemory(error.what()); - } - } - - /// Allocates the appropriate size of the array and copies the given data - /// into the array. - /// - template - VTKM_CONT_EXPORT void LoadDataForInPlace(PortalControl arrayPortal) - { - this->LoadDataForInput(arrayPortal); - } - - /// Allocates the array to the given size. - /// - VTKM_CONT_EXPORT void AllocateArrayForOutput( - ContainerType &vtkmNotUsed(container), - vtkm::Id numberOfValues) - { - if(this->NumberOfValues > 0) - { - ::thrust::system::cuda::free( this->ArrayBegin ); - } - this->NumberOfValues = numberOfValues; - this->ArrayBegin = ::thrust::system::cuda::malloc( this->NumberOfValues ); - this->ArrayEnd = this->ArrayBegin + numberOfValues; - } - - /// Allocates enough space in \c controlArray and copies the data in the - /// device vector into it. - /// - VTKM_CONT_EXPORT void RetrieveOutputData(ContainerType &controlArray) const - { - controlArray.Allocate(this->NumberOfValues); - ::thrust::copy(this->ArrayBegin, - this->ArrayEnd, - controlArray.GetPortal().GetRawIterator()); - } - - /// Resizes the device vector. - /// - VTKM_CONT_EXPORT void Shrink(vtkm::Id numberOfValues) - { - // The operation will succeed even if this assertion fails, but this - // is still supposed to be a precondition to Shrink. - VTKM_ASSERT_CONT(numberOfValues <= this->NumberOfValues); - this->NumberOfValues = numberOfValues; - this->ArrayEnd = this->ArrayBegin + this->NumberOfValues; - } - - VTKM_CONT_EXPORT PortalType GetPortal() - { - return PortalType(this->ArrayBegin, this->ArrayEnd); - } - - VTKM_CONT_EXPORT PortalConstType GetPortalConst() const - { - if(!this->HaveTextureBound) - { - this->HaveTextureBound = true; - this->InputArrayIterator.BindTexture(ArrayBegin,this->NumberOfValues); - } - - //if we have a texture iterator bound use that - return PortalConstType(this->InputArrayIterator, this->NumberOfValues); - } - - - /// Frees all memory. - /// - VTKM_CONT_EXPORT void ReleaseResources() { - if(this->HaveTextureBound) - { - this->HaveTextureBound = false; - this->InputArrayIterator.UnbindTexture(); - } - ::thrust::system::cuda::free( this->ArrayBegin ); - this->ArrayBegin = ::thrust::system::cuda::pointer(); - this->ArrayEnd = ::thrust::system::cuda::pointer(); - } - -private: - // Not implemented - ArrayManagerExecutionThrustDevice( - ArrayManagerExecutionThrustDevice &); - void operator=( - ArrayManagerExecutionThrustDevice &); - - vtkm::Id NumberOfValues; - ::thrust::system::cuda::pointer ArrayBegin; - ::thrust::system::cuda::pointer ArrayEnd; - mutable bool HaveTextureBound; - mutable TextureIteratorType InputArrayIterator; + ::thrust::system::cuda::vector Array; }; diff --git a/vtkm/cont/cuda/internal/MakeThrustIterator.h b/vtkm/cont/cuda/internal/MakeThrustIterator.h index e59d3cdd1..976fe46b9 100644 --- a/vtkm/cont/cuda/internal/MakeThrustIterator.h +++ b/vtkm/cont/cuda/internal/MakeThrustIterator.h @@ -24,7 +24,6 @@ #include #include -#include // Disable GCC warnings we check vtkmfor but Thrust does not. #if defined(__GNUC__) && !defined(VTKM_CUDA) @@ -59,7 +58,6 @@ namespace detail { // Tags to specify what type of thrust iterator to use. struct ThrustIteratorTransformTag { }; struct ThrustIteratorDevicePtrTag { }; -struct ThrustIteratorDeviceTextureTag { }; // Traits to help classify what thrust iterators will be used. template @@ -149,13 +147,6 @@ struct IteratorTraits typedef typename IteratorChooser::Type IteratorType; }; -template -struct IteratorTraits< vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< T > > -{ - typedef vtkm::exec::cuda::internal::ConstArrayPortalFromTexture< T > PortalType; - typedef ThrustIteratorDeviceTextureTag Tag; - typedef typename PortalType::IteratorType IteratorType; -}; template VTKM_CONT_EXPORT static @@ -190,14 +181,6 @@ MakeIteratorBegin(PortalType portal, detail::ThrustIteratorDevicePtrTag) return MakeDevicePtr(portal.GetIteratorBegin()); } -template -VTKM_CONT_EXPORT static -typename IteratorTraits::IteratorType -MakeIteratorBegin(PortalType portal, detail::ThrustIteratorDeviceTextureTag) -{ - return portal.GetIteratorBegin(); -} - } // namespace detail diff --git a/vtkm/cont/testing/UnitTestArrayHandle.cxx b/vtkm/cont/testing/UnitTestArrayHandle.cxx index bf2cd5856..6f63a07f6 100644 --- a/vtkm/cont/testing/UnitTestArrayHandle.cxx +++ b/vtkm/cont/testing/UnitTestArrayHandle.cxx @@ -113,6 +113,12 @@ struct TryArrayHandleType "Array size did not shrink correctly."); CheckArray(arrayHandle); + std::cout << "Try reallocating array." << std::endl; + arrayHandle.Allocate(ARRAY_SIZE*2); + VTKM_TEST_ASSERT(arrayHandle.GetNumberOfValues() == ARRAY_SIZE*2, + "Array size did not allocate correctly."); + // No point in checking values. This method can invalidate them. + std::cout << "Try in place operation." << std::endl; { typedef typename vtkm::cont::ArrayHandle::template @@ -135,6 +141,25 @@ struct TryArrayHandleType TestValue(index, T()) + T(1)), "Did not get result from in place operation."); } + + std::cout << "Try operations on empty arrays." << std::endl; + // After each operation, reinitialize array in case something gets + // allocated. + arrayHandle = vtkm::cont::ArrayHandle(); + VTKM_TEST_ASSERT(arrayHandle.GetNumberOfValues() == 0, + "Uninitialized array does not report zero values."); + arrayHandle = vtkm::cont::ArrayHandle(); + VTKM_TEST_ASSERT( + arrayHandle.GetPortalConstControl().GetNumberOfValues() == 0, + "Uninitialized array does not give portal with zero values."); + arrayHandle = vtkm::cont::ArrayHandle(); + arrayHandle.Shrink(0); + arrayHandle = vtkm::cont::ArrayHandle(); + arrayHandle.ReleaseResourcesExecution(); + arrayHandle = vtkm::cont::ArrayHandle(); + arrayHandle.ReleaseResources(); + arrayHandle = vtkm::cont::ArrayHandle(); + arrayHandle.PrepareForOutput(ARRAY_SIZE, VTKM_DEFAULT_DEVICE_ADAPTER_TAG()); } }; diff --git a/vtkm/exec/cuda/internal/ArrayPortalFromTexture.h b/vtkm/exec/cuda/internal/ArrayPortalFromTexture.h deleted file mode 100644 index d21fcd6f1..000000000 --- a/vtkm/exec/cuda/internal/ArrayPortalFromTexture.h +++ /dev/null @@ -1,490 +0,0 @@ -//============================================================================ -// Copyright (c) Kitware, Inc. -// All rights reserved. -// See LICENSE.txt for details. -// This software is distributed WITHOUT ANY WARRANTY; without even -// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR -// PURPOSE. See the above copyright notice for more information. -// -// Copyright 2014 Sandia Corporation. -// Copyright 2014 UT-Battelle, LLC. -// Copyright 2014. Los Alamos National Security -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National -// Laboratory (LANL), the U.S. Government retains certain rights in -// this software. -//============================================================================ - -/****************************************************************************** - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#ifndef vtk_m_exec_cuda_internal_ArrayPortalFromTexture_h -#define vtk_m_exec_cuda_internal_ArrayPortalFromTexture_h - -#include - -#include - -// Disable GCC warnings we check vtkmfor but Thrust does not. -#if defined(__GNUC__) && !defined(VTKM_CUDA) -#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 6) -#pragma GCC diagnostic push -#endif // gcc version >= 4.6 -#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 2) -#pragma GCC diagnostic ignored "-Wshadow" -#pragma GCC diagnostic ignored "-Wunused-parameter" -#endif // gcc version >= 4.2 -#endif // gcc && !CUDA - -#include -#include - -#if defined(__GNUC__) && !defined(VTKM_CUDA) -#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 6) -#pragma GCC diagnostic pop -#endif // gcc version >= 4.6 -#endif // gcc && !CUDA - - -namespace -{ - -/** - * \brief Type selection (IF ? ThenType : ElseType) - */ -template -struct If -{ - /// Conditional type result - typedef ThenType Type; // true -}; - -template -struct If -{ - typedef ElseType Type; // false -}; - -/****************************************************************************** -* Size and alignment -******************************************************************************/ - -/// Structure alignment -template -struct AlignBytes -{ - struct Pad - { - T val; - char byte; - }; - - enum - { - /// The alignment of T in bytes - ALIGN_BYTES = sizeof(Pad) - sizeof(T) - }; -}; - -// Specializations where host C++ compilers (e.g., Windows) may disagree with device C++ compilers (EDG) - -template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; -#ifdef _WIN32 - template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; - template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; -#endif -template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 8 }; }; - -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -#ifndef _WIN32 - template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; - template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -#endif -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; -template <> struct AlignBytes { enum { ALIGN_BYTES = 16 }; }; - - -/// Unit-words of data movement -template -struct UnitWord -{ - enum { - ALIGN_BYTES = AlignBytes::ALIGN_BYTES - }; - - template - struct IsMultiple - { - enum { - UNIT_ALIGN_BYTES = AlignBytes::ALIGN_BYTES, - IS_MULTIPLE = (sizeof(T) % sizeof(Unit) == 0) && (ALIGN_BYTES % UNIT_ALIGN_BYTES == 0) - }; - }; - - /// Biggest shuffle word that T is a whole multiple of and is not larger than the alignment of T - typedef typename If::IS_MULTIPLE, - unsigned int, - typename If::IS_MULTIPLE, - unsigned short, - unsigned char>::Type>::Type ShuffleWord; - - /// Biggest volatile word that T is a whole multiple of and is not larger than the alignment of T - typedef typename If::IS_MULTIPLE, - unsigned long long, - ShuffleWord>::Type VolatileWord; - - /// Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T - typedef typename If::IS_MULTIPLE, - ulonglong2, - VolatileWord>::Type DeviceWord; - - /// Biggest texture reference word that T is a whole multiple of and is not larger than the alignment of T - typedef typename If::IS_MULTIPLE, - uint4, - typename If::IS_MULTIPLE, - uint2, - ShuffleWord>::Type>::Type TextureWord; -}; - -} - - -namespace vtkm { -namespace exec { -namespace cuda { -namespace internal { - -template < - typename T, - typename Offset = ptrdiff_t> -class DaxTexObjInputIterator -{ -public: - - // Required iterator traits - typedef DaxTexObjInputIterator self_type; ///< My own type - typedef Offset difference_type; ///< Type to express the result of subtracting one iterator from another - typedef T value_type; ///< The type of the element the iterator can point to - typedef T* pointer; ///< The type of a pointer to an element the iterator can point to - typedef T reference; ///< The type of a reference to an element the iterator can point to - - // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods - typedef typename ::thrust::detail::iterator_facade_category< - ::thrust::device_system_tag, - ::thrust::random_access_traversal_tag, - value_type, - reference - >::type iterator_category; ///< The iterator category - -private: - - // Largest texture word we can use in device - typedef typename UnitWord::TextureWord TextureWord; - - // Number of texture words per T - enum { TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord) }; - -private: - - const T* ptr; - difference_type tex_offset; - cudaTextureObject_t tex_obj; - -public: - - /// Constructor - __host__ __device__ __forceinline__ DaxTexObjInputIterator() - : - ptr(NULL), - tex_offset(0), - tex_obj(0) - {} - - /// Use this iterator to bind \p ptr with a texture reference - cudaError_t BindTexture( - const ::thrust::system::cuda::pointer ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment - size_t numElements, ///< Number of elements in the range - size_t tex_offset = 0) ///< Offset (in items) from \p ptr denoting the position of the iterator - { - this->ptr = ptr.get(); - this->tex_offset = tex_offset; - - cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(); - cudaResourceDesc res_desc; - cudaTextureDesc tex_desc; - memset(&res_desc, 0, sizeof(cudaResourceDesc)); - memset(&tex_desc, 0, sizeof(cudaTextureDesc)); - res_desc.resType = cudaResourceTypeLinear; - res_desc.res.linear.devPtr = (void*)ptr.get(); - res_desc.res.linear.desc = channel_desc; - res_desc.res.linear.sizeInBytes = numElements * sizeof(T); - tex_desc.readMode = cudaReadModeElementType; - - return cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL); - } - - /// Unbind this iterator from its texture reference - cudaError_t UnbindTexture() - { - return cudaDestroyTextureObject(tex_obj); - } - - /// Postfix increment - __host__ __device__ __forceinline__ self_type operator++(int) - { - self_type retval = *this; - tex_offset++; - return retval; - } - - /// Prefix increment - __host__ __device__ __forceinline__ self_type operator++() - { - tex_offset++; - return *this; - } - - /// Postfix decrement - __host__ __device__ __forceinline__ self_type operator--(int) - { - self_type retval = *this; - tex_offset--; - return retval; - } - - /// Prefix decrement - __host__ __device__ __forceinline__ self_type operator--() - { - tex_offset--; - return *this; - } - - /// Indirection - __host__ __device__ __forceinline__ reference operator*() const - { -#ifndef DAX_CUDA_COMPILATION - // Simply dereference the pointer on the host - return ptr[tex_offset]; -#else - // Move array of uninitialized words, then alias and assign to return value - TextureWord words[TEXTURE_MULTIPLE]; - - #pragma unroll - for (int i = 0; i < TEXTURE_MULTIPLE; ++i) - { - words[i] = tex1Dfetch( - tex_obj, - (tex_offset * TEXTURE_MULTIPLE) + i); - } - - // Load from words - return *reinterpret_cast(words); -#endif - } - - /// Addition - template - __host__ __device__ __forceinline__ self_type operator+(Distance n) const - { - self_type retval; - retval.ptr = ptr; - retval.tex_obj = tex_obj; - retval.tex_offset = tex_offset + n; - return retval; - } - - /// Addition assignment - template - __host__ __device__ __forceinline__ self_type& operator+=(Distance n) - { - tex_offset += n; - return *this; - } - - /// Subtraction - template - __host__ __device__ __forceinline__ self_type operator-(Distance n) const - { - self_type retval; - retval.ptr = ptr; - retval.tex_obj = tex_obj; - retval.tex_offset = tex_offset - n; - return retval; - } - - /// Subtraction assignment - template - __host__ __device__ __forceinline__ self_type& operator-=(Distance n) - { - tex_offset -= n; - return *this; - } - - /// Distance - __host__ __device__ __forceinline__ difference_type operator-(self_type other) const - { - return tex_offset - other.tex_offset; - } - - /// Array subscript - template - __host__ __device__ __forceinline__ reference operator[](Distance n) const - { - return *(*this + n); - } - - /// Structure dereference - __host__ __device__ __forceinline__ pointer operator->() - { - return &(*(*this)); - } - - /// Equal to - __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) const - { - return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset) && (tex_obj == rhs.tex_obj)); - } - - /// Not equal to - __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) const - { - return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset) || (tex_obj != rhs.tex_obj)); - } - - /// less than - __host__ __device__ __forceinline__ bool operator<(const self_type& rhs) - { - return (tex_offset < rhs.tex_offset); - } - - /// ostream operator - friend std::ostream& operator<<(std::ostream& os, const self_type& itr) - { - return os; - } - -}; - - -template -class ConstArrayPortalFromTexture -{ -public: - - typedef typename TextureIterator::value_type ValueType; - typedef TextureIterator IteratorType; - - VTKM_EXEC_CONT_EXPORT ConstArrayPortalFromTexture() { } - - VTKM_CONT_EXPORT - ConstArrayPortalFromTexture(IteratorType begin, ptrdiff_t size) - : Length(size), - BeginIterator(begin), - EndIterator(begin+size) - { } - - /// Copy constructor for any other ConstArrayPortalFromTexture with an iterator - /// type that can be copied to this iterator type. This allows us to do any - /// type casting that the iterators do (like the non-const to const cast). - /// - template - VTKM_EXEC_CONT_EXPORT - ConstArrayPortalFromTexture(const ConstArrayPortalFromTexture &src) - : Length(src.Length), - BeginIterator(src.BeginIterator), - EndIterator(src.EndIterator) - { } - - template - VTKM_EXEC_CONT_EXPORT - ConstArrayPortalFromTexture &operator=( - const ConstArrayPortalFromTexture &src) - { - this->Length = src.Length; - this->BeginIterator = src.BeginIterator; - this->EndIterator = src.EndIterator; - return *this; - } - - VTKM_EXEC_CONT_EXPORT - vtkm::Id GetNumberOfValues() const { - return static_cast(this->Length); - } - - VTKM_EXEC_EXPORT - ValueType Get(vtkm::Id index) const { - return *this->IteratorAt(index); - } - - VTKM_EXEC_EXPORT - void Set(vtkm::Id index, ValueType value) const { - *this->IteratorAt(index) = value; - } - - VTKM_CONT_EXPORT - IteratorType GetIteratorBegin() const { return this->BeginIterator; } - - VTKM_CONT_EXPORT - IteratorType GetIteratorEnd() const { return this->EndIterator; } - -private: - ptrdiff_t Length; - IteratorType BeginIterator; - IteratorType EndIterator; - - VTKM_EXEC_EXPORT - IteratorType IteratorAt(vtkm::Id index) const { - // Not using std::advance because on CUDA it cannot be used on a device. - return (this->BeginIterator + index); - } -}; - -} -} -} -} // namespace vtkm::exec::cuda::internal - - -#endif //vtk_m_exec_cuda_internal_ArrayPortalFromTexture_h diff --git a/vtkm/exec/cuda/internal/ArrayPortalFromThrust.h b/vtkm/exec/cuda/internal/ArrayPortalFromThrust.h index 788e255e2..9c3d9ffc5 100644 --- a/vtkm/exec/cuda/internal/ArrayPortalFromThrust.h +++ b/vtkm/exec/cuda/internal/ArrayPortalFromThrust.h @@ -42,11 +42,63 @@ #pragma GCC diagnostic pop #endif // gcc version >= 4.6 #endif // gcc && !CUDA + +#include + namespace vtkm { namespace exec { namespace cuda { namespace internal { +template struct UseTextureLoad {typedef boost::false_type type;}; + +template<> struct UseTextureLoad {typedef boost::true_type type; }; +template<> struct UseTextureLoad {typedef boost::true_type type; }; +template<> struct UseTextureLoad {typedef boost::true_type type; }; +template<> struct UseTextureLoad {typedef boost::true_type type; }; +template<> struct UseTextureLoad {typedef boost::true_type type; }; +template<> struct UseTextureLoad {typedef boost::true_type type; }; + +template<> struct UseTextureLoad* > {typedef boost::true_type type; }; +template<> struct UseTextureLoad* > {typedef boost::true_type type; }; +template<> struct UseTextureLoad* > {typedef boost::true_type type; }; +template<> struct UseTextureLoad* > {typedef boost::true_type type; }; + +template<> struct UseTextureLoad {typedef boost::true_type type; }; +template<> struct UseTextureLoad {typedef boost::true_type type; }; + +template<> struct UseTextureLoad* > {typedef boost::true_type type; }; +template<> struct UseTextureLoad* > {typedef boost::true_type type; }; +template<> struct UseTextureLoad* > {typedef boost::true_type type; }; + +//this T type is not one that is valid to be loaded through texture memory +template +struct load_through_texture +{ + VTKM_EXEC_EXPORT + static T get(const thrust::system::cuda::pointer data) + { + return *(data.get()); + } +}; + +//this T type is valid to be loaded through texture memory +template +struct load_through_texture::type >::type > +{ + VTKM_EXEC_EXPORT + static T get(const thrust::system::cuda::pointer data) + { + //only load through a texture if we have sm 35 support +#if __CUDA_ARCH__ >= 350 + return __ldg(data.get()); +#else + return *(data.get()); +#endif + } +}; + + class ArrayPortalFromThrustBase {}; /// This templated implementation of an ArrayPortal allows you to adapt a pair @@ -168,7 +220,7 @@ public: VTKM_EXEC_EXPORT ValueType Get(vtkm::Id index) const { - return *this->IteratorAt(index); + return vtkm::exec::cuda::internal::load_through_texture::get( this->IteratorAt(index) ); } VTKM_EXEC_EXPORT