vtk-m/vtkm/internal/VariantImplDetail.h.in
Kenneth Moreland 65f6d2aa67 Fix issue with union placeholder on Intel compiler
We have run into an issue with some Intel compilers where if a `union`
contains a `struct` that has some padding for byte alignment, the value
copy might skip over that padding even when the `union` contains a different
type where those bytes are valid. This breaks the value copy of our
`Variant` class.

This is not a unique problem. We have seen the same thing in other
compilers and already have a workaround for when this happens. The
workaround creates a special struct that has no padding placed at the front
of the `union`. The Intel compiler adds a fun twist in that this
placeholder structure only works if the alignment is at least as high as
the struct that follows it.

To get around this problem, make the alignment of the placeholder `struct`
at large as possible for the size of the `union`.

It was taking too long to compile with a device. Redesign the
placeholder to be simpler and compile faster.
2023-09-26 07:25:25 -04:00

483 lines
16 KiB
C

//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
$# This file uses the pyexpander macro processing utility to build the
$# FunctionInterface facilities that use a variable number of arguments.
$# Information, documentation, and downloads for pyexpander can be found at:
$#
$# http://pyexpander.sourceforge.net/
$#
$# To build the source code, execute the following (after installing
$# pyexpander, of course):
$#
$# expander.py VariantDetail.h.in > VariantDetail.h
$#
$# Ignore the following comment. It is meant for the generated file.
// **** DO NOT EDIT THIS FILE!!! ****
// This file is automatically generated by VariantDetail.h.in
#if !defined(VTK_M_DEVICE) || !defined(VTK_M_NAMESPACE)
#error VarianImplDetail.h must be included from VariantImpl.h
// Some defines to make my IDE happy.
#define VTK_M_DEVICE
#define VTK_M_NAMESPACE tmp
#endif
#include <vtkm/List.h>
#include <vtkm/Types.h>
#include <vtkm/internal/Assume.h>
#include <vtkmstd/is_trivial.h>
#include <algorithm>
#include <cstddef>
#include <type_traits>
$py(max_expanded=8)\
$# Python commands used in template expansion.
$py(
def type_list(num_params):
if num_params < 0:
return ''
result = 'T0'
for param in range(1, num_params + 1):
result += ', T%d' % param
return result
def typename_list(num_params):
if num_params < 0:
return ''
result = 'typename T0'
for param in range(1, num_params + 1):
result += ', typename T%d' % param
return result
)\
$#
$extend(type_list, typename_list)\
namespace vtkm
{
namespace VTK_M_NAMESPACE
{
namespace detail
{
// --------------------------------------------------------------------------------
// Helper classes to determine if all Variant types are trivial.
template <typename... Ts>
using AllTriviallyCopyable = vtkm::ListAll<vtkm::List<Ts...>, vtkmstd::is_trivially_copyable>;
// Single argument version of is_trivially_constructible
template <typename T>
using Constructible = vtkmstd::is_trivially_constructible<T>;
template <typename... Ts>
using AllTriviallyConstructible = vtkm::ListAll<vtkm::List<Ts...>, Constructible>;
template <typename... Ts>
using AllTriviallyDestructible =
vtkm::ListAll<vtkm::List<Ts...>, vtkmstd::is_trivially_destructible>;
// --------------------------------------------------------------------------------
// Helper functions to determine the maximum type size.
#if defined(VTKM_GCC) && (__GNUC__ == 5)
// GCC5 gives an error with `sizeof(Ts)...` for an unexpanded parameter pack.
template <typename T0>
constexpr std::size_t MaxSizeOf()
{
return sizeof(T0);
}
template <typename T0, typename T1, typename... Ts>
constexpr std::size_t MaxSizeOf()
{
return std::max(sizeof(T0), MaxSizeOf<T1, Ts...>());
}
#else
template <typename... Ts>
constexpr std::size_t MaxSizeOf()
{
return std::max({ sizeof(Ts)... });
}
#endif
// --------------------------------------------------------------------------------
// Helper functions to determine the maximum alignment size.
template <typename... Ts>
constexpr std::size_t MaxAlignmentOf()
{
return std::max({ std::alignment_of<Ts>::value... });
}
// --------------------------------------------------------------------------------
// Placeholder for a fully used structure of the given type.
// This placeholder is used for compilers that do not correctly copy `struct`s
// in `union`s where some of the `struct`s have padding. This is added to the
// front of the `union` for the compiler to pick up and use.
//
// It is normally sufficient to have a full `struct`, but we have also encountered
// compilers that only use it if the alignment is at least as large. But we
// don't want the alignment too large because it can add unwanted padding
// elsewhere. Also, adding `alignas` did not work to resolve this problem for
// the compiler.
template <std::size_t Alignment>
struct TypeForAlignmentImpl;
template <>
struct TypeForAlignmentImpl<8>
{
using type = vtkm::Int64;
};
template <>
struct TypeForAlignmentImpl<4>
{
using type = vtkm::Int32;
};
template <>
struct TypeForAlignmentImpl<2>
{
using type = vtkm::Int16;
};
template <>
struct TypeForAlignmentImpl<1>
{
using type = vtkm::Int8;
};
template <std::size_t Alignment>
using TypeForAlignment = typename TypeForAlignmentImpl<Alignment>::type;
template <std::size_t Size, typename Word, bool = (Size >= 4)>
struct SizedPlaceholderImpl;
template <std::size_t Size, typename Word>
struct SizedPlaceholderImpl<Size, Word, true>
{
Word A;
Word B;
Word C;
Word D;
SizedPlaceholderImpl<Size - 4, Word> E;
};
template <typename Word>
struct SizedPlaceholderImpl<4, Word, true>
{
Word A;
Word B;
Word C;
Word D;
};
template <std::size_t Size, typename Word>
struct SizedPlaceholderImpl<Size, Word, false>
{
Word A;
SizedPlaceholderImpl<Size - 1, Word> B;
};
template <typename Word>
struct SizedPlaceholderImpl<1, Word, false>
{
Word A;
};
template <typename... Ts>
struct SizedPlaceholder
: SizedPlaceholderImpl<(MaxSizeOf<Ts...>() / MaxAlignmentOf<Ts...>()),
TypeForAlignment<MaxAlignmentOf<Ts...>()>>
{
};
// clang-format off
// --------------------------------------------------------------------------------
// Union type used inside of Variant
//
// You may be asking yourself, why not just use an std::aligned_union rather than a real union
// type? That was our first implementation, but the problem is that the std::aligned_union
// reference needs to be recast to the actual type. Typically you would do that with
// reinterpret_cast. However, doing that leads to undefined behavior. The C++ compiler assumes that
// 2 pointers of different types point to different memory (even if it is clear that they are set
// to the same address). That means optimizers can remove code because it "knows" that data in one
// type cannot affect data in another type. (See Shafik Yaghmour's excellent writeup at
// https://gist.github.com/shafik/848ae25ee209f698763cffee272a58f8 for more details.) To safely
// change the type of an std::aligned_union, you really have to do an std::memcpy. This is
// problematic for types that cannot be trivially copied. Another problem is that we found that
// device compilers do not optimize the memcpy as well as most CPU compilers. Likely, memcpy is
// used much less frequently on GPU devices.
//
// Part of the trickiness of the union implementation is trying to preserve when the type is
// trivially constructible and copyable. The trick is that if members of the union are not trivial,
// then the default constructors are deleted. To get around that, a non-default constructor is
// added, which we can use to construct the union for non-trivial types. Working with types with
// non-trivial destructors are particularly tricky. Again, if any member of the union has a
// non-trivial destructor, the destructor is deleted. Unlike a constructor, you cannot just say to
// use a different destructor. Thus, we have to define our own destructor for the union.
// Technically, the destructor here does not do anything, but the actual destruction should be
// handled by the Variant class that contains this VariantUnion. We actually need two separate
// implementations of our union, one that defines a destructor and one that use the default
// destructor. If you define your own destructor, you can lose the trivial constructor and trivial
// copy properties.
//
// TD = trivially deconstructible
template <typename T0, typename... Ts>
union VariantUnionTD;
// NTD = non-trivially deconstructible
template <typename T0, typename... Ts>
union VariantUnionNTD;
$for(param_length in range(max_expanded))\
template <$typename_list(param_length)>
union VariantUnionTD<$type_list(param_length)>
{
// Work around issue where some compilers miss initializing some struct members if another entry
// in the varient has a struct with padding. Place an item that requires everthing to be copied.
SizedPlaceholder<$type_list(param_length)> Placeholder;
$for(param_index in range(param_length + 1))\
T$(param_index) V$(param_index);
$endfor\
VTK_M_DEVICE VariantUnionTD(vtkm::internal::NullType) { }
VariantUnionTD() = default;
};
template <$typename_list(param_length)>
union VariantUnionNTD<$type_list(param_length)>
{
// Work around issue where some compilers miss initializing some struct members if another entry
// in the varient has a struct with padding. Place an item that requires everthing to be copied.
SizedPlaceholder<$type_list(param_length)> Placeholder;
$for(param_index in range(param_length + 1))\
T$(param_index) V$(param_index);
$endfor\
VTK_M_DEVICE VariantUnionNTD(vtkm::internal::NullType) { }
VariantUnionNTD() = default;
VTK_M_DEVICE ~VariantUnionNTD() { }
};
$endfor\
template <$typename_list(max_expanded), typename... Ts>
union VariantUnionTD<$type_list(max_expanded), Ts...>
{
// Work around issue where CUDA sometimes seems to miss initializing some struct members
// if another entry in the varient has a struct with padding. Place an item that requires
// everthing to be copied.
SizedPlaceholder<$type_list(max_expanded), Ts...> Placeholder;
$for(param_index in range(max_expanded))\
T$(param_index) V$(param_index);
$endfor\
VariantUnionTD<T$(max_expanded), Ts...> Remaining;
VTK_M_DEVICE VariantUnionTD(vtkm::internal::NullType) { }
VariantUnionTD() = default;
};
template <$typename_list(max_expanded), typename... Ts>
union VariantUnionNTD<$type_list(max_expanded), Ts...>
{
// Work around issue where CUDA sometimes seems to miss initializing some struct members
// if another entry in the varient has a struct with padding. Place an item that requires
// everthing to be copied.
SizedPlaceholder<$type_list(max_expanded), Ts...> Placeholder;
$for(param_index in range(max_expanded))\
T$(param_index) V$(param_index);
$endfor\
VariantUnionNTD<T$(max_expanded), Ts...> Remaining;
VTK_M_DEVICE VariantUnionNTD(vtkm::internal::NullType) { }
VariantUnionNTD() = default;
VTK_M_DEVICE ~VariantUnionNTD() { }
};
//clang-format on
template <bool TrivialConstructor, typename... Ts>
struct VariantUnionFinder;
template <typename... Ts>
struct VariantUnionFinder<true, Ts...>
{
using type = VariantUnionTD<Ts...>;
};
template <typename... Ts>
struct VariantUnionFinder<false, Ts...>
{
using type = VariantUnionNTD<Ts...>;
};
template <typename... Ts>
using VariantUnion =
typename VariantUnionFinder<AllTriviallyDestructible<Ts...>::value, Ts...>::type;
// --------------------------------------------------------------------------------
// Methods to get values out of the variant union
template <vtkm::IdComponent I, typename UnionType>
struct VariantUnionGetImpl;
$for(param_index in range(max_expanded))\
template <typename UnionType>
struct VariantUnionGetImpl<$(param_index), UnionType>
{
using ReturnType = decltype(std::declval<UnionType>().V$(param_index));
VTK_M_DEVICE static ReturnType& Get(UnionType& storage) noexcept
{
return storage.V$(param_index);
}
VTK_M_DEVICE static const ReturnType& Get(const UnionType& storage) noexcept
{
return storage.V$(param_index);
}
};
$endfor\
template <vtkm::IdComponent I, typename UnionType>
struct VariantUnionGetImpl
{
VTKM_STATIC_ASSERT(I >= $(max_expanded));
using RecursiveGet = VariantUnionGetImpl<I - $(max_expanded), decltype(std::declval<UnionType&>().Remaining)>;
using ReturnType = typename RecursiveGet::ReturnType;
VTK_M_DEVICE static ReturnType& Get(UnionType& storage) noexcept
{
return RecursiveGet::Get(storage.Remaining);
}
VTK_M_DEVICE static const ReturnType& Get(const UnionType& storage) noexcept
{
return RecursiveGet::Get(storage.Remaining);
}
};
template <vtkm::IdComponent I, typename UnionType>
VTK_M_DEVICE auto VariantUnionGet(UnionType& storage) noexcept
-> decltype(VariantUnionGetImpl<I, typename std::decay<UnionType>::type>::Get(storage))&
{
return VariantUnionGetImpl<I, typename std::decay<UnionType>::type>::Get(storage);
}
// --------------------------------------------------------------------------------
// Internal implementation of CastAndCall for Variant
template <std::size_t NumCases>
struct VariantCases
{
template <typename Functor, typename UnionType, typename... Args>
VTK_M_DEVICE static
#ifdef VTKM_HIP
// this is a temporary solution to improve Kokkos/HIP compile times for
// ConnectivityTracer in Rendering.
//
// This function currently gets inlined many times, which dramatically increases
// both compile time and the size of the resulting code-object
__attribute__((noinline))
#else
inline
#endif
auto CastAndCall(
vtkm::IdComponent index,
Functor&& f,
UnionType& storage,
Args&&... args) noexcept(noexcept(f(storage.V0, args...)))
-> decltype(f(storage.V0, args...))
{
VTKM_ASSERT((index >= 0) && (index < static_cast<vtkm::IdComponent>(NumCases)));
switch (index)
{
$for(param_index in range(max_expanded))\
case $(param_index):
// If you get a compile error here, it probably means that you have called
// Variant::CastAndCall with a functor that does not accept one of the types in the
// Variant. The functor you provide must be callable with all types in the Variant, not
// just the one that it currently holds.
return f(storage.V$(param_index), std::forward<Args>(args)...);
$endfor\
default:
return VariantCases<NumCases - $(max_expanded)>::template CastAndCall(
index - $(max_expanded), std::forward<Functor>(f), storage.Remaining, std::forward<Args>(args)...);
}
}
};
template<>
struct VariantCases<1>
{
template <typename Functor, typename UnionType, typename... Args>
VTK_M_DEVICE static inline auto CastAndCall(
vtkm::IdComponent index,
Functor&& f,
UnionType& storage,
Args&&... args) noexcept(noexcept(f(storage.V0, args...)))
-> decltype(f(storage.V0, args...))
{
// Assume index is 0. Saves us some conditionals.
VTKM_ASSERT(index == 0);
(void)index;
return f(storage.V0, std::forward<Args>(args)...);
}
};
$for(case_index in range(2, max_expanded + 1))\
template<>
struct VariantCases<$(case_index)>
{
template <typename Functor, typename UnionType, typename... Args>
VTK_M_DEVICE static
#ifdef VTKM_HIP
// this is a temporary solution to improve Kokkos/HIP compile times for
// ConnectivityTracer in Rendering.
//
// This function currently gets inlined many times, which dramatically increases
// both compile time and the size of the resulting code-object
__attribute__((noinline))
#else
inline
#endif
auto CastAndCall(
vtkm::IdComponent index,
Functor&& f,
UnionType& storage,
Args&&... args) noexcept(noexcept(f(storage.V0, args...)))
-> decltype(f(storage.V0, args...))
{
// Assume index is 0. Saves us some conditionals.
VTKM_ASSERT((index >= 0) && (index < $(case_index)));
switch (index)
{
default:
$for(param_index in range(case_index))\
case $(param_index):
// If you get a compile error here, it probably means that you have called
// Variant::CastAndCall with a functor that does not accept one of the types in the
// Variant. The functor you provide must be callable with all types in the Variant, not
// just the one that it currently holds.
return f(storage.V$(param_index), std::forward<Args>(args)...);
$endfor\
}
}
};
$endfor
template <std::size_t UnionSize, typename Functor, typename UnionType, typename... Args>
VTK_M_DEVICE inline auto VariantCastAndCallImpl(
vtkm::IdComponent index,
Functor&& f,
UnionType& storage,
Args&&... args) noexcept(noexcept(f(storage.V0, args...)))
-> decltype(f(storage.V0, args...))
{
return VariantCases<UnionSize>::CastAndCall(
index, std::forward<Functor>(f), storage, std::forward<Args>(args)...);
}
}
}
} // vtkm::VTK_M_NAMESPACE::detail