From de28a435104cc637b6c5b0f0756d2e2c63dd94c8 Mon Sep 17 00:00:00 2001 From: Kenneth Moreland Date: Tue, 28 Mar 2023 14:32:25 -0600 Subject: [PATCH] Add an entry to VariantUnion to help compiler copy structs I've been seeing errors in a nightly build that compiles for CUDA Pascal using GCC5. The issue is that one of the `ArrayHandleMultiplexer` tests is failing to copy an implicit array correctly. I think the problem is that in this test the first and second type of the `Variant` are the same size, but the first type has some padding in the middle whereas the second type does not. When using this second type, the values in the same position of the padding of the first type don't seem to be initialized properly in the kernel invocation. My nonexhaustive experiment shows that things work OK as long as the first type is large enough and has no fillers. Enforce this by adding an internal entry to the union that is completely full. --- docs/changelog/fix-nightly-gcc5-cuda.md | 70 ++++++++++++ vtkm/internal/VariantImplDetail.h | 143 ++++++++++++++++++++++++ vtkm/internal/VariantImplDetail.h.in | 73 ++++++++++++ 3 files changed, 286 insertions(+) create mode 100644 docs/changelog/fix-nightly-gcc5-cuda.md diff --git a/docs/changelog/fix-nightly-gcc5-cuda.md b/docs/changelog/fix-nightly-gcc5-cuda.md new file mode 100644 index 000000000..86a949704 --- /dev/null +++ b/docs/changelog/fix-nightly-gcc5-cuda.md @@ -0,0 +1,70 @@ +# Fixed issue with trivial variant copies + +A rare error occurred with trivial copies of variants. The problem is likely +a compiler bug, and has so far only been observed when passing the variant +to a CUDA kernel when compiling with GCC 5. + +The problem was caused by structures with padding. `struct` objects in +C/C++ are frequently padded with unused memory to align all entries +properly. For example, consider the following simple `struct`. + +``` cpp +struct FooHasPadding +{ + vtkm::Int32 A; + // Padding here. + vtkm::Int64 C; +}; +``` + +Because the `C` member is a 64-bit integer, it needs to be aligned on +8-byte (i.e., 64-bit) address locations. For this to work, the C++ compiler +adds 4 bytes of padding between `A` and `C` so that an array of +`FooHasPadding`s will have the `C` member always on an 8-byte boundary. + +Now consider a second `struct` that is similar to the first but has a valid +member where the padding would be. + +``` cpp +struct BarNoPadding +{ + vtkm::Int32 A; + vtkm::Int32 B; + vtkm::Int64 C; +}; +``` + +This structure does not need padding because the `A` and `B` members +combine to fill the 8 bytes that `C` needs for the alignment. Both +`FooHasPadding` and `BarNoPadding` fill 16 bytes of memory. The `A` and `C` +members are at the same offsets, respectively, for the two structures. The +`B` member happens to reside just where the padding is for `FooHasPadding`. + +Now, let's say we create a `vtkm::exec::Variant`. +Internally, the `Variant` class holds a union that looks roughly like the +following. + +``` cpp +union VariantUnion +{ + FooHasPadding V0; + BarNoPadding V1; +}; +``` + +This is a perfectly valid use of a `union`. We just need to keep track of +which type of object is in it (which the `Variant` object does for you). + +The problem appeared to occur when `VariantUnion` contained a +`BarNoPadding` and was passed from the host to the device via an argument +to a global function. The compiler must notice that the first type +(`FooHasPadding`) is the "biggest" and uses that for trivial copies (which +just copy bytes like `memcpy`). Since it's using `FooHasPadding` as its +prototype for the byte copy, and accidentally skips over padded regions that +are valid when the `union` contains a `BarNoPadding`. This appears to be a +compiler bug. (At least, I cannot find a reason why this is encroaching +undefined behavior.) + +The solution adds a new, unused type to the internal `union` for `Variant` +that is an object as large as the largest entry in the union and contains +no padding. diff --git a/vtkm/internal/VariantImplDetail.h b/vtkm/internal/VariantImplDetail.h index 896c75ed5..712b35f1c 100644 --- a/vtkm/internal/VariantImplDetail.h +++ b/vtkm/internal/VariantImplDetail.h @@ -24,6 +24,7 @@ #include +#include #include @@ -51,6 +52,58 @@ template using AllTriviallyDestructible = vtkm::ListAll, vtkmstd::is_trivially_destructible>; +// -------------------------------------------------------------------------------- +// Helper functions to determine the maximum type size. +#if defined(VTKM_GCC) && (__GNUC__ == 5) +// GCC5 gives an error with `sizeof(Ts)...` for an unexpanded parameter pack. +template +constexpr std::size_t MaxSizeOf() +{ + return sizeof(T0); +} +template +constexpr std::size_t MaxSizeOf() +{ + return std::max(sizeof(T0), MaxSizeOf()); +} +#else +template +constexpr std::size_t MaxSizeOf() +{ + return std::max({ sizeof(Ts)... }); +} +#endif + +// -------------------------------------------------------------------------------- +// Placeholder for a fullly used structure of the given type. +template 8)> +struct SizedPlaceholder +{ + VTKM_STATIC_ASSERT(Size > 0); + vtkm::Int8 A; + SizedPlaceholder B; +}; + +template <> +struct SizedPlaceholder<1, false> +{ + vtkm::Int8 A; +}; + +template +struct SizedPlaceholder +{ + vtkm::Int8 A; + vtkm::Int8 B; + vtkm::Int8 C; + vtkm::Int8 D; + vtkm::Int8 E; + vtkm::Int8 F; + vtkm::Int8 G; + vtkm::Int8 H; + SizedPlaceholder I; +}; + // clang-format off // -------------------------------------------------------------------------------- @@ -94,6 +147,11 @@ union VariantUnionNTD; template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; VTK_M_DEVICE VariantUnionTD(vtkm::internal::NullType) { } VariantUnionTD() = default; @@ -101,6 +159,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; VTK_M_DEVICE VariantUnionNTD(vtkm::internal::NullType) { } VariantUnionNTD() = default; @@ -110,6 +173,11 @@ union VariantUnionNTD template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; VTK_M_DEVICE VariantUnionTD(vtkm::internal::NullType) { } @@ -118,6 +186,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; VTK_M_DEVICE VariantUnionNTD(vtkm::internal::NullType) { } @@ -128,6 +201,11 @@ union VariantUnionNTD template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -137,6 +215,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -148,6 +231,11 @@ union VariantUnionNTD template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -158,6 +246,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -170,6 +263,11 @@ union VariantUnionNTD template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -181,6 +279,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -194,6 +297,11 @@ union VariantUnionNTD template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -206,6 +314,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -220,6 +333,11 @@ union VariantUnionNTD template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -233,6 +351,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -248,6 +371,11 @@ union VariantUnionNTD template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -262,6 +390,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -279,6 +412,11 @@ union VariantUnionNTD template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -296,6 +434,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; diff --git a/vtkm/internal/VariantImplDetail.h.in b/vtkm/internal/VariantImplDetail.h.in index 546cde5fb..bdca13e8c 100644 --- a/vtkm/internal/VariantImplDetail.h.in +++ b/vtkm/internal/VariantImplDetail.h.in @@ -36,6 +36,7 @@ $# Ignore the following comment. It is meant for the generated file. #include +#include #include $py(max_expanded=8)\ @@ -84,6 +85,58 @@ template using AllTriviallyDestructible = vtkm::ListAll, vtkmstd::is_trivially_destructible>; +// -------------------------------------------------------------------------------- +// Helper functions to determine the maximum type size. +#if defined(VTKM_GCC) && (__GNUC__ == 5) +// GCC5 gives an error with `sizeof(Ts)...` for an unexpanded parameter pack. +template +constexpr std::size_t MaxSizeOf() +{ + return sizeof(T0); +} +template +constexpr std::size_t MaxSizeOf() +{ + return std::max(sizeof(T0), MaxSizeOf()); +} +#else +template +constexpr std::size_t MaxSizeOf() +{ + return std::max({ sizeof(Ts)... }); +} +#endif + +// -------------------------------------------------------------------------------- +// Placeholder for a fullly used structure of the given type. +template 8)> +struct SizedPlaceholder +{ + VTKM_STATIC_ASSERT(Size > 0); + vtkm::Int8 A; + SizedPlaceholder B; +}; + +template <> +struct SizedPlaceholder<1, false> +{ + vtkm::Int8 A; +}; + +template +struct SizedPlaceholder +{ + vtkm::Int8 A; + vtkm::Int8 B; + vtkm::Int8 C; + vtkm::Int8 D; + vtkm::Int8 E; + vtkm::Int8 F; + vtkm::Int8 G; + vtkm::Int8 H; + SizedPlaceholder I; +}; + // clang-format off // -------------------------------------------------------------------------------- @@ -128,6 +181,11 @@ $for(param_length in range(max_expanded))\ template <$typename_list(param_length)> union VariantUnionTD<$type_list(param_length)> { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + $for(param_index in range(param_length + 1))\ T$(param_index) V$(param_index); $endfor\ @@ -137,6 +195,11 @@ $endfor\ template <$typename_list(param_length)> union VariantUnionNTD<$type_list(param_length)> { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + $for(param_index in range(param_length + 1))\ T$(param_index) V$(param_index); $endfor\ @@ -150,6 +213,11 @@ $endfor\ template <$typename_list(max_expanded), typename... Ts> union VariantUnionTD<$type_list(max_expanded), Ts...> { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + $for(param_index in range(max_expanded))\ T$(param_index) V$(param_index); $endfor\ @@ -162,6 +230,11 @@ $endfor\ template <$typename_list(max_expanded), typename... Ts> union VariantUnionNTD<$type_list(max_expanded), Ts...> { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + $for(param_index in range(max_expanded))\ T$(param_index) V$(param_index); $endfor\