diff --git a/docs/changelog/fix-nightly-gcc5-cuda.md b/docs/changelog/fix-nightly-gcc5-cuda.md new file mode 100644 index 000000000..86a949704 --- /dev/null +++ b/docs/changelog/fix-nightly-gcc5-cuda.md @@ -0,0 +1,70 @@ +# Fixed issue with trivial variant copies + +A rare error occurred with trivial copies of variants. The problem is likely +a compiler bug, and has so far only been observed when passing the variant +to a CUDA kernel when compiling with GCC 5. + +The problem was caused by structures with padding. `struct` objects in +C/C++ are frequently padded with unused memory to align all entries +properly. For example, consider the following simple `struct`. + +``` cpp +struct FooHasPadding +{ + vtkm::Int32 A; + // Padding here. + vtkm::Int64 C; +}; +``` + +Because the `C` member is a 64-bit integer, it needs to be aligned on +8-byte (i.e., 64-bit) address locations. For this to work, the C++ compiler +adds 4 bytes of padding between `A` and `C` so that an array of +`FooHasPadding`s will have the `C` member always on an 8-byte boundary. + +Now consider a second `struct` that is similar to the first but has a valid +member where the padding would be. + +``` cpp +struct BarNoPadding +{ + vtkm::Int32 A; + vtkm::Int32 B; + vtkm::Int64 C; +}; +``` + +This structure does not need padding because the `A` and `B` members +combine to fill the 8 bytes that `C` needs for the alignment. Both +`FooHasPadding` and `BarNoPadding` fill 16 bytes of memory. The `A` and `C` +members are at the same offsets, respectively, for the two structures. The +`B` member happens to reside just where the padding is for `FooHasPadding`. + +Now, let's say we create a `vtkm::exec::Variant`. +Internally, the `Variant` class holds a union that looks roughly like the +following. + +``` cpp +union VariantUnion +{ + FooHasPadding V0; + BarNoPadding V1; +}; +``` + +This is a perfectly valid use of a `union`. We just need to keep track of +which type of object is in it (which the `Variant` object does for you). + +The problem appeared to occur when `VariantUnion` contained a +`BarNoPadding` and was passed from the host to the device via an argument +to a global function. The compiler must notice that the first type +(`FooHasPadding`) is the "biggest" and uses that for trivial copies (which +just copy bytes like `memcpy`). Since it's using `FooHasPadding` as its +prototype for the byte copy, and accidentally skips over padded regions that +are valid when the `union` contains a `BarNoPadding`. This appears to be a +compiler bug. (At least, I cannot find a reason why this is encroaching +undefined behavior.) + +The solution adds a new, unused type to the internal `union` for `Variant` +that is an object as large as the largest entry in the union and contains +no padding. diff --git a/vtkm/internal/VariantImplDetail.h b/vtkm/internal/VariantImplDetail.h index 896c75ed5..712b35f1c 100644 --- a/vtkm/internal/VariantImplDetail.h +++ b/vtkm/internal/VariantImplDetail.h @@ -24,6 +24,7 @@ #include +#include #include @@ -51,6 +52,58 @@ template using AllTriviallyDestructible = vtkm::ListAll, vtkmstd::is_trivially_destructible>; +// -------------------------------------------------------------------------------- +// Helper functions to determine the maximum type size. +#if defined(VTKM_GCC) && (__GNUC__ == 5) +// GCC5 gives an error with `sizeof(Ts)...` for an unexpanded parameter pack. +template +constexpr std::size_t MaxSizeOf() +{ + return sizeof(T0); +} +template +constexpr std::size_t MaxSizeOf() +{ + return std::max(sizeof(T0), MaxSizeOf()); +} +#else +template +constexpr std::size_t MaxSizeOf() +{ + return std::max({ sizeof(Ts)... }); +} +#endif + +// -------------------------------------------------------------------------------- +// Placeholder for a fullly used structure of the given type. +template 8)> +struct SizedPlaceholder +{ + VTKM_STATIC_ASSERT(Size > 0); + vtkm::Int8 A; + SizedPlaceholder B; +}; + +template <> +struct SizedPlaceholder<1, false> +{ + vtkm::Int8 A; +}; + +template +struct SizedPlaceholder +{ + vtkm::Int8 A; + vtkm::Int8 B; + vtkm::Int8 C; + vtkm::Int8 D; + vtkm::Int8 E; + vtkm::Int8 F; + vtkm::Int8 G; + vtkm::Int8 H; + SizedPlaceholder I; +}; + // clang-format off // -------------------------------------------------------------------------------- @@ -94,6 +147,11 @@ union VariantUnionNTD; template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; VTK_M_DEVICE VariantUnionTD(vtkm::internal::NullType) { } VariantUnionTD() = default; @@ -101,6 +159,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; VTK_M_DEVICE VariantUnionNTD(vtkm::internal::NullType) { } VariantUnionNTD() = default; @@ -110,6 +173,11 @@ union VariantUnionNTD template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; VTK_M_DEVICE VariantUnionTD(vtkm::internal::NullType) { } @@ -118,6 +186,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; VTK_M_DEVICE VariantUnionNTD(vtkm::internal::NullType) { } @@ -128,6 +201,11 @@ union VariantUnionNTD template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -137,6 +215,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -148,6 +231,11 @@ union VariantUnionNTD template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -158,6 +246,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -170,6 +263,11 @@ union VariantUnionNTD template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -181,6 +279,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -194,6 +297,11 @@ union VariantUnionNTD template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -206,6 +314,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -220,6 +333,11 @@ union VariantUnionNTD template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -233,6 +351,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -248,6 +371,11 @@ union VariantUnionNTD template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -262,6 +390,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -279,6 +412,11 @@ union VariantUnionNTD template union VariantUnionTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; @@ -296,6 +434,11 @@ union VariantUnionTD template union VariantUnionNTD { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + T0 V0; T1 V1; T2 V2; diff --git a/vtkm/internal/VariantImplDetail.h.in b/vtkm/internal/VariantImplDetail.h.in index 546cde5fb..bdca13e8c 100644 --- a/vtkm/internal/VariantImplDetail.h.in +++ b/vtkm/internal/VariantImplDetail.h.in @@ -36,6 +36,7 @@ $# Ignore the following comment. It is meant for the generated file. #include +#include #include $py(max_expanded=8)\ @@ -84,6 +85,58 @@ template using AllTriviallyDestructible = vtkm::ListAll, vtkmstd::is_trivially_destructible>; +// -------------------------------------------------------------------------------- +// Helper functions to determine the maximum type size. +#if defined(VTKM_GCC) && (__GNUC__ == 5) +// GCC5 gives an error with `sizeof(Ts)...` for an unexpanded parameter pack. +template +constexpr std::size_t MaxSizeOf() +{ + return sizeof(T0); +} +template +constexpr std::size_t MaxSizeOf() +{ + return std::max(sizeof(T0), MaxSizeOf()); +} +#else +template +constexpr std::size_t MaxSizeOf() +{ + return std::max({ sizeof(Ts)... }); +} +#endif + +// -------------------------------------------------------------------------------- +// Placeholder for a fullly used structure of the given type. +template 8)> +struct SizedPlaceholder +{ + VTKM_STATIC_ASSERT(Size > 0); + vtkm::Int8 A; + SizedPlaceholder B; +}; + +template <> +struct SizedPlaceholder<1, false> +{ + vtkm::Int8 A; +}; + +template +struct SizedPlaceholder +{ + vtkm::Int8 A; + vtkm::Int8 B; + vtkm::Int8 C; + vtkm::Int8 D; + vtkm::Int8 E; + vtkm::Int8 F; + vtkm::Int8 G; + vtkm::Int8 H; + SizedPlaceholder I; +}; + // clang-format off // -------------------------------------------------------------------------------- @@ -128,6 +181,11 @@ $for(param_length in range(max_expanded))\ template <$typename_list(param_length)> union VariantUnionTD<$type_list(param_length)> { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + $for(param_index in range(param_length + 1))\ T$(param_index) V$(param_index); $endfor\ @@ -137,6 +195,11 @@ $endfor\ template <$typename_list(param_length)> union VariantUnionNTD<$type_list(param_length)> { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + $for(param_index in range(param_length + 1))\ T$(param_index) V$(param_index); $endfor\ @@ -150,6 +213,11 @@ $endfor\ template <$typename_list(max_expanded), typename... Ts> union VariantUnionTD<$type_list(max_expanded), Ts...> { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + $for(param_index in range(max_expanded))\ T$(param_index) V$(param_index); $endfor\ @@ -162,6 +230,11 @@ $endfor\ template <$typename_list(max_expanded), typename... Ts> union VariantUnionNTD<$type_list(max_expanded), Ts...> { + // Work around issue where CUDA sometimes seems to miss initializing some struct members + // if another entry in the varient has a struct with padding. Place an item that requires + // everthing to be copied. + SizedPlaceholder()> Placeholder; + $for(param_index in range(max_expanded))\ T$(param_index) V$(param_index); $endfor\