From 047d79672a2724fe6a767bf6166da1aba1de862d Mon Sep 17 00:00:00 2001
From: Kenneth Moreland <kmorel@sandia.gov>
Date: Mon, 15 Feb 2021 12:38:21 -0700
Subject: [PATCH] Fix CUDA compilation error with Lagrangian filter

CUDA architecture has a limited amount of memory available for
constants. The CUDA compiler uses this space to hold constants for some
optimizations. However, for large kernels, the number of constants
needed might be larger than the constant space available. For these
conditions, you have to disable this form of optimization with the `-
Xptxas --disable-optimizer-constants` flags.

Currently, the only file that seems to have this issue is the test for
the Lagrangian filter. Someone should take a closer look to see if this
filter in particular is making unnecessarily large worklet/kernel. (In
particular, why does the Lagrangian filter have a larger kernel than the
streamline and stream surface filters?)

If this occurance happens more often, we might need to add some ways to
configure it in the build.
---
 examples/lagrangian/CMakeLists.txt | 13 +++++++++++++
 vtkm/filter/testing/CMakeLists.txt | 19 +++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/examples/lagrangian/CMakeLists.txt b/examples/lagrangian/CMakeLists.txt
index c87652ea4..72e3326df 100644
--- a/examples/lagrangian/CMakeLists.txt
+++ b/examples/lagrangian/CMakeLists.txt
@@ -12,6 +12,19 @@ cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
 #Find the VTK-m package
 find_package(VTKm REQUIRED QUIET)
 
+if ((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
+  # CUDA architecture has a limited amount of memory available for constants. The CUDA
+  # compiler uses this space to hold constants for some optimizations. However, for large
+  # kernels, the number of constants needed might be larger than the constant space
+  # available. For these conditions, you have to disable this form of optimization with
+  # the -Xptxas --disable-optimizer-constants flags.
+  # TODO: Find a more elegant way to do this. Either figure out a way around this problem
+  # or add more general flags to vtkm_library/vtkm_unit_tests for sources with "large" kernels.
+  set_source_files_properties(lagrangian.cxx PROPERTIES
+    COMPILE_OPTIONS "-Xptxas;--disable-optimizer-constants"
+    )
+endif()
+
 add_executable(Lagrangian lagrangian.cxx ABCfield.h)
 target_link_libraries(Lagrangian PRIVATE vtkm_filter)
 vtkm_add_target_information(Lagrangian
diff --git a/vtkm/filter/testing/CMakeLists.txt b/vtkm/filter/testing/CMakeLists.txt
index bdb9763b1..538297194 100644
--- a/vtkm/filter/testing/CMakeLists.txt
+++ b/vtkm/filter/testing/CMakeLists.txt
@@ -94,6 +94,25 @@ if (VTKm_ENABLE_RENDERING)
   )
 endif()
 
+if ((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
+  # CUDA architecture has a limited amount of memory available for constants. The CUDA
+  # compiler uses this space to hold constants for some optimizations. However, for large
+  # kernels, the number of constants needed might be larger than the constant space
+  # available. For these conditions, you have to disable this form of optimization with
+  # the -Xptxas --disable-optimizer-constants flags.
+  # TODO: Find a more elegant way to do this. Either figure out a way around this problem
+  # or add more general flags to vtkm_library/vtkm_unit_tests for sources with "large" kernels.
+  set(large_kernel_sources
+    RegressionTestStreamline.cxx
+    UnitTestLagrangianFilter.cxx
+    UnitTestStreamlineFilter.cxx
+    UnitTestStreamSurfaceFilter.cxx
+    )
+  set_source_files_properties(${large_kernel_sources} PROPERTIES
+    COMPILE_OPTIONS "-Xptxas;--disable-optimizer-constants"
+    )
+endif()
+
 vtkm_unit_tests(
   SOURCES ${unit_tests}
   LIBRARIES ${libraries}