diff --git a/vtkm/cont/openmp/internal/FunctorsOpenMP.h b/vtkm/cont/openmp/internal/FunctorsOpenMP.h index bf548b249..3fdc5b938 100644 --- a/vtkm/cont/openmp/internal/FunctorsOpenMP.h +++ b/vtkm/cont/openmp/internal/FunctorsOpenMP.h @@ -333,6 +333,11 @@ struct ReduceHelper bool doParallel = false; vtkm::Id numThreads = 0; + + vtkm::cont::RuntimeDeviceInformation{} + .GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagOpenMP()) + .GetThreads(numThreads); + std::unique_ptr threadData; VTKM_OPENMP_DIRECTIVE(parallel default(none) firstprivate(f) shared( @@ -342,9 +347,6 @@ struct ReduceHelper VTKM_OPENMP_DIRECTIVE(single) { - vtkm::cont::RuntimeDeviceInformation{} - .GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagOpenMP()) - .GetThreads(numThreads); if (numVals >= numThreads * 2) { doParallel = true; @@ -534,15 +536,16 @@ void ReduceByKeyHelper(KeysInArray keysInArray, internal::WrappedBinaryOperator f(functor); vtkm::Id outIdx = 0; + vtkm::Id numThreads = 0; + + vtkm::cont::RuntimeDeviceInformation{} + .GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagOpenMP()) + .GetThreads(numThreads); VTKM_OPENMP_DIRECTIVE(parallel default(none) firstprivate(keysIn, valuesIn, keysOut, valuesOut, f) - shared(outIdx) VTKM_OPENMP_SHARED_CONST(numValues)) + shared(numThreads, outIdx) VTKM_OPENMP_SHARED_CONST(numValues)) { int tid = omp_get_thread_num(); - vtkm::Id numThreads = 0; - vtkm::cont::RuntimeDeviceInformation{} - .GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagOpenMP()) - .GetThreads(numThreads); // Determine bounds for this thread's scan operation: vtkm::Id chunkSize = (numValues + numThreads - 1) / numThreads; diff --git a/vtkm/cont/openmp/internal/RuntimeDeviceConfigurationOpenMP.h b/vtkm/cont/openmp/internal/RuntimeDeviceConfigurationOpenMP.h index c23ee4037..e4e400df8 100644 --- a/vtkm/cont/openmp/internal/RuntimeDeviceConfigurationOpenMP.h +++ b/vtkm/cont/openmp/internal/RuntimeDeviceConfigurationOpenMP.h @@ -86,10 +86,18 @@ private: VTKM_CONT vtkm::Id InitializeHardwareMaxThreads() const { vtkm::Id count = 0; - VTKM_OPENMP_DIRECTIVE(parallel) + + if (omp_in_parallel()) { - VTKM_OPENMP_DIRECTIVE(atomic) - ++count; + count = omp_get_num_threads(); + } + else + { + VTKM_OPENMP_DIRECTIVE(parallel) + { + VTKM_OPENMP_DIRECTIVE(atomic) + ++count; + } } return count; }