mirror of
https://gitlab.kitware.com/vtk/vtk-m
synced 2024-09-16 17:22:55 +00:00
Merge topic 'fix-openmp-reduction-with-few-threads'
738c05ae8 fixes OMP reduction when using OMP_NUM_THREADS lt 4 Acked-by: Kitware Robot <kwrobot@kitware.com> Acked-by: Robert Maynard <robert.maynard@kitware.com> Merge-request: !2053
This commit is contained in:
commit
d2ebecdb27
@ -368,10 +368,13 @@ struct ReduceHelper
|
||||
// Use the first (numThreads*2) values for initializing:
|
||||
ReturnType accum = f(data[2 * tid], data[2 * tid + 1]);
|
||||
|
||||
vtkm::Id i = numThreads * 2;
|
||||
const vtkm::Id unrollEnd = ((numVals / 4) * 4) - 4;
|
||||
const vtkm::Id offset = numThreads * 2;
|
||||
const vtkm::Id end = std::max(vtkm::Id(((numVals / 4) * 4) - 4), offset);
|
||||
const vtkm::Id unrollEnd = end - ((end - offset) % 4);
|
||||
|
||||
vtkm::Id i = offset;
|
||||
VTKM_OPENMP_DIRECTIVE(for schedule(static))
|
||||
for (i = numThreads * 2; i < unrollEnd; i += 4)
|
||||
for (i = offset; i < unrollEnd; i += 4)
|
||||
{
|
||||
const auto t1 = f(data[i], data[i + 1]);
|
||||
const auto t2 = f(data[i + 2], data[i + 3]);
|
||||
|
Loading…
Reference in New Issue
Block a user