mirror of
https://gitlab.kitware.com/vtk/vtk-m
synced 2024-09-16 17:22:55 +00:00
fixes OMP reduction when using OMP_NUM_THREADS lt 4
This fixes, which where triggered since in the new CI, one of the docker runner set `OMP_NUM_THREADS=3`: 1. `UnitTestOpenMPDeviceAdapter` 2. `UnitTestMeshQualityFilter` In the redution optimized implementation for _OpenMP_, it unrolls the reduce loop in iterations of four elements. The last iteration in the loop might overflow the loop end element (when it is not a multiple of four). This commit fixes this by setting the OpenMP unrolled reduce loop end element to its previous closest multiple of four of the original end element. Signed-off-by: Vicente Adolfo Bolea Sanchez <vicente.bolea@kitware.com>
This commit is contained in:
parent
f6970314a1
commit
738c05ae87
@ -368,10 +368,13 @@ struct ReduceHelper
|
||||
// Use the first (numThreads*2) values for initializing:
|
||||
ReturnType accum = f(data[2 * tid], data[2 * tid + 1]);
|
||||
|
||||
vtkm::Id i = numThreads * 2;
|
||||
const vtkm::Id unrollEnd = ((numVals / 4) * 4) - 4;
|
||||
const vtkm::Id offset = numThreads * 2;
|
||||
const vtkm::Id end = std::max(vtkm::Id(((numVals / 4) * 4) - 4), offset);
|
||||
const vtkm::Id unrollEnd = end - ((end - offset) % 4);
|
||||
|
||||
vtkm::Id i = offset;
|
||||
VTKM_OPENMP_DIRECTIVE(for schedule(static))
|
||||
for (i = numThreads * 2; i < unrollEnd; i += 4)
|
||||
for (i = offset; i < unrollEnd; i += 4)
|
||||
{
|
||||
const auto t1 = f(data[i], data[i + 1]);
|
||||
const auto t2 = f(data[i + 2], data[i + 3]);
|
||||
|
Loading…
Reference in New Issue
Block a user