Fix CUDA shfl usage.

There was a bug in the implementations of CountSetBits and
BitFieldToUnorderedSet.
This commit is contained in:
Allison Vacanti 2019-07-11 12:41:43 -04:00
parent 57440239ef
commit 112024dae2
2 changed files with 32 additions and 2 deletions

@ -320,7 +320,11 @@ private:
vtkm::Int32 rVal = this->LocalPopCount;
for (int delta = 1; delta < activeSize; delta *= 2)
{
rVal += activeLanes.shfl_down(rVal, delta);
const vtkm::Int32 shflVal = activeLanes.shfl_down(rVal, delta);
if (activeRank + delta < activeSize)
{
rVal += shflVal;
}
}
if (activeRank == 0)
@ -511,7 +515,11 @@ private:
vtkm::Int32 rVal = this->LocalPopCount;
for (int delta = 1; delta < activeSize; delta *= 2)
{
rVal += activeLanes.shfl_down(rVal, delta);
const vtkm::Int32 shflVal = activeLanes.shfl_down(rVal, delta);
if (activeRank + delta < activeSize)
{
rVal += shflVal;
}
}
if (activeRank == 0)

@ -2483,6 +2483,17 @@ private:
testRandomMask(0xffffffff);
testRandomMask(0x1c0fd395);
testRandomMask(0xdeadbeef);
// This case was causing issues on CUDA:
{
BitField bits;
Algorithm::Fill(bits, false, 32 * 32);
auto portal = bits.GetPortalControl();
portal.SetWord(2, 0x00100000ul);
portal.SetWord(8, 0x00100010ul);
portal.SetWord(11, 0x10000000ul);
testIndexArray(bits);
}
}
static VTKM_CONT void TestCountSetBits()
@ -2562,6 +2573,17 @@ private:
testRandomMask(0xffffffff);
testRandomMask(0x1c0fd395);
testRandomMask(0xdeadbeef);
// This case was causing issues on CUDA:
{
BitField bits;
Algorithm::Fill(bits, false, 32 * 32);
auto portal = bits.GetPortalControl();
portal.SetWord(2, 0x00100000ul);
portal.SetWord(8, 0x00100010ul);
portal.SetWord(11, 0x10000000ul);
verifyPopCount(bits);
}
}
template <typename WordType>