Data Race Resolution

Finished the comments on data races and their resolution.
This commit is contained in:
Li-Ta Lo 2020-10-07 11:56:02 -06:00
parent aed7e02da9
commit 1185f70bc9
4 changed files with 56 additions and 45 deletions

@ -34,7 +34,6 @@ public:
using ExecutionSignature = void(WorkIndex, _1, _2, _3, _4); using ExecutionSignature = void(WorkIndex, _1, _2, _3, _4);
// TODO: Use Scatter?
template <typename InPortalType, typename AtomicCompInOut> template <typename InPortalType, typename AtomicCompInOut>
VTKM_EXEC void operator()(vtkm::Id index, VTKM_EXEC void operator()(vtkm::Id index,
vtkm::Id start, vtkm::Id start,
@ -84,7 +83,6 @@ public:
vtkm::cont::ArrayHandleCounting<vtkm::Id>(0, 1, numIndicesArray.GetNumberOfValues()), vtkm::cont::ArrayHandleCounting<vtkm::Id>(0, 1, numIndicesArray.GetNumberOfValues()),
componentsOut); componentsOut);
// TODO: give the reason that single pass algorithm works.
vtkm::cont::Invoker invoke; vtkm::cont::Invoker invoke;
invoke( invoke(
detail::GraphGraft{}, indexOffsetsArray, numIndicesArray, connectivityArray, componentsOut); detail::GraphGraft{}, indexOffsetsArray, numIndicesArray, connectivityArray, componentsOut);

@ -59,6 +59,8 @@ public:
for (int j = minIndices[1]; j <= maxIndices[1]; j++) for (int j = minIndices[1]; j <= maxIndices[1]; j++)
{ {
for (int i = minIndices[0]; i <= maxIndices[0]; i++) for (int i = minIndices[0]; i <= maxIndices[0]; i++)
{
if (thisColor == neighborColor.Get(i, j, k))
{ {
// We need to reload thisComp and thatComp every iteration since // We need to reload thisComp and thatComp every iteration since
// they might have been changed by Unite(), both as a result of // they might have been changed by Unite(), both as a result of
@ -66,8 +68,7 @@ public:
// in findRoot(). // in findRoot().
auto thisComp = neighborComp.Get(0, 0, 0); auto thisComp = neighborComp.Get(0, 0, 0);
auto thatComp = neighborComp.Get(i, j, k); auto thatComp = neighborComp.Get(i, j, k);
if (thisColor == neighborColor.Get(i, j, k))
{
// Merge the two components one way or the other, the order will // Merge the two components one way or the other, the order will
// be resolved by Unite(). // be resolved by Unite().
UnionFind::Unite(compOut, thisComp, thatComp); UnionFind::Unite(compOut, thisComp, thatComp);
@ -103,7 +104,6 @@ public:
Algorithm::Copy(vtkm::cont::ArrayHandleCounting<vtkm::Id>(0, 1, pixels.GetNumberOfValues()), Algorithm::Copy(vtkm::cont::ArrayHandleCounting<vtkm::Id>(0, 1, pixels.GetNumberOfValues()),
componentsOut); componentsOut);
// TODO: give the reason that single pass algorithm works.
vtkm::cont::Invoker invoke; vtkm::cont::Invoker invoke;
invoke(detail::ImageGraft{}, input, componentsOut, pixels, componentsOut); invoke(detail::ImageGraft{}, input, componentsOut, pixels, componentsOut);
invoke(PointerJumping{}, componentsOut); invoke(PointerJumping{}, componentsOut);

@ -84,23 +84,22 @@ public:
class Renumber class Renumber
{ {
public: public:
// FIXME: const correctness for input. Or make it single InOut argument. static void Run(vtkm::cont::ArrayHandle<vtkm::Id>& componentsInOut)
static void Run(vtkm::cont::ArrayHandle<vtkm::Id>& components)
{ {
using Algorithm = vtkm::cont::Algorithm; using Algorithm = vtkm::cont::Algorithm;
// FIXME: we should able to apply findRoot to each pixel and use some kind // FIXME: we should be able to apply findRoot to each pixel and use some kind
// of atomic operation to get the number of unique components without the // of atomic operation to get the number of unique components without the
// cost of copying and sorting. This might be able to be extended to also // cost of copying and sorting. This might be able to be extended to also
// work for the renumbering (replacing InnerJoin) through atomic increment. // work for the renumbering (replacing InnerJoin) through atomic increment.
vtkm::cont::ArrayHandle<vtkm::Id> uniqueComponents; vtkm::cont::ArrayHandle<vtkm::Id> uniqueComponents;
Algorithm::Copy(components, uniqueComponents); Algorithm::Copy(componentsInOut, uniqueComponents);
Algorithm::Sort(uniqueComponents); Algorithm::Sort(uniqueComponents);
Algorithm::Unique(uniqueComponents); Algorithm::Unique(uniqueComponents);
vtkm::cont::ArrayHandle<vtkm::Id> ids; vtkm::cont::ArrayHandle<vtkm::Id> ids;
Algorithm::Copy(vtkm::cont::ArrayHandleCounting<vtkm::Id>(0, 1, components.GetNumberOfValues()), Algorithm::Copy(
ids); vtkm::cont::ArrayHandleCounting<vtkm::Id>(0, 1, componentsInOut.GetNumberOfValues()), ids);
vtkm::cont::ArrayHandle<vtkm::Id> uniqueColor; vtkm::cont::ArrayHandle<vtkm::Id> uniqueColor;
Algorithm::Copy( Algorithm::Copy(
@ -109,10 +108,15 @@ public:
vtkm::cont::ArrayHandle<vtkm::Id> cellColors; vtkm::cont::ArrayHandle<vtkm::Id> cellColors;
vtkm::cont::ArrayHandle<vtkm::Id> pixelIdsOut; vtkm::cont::ArrayHandle<vtkm::Id> pixelIdsOut;
InnerJoin::Run( InnerJoin::Run(componentsInOut,
components, ids, uniqueComponents, uniqueColor, cellColors, pixelIdsOut, components); ids,
uniqueComponents,
uniqueColor,
cellColors,
pixelIdsOut,
componentsInOut);
Algorithm::SortByKey(pixelIdsOut, components); Algorithm::SortByKey(pixelIdsOut, componentsInOut);
} }
}; };
} }

@ -59,49 +59,58 @@ public:
// rooted forest structure of Union-Find at the expense of duplicated (but // rooted forest structure of Union-Find at the expense of duplicated (but
// benign) work. // benign) work.
// Case 2, T0 calling Unite(u, v), T1 calling Unite(u, w) and T2 calling // Case 2, T0 calling Unite(u, v) and T1 calling Unite(u, w) concurrently.
// Unite(v, s) concurrently.
// Problem I: There is a potential write after read data race. After T0 // Problem I: There is a potential write after read data race. After T0
// calls findRoot for u and v, T1 might have called parents.Set(root_u, root_w) // calls findRoot() for u but before actually updating the parent of root_u,
// thus changed root_u to root_w thus making root_u "obsolete" before T0 calls // T1 might have changed root_u to root_w and made root_u "obsolete".
// parents.Set() on root_u/root_v.
// When the root of the tree to be attached to (e.g. root_u, when root_u < root_v) // When the root of the tree to be attached to (e.g. root_u, when root_u < root_v)
// is changed, there is no hazard, since we are just attaching a tree to a // is changed, there is no hazard, since we are just attaching a tree to a
// now a non-root node, root_u, (thus, root_w <- root_u <- root_v) and three // now a non-root node, root_u, (thus, root_w <- root_u <- root_v).
// components merged.
// However, when the root of the attaching tree (root_v) is changed, it // However, when the root of the attaching tree (root_v) is changed, it
// means that the root_u has been attached to yet some other root_s and became // means that the root_u has been attached to yet some other root_s and became
// a non-root node. If we are now attaching this non-root node to root_w we // a non-root node. If we are now attaching this non-root node to root_w we
// would leave root_s behind and undoing previous work. // would leave root_s behind and undoing previous work.
// Resolution: TDB, mostly some reason for CompareAndSwap // Atomic Load/Store with memory_order_acquire are no able to detect this
// data race. While Load see all previous Stores by other threds, it can not
// be aware of any Store after the Load.
// Resolution: Use atomic Compare and Swap in a loop when updating root_u.
// CAS will check if root of u has been updated by some other thread between
// findRoot(u) is called and when root of u is going to be updated. This is
// done by comparing the root_u = findRoot(u) to the current value at
// parents[root_u]. If they are the same, no data race has happened and the
// value in parents[root_u] is updated. However, if root_u != parent[root_u],
// it means parent[root_u] has been updated by some other thread. CAS returns
// the new value of parent[root_u] (root_s in the Problem description) which
// we can use as the new root_u. We keep retrying until there is no more
// data race and root_u == root_w i.e. they are in the same component.
// Case 3. There is a potential concurrent write data race as it is possible // Problem II: There is a potential concurrent write data race as it is
// for two threads to try to change the same old root to different new roots, // possible for the two threads to try to change the same old root to
// e.g. threadA calls parents.Set(root, rootA) while threadB calls // different new roots, e.g. T0 calls parents.Set(u, v) while T1 calls
// parents(root, rootB) where rootB < root and rootA < root (but the order // parents.Set(u, w) where v < u and w < u (but the order of v and w is
// of rootA and rootB is unspecified.) Each thread assumes success while // unspecified.) Each thread assumes success while the outcome is actually
// the outcome is actually unspecified. // unspecified.
// Resolution: An atomic Compare and Swap is suggested in SV Janati et. al. // Resolution: Use an atomic Compare and Swap is suggested in SV Janati et. al.
// as well as J. Jaiganesht et. al. to resolve the data race. The CAS // as well as J. Jaiganesht et. al. to resolve the data race. The CAS
// checks if the old root is the same as what we expected. If so, there is // checks if the old root is the same as what we expected. If so, there is
// no data race, CAS will set the root to the desired value. The return value // no data race, CAS will set the root to the desired new value. The return
// from CAS will equal to our expected old root and signifies a successful // value from CAS will equal to our expected old root and signifies a
// write which terminates the while loop. // successful write which terminates the while loop.
// If the old root is not what we expected, it was updated by some other // If the old root is not what we expected, it has been updated by some
// thread and the update by this thread fails. The root as updated by // other thread and the update by this thread fails. The root as updated by
// the other thread is returned. This returned value would not equal to our desired new // the other thread is returned. This returned value would not equal to
// root, signifying the need to retry with the while loop. On the other // our desired new root, signifying the need to retry with the while loop.
// hand, // We can use this return "new root" as is without calling findRoot() to
// Why simple Load/Store as provide by AtomicArray Get/Set do not work. // find the "new root". The while loop terminates when both u and v have
// the same root (thus united).
auto root_u = UnionFind::findRoot(parents, u); auto root_u = UnionFind::findRoot(parents, u);
auto root_v = UnionFind::findRoot(parents, v); auto root_v = UnionFind::findRoot(parents, v);
while (root_u != root_v) while (root_u != root_v)
{ {
// FIXME: we might be executing the loop one extra time. // FIXME: we might be executing the loop one extra time than necessary.
// Nota Bene: VTKm's CompareAndSwap has a different order of parameters // Nota Bene: VTKm's CompareAndSwap has a different order of parameters
// than normal practice, it is (index, new, expected) rather than // than common practice, it is (index, new, expected) rather than
// (index, expected, new). // (index, expected, new).
if (root_u < root_v) if (root_u < root_v)
root_v = parents.CompareAndSwap(root_v, root_u, root_v); root_v = parents.CompareAndSwap(root_v, root_u, root_v);