vtk-m/vtkm/cont/openmp/internal/ParallelQuickSortOpenMP.h
2019-04-17 10:57:13 -06:00

262 lines
8.1 KiB
C++

//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
#include <vtkm/cont/internal/FunctorsGeneral.h>
#include <vtkm/Types.h>
#include <vtkm/cont/ArrayHandle.h>
#include <omp.h>
#include <iterator>
namespace vtkm
{
namespace cont
{
namespace openmp
{
namespace sort
{
namespace quick
{
template <typename IterType, typename RawBinaryCompare>
struct QuickSorter
{
using BinaryCompare = vtkm::cont::internal::WrappedBinaryOperator<bool, RawBinaryCompare>;
using ValueType = typename std::iterator_traits<IterType>::value_type;
IterType Data;
BinaryCompare Compare;
vtkm::Id SerialSize;
QuickSorter(IterType iter, RawBinaryCompare comp)
: Data(iter)
, Compare(comp)
, SerialSize(0)
{
}
void Execute(const vtkm::Id2 range)
{
VTKM_OPENMP_DIRECTIVE(parallel default(shared))
{
VTKM_OPENMP_DIRECTIVE(single)
{
this->Prepare(range);
this->Sort(range);
}
}
}
private:
void Prepare(const vtkm::Id2 /*range*/)
{
// Rough benchmarking on an 4-core+4HT processor shows that this sort is
// most efficient (within 5% of TBB sort) when we switch to a serial
// implementation once a partition is less than 32K keys
this->SerialSize = 32768;
}
vtkm::Pair<vtkm::Id, ValueType> MedianOf3(const vtkm::Pair<vtkm::Id, ValueType>& v1,
const vtkm::Pair<vtkm::Id, ValueType>& v2,
const vtkm::Pair<vtkm::Id, ValueType>& v3) const
{
if (this->Compare(v1.second, v2.second))
{ // v1 < v2
if (this->Compare(v1.second, v3.second))
{ // v1 < v3
if (this->Compare(v2.second, v3.second))
{ // v1 < v2 < v3
return v2;
}
else // v3 < v2
{ // v1 < v3 < v2
return v3;
}
}
else // v3 < v1
{ // v3 < v1 < v2
return v1;
}
}
else
{ // v2 < v1
if (this->Compare(v2.second, v3.second))
{ // v2 < v3
if (this->Compare(v1.second, v3.second))
{ // v2 < v1 < v3
return v1;
}
else
{ // v2 < v3 < v1
return v3;
}
}
else
{ // v3 < v2 < v1
return v2;
}
}
}
vtkm::Pair<vtkm::Id, ValueType> MedianOf3(const vtkm::Id ids[3]) const
{
return this->MedianOf3(vtkm::Pair<vtkm::Id, ValueType>(ids[0], this->Data[ids[0]]),
vtkm::Pair<vtkm::Id, ValueType>(ids[1], this->Data[ids[1]]),
vtkm::Pair<vtkm::Id, ValueType>(ids[2], this->Data[ids[2]]));
}
vtkm::Pair<vtkm::Id, ValueType> PseudoMedianOf9(const vtkm::Id ids[9]) const
{
return this->MedianOf3(
this->MedianOf3(ids), this->MedianOf3(ids + 3), this->MedianOf3(ids + 6));
}
// Approximate the median of the range and return its index.
vtkm::Pair<vtkm::Id, ValueType> SelectPivot(const vtkm::Id2 range) const
{
const vtkm::Id numVals = range[1] - range[0];
assert(numVals >= 9);
// Pseudorandomize the pivot locations to avoid issues with periodic data
// (evenly sampling inputs with periodic values tends to cause the same
// value to be obtained for all samples)
const vtkm::Id seed = range[0] * 3 / 2 + range[1] * 11 / 3 + numVals * 10 / 7;
const vtkm::Id delta = (numVals / 9) * 4 / 3;
vtkm::Id sampleLocations[9] = {
range[0] + ((seed + 0 * delta) % numVals), range[0] + ((seed + 1 * delta) % numVals),
range[0] + ((seed + 2 * delta) % numVals), range[0] + ((seed + 3 * delta) % numVals),
range[0] + ((seed + 4 * delta) % numVals), range[0] + ((seed + 5 * delta) % numVals),
range[0] + ((seed + 6 * delta) % numVals), range[0] + ((seed + 7 * delta) % numVals),
range[0] + ((seed + 8 * delta) % numVals)
};
return this->PseudoMedianOf9(sampleLocations);
}
// Select a pivot and partition data with it, returning the final location of
// the pivot element(s). We use Bentley-McIlroy three-way partitioning to
// improve handling of duplicate keys, so the pivot "location" is actually
// a range of identical keys, hence the vtkm::Id2 return type, which mark
// the [begin, end) of the pivot range.
vtkm::Id2 PartitionData(const vtkm::Id2 range)
{
using namespace std; // For ADL swap
const vtkm::Pair<vtkm::Id, ValueType> pivotData = this->SelectPivot(range);
const vtkm::Id& origPivotIdx = pivotData.first;
const ValueType& pivotVal = pivotData.second;
// Move the pivot to the end of the block while we partition the rest:
swap(this->Data[origPivotIdx], this->Data[range[1] - 1]);
// Indices of the last partitioned keys:
vtkm::Id2 dataCursors(range[0] - 1, range[1] - 1);
// Indices of the start/end of the keys equal to the pivot:
vtkm::Id2 pivotCursors(dataCursors);
for (;;)
{
// Advance the data cursors past all keys that are already partitioned:
while (this->Compare(this->Data[++dataCursors[0]], pivotVal))
;
while (this->Compare(pivotVal, this->Data[--dataCursors[1]]) && dataCursors[1] > range[0])
;
// Range is partitioned the cursors have crossed:
if (dataCursors[0] >= dataCursors[1])
{
break;
}
// Both dataCursors are pointing at incorrectly partitioned keys. Swap
// them to place them in the proper partitions:
swap(this->Data[dataCursors[0]], this->Data[dataCursors[1]]);
// If the elements we just swapped are actually equivalent to the pivot
// value, move them to the pivot storage locations:
if (!this->Compare(this->Data[dataCursors[0]], pivotVal))
{
++pivotCursors[0];
swap(this->Data[pivotCursors[0]], this->Data[dataCursors[0]]);
}
if (!this->Compare(pivotVal, this->Data[dataCursors[1]]))
{
--pivotCursors[1];
swap(this->Data[pivotCursors[1]], this->Data[dataCursors[1]]);
}
}
// Data is now partitioned as:
// | Equal | Less | Greater | Equal |
// Move the equal keys to the middle for the final partitioning:
// | Less | Equal | Greater |
// First the original pivot value at the end:
swap(this->Data[range[1] - 1], this->Data[dataCursors[0]]);
// Update the cursors to either side of the pivot:
dataCursors = vtkm::Id2(dataCursors[0] - 1, dataCursors[0] + 1);
for (vtkm::Id i = range[0]; i < pivotCursors[0]; ++i, --dataCursors[0])
{
swap(this->Data[i], this->Data[dataCursors[0]]);
}
for (vtkm::Id i = range[1] - 2; i > pivotCursors[1]; --i, ++dataCursors[1])
{
swap(this->Data[i], this->Data[dataCursors[1]]);
}
// Adjust the cursor so we can use them to construct the regions for the
// recursive call:
++dataCursors[0];
return dataCursors;
}
void Sort(const vtkm::Id2 range)
{
const vtkm::Id numVals = range[1] - range[0];
if (numVals <= this->SerialSize)
{
std::sort(this->Data + range[0], this->Data + range[1], this->Compare);
return;
}
const vtkm::Id2 pivots = this->PartitionData(range);
const vtkm::Id2 lhRange = vtkm::Id2(range[0], pivots[0]);
const vtkm::Id2 rhRange = vtkm::Id2(pivots[1], range[1]);
// Intel compilers seem to have trouble following the 'this' pointer
// when launching tasks, resulting in a corrupt task environment.
// Explicitly copying the pointer into a local variable seems to fix this.
auto explicitThis = this;
VTKM_OPENMP_DIRECTIVE(task default(none) firstprivate(rhRange, explicitThis))
{
explicitThis->Sort(rhRange);
}
this->Sort(lhRange);
}
};
}
} // end namespace sort::quick
}
}
} // end namespace vtkm::cont::openmp