Sebastián Barschkis 4ff7c5eed6 Mantaflow [Part 1]: Added preprocessed Mantaflow source files
Includes preprocessed Mantaflow source files for both OpenMP and TBB (if OpenMP is not present, TBB files will be used instead).

These files come directly from the Mantaflow repository. Future updates to the core fluid solver will take place by updating the files.

Reviewed By: sergey, mont29

Maniphest Tasks: T59995

Differential Revision:
2019-12-16 16:27:26 +01:00

1858 lines
49 KiB

// This file is generated using the MantaFlow preprocessor (prep generate).
* MantaFlow fluid solver framework
* Multigrid solver
* This program is free software, distributed under the terms of the
* Apache License, Version 2.0
* Copyright 2016, by Florian Ferstl (
* This is an implementation of the solver developed by Dick et al. [1]
* without topology awareness (= vertex duplication on coarser levels). This
* simplification allows us to use regular grids for all levels of the multigrid
* hierarchy and works well for moderately complex domains.
* [1] Solving the Fluid Pressure Poisson Equation Using Multigrid-Evaluation
* and Improvements, C. Dick, M. Rogowsky, R. Westermann, IEEE TVCG 2015
#include "multigrid.h"
#define FOR_LVL(IDX, LVL) for (int IDX = 0; IDX < mb[LVL].size(); IDX++)
Vec3i VEC; \
const Vec3i VEC##__min = (MIN), VEC##__max = (MAX); \
for (VEC.z = VEC##__min.z; VEC.z <= VEC##__max.z; VEC.z++) \
for (VEC.y = VEC##__min.y; VEC.y <= VEC##__max.y; VEC.y++) \
for (VEC.x = VEC##__min.x; VEC.x <= VEC##__max.x; VEC.x++)
Vec3i VEC; \
int LIN = 0; \
const Vec3i VEC##__min = (MIN), VEC##__max = (MAX); \
for (VEC.z = VEC##__min.z; VEC.z <= VEC##__max.z; VEC.z++) \
for (VEC.y = VEC##__min.y; VEC.y <= VEC##__max.y; VEC.y++) \
for (VEC.x = VEC##__min.x; VEC.x <= VEC##__max.x; VEC.x++, LIN++)
#define MG_TIMINGS(X)
//#define MG_TIMINGS(X) X
using namespace std;
namespace Manta {
// Helper class for calling mantaflow kernels with a specific number of threads
class ThreadSize {
IndexInt s;
ThreadSize(IndexInt _s)
s = _s;
IndexInt size()
return s;
// ----------------------------------------------------------------------------
// Efficient min heap for <ID, key> pairs with 0<=ID<N and 0<=key<K
// (elements are stored in K buckets, where each bucket is a doubly linked list).
// - if K<<N, all ops are O(1) on avg (worst case O(K)).
// - memory usage O(K+N): (K+N) * 3 * sizeof(int).
class NKMinHeap {
struct Entry {
int key, prev, next;
Entry() : key(-1), prev(-1), next(-1)
int mN, mK, mSize, mMinKey;
// Double linked lists of IDs, one for each bucket/key.
// The first K entries are the buckets' head pointers,
// and the last N entries correspond to the IDs.
std::vector<Entry> mEntries;
NKMinHeap(int N, int K) : mN(N), mK(K), mSize(0), mMinKey(-1), mEntries(N + K)
int size()
return mSize;
int getKey(int ID)
return mEntries[mK + ID].key;
// Insert, decrease or increase key (or delete by setting key to -1)
void setKey(int ID, int key);
// peek min key (returns ID/key pair)
std::pair<int, int> peekMin();
// pop min key (returns ID/key pair)
std::pair<int, int> popMin();
void print(); // for debugging
void NKMinHeap::setKey(int ID, int key)
assertMsg(0 <= ID && ID < mN, "NKMinHeap::setKey: ID out of range");
assertMsg(-1 <= key && key < mK, "NKMinHeap::setKey: key out of range");
const int kid = mK + ID;
if (mEntries[kid].key == key)
return; // nothing changes
// remove from old key-list if ID existed previously
if (mEntries[kid].key != -1) {
int pred = mEntries[kid].prev;
int succ = mEntries[kid].next; // can be -1
mEntries[pred].next = succ;
if (succ != -1)
mEntries[succ].prev = pred;
// if removed key was minimum key, mMinKey may need to be updated
int removedKey = mEntries[kid].key;
if (removedKey == mMinKey) {
if (mSize == 1) {
mMinKey = -1;
else {
for (; mMinKey < mK; mMinKey++) {
if (mEntries[mMinKey].next != -1)
// set new key of ID
mEntries[kid].key = key;
if (key == -1) {
// finished if key was set to -1
mEntries[kid].next = mEntries[kid].prev = -1;
// add key
if (mMinKey == -1)
mMinKey = key;
mMinKey = std::min(mMinKey, key);
// insert into new key-list (headed by mEntries[key])
int tmp = mEntries[key].next;
mEntries[key].next = kid;
mEntries[kid].prev = key;
mEntries[kid].next = tmp;
if (tmp != -1)
mEntries[tmp].prev = kid;
std::pair<int, int> NKMinHeap::peekMin()
if (mSize == 0)
return std::pair<int, int>(-1, -1); // error
const int ID = mEntries[mMinKey].next - mK;
return std::pair<int, int>(ID, mMinKey);
std::pair<int, int> NKMinHeap::popMin()
if (mSize == 0)
return std::pair<int, int>(-1, -1); // error
const int kid = mEntries[mMinKey].next;
const int ID = kid - mK;
const int key = mMinKey;
// remove from key-list
int pred = mEntries[kid].prev;
int succ = mEntries[kid].next; // can be -1
mEntries[pred].next = succ;
if (succ != -1)
mEntries[succ].prev = pred;
// remove entry
mEntries[kid] = Entry();
// update mMinKey
if (mSize == 0) {
mMinKey = -1;
else {
for (; mMinKey < mK; mMinKey++) {
if (mEntries[mMinKey].next != -1)
// return result
return std::pair<int, int>(ID, key);
void NKMinHeap::print()
std::cout << "Size: " << mSize << ", MinKey: " << mMinKey << std::endl;
for (int key = 0; key < mK; key++) {
if (mEntries[key].next != -1) {
std::cout << "Key " << key << ": ";
int kid = mEntries[key].next;
while (kid != -1) {
std::cout << kid - mK << " ";
kid = mEntries[kid].next;
std::cout << std::endl;
std::cout << std::endl;
// ----------------------------------------------------------------------------
// GridMg methods
// Illustration of 27-point stencil indices
// y | z = -1 z = 0 z = 1
// ^ | 6 7 8, 15 16 17, 24 25 26
// | | 3 4 5, 12 13 14, 21 22 23
// o-> x | 0 1 2, 9 10 11, 18 19 20
// Symmetric storage with only 14 entries per vertex
// y | z = -1 z = 0 z = 1
// ^ | - - -, 2 3 4, 11 12 13
// | | - - -, - 0 1, 8 9 10
// o-> x | - - -, - - -, 5 6 7
GridMg::GridMg(const Vec3i &gridSize)
: mNumPreSmooth(1),
MG_TIMINGS(MuTime time;)
// 2D or 3D mode
mIs3D = (gridSize.z > 1);
mDim = mIs3D ? 3 : 2;
mStencilSize = mIs3D ? 14 : 5; // A has a full 27-point stencil on levels > 0
mStencilSize0 = mIs3D ? 4 : 3; // A has a 7-point stencil on level 0
mStencilMin = Vec3i(-1, -1, mIs3D ? -1 : 0);
mStencilMax = Vec3i(1, 1, mIs3D ? 1 : 0);
// Create level 0 (=original grid)
mPitch.push_back(Vec3i(1, mSize.back().x, mSize.back().x * mSize.back().y));
int n = mSize.back().x * mSize.back().y * mSize.back().z;
mA.push_back(std::vector<Real>(n * mStencilSize0));
debMsg("GridMg::GridMg level 0: " << mSize[0].x << " x " << mSize[0].y << " x " << mSize[0].z
<< " x ",
// Create coarse levels >0
for (int l = 1; l <= 100; l++) {
if (mSize[l - 1].x <= 5 && mSize[l - 1].y <= 5 && mSize[l - 1].z <= 5)
if (n <= 1000)
mSize.push_back((mSize[l - 1] + 2) / 2);
mPitch.push_back(Vec3i(1, mSize.back().x, mSize.back().x * mSize.back().y));
n = mSize.back().x * mSize.back().y * mSize.back().z;
mA.push_back(std::vector<Real>(n * mStencilSize));
debMsg("GridMg::GridMg level " << l << ": " << mSize[l].x << " x " << mSize[l].y << " x "
<< mSize[l].z << " x ",
// Additional memory for CG on coarsest level
mCGtmp1.back() = std::vector<double>(n);
mCGtmp2.back() = std::vector<double>(n);
mCGtmp3.back() = std::vector<double>(n);
mCGtmp4.back() = std::vector<double>(n);
MG_TIMINGS(debMsg("GridMg: Allocation done in " << time.update(), 1);)
// Precalculate coarsening paths:
// (V) <--restriction-- (U) <--A_{l-1}-- (W) <--interpolation-- (N)
Vec3i p7stencil[7] = {Vec3i(0, 0, 0),
Vec3i(-1, 0, 0),
Vec3i(1, 0, 0),
Vec3i(0, -1, 0),
Vec3i(0, 1, 0),
Vec3i(0, 0, -1),
Vec3i(0, 0, 1)};
Vec3i V(1, 1, 1); // reference coarse grid vertex at (1,1,1)
FOR_VEC_MINMAX(U, V * 2 + mStencilMin, V * 2 + mStencilMax)
for (int i = 0; i < 1 + 2 * mDim; i++) {
Vec3i W = U + p7stencil[i];
FOR_VEC_MINMAX(N, W / 2, (W + 1) / 2)
int s = dot(N, Vec3i(1, 3, 9));
if (s >= 13) {
CoarseningPath path;
path.N = N - 1; // offset of N on coarse grid
path.U = U - V * 2; // offset of U on fine grid
path.W = W - V * 2; // offset of W on fine grid = s - 13; // stencil index corresponding to V<-N on coarse grid
path.sf = (i + 1) / 2; // stencil index corresponding to U<-W on coarse grid
path.inUStencil = (i % 2 == 0); // fine grid stencil entry stored at U or W? = Real(1) /
Real(1 << ((U.x % 2) + (U.y % 2) + (U.z % 2))); // restriction weight V<-U
path.iw = Real(1) /
Real(1 << ((W.x % 2) + (W.y % 2) + (W.z % 2))); // interpolation weight W<-N
auto pathLess = [](const GridMg::CoarseningPath &p1, const GridMg::CoarseningPath &p2) {
if ( ==
return dot(p1.U + 1, Vec3i(1, 3, 9)) < dot(p2.U + 1, Vec3i(1, 3, 9));
return <;
std::sort(mCoarseningPaths0.begin(), mCoarseningPaths0.end(), pathLess);
void GridMg::analyzeStencil(int v,
bool is3D,
bool &isStencilSumNonZero,
bool &isEquationTrivial) const
Vec3i V = vecIdx(v, 0);
// collect stencil entries
Real A[7];
A[0] = mA[0][v * mStencilSize0 + 0];
A[1] = mA[0][v * mStencilSize0 + 1];
A[2] = mA[0][v * mStencilSize0 + 2];
A[3] = is3D ? mA[0][v * mStencilSize0 + 3] : Real(0);
A[4] = V.x != 0 ? mA[0][(v - mPitch[0].x) * mStencilSize0 + 1] : Real(0);
A[5] = V.y != 0 ? mA[0][(v - mPitch[0].y) * mStencilSize0 + 2] : Real(0);
A[6] = V.z != 0 && is3D ? mA[0][(v - mPitch[0].z) * mStencilSize0 + 3] : Real(0);
// compute sum of stencil entries
Real stencilMax = Real(0), stencilSum = Real(0);
for (int i = 0; i < 7; i++) {
stencilSum += A[i];
stencilMax = max(stencilMax, std::abs(A[i]));
// check if sum is numerically zero
isStencilSumNonZero = std::abs(stencilSum / stencilMax) > Real(1E-6);
// check for trivial equation (exact comparisons)
isEquationTrivial = A[0] == Real(1) && A[1] == Real(0) && A[2] == Real(0) && A[3] == Real(0) &&
A[4] == Real(0) && A[5] == Real(0) && A[6] == Real(0);
struct knCopyA : public KernelBase {
knCopyA(std::vector<Real> &sizeRef,
std::vector<Real> &A0,
int stencilSize0,
bool is3D,
const Grid<Real> *pA0,
const Grid<Real> *pAi,
const Grid<Real> *pAj,
const Grid<Real> *pAk)
: KernelBase(sizeRef.size()),
inline void op(IndexInt idx,
std::vector<Real> &sizeRef,
std::vector<Real> &A0,
int stencilSize0,
bool is3D,
const Grid<Real> *pA0,
const Grid<Real> *pAi,
const Grid<Real> *pAj,
const Grid<Real> *pAk) const
A0[idx * stencilSize0 + 0] = (*pA0)[idx];
A0[idx * stencilSize0 + 1] = (*pAi)[idx];
A0[idx * stencilSize0 + 2] = (*pAj)[idx];
if (is3D)
A0[idx * stencilSize0 + 3] = (*pAk)[idx];
inline std::vector<Real> &getArg0()
return sizeRef;
typedef std::vector<Real> type0;
inline std::vector<Real> &getArg1()
return A0;
typedef std::vector<Real> type1;
inline int &getArg2()
return stencilSize0;
typedef int type2;
inline bool &getArg3()
return is3D;
typedef bool type3;
inline const Grid<Real> *getArg4()
return pA0;
typedef Grid<Real> type4;
inline const Grid<Real> *getArg5()
return pAi;
typedef Grid<Real> type5;
inline const Grid<Real> *getArg6()
return pAj;
typedef Grid<Real> type6;
inline const Grid<Real> *getArg7()
return pAk;
typedef Grid<Real> type7;
void runMessage()
debMsg("Executing kernel knCopyA ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
void operator()(const tbb::blocked_range<IndexInt> &__r) const
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, sizeRef, A0, stencilSize0, is3D, pA0, pAi, pAj, pAk);
void run()
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
std::vector<Real> &sizeRef;
std::vector<Real> &A0;
int stencilSize0;
bool is3D;
const Grid<Real> *pA0;
const Grid<Real> *pAi;
const Grid<Real> *pAj;
const Grid<Real> *pAk;
struct knActivateVertices : public KernelBase {
knActivateVertices(std::vector<GridMg::VertexType> &type_0,
std::vector<Real> &A0,
bool &nonZeroStencilSumFound,
bool &trivialEquationsFound,
const GridMg &mg)
: KernelBase(type_0.size()),
inline void op(IndexInt idx,
std::vector<GridMg::VertexType> &type_0,
std::vector<Real> &A0,
bool &nonZeroStencilSumFound,
bool &trivialEquationsFound,
const GridMg &mg) const
// active vertices on level 0 are vertices with non-zero diagonal entry in A
type_0[idx] = GridMg::vtInactive;
if (mg.mA[0][idx * mg.mStencilSize0 + 0] != Real(0)) {
type_0[idx] = GridMg::vtActive;
bool isStencilSumNonZero = false, isEquationTrivial = false;
mg.analyzeStencil(int(idx), mg.mIs3D, isStencilSumNonZero, isEquationTrivial);
// Note: nonZeroStencilSumFound and trivialEquationsFound are only
// changed from false to true, and hence there are no race conditions.
if (isStencilSumNonZero)
nonZeroStencilSumFound = true;
// scale down trivial equations
if (isEquationTrivial) {
type_0[idx] = GridMg::vtActiveTrivial;
A0[idx * mg.mStencilSize0 + 0] *= mg.mTrivialEquationScale;
trivialEquationsFound = true;
inline std::vector<GridMg::VertexType> &getArg0()
return type_0;
typedef std::vector<GridMg::VertexType> type0;
inline std::vector<Real> &getArg1()
return A0;
typedef std::vector<Real> type1;
inline bool &getArg2()
return nonZeroStencilSumFound;
typedef bool type2;
inline bool &getArg3()
return trivialEquationsFound;
typedef bool type3;
inline const GridMg &getArg4()
return mg;
typedef GridMg type4;
void runMessage()
debMsg("Executing kernel knActivateVertices ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
void operator()(const tbb::blocked_range<IndexInt> &__r) const
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, type_0, A0, nonZeroStencilSumFound, trivialEquationsFound, mg);
void run()
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
std::vector<GridMg::VertexType> &type_0;
std::vector<Real> &A0;
bool &nonZeroStencilSumFound;
bool &trivialEquationsFound;
const GridMg &mg;
void GridMg::setA(const Grid<Real> *pA0,
const Grid<Real> *pAi,
const Grid<Real> *pAj,
const Grid<Real> *pAk)
MG_TIMINGS(MuTime time;)
// Copy level 0
knCopyA(mx[0], mA[0], mStencilSize0, mIs3D, pA0, pAi, pAj, pAk);
// Determine active vertices and scale trivial equations
bool nonZeroStencilSumFound = false;
bool trivialEquationsFound = false;
knActivateVertices(mType[0], mA[0], nonZeroStencilSumFound, trivialEquationsFound, *this);
if (trivialEquationsFound)
debMsg("GridMg::setA: Found at least one trivial equation", 2);
// Sanity check: if all rows of A sum up to 0 --> A doesn't have full rank (opposite direction
// isn't necessarily true)
if (!nonZeroStencilSumFound)
"GridMg::setA: Found constant mode: A*1=0! A does not have full rank and multigrid may "
"not converge. (forgot to fix a pressure value?)",
// Create coarse grids and operators on levels >0
for (int l = 1; l < mA.size(); l++) {
MG_TIMINGS(debMsg("GridMg: Generated level " << l << " in " << time.update(), 1);)
MG_TIMINGS(debMsg("GridMg: Generated operator " << l << " in " << time.update(), 1);)
mIsASet = true;
mIsRhsSet = false; // invalidate rhs
struct knSetRhs : public KernelBase {
knSetRhs(std::vector<Real> &b, const Grid<Real> &rhs, const GridMg &mg)
: KernelBase(b.size()), b(b), rhs(rhs), mg(mg)
inline void op(IndexInt idx, std::vector<Real> &b, const Grid<Real> &rhs, const GridMg &mg) const
b[idx] = rhs[idx];
// scale down trivial equations
if (mg.mType[0][idx] == GridMg::vtActiveTrivial) {
b[idx] *= mg.mTrivialEquationScale;
inline std::vector<Real> &getArg0()
return b;
typedef std::vector<Real> type0;
inline const Grid<Real> &getArg1()
return rhs;
typedef Grid<Real> type1;
inline const GridMg &getArg2()
return mg;
typedef GridMg type2;
void runMessage()
debMsg("Executing kernel knSetRhs ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
void operator()(const tbb::blocked_range<IndexInt> &__r) const
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, b, rhs, mg);
void run()
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
std::vector<Real> &b;
const Grid<Real> &rhs;
const GridMg &mg;
void GridMg::setRhs(const Grid<Real> &rhs)
assertMsg(mIsASet, "GridMg::setRhs Error: A has not been set.");
knSetRhs(mb[0], rhs, *this);
mIsRhsSet = true;
template<class T> struct knSet : public KernelBase {
knSet(std::vector<T> &data, T value) : KernelBase(data.size()), data(data), value(value)
inline void op(IndexInt idx, std::vector<T> &data, T value) const
data[idx] = value;
inline std::vector<T> &getArg0()
return data;
typedef std::vector<T> type0;
inline T &getArg1()
return value;
typedef T type1;
void runMessage()
debMsg("Executing kernel knSet ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
void operator()(const tbb::blocked_range<IndexInt> &__r) const
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, data, value);
void run()
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
std::vector<T> &data;
T value;
template<class T> struct knCopyToVector : public KernelBase {
knCopyToVector(std::vector<T> &dst, const Grid<T> &src)
: KernelBase(dst.size()), dst(dst), src(src)
inline void op(IndexInt idx, std::vector<T> &dst, const Grid<T> &src) const
dst[idx] = src[idx];
inline std::vector<T> &getArg0()
return dst;
typedef std::vector<T> type0;
inline const Grid<T> &getArg1()
return src;
typedef Grid<T> type1;
void runMessage()
debMsg("Executing kernel knCopyToVector ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
void operator()(const tbb::blocked_range<IndexInt> &__r) const
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, dst, src);
void run()
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
std::vector<T> &dst;
const Grid<T> &src;
template<class T> struct knCopyToGrid : public KernelBase {
knCopyToGrid(const std::vector<T> &src, Grid<T> &dst)
: KernelBase(src.size()), src(src), dst(dst)
inline void op(IndexInt idx, const std::vector<T> &src, Grid<T> &dst) const
dst[idx] = src[idx];
inline const std::vector<T> &getArg0()
return src;
typedef std::vector<T> type0;
inline Grid<T> &getArg1()
return dst;
typedef Grid<T> type1;
void runMessage()
debMsg("Executing kernel knCopyToGrid ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
void operator()(const tbb::blocked_range<IndexInt> &__r) const
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, src, dst);
void run()
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
const std::vector<T> &src;
Grid<T> &dst;
template<class T> struct knAddAssign : public KernelBase {
knAddAssign(std::vector<T> &dst, const std::vector<T> &src)
: KernelBase(dst.size()), dst(dst), src(src)
inline void op(IndexInt idx, std::vector<T> &dst, const std::vector<T> &src) const
dst[idx] += src[idx];
inline std::vector<T> &getArg0()
return dst;
typedef std::vector<T> type0;
inline const std::vector<T> &getArg1()
return src;
typedef std::vector<T> type1;
void runMessage()
debMsg("Executing kernel knAddAssign ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
void operator()(const tbb::blocked_range<IndexInt> &__r) const
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, dst, src);
void run()
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
std::vector<T> &dst;
const std::vector<T> &src;
Real GridMg::doVCycle(Grid<Real> &dst, const Grid<Real> *src)
MG_TIMINGS(MuTime timeSmooth; MuTime timeCG; MuTime timeI; MuTime timeR; MuTime timeTotal;
MuTime time;)
MG_TIMINGS(timeSmooth.clear(); timeCG.clear(); timeI.clear(); timeR.clear();)
assertMsg(mIsASet && mIsRhsSet, "GridMg::doVCycle Error: A and/or rhs have not been set.");
const int maxLevel = int(mA.size()) - 1;
if (src) {
knCopyToVector<Real>(mx[0], *src);
else {
knSet<Real>(mx[0], Real(0));
for (int l = 0; l < maxLevel; l++) {
for (int i = 0; i < mNumPreSmooth; i++) {
smoothGS(l, false);
MG_TIMINGS(timeSmooth += time.update();)
restrict(l + 1, mr[l], mb[l + 1]);
knSet<Real>(mx[l + 1], Real(0));
MG_TIMINGS(timeR += time.update();)
MG_TIMINGS(timeCG += time.update();)
for (int l = maxLevel - 1; l >= 0; l--) {
interpolate(l, mx[l + 1], mr[l]);
knAddAssign<Real>(mx[l], mr[l]);
MG_TIMINGS(timeI += time.update();)
for (int i = 0; i < mNumPostSmooth; i++) {
smoothGS(l, true);
MG_TIMINGS(timeSmooth += time.update();)
Real res = calcResidualNorm(0);
knCopyToGrid<Real>(mx[0], dst);
MG_TIMINGS(debMsg("GridMg: Finished VCycle in "
<< timeTotal.update() << " (smoothing: " << timeSmooth
<< ", CG: " << timeCG << ", R: " << timeR << ", I: " << timeI << ")",
return res;
struct knActivateCoarseVertices : public KernelBase {
knActivateCoarseVertices(std::vector<GridMg::VertexType> &type, int unused)
: KernelBase(type.size()), type(type), unused(unused)
inline void op(IndexInt idx, std::vector<GridMg::VertexType> &type, int unused) const
// set all remaining 'free' vertices to 'removed',
if (type[idx] == GridMg::vtFree)
type[idx] = GridMg::vtRemoved;
// then convert 'zero' vertices to 'active' and 'removed' vertices to 'inactive'
if (type[idx] == GridMg::vtZero)
type[idx] = GridMg::vtActive;
if (type[idx] == GridMg::vtRemoved)
type[idx] = GridMg::vtInactive;
inline std::vector<GridMg::VertexType> &getArg0()
return type;
typedef std::vector<GridMg::VertexType> type0;
inline int &getArg1()
return unused;
typedef int type1;
void runMessage()
debMsg("Executing kernel knActivateCoarseVertices ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
void operator()(const tbb::blocked_range<IndexInt> &__r) const
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, type, unused);
void run()
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
std::vector<GridMg::VertexType> &type;
int unused;
// Determine active cells on coarse level l from active cells on fine level l-1
// while ensuring a full-rank interpolation operator (see Section 3.3 in [1]).
void GridMg::genCoarseGrid(int l)
// AF_Free: unused/untouched vertices
// AF_Zero: vertices selected for coarser level
// AF_Removed: vertices removed from coarser level
enum activeFlags : char { AF_Removed = 0, AF_Zero = 1, AF_Free = 2 };
// initialize all coarse vertices with 'free'
knSet<VertexType>(mType[l], vtFree);
// initialize min heap of (ID: fine grid vertex, key: #free interpolation vertices) pairs
NKMinHeap heap(int(mb[l - 1].size()),
mIs3D ? 9 : 5); // max 8 (or 4 in 2D) free interpolation vertices
FOR_LVL(v, l - 1)
if (mType[l - 1][v] != vtInactive) {
Vec3i V = vecIdx(v, l - 1);
int fiv = 1 << ((V.x % 2) + (V.y % 2) + (V.z % 2));
heap.setKey(v, fiv);
// process fine vertices in heap consecutively, always choosing the vertex with
// the currently smallest number of free interpolation vertices
while (heap.size() > 0) {
int v = heap.popMin().first;
Vec3i V = vecIdx(v, l - 1);
// loop over associated interpolation vertices of V on coarse level l:
// the first encountered 'free' vertex is set to 'zero',
// all remaining 'free' vertices are set to 'removed'.
bool vdone = false;
FOR_VEC_MINMAX(I, V / 2, (V + 1) / 2)
int i = linIdx(I, l);
if (mType[l][i] == vtFree) {
if (vdone) {
mType[l][i] = vtRemoved;
else {
mType[l][i] = vtZero;
vdone = true;
// update #free interpolation vertices in heap:
// loop over all associated restriction vertices of I on fine level l-1
FOR_VEC_MINMAX(R, vmax(0, I * 2 - 1), vmin(mSize[l - 1] - 1, I * 2 + 1))
int r = linIdx(R, l - 1);
int key = heap.getKey(r);
if (key > 1) {
heap.setKey(r, key - 1);
} // decrease key of r
else if (key > -1) {
heap.setKey(r, -1);
} // removes r from heap
knActivateCoarseVertices(mType[l], 0);
struct knGenCoarseGridOperator : public KernelBase {
knGenCoarseGridOperator(std::vector<Real> &sizeRef,
std::vector<Real> &A,
int l,
const GridMg &mg)
: KernelBase(sizeRef.size()), sizeRef(sizeRef), A(A), l(l), mg(mg)
inline void op(IndexInt idx,
std::vector<Real> &sizeRef,
std::vector<Real> &A,
int l,
const GridMg &mg) const
if (mg.mType[l][idx] == GridMg::vtInactive)
for (int i = 0; i < mg.mStencilSize; i++) {
A[idx * mg.mStencilSize + i] = Real(0);
} // clear stencil
Vec3i V = mg.vecIdx(int(idx), l);
// Calculate the stencil of A_l at V by considering all vertex paths of the form:
// (V) <--restriction-- (U) <--A_{l-1}-- (W) <--interpolation-- (N)
// V and N are vertices on the coarse grid level l,
// U and W are vertices on the fine grid level l-1.
if (l == 1) {
// loop over precomputed paths
for (auto it = mg.mCoarseningPaths0.begin(); it != mg.mCoarseningPaths0.end(); it++) {
Vec3i N = V + it->N;
int n = mg.linIdx(N, l);
if (!mg.inGrid(N, l) || mg.mType[l][n] == GridMg::vtInactive)
Vec3i U = V * 2 + it->U;
int u = mg.linIdx(U, l - 1);
if (!mg.inGrid(U, l - 1) || mg.mType[l - 1][u] == GridMg::vtInactive)
Vec3i W = V * 2 + it->W;
int w = mg.linIdx(W, l - 1);
if (!mg.inGrid(W, l - 1) || mg.mType[l - 1][w] == GridMg::vtInactive)
if (it->inUStencil) {
A[idx * mg.mStencilSize + it->sc] += it->rw *
mg.mA[l - 1][u * mg.mStencilSize0 + it->sf] *
else {
A[idx * mg.mStencilSize + it->sc] += it->rw *
mg.mA[l - 1][w * mg.mStencilSize0 + it->sf] *
else {
// l > 1:
// loop over restriction vertices U on level l-1 associated with V
FOR_VEC_MINMAX(U, vmax(0, V * 2 - 1), vmin(mg.mSize[l - 1] - 1, V * 2 + 1))
int u = mg.linIdx(U, l - 1);
if (mg.mType[l - 1][u] == GridMg::vtInactive)
// restriction weight
Real rw = Real(1) / Real(1 << ((U.x % 2) + (U.y % 2) + (U.z % 2)));
// loop over all stencil neighbors N of V on level l that can be reached via restriction to
// U
FOR_VEC_MINMAX(N, (U - 1) / 2, vmin(mg.mSize[l] - 1, (U + 2) / 2))
int n = mg.linIdx(N, l);
if (mg.mType[l][n] == GridMg::vtInactive)
// stencil entry at V associated to N (coarse grid level l)
Vec3i SC = N - V + mg.mStencilMax;
int sc = SC.x + 3 * SC.y + 9 * SC.z;
if (sc < mg.mStencilSize - 1)
// loop over all vertices W which are in the stencil of A_{l-1} at U
// and which interpolate from N
vmax(0, vmax(U - 1, N * 2 - 1)),
vmin(mg.mSize[l - 1] - 1, vmin(U + 1, N * 2 + 1)))
int w = mg.linIdx(W, l - 1);
if (mg.mType[l - 1][w] == GridMg::vtInactive)
// stencil entry at U associated to W (fine grid level l-1)
Vec3i SF = W - U + mg.mStencilMax;
int sf = SF.x + 3 * SF.y + 9 * SF.z;
Real iw = Real(1) /
Real(1 << ((W.x % 2) + (W.y % 2) + (W.z % 2))); // interpolation weight
if (sf < mg.mStencilSize) {
A[idx * mg.mStencilSize + sc - mg.mStencilSize + 1] +=
rw * mg.mA[l - 1][w * mg.mStencilSize + mg.mStencilSize - 1 - sf] * iw;
else {
A[idx * mg.mStencilSize + sc - mg.mStencilSize + 1] +=
rw * mg.mA[l - 1][u * mg.mStencilSize + sf - mg.mStencilSize + 1] * iw;
inline std::vector<Real> &getArg0()
return sizeRef;
typedef std::vector<Real> type0;
inline std::vector<Real> &getArg1()
return A;
typedef std::vector<Real> type1;
inline int &getArg2()
return l;
typedef int type2;
inline const GridMg &getArg3()
return mg;
typedef GridMg type3;
void runMessage()
debMsg("Executing kernel knGenCoarseGridOperator ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
void operator()(const tbb::blocked_range<IndexInt> &__r) const
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, sizeRef, A, l, mg);
void run()
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
std::vector<Real> &sizeRef;
std::vector<Real> &A;
int l;
const GridMg &mg;
// Calculate A_l on coarse level l from A_{l-1} on fine level l-1 using
// Galerkin-based coarsening, i.e., compute A_l = R * A_{l-1} * I.
void GridMg::genCoraseGridOperator(int l)
// for each coarse grid vertex V
knGenCoarseGridOperator(mx[l], mA[l], l, *this);
struct knSmoothColor : public KernelBase {
knSmoothColor(ThreadSize &numBlocks,
std::vector<Real> &x,
const Vec3i &blockSize,
const std::vector<Vec3i> &colorOffs,
int l,
const GridMg &mg)
: KernelBase(numBlocks.size()),
inline void op(IndexInt idx,
ThreadSize &numBlocks,
std::vector<Real> &x,
const Vec3i &blockSize,
const std::vector<Vec3i> &colorOffs,
int l,
const GridMg &mg) const
Vec3i blockOff(int(idx) % blockSize.x,
(int(idx) % (blockSize.x * blockSize.y)) / blockSize.x,
int(idx) / (blockSize.x * blockSize.y));
for (int off = 0; off < colorOffs.size(); off++) {
Vec3i V = blockOff * 2 + colorOffs[off];
if (!mg.inGrid(V, l))
const int v = mg.linIdx(V, l);
if (mg.mType[l][v] == GridMg::vtInactive)
Real sum = mg.mb[l][v];
if (l == 0) {
int n;
for (int d = 0; d < mg.mDim; d++) {
if (V[d] > 0) {
n = v - mg.mPitch[0][d];
sum -= mg.mA[0][n * mg.mStencilSize0 + d + 1] *[0][n];
if (V[d] < mg.mSize[0][d] - 1) {
n = v + mg.mPitch[0][d];
sum -= mg.mA[0][v * mg.mStencilSize0 + d + 1] *[0][n];
x[v] = sum / mg.mA[0][v * mg.mStencilSize0 + 0];
else {
FOR_VECLIN_MINMAX(S, s, mg.mStencilMin, mg.mStencilMax)
if (s == mg.mStencilSize - 1)
Vec3i N = V + S;
int n = mg.linIdx(N, l);
if (mg.inGrid(N, l) && mg.mType[l][n] != GridMg::vtInactive) {
if (s < mg.mStencilSize) {
sum -= mg.mA[l][n * mg.mStencilSize + mg.mStencilSize - 1 - s] *[l][n];
else {
sum -= mg.mA[l][v * mg.mStencilSize + s - mg.mStencilSize + 1] *[l][n];
x[v] = sum / mg.mA[l][v * mg.mStencilSize + 0];
inline ThreadSize &getArg0()
return numBlocks;
typedef ThreadSize type0;
inline std::vector<Real> &getArg1()
return x;
typedef std::vector<Real> type1;
inline const Vec3i &getArg2()
return blockSize;
typedef Vec3i type2;
inline const std::vector<Vec3i> &getArg3()
return colorOffs;
typedef std::vector<Vec3i> type3;
inline int &getArg4()
return l;
typedef int type4;
inline const GridMg &getArg5()
return mg;
typedef GridMg type5;
void runMessage()
debMsg("Executing kernel knSmoothColor ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
void operator()(const tbb::blocked_range<IndexInt> &__r) const
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, numBlocks, x, blockSize, colorOffs, l, mg);
void run()
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
ThreadSize &numBlocks;
std::vector<Real> &x;
const Vec3i &blockSize;
const std::vector<Vec3i> &colorOffs;
int l;
const GridMg &mg;
void GridMg::smoothGS(int l, bool reversedOrder)
// Multicolor Gauss-Seidel with two colors for the 5/7-point stencil on level 0
// and with four/eight colors for the 9/27-point stencil on levels > 0
std::vector<std::vector<Vec3i>> colorOffs;
const Vec3i a[8] = {Vec3i(0, 0, 0),
Vec3i(1, 0, 0),
Vec3i(0, 1, 0),
Vec3i(1, 1, 0),
Vec3i(0, 0, 1),
Vec3i(1, 0, 1),
Vec3i(0, 1, 1),
Vec3i(1, 1, 1)};
if (mIs3D) {
if (l == 0)
colorOffs = {{a[0], a[3], a[5], a[6]}, {a[1], a[2], a[4], a[7]}};
colorOffs = {{a[0]}, {a[1]}, {a[2]}, {a[3]}, {a[4]}, {a[5]}, {a[6]}, {a[7]}};
else {
if (l == 0)
colorOffs = {{a[0], a[3]}, {a[1], a[2]}};
colorOffs = {{a[0]}, {a[1]}, {a[2]}, {a[3]}};
// Divide grid into 2x2 blocks for parallelization
Vec3i blockSize = (mSize[l] + 1) / 2;
ThreadSize numBlocks(blockSize.x * blockSize.y * blockSize.z);
for (int c = 0; c < colorOffs.size(); c++) {
int color = reversedOrder ? int(colorOffs.size()) - 1 - c : c;
knSmoothColor(numBlocks, mx[l], blockSize, colorOffs[color], l, *this);
struct knCalcResidual : public KernelBase {
knCalcResidual(std::vector<Real> &r, int l, const GridMg &mg)
: KernelBase(r.size()), r(r), l(l), mg(mg)
inline void op(IndexInt idx, std::vector<Real> &r, int l, const GridMg &mg) const
if (mg.mType[l][idx] == GridMg::vtInactive)
Vec3i V = mg.vecIdx(int(idx), l);
Real sum = mg.mb[l][idx];
if (l == 0) {
int n;
for (int d = 0; d < mg.mDim; d++) {
if (V[d] > 0) {
n = int(idx) - mg.mPitch[0][d];
sum -= mg.mA[0][n * mg.mStencilSize0 + d + 1] *[0][n];
if (V[d] < mg.mSize[0][d] - 1) {
n = int(idx) + mg.mPitch[0][d];
sum -= mg.mA[0][idx * mg.mStencilSize0 + d + 1] *[0][n];
sum -= mg.mA[0][idx * mg.mStencilSize0 + 0] *[0][idx];
else {
FOR_VECLIN_MINMAX(S, s, mg.mStencilMin, mg.mStencilMax)
Vec3i N = V + S;
int n = mg.linIdx(N, l);
if (mg.inGrid(N, l) && mg.mType[l][n] != GridMg::vtInactive) {
if (s < mg.mStencilSize) {
sum -= mg.mA[l][n * mg.mStencilSize + mg.mStencilSize - 1 - s] *[l][n];
else {
sum -= mg.mA[l][idx * mg.mStencilSize + s - mg.mStencilSize + 1] *[l][n];
r[idx] = sum;
inline std::vector<Real> &getArg0()
return r;
typedef std::vector<Real> type0;
inline int &getArg1()
return l;
typedef int type1;
inline const GridMg &getArg2()
return mg;
typedef GridMg type2;
void runMessage()
debMsg("Executing kernel knCalcResidual ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
void operator()(const tbb::blocked_range<IndexInt> &__r) const
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, r, l, mg);
void run()
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
std::vector<Real> &r;
int l;
const GridMg &mg;
void GridMg::calcResidual(int l)
knCalcResidual(mr[l], l, *this);
struct knResidualNormSumSqr : public KernelBase {
knResidualNormSumSqr(const vector<Real> &r, int l, const GridMg &mg)
: KernelBase(r.size()), r(r), l(l), mg(mg), result(Real(0))
inline void op(IndexInt idx, const vector<Real> &r, int l, const GridMg &mg, Real &result)
if (mg.mType[l][idx] == GridMg::vtInactive)
result += r[idx] * r[idx];
inline operator Real()
return result;
inline Real &getRet()
return result;
inline const vector<Real> &getArg0()
return r;
typedef vector<Real> type0;
inline int &getArg1()
return l;
typedef int type1;
inline const GridMg &getArg2()
return mg;
typedef GridMg type2;
void runMessage()
debMsg("Executing kernel knResidualNormSumSqr ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
void operator()(const tbb::blocked_range<IndexInt> &__r)
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, r, l, mg, result);
void run()
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this);
knResidualNormSumSqr(knResidualNormSumSqr &o, tbb::split)
: KernelBase(o), r(o.r), l(o.l), mg(, result(Real(0))
void join(const knResidualNormSumSqr &o)
result += o.result;
const vector<Real> &r;
int l;
const GridMg &mg;
Real result;
Real GridMg::calcResidualNorm(int l)
Real res = knResidualNormSumSqr(mr[l], l, *this);
return std::sqrt(res);
// Standard conjugate gradients with Jacobi preconditioner
// Notes: Always run at double precision. Not parallelized since
// coarsest level is assumed to be small.
void GridMg::solveCG(int l)
auto applyAStencil = [this](int v, int l, const std::vector<double> &vec) -> double {
Vec3i V = vecIdx(v, l);
double sum = 0;
if (l == 0) {
int n;
for (int d = 0; d < mDim; d++) {
if (V[d] > 0) {
n = v - mPitch[0][d];
sum += mA[0][n * mStencilSize0 + d + 1] * vec[n];
if (V[d] < mSize[0][d] - 1) {
n = v + mPitch[0][d];
sum += mA[0][v * mStencilSize0 + d + 1] * vec[n];
sum += mA[0][v * mStencilSize0 + 0] * vec[v];
else {
FOR_VECLIN_MINMAX(S, s, mStencilMin, mStencilMax)
Vec3i N = V + S;
int n = linIdx(N, l);
if (inGrid(N, l) && mType[l][n] != vtInactive) {
if (s < mStencilSize) {
sum += mA[l][n * mStencilSize + mStencilSize - 1 - s] * vec[n];
else {
sum += mA[l][v * mStencilSize + s - mStencilSize + 1] * vec[n];
return sum;
std::vector<double> &z = mCGtmp1[l];
std::vector<double> &p = mCGtmp2[l];
std::vector<double> &x = mCGtmp3[l];
std::vector<double> &r = mCGtmp4[l];
// Initialization:
double alphaTop = 0;
double initialResidual = 0;
FOR_LVL(v, l)
x[v] = mx[l][v];
FOR_LVL(v, l)
if (mType[l][v] == vtInactive)
r[v] = mb[l][v] - applyAStencil(v, l, x);
if (l == 0) {
z[v] = r[v] / mA[0][v * mStencilSize0 + 0];
else {
z[v] = r[v] / mA[l][v * mStencilSize + 0];
initialResidual += r[v] * r[v];
p[v] = z[v];
alphaTop += r[v] * z[v];
initialResidual = std::sqrt(initialResidual);
int iter = 0;
const int maxIter = 10000;
double residual = -1;
// CG iterations
for (; iter < maxIter && initialResidual > 1E-12; iter++) {
double alphaBot = 0;
FOR_LVL(v, l)
if (mType[l][v] == vtInactive)
z[v] = applyAStencil(v, l, p);
alphaBot += p[v] * z[v];
double alpha = alphaTop / alphaBot;
double alphaTopNew = 0;
residual = 0;
FOR_LVL(v, l)
if (mType[l][v] == vtInactive)
x[v] += alpha * p[v];
r[v] -= alpha * z[v];
residual += r[v] * r[v];
if (l == 0)
z[v] = r[v] / mA[0][v * mStencilSize0 + 0];
z[v] = r[v] / mA[l][v * mStencilSize + 0];
alphaTopNew += r[v] * z[v];
residual = std::sqrt(residual);
if (residual / initialResidual < mCoarsestLevelAccuracy)
double beta = alphaTopNew / alphaTop;
alphaTop = alphaTopNew;
FOR_LVL(v, l)
p[v] = z[v] + beta * p[v];
debMsg("GridMg::solveCG i=" << iter << " rel-residual=" << (residual / initialResidual), 5);
FOR_LVL(v, l)
mx[l][v] = Real(x[v]);
if (iter == maxIter) {
debMsg("GridMg::solveCG Warning: Reached maximum number of CG iterations", 1);
else {
debMsg("GridMg::solveCG Info: Reached residual " << residual << " in " << iter
<< " iterations",
struct knRestrict : public KernelBase {
knRestrict(std::vector<Real> &dst, const std::vector<Real> &src, int l_dst, const GridMg &mg)
: KernelBase(dst.size()), dst(dst), src(src), l_dst(l_dst), mg(mg)
inline void op(IndexInt idx,
std::vector<Real> &dst,
const std::vector<Real> &src,
int l_dst,
const GridMg &mg) const
if (mg.mType[l_dst][idx] == GridMg::vtInactive)
const int l_src = l_dst - 1;
// Coarse grid vertex
Vec3i V = mg.vecIdx(int(idx), l_dst);
Real sum = Real(0);
FOR_VEC_MINMAX(R, vmax(0, V * 2 - 1), vmin(mg.mSize[l_src] - 1, V * 2 + 1))
int r = mg.linIdx(R, l_src);
if (mg.mType[l_src][r] == GridMg::vtInactive)
// restriction weight
Real rw = Real(1) / Real(1 << ((R.x % 2) + (R.y % 2) + (R.z % 2)));
sum += rw * src[r];
dst[idx] = sum;
inline std::vector<Real> &getArg0()
return dst;
typedef std::vector<Real> type0;
inline const std::vector<Real> &getArg1()
return src;
typedef std::vector<Real> type1;
inline int &getArg2()
return l_dst;
typedef int type2;
inline const GridMg &getArg3()
return mg;
typedef GridMg type3;
void runMessage()
debMsg("Executing kernel knRestrict ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
void operator()(const tbb::blocked_range<IndexInt> &__r) const
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, dst, src, l_dst, mg);
void run()
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
std::vector<Real> &dst;
const std::vector<Real> &src;
int l_dst;
const GridMg &mg;
void GridMg::restrict(int l_dst, const std::vector<Real> &src, std::vector<Real> &dst) const
knRestrict(dst, src, l_dst, *this);
struct knInterpolate : public KernelBase {
knInterpolate(std::vector<Real> &dst, const std::vector<Real> &src, int l_dst, const GridMg &mg)
: KernelBase(dst.size()), dst(dst), src(src), l_dst(l_dst), mg(mg)
inline void op(IndexInt idx,
std::vector<Real> &dst,
const std::vector<Real> &src,
int l_dst,
const GridMg &mg) const
if (mg.mType[l_dst][idx] == GridMg::vtInactive)
const int l_src = l_dst + 1;
Vec3i V = mg.vecIdx(int(idx), l_dst);
Real sum = Real(0);
FOR_VEC_MINMAX(I, V / 2, (V + 1) / 2)
int i = mg.linIdx(I, l_src);
if (mg.mType[l_src][i] != GridMg::vtInactive)
sum += src[i];
// interpolation weight
Real iw = Real(1) / Real(1 << ((V.x % 2) + (V.y % 2) + (V.z % 2)));
dst[idx] = iw * sum;
inline std::vector<Real> &getArg0()
return dst;
typedef std::vector<Real> type0;
inline const std::vector<Real> &getArg1()
return src;
typedef std::vector<Real> type1;
inline int &getArg2()
return l_dst;
typedef int type2;
inline const GridMg &getArg3()
return mg;
typedef GridMg type3;
void runMessage()
debMsg("Executing kernel knInterpolate ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
void operator()(const tbb::blocked_range<IndexInt> &__r) const
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, dst, src, l_dst, mg);
void run()
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
std::vector<Real> &dst;
const std::vector<Real> &src;
int l_dst;
const GridMg &mg;
void GridMg::interpolate(int l_dst, const std::vector<Real> &src, std::vector<Real> &dst) const
knInterpolate(dst, src, l_dst, *this);
}; // namespace Manta