- Added OpenMP code, it is enabled by defining PARALLEL=1 for the elbeem

compilation.  Currently, it is not yet active by default, but 
  Genscher wanted to do some tests. 
  It can be used to distribute the computation load onto multiple shared-
  memory CPUs by splitting the domain along the y-axis (assuming a 
  gravity force along z). However, there is no load balancing: so
  if there's fluid only in one of the y-axis halves you will not get 
  a speedup for 2 CPUs.  

- Added a fix for the memory allocation bugs #7120 and #6775. In 
  solver_init.cpp there are now several variables max___MemChunk 
  (line 692+), that set upper limits for various systems. The same
  problem existed for mac & linux, but the limit is higher, so 
  it probably went by undetected. The windows limit is currently 1GB,
  if the strange 700MB limit problems mentioned in the bug regports the 
  bugs persist, this could be further reduced. For 64bit compilations 
  this problem shouldn't exist anyway.
  What's still missing is a display of how much the resolution was 
  reduced to fit into memory...

- And some minor solver code cleanup.
This commit is contained in:
Nils Thuerey 2007-11-21 22:12:16 +00:00
parent 413c24c746
commit 818bfcca63
14 changed files with 159 additions and 35 deletions

@ -154,7 +154,7 @@ typedef struct elbeemMesh {
short volumeInitType;
/* name of the mesh, mostly for debugging */
char *name;
const char *name;
} elbeemMesh;
// API functions

@ -103,7 +103,7 @@ void AttributeList::readMat4Gfx(string name, ntlMat4Gfx defaultValue, string sou
// set that a parameter can be given, and will be ignored...
bool AttributeList::ignoreParameter(string name, string source) {
name=source=(""); // remove warning
name = source = ("");
return false;
}

@ -54,15 +54,84 @@
#define unused_GRID_REGION_END() \
} /* main_region */ \
// end unusedGRID_REGION_END
// -----------------------------------------------------------------------------------
#else // PARALLEL==1
#include "paraloop.h"
//#include "paraloop.h"
#define PERFORM_USQRMAXCHECK USQRMAXCHECK(usqr,ux,uy,uz, calcMaxVlen, calcMxvx,calcMxvy,calcMxvz);
#define LIST_EMPTY(x) calcListEmpty.push_back( x );
#define LIST_FULL(x) calcListFull.push_back( x );
#define FSGR_ADDPART(x) calcListParts.push_back( x );
// parallel region
//was: # pragma omp parallel default(shared)
#if COMPRESSGRIDS!=1
// requires compressed grids...!
ERROR!
#endif
// loop start
#define GRID_REGION_START() \
{ \
\
\
if(mSizez<2) { \
mPanic = 1; \
errFatal("ParaLoop::2D","Not valid...!", SIMWORLD_GENERICERROR); \
} \
\
\
vector<LbmPoint> calcListFull; \
vector<LbmPoint> calcListEmpty; \
vector<ParticleObject> calcListParts; \
LbmFloat calcMxvx, calcMxvy, calcMxvz, calcMaxVlen; \
calcMxvx = calcMxvy = calcMxvz = calcMaxVlen = 0.0; \
calcListEmpty.reserve(mListEmpty.capacity() / omp_get_num_threads() ); \
calcListFull.reserve( mListFull.capacity() / omp_get_num_threads() ); \
calcListParts.reserve(mSizex); \
\
\
const int id = omp_get_thread_num(); \
const int Nthrds = omp_get_num_threads(); \
\
\
\
\
\
int kdir = 1; \
\
int kstart=getForZMinBnd(), kend=getForZMaxBnd(mMaxRefine); \
if(gridLoopBound>0){ kstart=getForZMin1(); kend=getForZMax1(mMaxRefine); } \
LbmFloat *ccel = NULL, *tcel = NULL; \
CellFlagType *pFlagSrc=NULL, *pFlagDst=NULL; \
\
\
if(mLevel[mMaxRefine].setCurr==1) { \
kdir = -1; \
int temp = kend; \
kend = kstart-1; \
kstart = temp-1; \
} \
\
const int Nj = mLevel[mMaxRefine].lSizey; \
int jstart = 0+( id * (Nj / Nthrds) ); \
int jend = 0+( (id+1) * (Nj / Nthrds) ); \
if( ((Nj/Nthrds) *Nthrds) != Nj) { \
errMsg("LbmFsgrSolver","Invalid domain size Nj="<<Nj<<" Nthrds="<<Nthrds); \
} \
\
if(jstart<gridLoopBound) jstart = gridLoopBound; \
if(jend>mLevel[mMaxRefine].lSizey-gridLoopBound) jend = mLevel[mMaxRefine].lSizey-gridLoopBound; \
\
debMsgStd("ParaLoop::OMP",DM_MSG,"Thread:"<<id<<" i:"<<istart<<"-"<<iend<<" j:"<<jstart<<"-"<<jend<<", k:"<<kstart<<"-"<<kend<<" ", 1); \
\
// para GRID LOOP END is parainc3
#endif // PARALLEL==1
@ -101,9 +170,11 @@
// old loop for COMPRESSGRIDS==0
#define old__GRID_LOOP_START() \
for(int k=kstart;k<kend;++k) { \
for(int j=1;j<mLevel[lev].lSizey-1;++j) { \
for(int i=0;i<mLevel[lev].lSizex-2; ) {

@ -22,6 +22,7 @@
#include <math.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
// hack for MSVC6.0 compiler
#ifdef _MSC_VER

@ -325,7 +325,7 @@ void ParticleTracer::getTriangles(double time, vector<ntlTriangle> *triangles,
// suppress warnings...
vertices = NULL; triangles = NULL;
normals = NULL; objectId = 0;
time = 0.0;
time = 0.;
#else // ELBEEM_PLUGIN
int pcnt = 0;
// currently not used in blender

@ -15,7 +15,6 @@
#include "solver_interface.h"
#include "particletracer.h"
#include "elbeem.h"
#include <stdlib.h> /* exit(3) - also in linux */
#ifdef _WIN32
#else
@ -69,6 +68,7 @@ SimulationObject::~SimulationObject()
/*! init tree for certain geometry init */
/*****************************************************************************/
void SimulationObject::initGeoTree() {
// unused!! overriden by solver interface
if(mpGlob == NULL) {
errFatal("SimulationObject::initGeoTree error","Requires globals!", SIMWORLD_INITERROR);
return;
@ -80,7 +80,7 @@ void SimulationObject::initGeoTree() {
char treeFlag = (1<<(mGeoInitId+4));
mpGiTree = new ntlTree( 20, 4, // warning - fixed values for depth & maxtriangles here...
scene, treeFlag );
exit(1); // unused!? overriden by solver interface
// unused!! overriden by solver interface
}
/*****************************************************************************/
@ -310,7 +310,7 @@ void SimulationObject::step( void )
// dont advance for stopped time
mpLbm->step();
mTime += mpParam->getTimestep();
//if(mTime>0.001) { errMsg("DEBUG!!!!!!!!","quit mlsu..."); exit(1); } // PROFILE DEBUG TEST!
//if(mTime>0.001) { errMsg("DEBUG!!!!!!!!","quit mlsu..."); xit(1); } // PROFILE DEBUG TEST!
}
if(mpLbm->getPanic()) mPanic = true;

@ -101,7 +101,7 @@
// sirdude fix for solaris
#if !defined(linux) && defined(sun)
#ifndef expf
#define expf(a) exp((double)(a))
#define expf(x) exp((double)(x))
#endif
#endif

@ -655,6 +655,7 @@ bool LbmFsgrSolver::initializeSolverMemory()
int orgSz = mSizez;
double sizeReduction = 1.0;
double memEstFromFunc = -1.0;
double memEstFine = -1.0;
string memreqStr("");
bool firstMInit = true;
int minitTries=0;
@ -672,7 +673,7 @@ bool LbmFsgrSolver::initializeSolverMemory()
firstMInit=false;
calculateMemreqEstimate( mSizex, mSizey, mSizez,
mMaxRefine, mFarFieldSize, &memEstFromFunc, &memreqStr );
mMaxRefine, mFarFieldSize, &memEstFromFunc, &memEstFine, &memreqStr );
double memLimit;
string memLimStr("-");
@ -685,13 +686,36 @@ bool LbmFsgrSolver::initializeSolverMemory()
memLimit = 16.0* 1024.0*1024.0*1024.0;
memLimStr = string("16GB");
}
if(memEstFromFunc>memLimit) {
// restrict max. chunk of 1 mem block to 1GB for windos
bool memBlockAllocProblem = false;
double maxWinMemChunk = 1100.*1024.*1024.;
double maxMacMemChunk = 1200.*1024.*1024.;
double maxDefaultMemChunk = 2.*1024.*1024.*1024.;
//std::cerr<<" memEstFine "<< memEstFine <<" maxWin:" <<maxWinMemChunk <<" maxMac:" <<maxMacMemChunk ; // DEBUG
#ifdef WIN32
if(memEstFine> maxWinMemChunk) {
memBlockAllocProblem = true;
}
#endif // WIN32
#ifdef __APPLE__
if(memEstFine> maxMacMemChunk) {
memBlockAllocProblem = true;
}
#endif // Mac
if(sizeof(int)==4 && memEstFine>maxDefaultMemChunk) {
// max memory chunk for 32bit systems 2gig
memBlockAllocProblem = true;
}
if(memEstFromFunc>memLimit || memBlockAllocProblem) {
sizeReduction *= 0.9;
mSizex = (int)(orgSx * sizeReduction);
mSizey = (int)(orgSy * sizeReduction);
mSizez = (int)(orgSz * sizeReduction);
debMsgStd("LbmFsgrSolver::initialize",DM_WARNING,"initGridSizes: memory limit exceeded "<<
//memEstFromFunc<<"/"<<memLimit<<", "<<
//memEstFine<<"/"<<maxWinMemChunk<<", "<<
memreqStr<<"/"<<memLimStr<<", "<<
"retrying: "<<PRINT_VEC(mSizex,mSizey,mSizez)<<" org:"<<PRINT_VEC(orgSx,orgSy,orgSz)
, 3 );
@ -778,10 +802,6 @@ bool LbmFsgrSolver::initializeSolverMemory()
mLevel[ mMaxRefine ].simCellSize = mpParam->getCellSize();
mLevel[ mMaxRefine ].lcellfactor = 1.0;
LONGINT rcellSize = ((mLevel[mMaxRefine].lSizex*mLevel[mMaxRefine].lSizey*mLevel[mMaxRefine].lSizez) *dTotalNum);
// +4 for safety ?
mLevel[ mMaxRefine ].mprsFlags[0] = new CellFlagType[ rcellSize/dTotalNum +4 ];
mLevel[ mMaxRefine ].mprsFlags[1] = new CellFlagType[ rcellSize/dTotalNum +4 ];
ownMemCheck += 2 * sizeof(CellFlagType) * (rcellSize/dTotalNum +4);
#if COMPRESSGRIDS==0
mLevel[ mMaxRefine ].mprsCells[0] = new LbmFloat[ rcellSize +4 ];
@ -789,11 +809,34 @@ bool LbmFsgrSolver::initializeSolverMemory()
ownMemCheck += 2 * sizeof(LbmFloat) * (rcellSize+4);
#else // COMPRESSGRIDS==0
LONGINT compressOffset = (mLevel[mMaxRefine].lSizex*mLevel[mMaxRefine].lSizey*dTotalNum*2);
// D int tmp = ( (rcellSize +compressOffset +4)/(1024*1024) )*4;
// D printf("Debug MEMMMM excee: %d\n", tmp);
mLevel[ mMaxRefine ].mprsCells[1] = new LbmFloat[ rcellSize +compressOffset +4 ];
mLevel[ mMaxRefine ].mprsCells[0] = mLevel[ mMaxRefine ].mprsCells[1]+compressOffset;
ownMemCheck += sizeof(LbmFloat) * (rcellSize +compressOffset +4);
#endif // COMPRESSGRIDS==0
if(!mLevel[ mMaxRefine ].mprsCells[1] || !mLevel[ mMaxRefine ].mprsCells[0]) {
errFatal("LbmFsgrSolver::initialize","Fatal: Couldnt allocate memory (1)! Aborting...",SIMWORLD_INITERROR);
return false;
}
// +4 for safety ?
mLevel[ mMaxRefine ].mprsFlags[0] = new CellFlagType[ rcellSize/dTotalNum +4 ];
mLevel[ mMaxRefine ].mprsFlags[1] = new CellFlagType[ rcellSize/dTotalNum +4 ];
ownMemCheck += 2 * sizeof(CellFlagType) * (rcellSize/dTotalNum +4);
if(!mLevel[ mMaxRefine ].mprsFlags[1] || !mLevel[ mMaxRefine ].mprsFlags[0]) {
errFatal("LbmFsgrSolver::initialize","Fatal: Couldnt allocate memory (2)! Aborting...",SIMWORLD_INITERROR);
#if COMPRESSGRIDS==0
delete[] mLevel[ mMaxRefine ].mprsCells[0];
delete[] mLevel[ mMaxRefine ].mprsCells[1];
#else // COMPRESSGRIDS==0
delete[] mLevel[ mMaxRefine ].mprsCells[1];
#endif // COMPRESSGRIDS==0
return false;
}
LbmFloat lcfdimFac = 8.0;
if(LBMDIM==2) lcfdimFac = 4.0;
for(int i=mMaxRefine-1; i>=0; i--) {

@ -17,7 +17,6 @@
#include "ntl_world.h"
#include "elbeem.h"
#include <stdlib.h> /* getenv(3) - also in linux */
@ -142,7 +141,7 @@ void initGridSizes(int &sizex, int &sizey, int &sizez,
void calculateMemreqEstimate( int resx,int resy,int resz,
int refine, float farfield,
double *reqret, string *reqstr) {
double *reqret, double *reqretFine, string *reqstr) {
// debug estimation?
const bool debugMemEst = true;
// COMPRESSGRIDS define is not available here, make sure it matches
@ -150,6 +149,7 @@ void calculateMemreqEstimate( int resx,int resy,int resz,
// make sure we can handle bid numbers here... all double
double memCnt = 0.0;
double ddTotalNum = (double)dTotalNum;
if(reqretFine) *reqretFine = -1.;
double currResx = (double)resx;
double currResy = (double)resy;
@ -159,10 +159,12 @@ void calculateMemreqEstimate( int resx,int resy,int resz,
if(debugMemEst) debMsgStd("calculateMemreqEstimate",DM_MSG,"res:"<<PRINT_VEC(currResx,currResy,currResz)<<" rcellSize:"<<rcellSize<<" mc:"<<memCnt, 10);
if(!useGridComp) {
memCnt += (double)(sizeof(LbmFloat) * (rcellSize +4.0) *2.0);
if(reqretFine) *reqretFine = (double)(sizeof(LbmFloat) * (rcellSize +4.0) *2.0);
if(debugMemEst) debMsgStd("calculateMemreqEstimate",DM_MSG," no-comp, mc:"<<memCnt, 10);
} else {
double compressOffset = (double)(currResx*currResy*ddTotalNum*2.0);
memCnt += (double)(sizeof(LbmFloat) * (rcellSize+compressOffset +4.0));
if(reqretFine) *reqretFine = (double)(sizeof(LbmFloat) * (rcellSize+compressOffset +4.0));
if(debugMemEst) debMsgStd("calculateMemreqEstimate",DM_MSG," w-comp, mc:"<<memCnt, 10);
}
for(int i=refine-1; i>=0; i--) {

@ -21,7 +21,7 @@
#if LBM_USE_GUI==1
#define USE_GLUTILITIES
// for debug display
#include <GL/gl.h>
//#include <GL/gl.h>
#include "../gui/guifuncs.h"
#endif
@ -596,8 +596,10 @@ class LbmSolverInterface
void initGridSizes(int &mSizex, int &mSizey, int &mSizez,
ntlVec3Gfx &mvGeoStart, ntlVec3Gfx &mvGeoEnd,
int mMaxRefine, bool parallel);
// return the amount of memory required in total (reqret)
// and for the finest grid only (reqretFine, can be NULL)
void calculateMemreqEstimate(int resx,int resy,int resz, int refine,
float farfieldsize, double *reqret, string *reqstr);
float farfieldsize, double *reqret, double *reqretFine, string *reqstr);
//! helper function to convert flag to string (for debuggin)
string convertCellFlagType2String( CellFlagType flag );

@ -7,11 +7,11 @@
*
*****************************************************************************/
#include <stdlib.h> /* rand(3) - also in linux */
#include "solver_class.h"
#include "solver_relax.h"
#include "particletracer.h"
#include "loop_tools.h"
#include <stdlib.h>
/*****************************************************************************/
/*! perform a single LBM step */
@ -375,7 +375,11 @@ LbmFsgrSolver::mainLoop(int lev)
const int gridLoopBound=1;
GRID_REGION_INIT();
#if PARALLEL==1
#include "paraloopstart.h"
#pragma omp parallel default(shared) \
reduction(+: \
calcCurrentMass,calcCurrentVolume, \
calcCellsFilled,calcCellsEmptied, \
calcNumUsedCells )
GRID_REGION_START();
#else // PARALLEL==1
GRID_REGION_START();
@ -1112,7 +1116,11 @@ LbmFsgrSolver::preinitGrids()
GRID_REGION_INIT();
#if PARALLEL==1
#include "paraloopstart.h"
#pragma omp parallel default(shared) \
reduction(+: \
calcCurrentMass,calcCurrentVolume, \
calcCellsFilled,calcCellsEmptied, \
calcNumUsedCells )
#endif // PARALLEL==1
GRID_REGION_START();
GRID_LOOP_START();
@ -1145,7 +1153,11 @@ LbmFsgrSolver::standingFluidPreinit()
GRID_REGION_INIT();
#if PARALLEL==1
#include "paraloopstart.h"
#pragma omp parallel default(shared) \
reduction(+: \
calcCurrentMass,calcCurrentVolume, \
calcCellsFilled,calcCellsEmptied, \
calcNumUsedCells )
#endif // PARALLEL==1
GRID_REGION_START();

@ -15,8 +15,7 @@
#include "ntl_world.h"
#include "simulation_object.h"
#include <stdlib.h> /* rand(3) */
#include <stdlib.h>
#include <zlib.h>
#ifndef sqrtf
#define sqrtf sqrt

@ -10,7 +10,6 @@
#include <iostream>
#include <sstream>
#include <stdlib.h> /* getenv(3), strtol(3) */
#ifdef WIN32
// for timing
#include <windows.h>
@ -482,7 +481,7 @@ double elbeemEstimateMemreq(int res,
double memreq = -1.0;
string memreqStr("");
// ignore farfield for now...
calculateMemreqEstimate(resx,resy,resz, refine, 0., &memreq, &memreqStr );
calculateMemreqEstimate(resx,resy,resz, refine, 0., &memreq, NULL, &memreqStr );
if(retstr) {
// copy at max. 32 characters

@ -9,11 +9,6 @@
#ifndef UTILITIES_H
#include "ntl_vector3dim.h"
// Solaris requires ieeefp.h for finite(3C)
#if !defined(linux) && defined(sun)
#include <ieeefp.h>
#endif
/* debugging outputs , debug level 0 (off) to 10 (max) */
#ifdef ELBEEM_PLUGIN