Smoke: The well known Miika Hämäläinen (aka MiikaH) patch (http://blenderartists.org/forum/showthread.php?t=158317&page=42)

* Better (and windows enabled) OpenMP handling (> 2x-5x speed)
* More Volumetric Texture mapping options (heat, etc) <-- Matt if that's not to your liking, just revert that part, it's separate anyway
* Initial velocity taken from particle settings (no more slow starting)
* Option to select compression method (there seem to be a bug in my high compression usage, at least it's been reported to result in exploding smoke - better use low compression for the time being)

It's been tested since a while but as usual please report any (new!) bugs. ;-)
This commit is contained in:
Daniel Genrich 2010-01-25 15:10:14 +00:00
parent 4b71eaa4d1
commit 83dfade37a
19 changed files with 2323 additions and 749 deletions

@ -0,0 +1,885 @@
#include "EIGENVALUE_HELPER.h"
void Eigentred2(sEigenvalue& eval) {
// This is derived from the Algol procedures tred2 by
// Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
// Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutine in EISPACK.
int n=eval.n;
for (int j = 0; j < n; j++) {
eval.d[j] = eval.V[n-1][j];
}
// Householder reduction to tridiagonal form.
for (int i = n-1; i > 0; i--) {
// Scale to avoid under/overflow.
float scale = 0.0;
float h = 0.0;
for (int k = 0; k < i; k++) {
scale = scale + fabs(eval.d[k]);
}
if (scale == 0.0) {
eval.e[i] = eval.d[i-1];
for (int j = 0; j < i; j++) {
eval.d[j] = eval.V[i-1][j];
eval.V[i][j] = 0.0;
eval.V[j][i] = 0.0;
}
} else {
// Generate Householder vector.
for (int k = 0; k < i; k++) {
eval.d[k] /= scale;
h += eval.d[k] * eval.d[k];
}
float f = eval.d[i-1];
float g = sqrt(h);
if (f > 0) {
g = -g;
}
eval.e[i] = scale * g;
h = h - f * g;
eval.d[i-1] = f - g;
for (int j = 0; j < i; j++) {
eval.e[j] = 0.0;
}
// Apply similarity transformation to remaining columns.
for (int j = 0; j < i; j++) {
f = eval.d[j];
eval.V[j][i] = f;
g = eval.e[j] + eval.V[j][j] * f;
for (int k = j+1; k <= i-1; k++) {
g += eval.V[k][j] * eval.d[k];
eval.e[k] += eval.V[k][j] * f;
}
eval.e[j] = g;
}
f = 0.0;
for (int j = 0; j < i; j++) {
eval.e[j] /= h;
f += eval.e[j] * eval.d[j];
}
float hh = f / (h + h);
for (int j = 0; j < i; j++) {
eval.e[j] -= hh * eval.d[j];
}
for (int j = 0; j < i; j++) {
f = eval.d[j];
g = eval.e[j];
for (int k = j; k <= i-1; k++) {
eval.V[k][j] -= (f * eval.e[k] + g * eval.d[k]);
}
eval.d[j] = eval.V[i-1][j];
eval.V[i][j] = 0.0;
}
}
eval.d[i] = h;
}
// Accumulate transformations.
for (int i = 0; i < n-1; i++) {
eval.V[n-1][i] = eval.V[i][i];
eval.V[i][i] = 1.0;
float h = eval.d[i+1];
if (h != 0.0) {
for (int k = 0; k <= i; k++) {
eval.d[k] = eval.V[k][i+1] / h;
}
for (int j = 0; j <= i; j++) {
float g = 0.0;
for (int k = 0; k <= i; k++) {
g += eval.V[k][i+1] * eval.V[k][j];
}
for (int k = 0; k <= i; k++) {
eval.V[k][j] -= g * eval.d[k];
}
}
}
for (int k = 0; k <= i; k++) {
eval.V[k][i+1] = 0.0;
}
}
for (int j = 0; j < n; j++) {
eval.d[j] = eval.V[n-1][j];
eval.V[n-1][j] = 0.0;
}
eval.V[n-1][n-1] = 1.0;
eval.e[0] = 0.0;
}
void Eigencdiv(sEigenvalue& eval, float xr, float xi, float yr, float yi) {
float r,d;
if (fabs(yr) > fabs(yi)) {
r = yi/yr;
d = yr + r*yi;
eval.cdivr = (xr + r*xi)/d;
eval.cdivi = (xi - r*xr)/d;
} else {
r = yr/yi;
d = yi + r*yr;
eval.cdivr = (r*xr + xi)/d;
eval.cdivi = (r*xi - xr)/d;
}
}
void Eigentql2 (sEigenvalue& eval) {
// This is derived from the Algol procedures tql2, by
// Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
// Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutine in EISPACK.
int n=eval.n;
for (int i = 1; i < n; i++) {
eval.e[i-1] = eval.e[i];
}
eval.e[n-1] = 0.0;
float f = 0.0;
float tst1 = 0.0;
float eps = pow(2.0,-52.0);
for (int l = 0; l < n; l++) {
// Find small subdiagonal element
tst1 = max(tst1,fabs(eval.d[l]) + fabs(eval.e[l]));
int m = l;
// Original while-loop from Java code
while (m < n) {
if (fabs(eval.e[m]) <= eps*tst1) {
break;
}
m++;
}
// If m == l, d[l] is an eigenvalue,
// otherwise, iterate.
if (m > l) {
int iter = 0;
do {
iter = iter + 1; // (Could check iteration count here.)
// Compute implicit shift
float g = eval.d[l];
float p = (eval.d[l+1] - g) / (2.0 * eval.e[l]);
float r = hypot(p,1.0);
if (p < 0) {
r = -r;
}
eval.d[l] = eval.e[l] / (p + r);
eval.d[l+1] = eval.e[l] * (p + r);
float dl1 = eval.d[l+1];
float h = g - eval.d[l];
for (int i = l+2; i < n; i++) {
eval.d[i] -= h;
}
f = f + h;
// Implicit QL transformation.
p = eval.d[m];
float c = 1.0;
float c2 = c;
float c3 = c;
float el1 = eval.e[l+1];
float s = 0.0;
float s2 = 0.0;
for (int i = m-1; i >= l; i--) {
c3 = c2;
c2 = c;
s2 = s;
g = c * eval.e[i];
h = c * p;
r = hypot(p,eval.e[i]);
eval.e[i+1] = s * r;
s = eval.e[i] / r;
c = p / r;
p = c * eval.d[i] - s * g;
eval.d[i+1] = h + s * (c * g + s * eval.d[i]);
// Accumulate transformation.
for (int k = 0; k < n; k++) {
h = eval.V[k][i+1];
eval.V[k][i+1] = s * eval.V[k][i] + c * h;
eval.V[k][i] = c * eval.V[k][i] - s * h;
}
}
p = -s * s2 * c3 * el1 * eval.e[l] / dl1;
eval.e[l] = s * p;
eval.d[l] = c * p;
// Check for convergence.
} while (fabs(eval.e[l]) > eps*tst1);
}
eval.d[l] = eval.d[l] + f;
eval.e[l] = 0.0;
}
// Sort eigenvalues and corresponding vectors.
for (int i = 0; i < n-1; i++) {
int k = i;
float p = eval.d[i];
for (int j = i+1; j < n; j++) {
if (eval.d[j] < p) {
k = j;
p = eval.d[j];
}
}
if (k != i) {
eval.d[k] = eval.d[i];
eval.d[i] = p;
for (int j = 0; j < n; j++) {
p = eval.V[j][i];
eval.V[j][i] = eval.V[j][k];
eval.V[j][k] = p;
}
}
}
}
void Eigenorthes (sEigenvalue& eval) {
// This is derived from the Algol procedures orthes and ortran,
// by Martin and Wilkinson, Handbook for Auto. Comp.,
// Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutines in EISPACK.
int n=eval.n;
int low = 0;
int high = n-1;
for (int m = low+1; m <= high-1; m++) {
// Scale column.
float scale = 0.0;
for (int i = m; i <= high; i++) {
scale = scale + fabs(eval.H[i][m-1]);
}
if (scale != 0.0) {
// Compute Householder transformation.
float h = 0.0;
for (int i = high; i >= m; i--) {
eval.ort[i] = eval.H[i][m-1]/scale;
h += eval.ort[i] * eval.ort[i];
}
float g = sqrt(h);
if (eval.ort[m] > 0) {
g = -g;
}
h = h - eval.ort[m] * g;
eval.ort[m] = eval.ort[m] - g;
// Apply Householder similarity transformation
// H = (I-u*u'/h)*H*(I-u*u')/h)
for (int j = m; j < n; j++) {
float f = 0.0;
for (int i = high; i >= m; i--) {
f += eval.ort[i]*eval.H[i][j];
}
f = f/h;
for (int i = m; i <= high; i++) {
eval.H[i][j] -= f*eval.ort[i];
}
}
for (int i = 0; i <= high; i++) {
float f = 0.0;
for (int j = high; j >= m; j--) {
f += eval.ort[j]*eval.H[i][j];
}
f = f/h;
for (int j = m; j <= high; j++) {
eval.H[i][j] -= f*eval.ort[j];
}
}
eval.ort[m] = scale*eval.ort[m];
eval.H[m][m-1] = scale*g;
}
}
// Accumulate transformations (Algol's ortran).
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
eval.V[i][j] = (i == j ? 1.0 : 0.0);
}
}
for (int m = high-1; m >= low+1; m--) {
if (eval.H[m][m-1] != 0.0) {
for (int i = m+1; i <= high; i++) {
eval.ort[i] = eval.H[i][m-1];
}
for (int j = m; j <= high; j++) {
float g = 0.0;
for (int i = m; i <= high; i++) {
g += eval.ort[i] * eval.V[i][j];
}
// Double division avoids possible underflow
g = (g / eval.ort[m]) / eval.H[m][m-1];
for (int i = m; i <= high; i++) {
eval.V[i][j] += g * eval.ort[i];
}
}
}
}
}
void Eigenhqr2 (sEigenvalue& eval) {
// This is derived from the Algol procedure hqr2,
// by Martin and Wilkinson, Handbook for Auto. Comp.,
// Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutine in EISPACK.
// Initialize
int nn = eval.n;
int n = nn-1;
int low = 0;
int high = nn-1;
float eps = pow(2.0,-52.0);
float exshift = 0.0;
float p=0,q=0,r=0,s=0,z=0,t,w,x,y;
// Store roots isolated by balanc and compute matrix norm
float norm = 0.0;
for (int i = 0; i < nn; i++) {
if ((i < low) || (i > high)) {
eval.d[i] = eval.H[i][i];
eval.e[i] = 0.0;
}
for (int j = max(i-1,0); j < nn; j++) {
norm = norm + fabs(eval.H[i][j]);
}
}
// Outer loop over eigenvalue index
int iter = 0;
int totIter = 0;
while (n >= low) {
// NT limit no. of iterations
totIter++;
if(totIter>100) {
//if(totIter>15) std::cout<<"!!!!iter ABORT !!!!!!! "<<totIter<<"\n";
// NT hack/fix, return large eigenvalues
for (int i = 0; i < nn; i++) {
eval.d[i] = 10000.;
eval.e[i] = 10000.;
}
return;
}
// Look for single small sub-diagonal element
int l = n;
while (l > low) {
s = fabs(eval.H[l-1][l-1]) + fabs(eval.H[l][l]);
if (s == 0.0) {
s = norm;
}
if (fabs(eval.H[l][l-1]) < eps * s) {
break;
}
l--;
}
// Check for convergence
// One root found
if (l == n) {
eval.H[n][n] = eval.H[n][n] + exshift;
eval.d[n] = eval.H[n][n];
eval.e[n] = 0.0;
n--;
iter = 0;
// Two roots found
} else if (l == n-1) {
w = eval.H[n][n-1] * eval.H[n-1][n];
p = (eval.H[n-1][n-1] - eval.H[n][n]) / 2.0;
q = p * p + w;
z = sqrt(fabs(q));
eval.H[n][n] = eval.H[n][n] + exshift;
eval.H[n-1][n-1] = eval.H[n-1][n-1] + exshift;
x = eval.H[n][n];
// float pair
if (q >= 0) {
if (p >= 0) {
z = p + z;
} else {
z = p - z;
}
eval.d[n-1] = x + z;
eval.d[n] = eval.d[n-1];
if (z != 0.0) {
eval.d[n] = x - w / z;
}
eval.e[n-1] = 0.0;
eval.e[n] = 0.0;
x = eval.H[n][n-1];
s = fabs(x) + fabs(z);
p = x / s;
q = z / s;
r = sqrt(p * p+q * q);
p = p / r;
q = q / r;
// Row modification
for (int j = n-1; j < nn; j++) {
z = eval.H[n-1][j];
eval.H[n-1][j] = q * z + p * eval.H[n][j];
eval.H[n][j] = q * eval.H[n][j] - p * z;
}
// Column modification
for (int i = 0; i <= n; i++) {
z = eval.H[i][n-1];
eval.H[i][n-1] = q * z + p * eval.H[i][n];
eval.H[i][n] = q * eval.H[i][n] - p * z;
}
// Accumulate transformations
for (int i = low; i <= high; i++) {
z = eval.V[i][n-1];
eval.V[i][n-1] = q * z + p * eval.V[i][n];
eval.V[i][n] = q * eval.V[i][n] - p * z;
}
// Complex pair
} else {
eval.d[n-1] = x + p;
eval.d[n] = x + p;
eval.e[n-1] = z;
eval.e[n] = -z;
}
n = n - 2;
iter = 0;
// No convergence yet
} else {
// Form shift
x = eval.H[n][n];
y = 0.0;
w = 0.0;
if (l < n) {
y = eval.H[n-1][n-1];
w = eval.H[n][n-1] * eval.H[n-1][n];
}
// Wilkinson's original ad hoc shift
if (iter == 10) {
exshift += x;
for (int i = low; i <= n; i++) {
eval.H[i][i] -= x;
}
s = fabs(eval.H[n][n-1]) + fabs(eval.H[n-1][n-2]);
x = y = 0.75 * s;
w = -0.4375 * s * s;
}
// MATLAB's new ad hoc shift
if (iter == 30) {
s = (y - x) / 2.0;
s = s * s + w;
if (s > 0) {
s = sqrt(s);
if (y < x) {
s = -s;
}
s = x - w / ((y - x) / 2.0 + s);
for (int i = low; i <= n; i++) {
eval.H[i][i] -= s;
}
exshift += s;
x = y = w = 0.964;
}
}
iter = iter + 1; // (Could check iteration count here.)
// Look for two consecutive small sub-diagonal elements
int m = n-2;
while (m >= l) {
z = eval.H[m][m];
r = x - z;
s = y - z;
p = (r * s - w) / eval.H[m+1][m] + eval.H[m][m+1];
q = eval.H[m+1][m+1] - z - r - s;
r = eval.H[m+2][m+1];
s = fabs(p) + fabs(q) + fabs(r);
p = p / s;
q = q / s;
r = r / s;
if (m == l) {
break;
}
if (fabs(eval.H[m][m-1]) * (fabs(q) + fabs(r)) <
eps * (fabs(p) * (fabs(eval.H[m-1][m-1]) + fabs(z) +
fabs(eval.H[m+1][m+1])))) {
break;
}
m--;
}
for (int i = m+2; i <= n; i++) {
eval.H[i][i-2] = 0.0;
if (i > m+2) {
eval.H[i][i-3] = 0.0;
}
}
// Double QR step involving rows l:n and columns m:n
for (int k = m; k <= n-1; k++) {
int notlast = (k != n-1);
if (k != m) {
p = eval.H[k][k-1];
q = eval.H[k+1][k-1];
r = (notlast ? eval.H[k+2][k-1] : 0.0);
x = fabs(p) + fabs(q) + fabs(r);
if (x != 0.0) {
p = p / x;
q = q / x;
r = r / x;
}
}
if (x == 0.0) {
break;
}
s = sqrt(p * p + q * q + r * r);
if (p < 0) {
s = -s;
}
if (s != 0) {
if (k != m) {
eval.H[k][k-1] = -s * x;
} else if (l != m) {
eval.H[k][k-1] = -eval.H[k][k-1];
}
p = p + s;
x = p / s;
y = q / s;
z = r / s;
q = q / p;
r = r / p;
// Row modification
for (int j = k; j < nn; j++) {
p = eval.H[k][j] + q * eval.H[k+1][j];
if (notlast) {
p = p + r * eval.H[k+2][j];
eval.H[k+2][j] = eval.H[k+2][j] - p * z;
}
eval.H[k][j] = eval.H[k][j] - p * x;
eval.H[k+1][j] = eval.H[k+1][j] - p * y;
}
// Column modification
for (int i = 0; i <= min(n,k+3); i++) {
p = x * eval.H[i][k] + y * eval.H[i][k+1];
if (notlast) {
p = p + z * eval.H[i][k+2];
eval.H[i][k+2] = eval.H[i][k+2] - p * r;
}
eval.H[i][k] = eval.H[i][k] - p;
eval.H[i][k+1] = eval.H[i][k+1] - p * q;
}
// Accumulate transformations
for (int i = low; i <= high; i++) {
p = x * eval.V[i][k] + y * eval.V[i][k+1];
if (notlast) {
p = p + z * eval.V[i][k+2];
eval.V[i][k+2] = eval.V[i][k+2] - p * r;
}
eval.V[i][k] = eval.V[i][k] - p;
eval.V[i][k+1] = eval.V[i][k+1] - p * q;
}
} // (s != 0)
} // k loop
} // check convergence
} // while (n >= low)
//if(totIter>15) std::cout<<"!!!!iter "<<totIter<<"\n";
// Backsubstitute to find vectors of upper triangular form
if (norm == 0.0) {
return;
}
for (n = nn-1; n >= 0; n--) {
p = eval.d[n];
q = eval.e[n];
// float vector
if (q == 0) {
int l = n;
eval.H[n][n] = 1.0;
for (int i = n-1; i >= 0; i--) {
w = eval.H[i][i] - p;
r = 0.0;
for (int j = l; j <= n; j++) {
r = r + eval.H[i][j] * eval.H[j][n];
}
if (eval.e[i] < 0.0) {
z = w;
s = r;
} else {
l = i;
if (eval.e[i] == 0.0) {
if (w != 0.0) {
eval.H[i][n] = -r / w;
} else {
eval.H[i][n] = -r / (eps * norm);
}
// Solve real equations
} else {
x = eval.H[i][i+1];
y = eval.H[i+1][i];
q = (eval.d[i] - p) * (eval.d[i] - p) + eval.e[i] * eval.e[i];
t = (x * s - z * r) / q;
eval.H[i][n] = t;
if (fabs(x) > fabs(z)) {
eval.H[i+1][n] = (-r - w * t) / x;
} else {
eval.H[i+1][n] = (-s - y * t) / z;
}
}
// Overflow control
t = fabs(eval.H[i][n]);
if ((eps * t) * t > 1) {
for (int j = i; j <= n; j++) {
eval.H[j][n] = eval.H[j][n] / t;
}
}
}
}
// Complex vector
} else if (q < 0) {
int l = n-1;
// Last vector component imaginary so matrix is triangular
if (fabs(eval.H[n][n-1]) > fabs(eval.H[n-1][n])) {
eval.H[n-1][n-1] = q / eval.H[n][n-1];
eval.H[n-1][n] = -(eval.H[n][n] - p) / eval.H[n][n-1];
} else {
Eigencdiv(eval, 0.0,-eval.H[n-1][n],eval.H[n-1][n-1]-p,q);
eval.H[n-1][n-1] = eval.cdivr;
eval.H[n-1][n] = eval.cdivi;
}
eval.H[n][n-1] = 0.0;
eval.H[n][n] = 1.0;
for (int i = n-2; i >= 0; i--) {
float ra,sa,vr,vi;
ra = 0.0;
sa = 0.0;
for (int j = l; j <= n; j++) {
ra = ra + eval.H[i][j] * eval.H[j][n-1];
sa = sa + eval.H[i][j] * eval.H[j][n];
}
w = eval.H[i][i] - p;
if (eval.e[i] < 0.0) {
z = w;
r = ra;
s = sa;
} else {
l = i;
if (eval.e[i] == 0) {
Eigencdiv(eval,-ra,-sa,w,q);
eval.H[i][n-1] = eval.cdivr;
eval.H[i][n] = eval.cdivi;
} else {
// Solve complex equations
x = eval.H[i][i+1];
y = eval.H[i+1][i];
vr = (eval.d[i] - p) * (eval.d[i] - p) + eval.e[i] * eval.e[i] - q * q;
vi = (eval.d[i] - p) * 2.0 * q;
if ((vr == 0.0) && (vi == 0.0)) {
vr = eps * norm * (fabs(w) + fabs(q) +
fabs(x) + fabs(y) + fabs(z));
}
Eigencdiv(eval, x*r-z*ra+q*sa,x*s-z*sa-q*ra,vr,vi);
eval.H[i][n-1] = eval.cdivr;
eval.H[i][n] = eval.cdivi;
if (fabs(x) > (fabs(z) + fabs(q))) {
eval.H[i+1][n-1] = (-ra - w * eval.H[i][n-1] + q * eval.H[i][n]) / x;
eval.H[i+1][n] = (-sa - w * eval.H[i][n] - q * eval.H[i][n-1]) / x;
} else {
Eigencdiv(eval, -r-y*eval.H[i][n-1],-s-y*eval.H[i][n],z,q);
eval.H[i+1][n-1] = eval.cdivr;
eval.H[i+1][n] = eval.cdivi;
}
}
// Overflow control
t = max(fabs(eval.H[i][n-1]),fabs(eval.H[i][n]));
if ((eps * t) * t > 1) {
for (int j = i; j <= n; j++) {
eval.H[j][n-1] = eval.H[j][n-1] / t;
eval.H[j][n] = eval.H[j][n] / t;
}
}
}
}
}
}
// Vectors of isolated roots
for (int i = 0; i < nn; i++) {
if (i < low || i > high) {
for (int j = i; j < nn; j++) {
eval.V[i][j] = eval.H[i][j];
}
}
}
// Back transformation to get eigenvectors of original matrix
for (int j = nn-1; j >= low; j--) {
for (int i = low; i <= high; i++) {
z = 0.0;
for (int k = low; k <= min(j,high); k++) {
z = z + eval.V[i][k] * eval.H[k][j];
}
eval.V[i][j] = z;
}
}
}
int computeEigenvalues3x3(
float dout[3],
float a[3][3])
{
/*TNT::Array2D<float> A = TNT::Array2D<float>(3,3, &a[0][0]);
TNT::Array1D<float> eig = TNT::Array1D<float>(3);
TNT::Array1D<float> eigImag = TNT::Array1D<float>(3);
JAMA::Eigenvalue<float> jeig = JAMA::Eigenvalue<float>(A);*/
sEigenvalue jeig;
// Compute the values
{
jeig.n = 3;
int n=3;
//V = Array2D<float>(n,n);
//d = Array1D<float>(n);
//e = Array1D<float>(n);
for (int y=0; y<3; y++)
{
jeig.d[y]=0.0f;
jeig.e[y]=0.0f;
for (int t=0; t<3; t++) jeig.V[y][t]=0.0f;
}
jeig.issymmetric = 1;
for (int j = 0; (j < 3) && jeig.issymmetric; j++) {
for (int i = 0; (i < 3) && jeig.issymmetric; i++) {
jeig.issymmetric = (a[i][j] == a[j][i]);
}
}
if (jeig.issymmetric) {
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
jeig.V[i][j] = a[i][j];
}
}
// Tridiagonalize.
Eigentred2(jeig);
// Diagonalize.
Eigentql2(jeig);
} else {
//H = TNT::Array2D<float>(n,n);
for (int y=0; y<3; y++)
{
jeig.ort[y]=0.0f;
for (int t=0; t<3; t++) jeig.H[y][t]=0.0f;
}
//ort = TNT::Array1D<float>(n);
for (int j = 0; j < n; j++) {
for (int i = 0; i < n; i++) {
jeig.H[i][j] = a[i][j];
}
}
// Reduce to Hessenberg form.
Eigenorthes(jeig);
// Reduce Hessenberg to real Schur form.
Eigenhqr2(jeig);
}
}
//jeig.getfloatEigenvalues(eig);
// complex ones
//jeig.getImagEigenvalues(eigImag);
dout[0] = sqrt(jeig.d[0]*jeig.d[0] + jeig.e[0]*jeig.e[0]);
dout[1] = sqrt(jeig.d[1]*jeig.d[1] + jeig.e[1]*jeig.e[1]);
dout[2] = sqrt(jeig.d[2]*jeig.d[2] + jeig.e[2]*jeig.e[2]);
return 0;
}

@ -15,33 +15,60 @@
// along with Wavelet Turbulence. If not, see <http://www.gnu.org/licenses/>.
//
// Copyright 2008 Theodore Kim and Nils Thuerey
//
//
//////////////////////////////////////////////////////////////////////
// Modified to not require TNT matrix library anymore. It was very slow
// when being run in parallel. Required TNT JAMA::Eigenvalue libraries were
// converted into independent functions.
// - MiikaH
//
//////////////////////////////////////////////////////////////////////
// Helper function, compute eigenvalues of 3x3 matrix
//////////////////////////////////////////////////////////////////////
#include "tnt/jama_eig.h"
#ifndef EIGENVAL_HELPER_H
#define EIGENVAL_HELPER_H
//#include "tnt/jama_eig.h"
#include <algorithm>
#include <cmath>
using namespace std;
//////////////////////////////////////////////////////////////////////
// eigenvalues of 3x3 non-symmetric matrix
//////////////////////////////////////////////////////////////////////
int inline computeEigenvalues3x3(
float dout[3],
float a[3][3])
struct sEigenvalue
{
TNT::Array2D<float> A = TNT::Array2D<float>(3,3, &a[0][0]);
TNT::Array1D<float> eig = TNT::Array1D<float>(3);
TNT::Array1D<float> eigImag = TNT::Array1D<float>(3);
JAMA::Eigenvalue<float> jeig = JAMA::Eigenvalue<float>(A);
jeig.getRealEigenvalues(eig);
int n;
int issymmetric;
float d[3]; /* real part */
float e[3]; /* img part */
float V[3][3]; /* Eigenvectors */
// complex ones
jeig.getImagEigenvalues(eigImag);
dout[0] = sqrt(eig[0]*eig[0] + eigImag[0]*eigImag[0]);
dout[1] = sqrt(eig[1]*eig[1] + eigImag[1]*eigImag[1]);
dout[2] = sqrt(eig[2]*eig[2] + eigImag[2]*eigImag[2]);
return 0;
}
float H[3][3];
#undef rfabs
#undef ROT
float ort[3];
float cdivr;
float cdivi;
};
void Eigentred2(sEigenvalue& eval);
void Eigencdiv(sEigenvalue& eval, float xr, float xi, float yr, float yi);
void Eigentql2 (sEigenvalue& eval);
void Eigenorthes (sEigenvalue& eval);
void Eigenhqr2 (sEigenvalue& eval);
int computeEigenvalues3x3(float dout[3], float a[3][3]);
#endif

@ -19,6 +19,11 @@
// FLUID_3D.cpp: implementation of the FLUID_3D class.
//
//////////////////////////////////////////////////////////////////////
// Heavy parallel optimization done. Many of the old functions now
// take begin and end parameters and process only specified part of the data.
// Some functions were divided into multiple ones.
// - MiikaH
//////////////////////////////////////////////////////////////////////
#include "FLUID_3D.h"
#include "IMAGE.h"
@ -26,6 +31,10 @@
#include "SPHERE.h"
#include <zlib.h>
#if PARALLEL==1
#include <omp.h>
#endif // PARALLEL
// boundary conditions of the fluid domain
#define DOMAIN_BC_FRONT 0 // z
#define DOMAIN_BC_TOP 1 // y
@ -90,6 +99,13 @@ FLUID_3D::FLUID_3D(int *res, float *p0, float dt) :
_heatOld = new float[_totalCells];
_obstacles = new unsigned char[_totalCells]; // set 0 at end of step
// For threaded version:
_xVelocityTemp = new float[_totalCells];
_yVelocityTemp = new float[_totalCells];
_zVelocityTemp = new float[_totalCells];
_densityTemp = new float[_totalCells];
_heatTemp = new float[_totalCells];
// DG TODO: check if alloc went fine
for (int x = 0; x < _totalCells; x++)
@ -167,6 +183,12 @@ FLUID_3D::~FLUID_3D()
if (_obstacles) delete[] _obstacles;
// if (_wTurbulence) delete _wTurbulence;
if (_xVelocityTemp) delete[] _xVelocityTemp;
if (_yVelocityTemp) delete[] _yVelocityTemp;
if (_zVelocityTemp) delete[] _zVelocityTemp;
if (_densityTemp) delete[] _densityTemp;
if (_heatTemp) delete[] _heatTemp;
// printf("deleted fluid\n");
}
@ -182,108 +204,306 @@ void FLUID_3D::initBlenderRNA(float *alpha, float *beta)
//////////////////////////////////////////////////////////////////////
void FLUID_3D::step()
{
// addSmokeTestCase(_density, _res);
// addSmokeTestCase(_heat, _res);
wipeBoundaries();
int threadval = 1;
#if PARALLEL==1
threadval = omp_get_max_threads();
#endif
int stepParts = 1;
float partSize = _zRes;
#if PARALLEL==1
stepParts = threadval*2; // Dividing parallelized sections into numOfThreads * 2 sections
partSize = (float)_zRes/stepParts; // Size of one part;
if (partSize < 4) {stepParts = threadval; // If the slice gets too low (might actually slow things down, change it to larger
partSize = (float)_zRes/stepParts;}
if (partSize < 4) {stepParts = (int)(ceil((float)_zRes/4.0f)); // If it's still too low (only possible on future systems with +24 cores), change it to 4
partSize = (float)_zRes/stepParts;}
#else
int zBegin=0;
int zEnd=_zRes;
#endif
#if PARALLEL==1
#pragma omp parallel
{
#pragma omp for schedule(static,1)
for (int i=0; i<stepParts; i++)
{
int zBegin = (int)((float)i*partSize + 0.5f);
int zEnd = (int)((float)(i+1)*partSize + 0.5f);
#endif
wipeBoundariesSL(zBegin, zEnd);
addVorticity(zBegin, zEnd);
addBuoyancy(_heat, _density, zBegin, zEnd);
addForce(zBegin, zEnd);
#if PARALLEL==1
} // end of parallel
#pragma omp barrier
#pragma omp single
{
#endif
SWAP_POINTERS(_xVelocity, _xVelocityTemp);
SWAP_POINTERS(_yVelocity, _yVelocityTemp);
SWAP_POINTERS(_zVelocity, _zVelocityTemp);
#if PARALLEL==1
} // end of single
#pragma omp barrier
#pragma omp for
for (int i=0; i<2; i++)
{
if (i==0)
{
#endif
project();
#if PARALLEL==1
}
else
{
#endif
diffuseHeat();
#if PARALLEL==1
}
}
#pragma omp barrier
#pragma omp single
{
#endif
SWAP_POINTERS(_xVelocity, _xVelocityOld);
SWAP_POINTERS(_yVelocity, _yVelocityOld);
SWAP_POINTERS(_zVelocity, _zVelocityOld);
SWAP_POINTERS(_density, _densityOld);
SWAP_POINTERS(_heat, _heatOld);
advectMacCormackBegin(0, _zRes);
#if PARALLEL==1
} // end of single
#pragma omp barrier
#pragma omp for schedule(static,1)
for (int i=0; i<stepParts; i++)
{
int zBegin = (int)((float)i*partSize + 0.5f);
int zEnd = (int)((float)(i+1)*partSize + 0.5f);
#endif
advectMacCormackEnd1(zBegin, zEnd);
#if PARALLEL==1
} // end of parallel
#pragma omp barrier
#pragma omp for schedule(static,1)
for (int i=0; i<stepParts; i++)
{
int zBegin = (int)((float)i*partSize + 0.5f);
int zEnd = (int)((float)(i+1)*partSize + 0.5f);
#endif
advectMacCormackEnd2(zBegin, zEnd);
artificialDampingSL(zBegin, zEnd);
// Using forces as temp arrays
#if PARALLEL==1
}
}
for (int i=1; i<stepParts; i++)
{
int zPos=(int)((float)i*partSize + 0.5f);
artificialDampingExactSL(zPos);
}
#endif
SWAP_POINTERS(_xVelocity, _xForce);
SWAP_POINTERS(_yVelocity, _yForce);
SWAP_POINTERS(_zVelocity, _zForce);
// run the solvers
addVorticity();
addBuoyancy(_heat, _density);
addForce();
project();
diffuseHeat();
// advect everything
advectMacCormack();
// if(_wTurbulence) {
// _wTurbulence->stepTurbulenceFull(_dt/_dx,
// _xVelocity, _yVelocity, _zVelocity, _obstacles);
// _wTurbulence->stepTurbulenceReadable(_dt/_dx,
// _xVelocity, _yVelocity, _zVelocity, _obstacles);
// }
/*
// no file output
float *src = _density;
string prefix = string("./original.preview/density_fullxy_");
writeImageSliceXY(src,_res, _res[2]/2, prefix, _totalSteps);
*/
// artificial damping -- this is necessary because we use a
// collated grid, and at very coarse grid resolutions, banding
// artifacts can occur
artificialDamping(_xVelocity);
artificialDamping(_yVelocity);
artificialDamping(_zVelocity);
/*
// no file output
string pbrtPrefix = string("./pbrt/density_small_");
IMAGE::dumpPBRT(_totalSteps, pbrtPrefix, _density, _res[0],_res[1],_res[2]);
*/
_totalTime += _dt;
_totalSteps++;
_totalSteps++;
// todo xxx dg: only clear obstacles, not boundaries
// memset(_obstacles, 0, sizeof(unsigned char)*_xRes*_yRes*_zRes);
// wipe forces
// for external forces we can't do it at the beginning of this function but at the end
for (int i = 0; i < _totalCells; i++)
{
_xForce[i] = _yForce[i] = _zForce[i] = 0.0f;
}
}
//////////////////////////////////////////////////////////////////////
// helper function to dampen co-located grid artifacts of given arrays in intervals
// (only needed for velocity, strength (w) depends on testcase...
//////////////////////////////////////////////////////////////////////
void FLUID_3D::artificialDamping(float* field) {
void FLUID_3D::artificialDampingSL(int zBegin, int zEnd) {
const float w = 0.9;
memmove(_xForce+(_slabSize*zBegin), _xVelocityTemp+(_slabSize*zBegin), sizeof(float)*_slabSize*(zEnd-zBegin));
memmove(_yForce+(_slabSize*zBegin), _yVelocityTemp+(_slabSize*zBegin), sizeof(float)*_slabSize*(zEnd-zBegin));
memmove(_zForce+(_slabSize*zBegin), _zVelocityTemp+(_slabSize*zBegin), sizeof(float)*_slabSize*(zEnd-zBegin));
if(_totalSteps % 4 == 1) {
for (int z = 1; z < _res[2]-1; z++)
for (int z = zBegin+1; z < zEnd-1; z++)
for (int y = 1; y < _res[1]-1; y++)
for (int x = 1+(y+z)%2; x < _res[0]-1; x+=2) {
const int index = x + y*_res[0] + z * _slabSize;
field[index] = (1-w)*field[index] + 1./6. * w*(
field[index+1] + field[index-1] +
field[index+_res[0]] + field[index-_res[0]] +
field[index+_slabSize] + field[index-_slabSize] );
_xForce[index] = (1-w)*_xVelocityTemp[index] + 1./6. * w*(
_xVelocityTemp[index+1] + _xVelocityTemp[index-1] +
_xVelocityTemp[index+_res[0]] + _xVelocityTemp[index-_res[0]] +
_xVelocityTemp[index+_slabSize] + _xVelocityTemp[index-_slabSize] );
_yForce[index] = (1-w)*_yVelocityTemp[index] + 1./6. * w*(
_yVelocityTemp[index+1] + _yVelocityTemp[index-1] +
_yVelocityTemp[index+_res[0]] + _yVelocityTemp[index-_res[0]] +
_yVelocityTemp[index+_slabSize] + _yVelocityTemp[index-_slabSize] );
_zForce[index] = (1-w)*_zVelocityTemp[index] + 1./6. * w*(
_zVelocityTemp[index+1] + _zVelocityTemp[index-1] +
_zVelocityTemp[index+_res[0]] + _zVelocityTemp[index-_res[0]] +
_zVelocityTemp[index+_slabSize] + _zVelocityTemp[index-_slabSize] );
}
}
if(_totalSteps % 4 == 3) {
for (int z = 1; z < _res[2]-1; z++)
for (int z = zBegin+1; z < zEnd-1; z++)
for (int y = 1; y < _res[1]-1; y++)
for (int x = 1+(y+z+1)%2; x < _res[0]-1; x+=2) {
const int index = x + y*_res[0] + z * _slabSize;
field[index] = (1-w)*field[index] + 1./6. * w*(
field[index+1] + field[index-1] +
field[index+_res[0]] + field[index-_res[0]] +
field[index+_slabSize] + field[index-_slabSize] );
_xForce[index] = (1-w)*_xVelocityTemp[index] + 1./6. * w*(
_xVelocityTemp[index+1] + _xVelocityTemp[index-1] +
_xVelocityTemp[index+_res[0]] + _xVelocityTemp[index-_res[0]] +
_xVelocityTemp[index+_slabSize] + _xVelocityTemp[index-_slabSize] );
_yForce[index] = (1-w)*_yVelocityTemp[index] + 1./6. * w*(
_yVelocityTemp[index+1] + _yVelocityTemp[index-1] +
_yVelocityTemp[index+_res[0]] + _yVelocityTemp[index-_res[0]] +
_yVelocityTemp[index+_slabSize] + _yVelocityTemp[index-_slabSize] );
_zForce[index] = (1-w)*_zVelocityTemp[index] + 1./6. * w*(
_zVelocityTemp[index+1] + _zVelocityTemp[index-1] +
_zVelocityTemp[index+_res[0]] + _zVelocityTemp[index-_res[0]] +
_zVelocityTemp[index+_slabSize] + _zVelocityTemp[index-_slabSize] );
}
}
}
void FLUID_3D::artificialDampingExactSL(int pos) {
const float w = 0.9;
int index, x,y,z;
size_t posslab;
for (z=pos-1; z<=pos; z++)
{
posslab=z * _slabSize;
if(_totalSteps % 4 == 1) {
for (y = 1; y < _res[1]-1; y++)
for (x = 1+(y+z)%2; x < _res[0]-1; x+=2) {
index = x + y*_res[0] + posslab;
_xForce[index] = (1-w)*_xVelocityTemp[index] + 1./6. * w*(
_xVelocityTemp[index+1] + _xVelocityTemp[index-1] +
_xVelocityTemp[index+_res[0]] + _xVelocityTemp[index-_res[0]] +
_xVelocityTemp[index+_slabSize] + _xVelocityTemp[index-_slabSize] );
_yForce[index] = (1-w)*_yVelocityTemp[index] + 1./6. * w*(
_yVelocityTemp[index+1] + _yVelocityTemp[index-1] +
_yVelocityTemp[index+_res[0]] + _yVelocityTemp[index-_res[0]] +
_yVelocityTemp[index+_slabSize] + _yVelocityTemp[index-_slabSize] );
_zForce[index] = (1-w)*_zVelocityTemp[index] + 1./6. * w*(
_zVelocityTemp[index+1] + _zVelocityTemp[index-1] +
_zVelocityTemp[index+_res[0]] + _zVelocityTemp[index-_res[0]] +
_zVelocityTemp[index+_slabSize] + _zVelocityTemp[index-_slabSize] );
}
}
if(_totalSteps % 4 == 3) {
for (y = 1; y < _res[1]-1; y++)
for (x = 1+(y+z+1)%2; x < _res[0]-1; x+=2) {
index = x + y*_res[0] + posslab;
_xForce[index] = (1-w)*_xVelocityTemp[index] + 1./6. * w*(
_xVelocityTemp[index+1] + _xVelocityTemp[index-1] +
_xVelocityTemp[index+_res[0]] + _xVelocityTemp[index-_res[0]] +
_xVelocityTemp[index+_slabSize] + _xVelocityTemp[index-_slabSize] );
_yForce[index] = (1-w)*_yVelocityTemp[index] + 1./6. * w*(
_yVelocityTemp[index+1] + _yVelocityTemp[index-1] +
_yVelocityTemp[index+_res[0]] + _yVelocityTemp[index-_res[0]] +
_yVelocityTemp[index+_slabSize] + _yVelocityTemp[index-_slabSize] );
_zForce[index] = (1-w)*_zVelocityTemp[index] + 1./6. * w*(
_zVelocityTemp[index+1] + _zVelocityTemp[index-1] +
_zVelocityTemp[index+_res[0]] + _zVelocityTemp[index-_res[0]] +
_zVelocityTemp[index+_slabSize] + _zVelocityTemp[index-_slabSize] );
}
}
}
}
//////////////////////////////////////////////////////////////////////
// copy out the boundary in all directions
//////////////////////////////////////////////////////////////////////
void FLUID_3D::copyBorderAll(float* field)
void FLUID_3D::copyBorderAll(float* field, int zBegin, int zEnd)
{
int index;
int index, x, y, z;
int zSize = zEnd-zBegin;
int _blockTotalCells=_slabSize * zSize;
if ((zBegin==0))
for (int y = 0; y < _yRes; y++)
for (int x = 0; x < _xRes; x++)
{
// front slab
index = x + y * _xRes;
field[index] = field[index + _slabSize];
}
if (zEnd==_zRes)
for (y = 0; y < _yRes; y++)
for (x = 0; x < _xRes; x++)
{
// back slab
index += _totalCells - _slabSize;
index = x + y * _xRes + _blockTotalCells - _slabSize;
field[index] = field[index - _slabSize];
}
for (int z = 0; z < _zRes; z++)
for (int x = 0; x < _xRes; x++)
for (z = 0; z < zSize; z++)
for (x = 0; x < _xRes; x++)
{
// bottom slab
index = x + z * _slabSize;
@ -294,8 +514,8 @@ void FLUID_3D::copyBorderAll(float* field)
field[index] = field[index - _xRes];
}
for (int z = 0; z < _zRes; z++)
for (int y = 0; y < _yRes; y++)
for (z = 0; z < zSize; z++)
for (y = 0; y < _yRes; y++)
{
// left slab
index = y * _xRes + z * _slabSize;
@ -310,27 +530,123 @@ void FLUID_3D::copyBorderAll(float* field)
//////////////////////////////////////////////////////////////////////
// wipe boundaries of velocity and density
//////////////////////////////////////////////////////////////////////
void FLUID_3D::wipeBoundaries()
void FLUID_3D::wipeBoundaries(int zBegin, int zEnd)
{
setZeroBorder(_xVelocity, _res);
setZeroBorder(_yVelocity, _res);
setZeroBorder(_zVelocity, _res);
setZeroBorder(_density, _res);
setZeroBorder(_xVelocity, _res, zBegin, zEnd);
setZeroBorder(_yVelocity, _res, zBegin, zEnd);
setZeroBorder(_zVelocity, _res, zBegin, zEnd);
setZeroBorder(_density, _res, zBegin, zEnd);
}
void FLUID_3D::wipeBoundariesSL(int zBegin, int zEnd)
{
/////////////////////////////////////
// setZeroBorder to all:
/////////////////////////////////////
/////////////////////////////////////
// setZeroX
/////////////////////////////////////
const int slabSize = _xRes * _yRes;
int index, x,y,z;
for (z = zBegin; z < zEnd; z++)
for (y = 0; y < _yRes; y++)
{
// left slab
index = y * _xRes + z * slabSize;
_xVelocity[index] = 0.0f;
_yVelocity[index] = 0.0f;
_zVelocity[index] = 0.0f;
_density[index] = 0.0f;
// right slab
index += _xRes - 1;
_xVelocity[index] = 0.0f;
_yVelocity[index] = 0.0f;
_zVelocity[index] = 0.0f;
_density[index] = 0.0f;
}
/////////////////////////////////////
// setZeroY
/////////////////////////////////////
for (z = zBegin; z < zEnd; z++)
for (x = 0; x < _xRes; x++)
{
// bottom slab
index = x + z * slabSize;
_xVelocity[index] = 0.0f;
_yVelocity[index] = 0.0f;
_zVelocity[index] = 0.0f;
_density[index] = 0.0f;
// top slab
index += slabSize - _xRes;
_xVelocity[index] = 0.0f;
_yVelocity[index] = 0.0f;
_zVelocity[index] = 0.0f;
_density[index] = 0.0f;
}
/////////////////////////////////////
// setZeroZ
/////////////////////////////////////
const int totalCells = _xRes * _yRes * _zRes;
index = 0;
if ((zBegin == 0))
for (y = 0; y < _yRes; y++)
for (x = 0; x < _xRes; x++, index++)
{
// front slab
_xVelocity[index] = 0.0f;
_yVelocity[index] = 0.0f;
_zVelocity[index] = 0.0f;
_density[index] = 0.0f;
}
if (zEnd == _zRes)
{
index=0;
int indexx=0;
const int cellsslab = totalCells - slabSize;
for (y = 0; y < _yRes; y++)
for (x = 0; x < _xRes; x++, index++)
{
// back slab
indexx = index + cellsslab;
_xVelocity[indexx] = 0.0f;
_yVelocity[indexx] = 0.0f;
_zVelocity[indexx] = 0.0f;
_density[indexx] = 0.0f;
}
}
}
//////////////////////////////////////////////////////////////////////
// add forces to velocity field
//////////////////////////////////////////////////////////////////////
void FLUID_3D::addForce()
void FLUID_3D::addForce(int zBegin, int zEnd)
{
for (int i = 0; i < _totalCells; i++)
int begin=zBegin * _slabSize;
int end=begin + (zEnd - zBegin) * _slabSize;
for (int i = begin; i < end; i++)
{
_xVelocity[i] += _dt * _xForce[i];
_yVelocity[i] += _dt * _yForce[i];
_zVelocity[i] += _dt * _zForce[i];
_xVelocityTemp[i] = _xVelocity[i] + _dt * _xForce[i];
_yVelocityTemp[i] = _yVelocity[i] + _dt * _yForce[i];
_zVelocityTemp[i] = _zVelocity[i] + _dt * _zForce[i];
}
}
//////////////////////////////////////////////////////////////////////
// project into divergence free field
//////////////////////////////////////////////////////////////////////
@ -344,18 +660,18 @@ void FLUID_3D::project()
memset(_pressure, 0, sizeof(float)*_totalCells);
memset(_divergence, 0, sizeof(float)*_totalCells);
setObstacleBoundaries(_pressure);
setObstacleBoundaries(_pressure, 0, _zRes);
// copy out the boundaries
if(DOMAIN_BC_LEFT == 0) setNeumannX(_xVelocity, _res);
else setZeroX(_xVelocity, _res);
if(DOMAIN_BC_LEFT == 0) setNeumannX(_xVelocity, _res, 0, _zRes);
else setZeroX(_xVelocity, _res, 0, _zRes);
if(DOMAIN_BC_TOP == 0) setNeumannY(_yVelocity, _res);
else setZeroY(_yVelocity, _res);
if(DOMAIN_BC_TOP == 0) setNeumannY(_yVelocity, _res, 0, _zRes);
else setZeroY(_yVelocity, _res, 0, _zRes);
if(DOMAIN_BC_FRONT == 0) setNeumannZ(_zVelocity, _res);
else setZeroZ(_zVelocity, _res);
if(DOMAIN_BC_FRONT == 0) setNeumannZ(_zVelocity, _res, 0, _zRes);
else setZeroZ(_zVelocity, _res, 0, _zRes);
// calculate divergence
index = _slabSize + _xRes + 1;
@ -385,12 +701,12 @@ void FLUID_3D::project()
// DG: commenting this helps CG to get a better start, 10-20% speed improvement
// _pressure[index] = 0.0f;
}
copyBorderAll(_pressure);
copyBorderAll(_pressure, 0, _zRes);
// solve Poisson equation
solvePressurePre(_pressure, _divergence, _obstacles);
setObstaclePressure(_pressure);
setObstaclePressure(_pressure, 0, _zRes);
// project out solution
float invDx = 1.0f / _dx;
@ -411,6 +727,9 @@ void FLUID_3D::project()
if (_divergence) delete[] _divergence;
}
//////////////////////////////////////////////////////////////////////
// diffuse heat
//////////////////////////////////////////////////////////////////////
@ -418,13 +737,14 @@ void FLUID_3D::diffuseHeat()
{
SWAP_POINTERS(_heat, _heatOld);
copyBorderAll(_heatOld);
copyBorderAll(_heatOld, 0, _zRes);
solveHeat(_heat, _heatOld, _obstacles);
// zero out inside obstacles
for (int x = 0; x < _totalCells; x++)
if (_obstacles[x])
_heat[x] = 0.0f;
}
//////////////////////////////////////////////////////////////////////
@ -444,12 +764,28 @@ void FLUID_3D::addObstacle(OBSTACLE* obstacle)
//////////////////////////////////////////////////////////////////////
// calculate the obstacle directional types
//////////////////////////////////////////////////////////////////////
void FLUID_3D::setObstaclePressure(float *_pressure)
void FLUID_3D::setObstaclePressure(float *_pressure, int zBegin, int zEnd)
{
// compleately TODO
const size_t index_ = _slabSize + _xRes + 1;
//int vIndex=_slabSize + _xRes + 1;
int bb=0;
int bt=0;
if (zBegin == 0) {bb = 1;}
if (zEnd == _zRes) {bt = 1;}
// tag remaining obstacle blocks
for (int z = 1, index = _slabSize + _xRes + 1;
z < _zRes - 1; z++, index += 2 * _xRes)
for (int z = zBegin + bb; z < zEnd - bt; z++)
{
size_t index = index_ +(z-1)*_slabSize;
for (int y = 1; y < _yRes - 1; y++, index += 2)
{
for (int x = 1; x < _xRes - 1; x++, index++)
{
// could do cascade of ifs, but they are a pain
@ -507,15 +843,33 @@ void FLUID_3D::setObstaclePressure(float *_pressure)
// this means it's not a full no-slip boundary condition
// but a "half-slip" - still looks ok right now
}
}
//vIndex++;
} // x loop
//vIndex += 2;
} // y loop
//vIndex += 2 * _xRes;
} // z loop
}
void FLUID_3D::setObstacleBoundaries(float *_pressure)
void FLUID_3D::setObstacleBoundaries(float *_pressure, int zBegin, int zEnd)
{
// cull degenerate obstacles , move to addObstacle?
for (int z = 1, index = _slabSize + _xRes + 1;
z < _zRes - 1; z++, index += 2 * _xRes)
// r = b - Ax
const size_t index_ = _slabSize + _xRes + 1;
int bb=0;
int bt=0;
if (zBegin == 0) {bb = 1;}
if (zEnd == _zRes) {bt = 1;}
for (int z = zBegin + bb; z < zEnd - bt; z++)
{
size_t index = index_ +(z-1)*_slabSize;
for (int y = 1; y < _yRes - 1; y++, index += 2)
{
for (int x = 1; x < _xRes - 1; x++, index++)
{
if (_obstacles[index] != EMPTY)
@ -545,17 +899,22 @@ void FLUID_3D::setObstacleBoundaries(float *_pressure)
_zVelocity[index] = 0.0f;
_pressure[index] = 0.0f;
}
}
//vIndex++;
} // x-loop
//vIndex += 2;
} // y-loop
//vIndex += 2* _xRes;
} // z-loop
}
//////////////////////////////////////////////////////////////////////
// add buoyancy forces
//////////////////////////////////////////////////////////////////////
void FLUID_3D::addBuoyancy(float *heat, float *density)
void FLUID_3D::addBuoyancy(float *heat, float *density, int zBegin, int zEnd)
{
int index = 0;
int index = zBegin*_slabSize;
for (int z = 0; z < _zRes; z++)
for (int z = zBegin; z < zEnd; z++)
for (int y = 0; y < _yRes; y++)
for (int x = 0; x < _xRes; x++, index++)
{
@ -566,30 +925,55 @@ void FLUID_3D::addBuoyancy(float *heat, float *density)
//////////////////////////////////////////////////////////////////////
// add vorticity to the force field
//////////////////////////////////////////////////////////////////////
void FLUID_3D::addVorticity()
void FLUID_3D::addVorticity(int zBegin, int zEnd)
{
int x,y,z,index;
//int x,y,z,index;
if(_vorticityEps<=0.) return;
int _blockSize=zEnd-zBegin;
int _blockTotalCells = _slabSize * (_blockSize+2);
float *_xVorticity, *_yVorticity, *_zVorticity, *_vorticity;
_xVorticity = new float[_totalCells];
_yVorticity = new float[_totalCells];
_zVorticity = new float[_totalCells];
_vorticity = new float[_totalCells];
int _vIndex = _slabSize + _xRes + 1;
int bb=0;
int bt=0;
int bb1=-1;
int bt1=-1;
memset(_xVorticity, 0, sizeof(float)*_totalCells);
memset(_yVorticity, 0, sizeof(float)*_totalCells);
memset(_zVorticity, 0, sizeof(float)*_totalCells);
memset(_vorticity, 0, sizeof(float)*_totalCells);
if (zBegin == 0) {bb1 = 1; bb = 1; _blockTotalCells-=_blockSize;}
if (zEnd == _zRes) {bt1 = 1;bt = 1; _blockTotalCells-=_blockSize;}
_xVorticity = new float[_blockTotalCells];
_yVorticity = new float[_blockTotalCells];
_zVorticity = new float[_blockTotalCells];
_vorticity = new float[_blockTotalCells];
memset(_xVorticity, 0, sizeof(float)*_blockTotalCells);
memset(_yVorticity, 0, sizeof(float)*_blockTotalCells);
memset(_zVorticity, 0, sizeof(float)*_blockTotalCells);
memset(_vorticity, 0, sizeof(float)*_blockTotalCells);
//const size_t indexsetupV=_slabSize;
const size_t index_ = _slabSize + _xRes + 1;
// calculate vorticity
float gridSize = 0.5f / _dx;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
//index = _slabSize + _xRes + 1;
size_t vIndex=_xRes + 1;
for (int z = zBegin + bb1; z < (zEnd - bt1); z++)
{
size_t index = index_ +(z-1)*_slabSize;
vIndex = index-(zBegin-1+bb)*_slabSize;
for (int y = 1; y < _yRes - 1; y++, index += 2)
{
for (int x = 1; x < _xRes - 1; x++, index++)
{
int up = _obstacles[index + _xRes] ? index : index + _xRes;
int down = _obstacles[index - _xRes] ? index : index - _xRes;
float dy = (up == index || down == index) ? 1.0f / _dx : gridSize;
@ -600,34 +984,51 @@ void FLUID_3D::addVorticity()
int left = _obstacles[index - 1] ? index : index - 1;
float dx = (right == index || right == index) ? 1.0f / _dx : gridSize;
_xVorticity[index] = (_zVelocity[up] - _zVelocity[down]) * dy + (-_yVelocity[out] + _yVelocity[in]) * dz;
_yVorticity[index] = (_xVelocity[out] - _xVelocity[in]) * dz + (-_zVelocity[right] + _zVelocity[left]) * dx;
_zVorticity[index] = (_yVelocity[right] - _yVelocity[left]) * dx + (-_xVelocity[up] + _xVelocity[down])* dy;
_xVorticity[vIndex] = (_zVelocity[up] - _zVelocity[down]) * dy + (-_yVelocity[out] + _yVelocity[in]) * dz;
_yVorticity[vIndex] = (_xVelocity[out] - _xVelocity[in]) * dz + (-_zVelocity[right] + _zVelocity[left]) * dx;
_zVorticity[vIndex] = (_yVelocity[right] - _yVelocity[left]) * dx + (-_xVelocity[up] + _xVelocity[down])* dy;
_vorticity[index] = sqrtf(_xVorticity[index] * _xVorticity[index] +
_yVorticity[index] * _yVorticity[index] +
_zVorticity[index] * _zVorticity[index]);
_vorticity[vIndex] = sqrtf(_xVorticity[vIndex] * _xVorticity[vIndex] +
_yVorticity[vIndex] * _yVorticity[vIndex] +
_zVorticity[vIndex] * _zVorticity[vIndex]);
vIndex++;
}
vIndex+=2;
}
//vIndex+=2*_xRes;
}
// calculate normalized vorticity vectors
float eps = _vorticityEps;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
//index = _slabSize + _xRes + 1;
vIndex=_slabSize + _xRes + 1;
for (int z = zBegin + bb; z < (zEnd - bt); z++)
{
size_t index = index_ +(z-1)*_slabSize;
vIndex = index-(zBegin-1+bb)*_slabSize;
for (int y = 1; y < _yRes - 1; y++, index += 2)
{
for (int x = 1; x < _xRes - 1; x++, index++)
{
//
if (!_obstacles[index])
{
float N[3];
int up = _obstacles[index + _xRes] ? index : index + _xRes;
int down = _obstacles[index - _xRes] ? index : index - _xRes;
float dy = (up == index || down == index) ? 1.0f / _dx : gridSize;
int out = _obstacles[index + _slabSize] ? index : index + _slabSize;
int in = _obstacles[index - _slabSize] ? index : index - _slabSize;
float dz = (out == index || in == index) ? 1.0f / _dx : gridSize;
int right = _obstacles[index + 1] ? index : index + 1;
int left = _obstacles[index - 1] ? index : index - 1;
float dx = (right == index || right == index) ? 1.0f / _dx : gridSize;
int up = _obstacles[index + _xRes] ? vIndex : vIndex + _xRes;
int down = _obstacles[index - _xRes] ? vIndex : vIndex - _xRes;
float dy = (up == vIndex || down == vIndex) ? 1.0f / _dx : gridSize;
int out = _obstacles[index + _slabSize] ? vIndex : vIndex + _slabSize;
int in = _obstacles[index - _slabSize] ? vIndex : vIndex - _slabSize;
float dz = (out == vIndex || in == vIndex) ? 1.0f / _dx : gridSize;
int right = _obstacles[index + 1] ? vIndex : vIndex + 1;
int left = _obstacles[index - 1] ? vIndex : vIndex - 1;
float dx = (right == vIndex || right == vIndex) ? 1.0f / _dx : gridSize;
N[0] = (_vorticity[right] - _vorticity[left]) * dx;
N[1] = (_vorticity[up] - _vorticity[down]) * dy;
N[2] = (_vorticity[out] - _vorticity[in]) * dz;
@ -640,11 +1041,17 @@ void FLUID_3D::addVorticity()
N[1] *= magnitude;
N[2] *= magnitude;
_xForce[index] += (N[1] * _zVorticity[index] - N[2] * _yVorticity[index]) * _dx * eps;
_yForce[index] -= (N[0] * _zVorticity[index] - N[2] * _xVorticity[index]) * _dx * eps;
_zForce[index] += (N[0] * _yVorticity[index] - N[1] * _xVorticity[index]) * _dx * eps;
_xForce[index] += (N[1] * _zVorticity[vIndex] - N[2] * _yVorticity[vIndex]) * _dx * eps;
_yForce[index] -= (N[0] * _zVorticity[vIndex] - N[2] * _xVorticity[vIndex]) * _dx * eps;
_zForce[index] += (N[0] * _yVorticity[vIndex] - N[1] * _xVorticity[vIndex]) * _dx * eps;
}
}
} // if
vIndex++;
} // x loop
vIndex+=2;
} // y loop
//vIndex+=2*_xRes;
} // z loop
if (_xVorticity) delete[] _xVorticity;
if (_yVorticity) delete[] _yVorticity;
@ -652,54 +1059,80 @@ void FLUID_3D::addVorticity()
if (_vorticity) delete[] _vorticity;
}
//////////////////////////////////////////////////////////////////////
// Advect using the MacCormack method from the Selle paper
//////////////////////////////////////////////////////////////////////
void FLUID_3D::advectMacCormack()
void FLUID_3D::advectMacCormackBegin(int zBegin, int zEnd)
{
Vec3Int res = Vec3Int(_xRes,_yRes,_zRes);
if(DOMAIN_BC_LEFT == 0) copyBorderX(_xVelocity, res);
else setZeroX(_xVelocity, res);
if(DOMAIN_BC_LEFT == 0) copyBorderX(_xVelocityOld, res, zBegin, zEnd);
else setZeroX(_xVelocityOld, res, zBegin, zEnd);
if(DOMAIN_BC_TOP == 0) copyBorderY(_yVelocity, res);
else setZeroY(_yVelocity, res);
if(DOMAIN_BC_TOP == 0) copyBorderY(_yVelocityOld, res, zBegin, zEnd);
else setZeroY(_yVelocityOld, res, zBegin, zEnd);
if(DOMAIN_BC_FRONT == 0) copyBorderZ(_zVelocity, res);
else setZeroZ(_zVelocity, res);
if(DOMAIN_BC_FRONT == 0) copyBorderZ(_zVelocityOld, res, zBegin, zEnd);
else setZeroZ(_zVelocityOld, res, zBegin, zEnd);
}
SWAP_POINTERS(_xVelocity, _xVelocityOld);
SWAP_POINTERS(_yVelocity, _yVelocityOld);
SWAP_POINTERS(_zVelocity, _zVelocityOld);
SWAP_POINTERS(_density, _densityOld);
SWAP_POINTERS(_heat, _heatOld);
//////////////////////////////////////////////////////////////////////
// Advect using the MacCormack method from the Selle paper
//////////////////////////////////////////////////////////////////////
void FLUID_3D::advectMacCormackEnd1(int zBegin, int zEnd)
{
Vec3Int res = Vec3Int(_xRes,_yRes,_zRes);
const float dt0 = _dt / _dx;
// use force arrays as temp arrays
for (int x = 0; x < _totalCells; x++)
_xForce[x] = _yForce[x] = 0.0;
float* t1 = _xForce;
float* t2 = _yForce;
int begin=zBegin * _slabSize;
int end=begin + (zEnd - zBegin) * _slabSize;
for (int x = begin; x < end; x++)
_xForce[x] = 0.0;
advectFieldMacCormack(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _densityOld, _density, t1,t2, res, _obstacles);
advectFieldMacCormack(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _heatOld, _heat, t1,t2, res, _obstacles);
advectFieldMacCormack(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _xVelocityOld, _xVelocity, t1,t2, res, _obstacles);
advectFieldMacCormack(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _yVelocityOld, _yVelocity, t1,t2, res, _obstacles);
advectFieldMacCormack(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _zVelocityOld, _zVelocity, t1,t2, res, _obstacles);
// advectFieldMacCormack1(dt, xVelocity, yVelocity, zVelocity, oldField, newField, res)
if(DOMAIN_BC_LEFT == 0) copyBorderX(_xVelocity, res);
else setZeroX(_xVelocity, res);
advectFieldMacCormack1(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _densityOld, _densityTemp, res, zBegin, zEnd);
advectFieldMacCormack1(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _heatOld, _heatTemp, res, zBegin, zEnd);
advectFieldMacCormack1(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _xVelocityOld, _xVelocity, res, zBegin, zEnd);
advectFieldMacCormack1(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _yVelocityOld, _yVelocity, res, zBegin, zEnd);
advectFieldMacCormack1(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _zVelocityOld, _zVelocity, res, zBegin, zEnd);
if(DOMAIN_BC_TOP == 0) copyBorderY(_yVelocity, res);
else setZeroY(_yVelocity, res);
if(DOMAIN_BC_FRONT == 0) copyBorderZ(_zVelocity, res);
else setZeroZ(_zVelocity, res);
setZeroBorder(_density, res);
setZeroBorder(_heat, res);
for (int x = 0; x < _totalCells; x++)
t1[x] = t2[x] = 0.0;
// Have to wait untill all the threads are done -> so continuing in step 3
}
//////////////////////////////////////////////////////////////////////
// Advect using the MacCormack method from the Selle paper
//////////////////////////////////////////////////////////////////////
void FLUID_3D::advectMacCormackEnd2(int zBegin, int zEnd)
{
const float dt0 = _dt / _dx;
Vec3Int res = Vec3Int(_xRes,_yRes,_zRes);
// use force array as temp arrays
float* t1 = _xForce;
// advectFieldMacCormack2(dt, xVelocity, yVelocity, zVelocity, oldField, newField, tempfield, temp, res, obstacles)
advectFieldMacCormack2(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _densityOld, _density, _densityTemp, t1, res, _obstacles, zBegin, zEnd);
advectFieldMacCormack2(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _heatOld, _heat, _heatTemp, t1, res, _obstacles, zBegin, zEnd);
advectFieldMacCormack2(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _xVelocityOld, _xVelocityTemp, _xVelocity, t1, res, _obstacles, zBegin, zEnd);
advectFieldMacCormack2(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _yVelocityOld, _yVelocityTemp, _yVelocity, t1, res, _obstacles, zBegin, zEnd);
advectFieldMacCormack2(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _zVelocityOld, _zVelocityTemp, _zVelocity, t1, res, _obstacles, zBegin, zEnd);
if(DOMAIN_BC_LEFT == 0) copyBorderX(_xVelocityTemp, res, zBegin, zEnd);
else setZeroX(_xVelocityTemp, res, zBegin, zEnd);
if(DOMAIN_BC_TOP == 0) copyBorderY(_yVelocityTemp, res, zBegin, zEnd);
else setZeroY(_yVelocityTemp, res, zBegin, zEnd);
if(DOMAIN_BC_FRONT == 0) copyBorderZ(_zVelocityTemp, res, zBegin, zEnd);
else setZeroZ(_zVelocityTemp, res, zBegin, zEnd);
setZeroBorder(_density, res, zBegin, zEnd);
setZeroBorder(_heat, res, zBegin, zEnd);
/*int begin=zBegin * _slabSize;
int end=begin + (zEnd - zBegin) * _slabSize;
for (int x = begin; x < end; x++)
_xForce[x] = _yForce[x] = 0.0f;*/
}

@ -19,6 +19,11 @@
// FLUID_3D.h: interface for the FLUID_3D class.
//
//////////////////////////////////////////////////////////////////////
// Heavy parallel optimization done. Many of the old functions now
// take begin and end parameters and process only specified part of the data.
// Some functions were divided into multiple ones.
// - MiikaH
//////////////////////////////////////////////////////////////////////
#ifndef FLUID_3D_H
#define FLUID_3D_H
@ -74,7 +79,8 @@ class FLUID_3D
int _totalImgDumps;
int _totalVelDumps;
void artificialDamping(float* field);
void artificialDampingSL(int zBegin, int zEnd);
void artificialDampingExactSL(int pos);
// fields
float* _density;
@ -92,6 +98,13 @@ class FLUID_3D
float* _zForce;
unsigned char* _obstacles;
// Required for proper threading:
float* _xVelocityTemp;
float* _yVelocityTemp;
float* _zVelocityTemp;
float* _heatTemp;
float* _densityTemp;
// CG fields
int _iterations;
@ -107,13 +120,14 @@ class FLUID_3D
// WTURBULENCE* _wTurbulence;
// boundary setting functions
void copyBorderAll(float* field);
void copyBorderAll(float* field, int zBegin, int zEnd);
// timestepping functions
void wipeBoundaries();
void addForce();
void addVorticity();
void addBuoyancy(float *heat, float *density);
void wipeBoundaries(int zBegin, int zEnd);
void wipeBoundariesSL(int zBegin, int zEnd);
void addForce(int zBegin, int zEnd);
void addVorticity(int zBegin, int zEnd);
void addBuoyancy(float *heat, float *density, int zBegin, int zEnd);
// solver stuff
void project();
@ -122,41 +136,58 @@ class FLUID_3D
void solvePressurePre(float* field, float* b, unsigned char* skip);
void solveHeat(float* field, float* b, unsigned char* skip);
// handle obstacle boundaries
void setObstacleBoundaries(float *_pressure);
void setObstaclePressure(float *_pressure);
void setObstacleBoundaries(float *_pressure, int zBegin, int zEnd);
void setObstaclePressure(float *_pressure, int zBegin, int zEnd);
public:
// advection, accessed e.g. by WTURBULENCE class
void advectMacCormack();
//void advectMacCormack();
void advectMacCormackBegin(int zBegin, int zEnd);
void advectMacCormackEnd1(int zBegin, int zEnd);
void advectMacCormackEnd2(int zBegin, int zEnd);
// boundary setting functions
static void copyBorderX(float* field, Vec3Int res);
static void copyBorderY(float* field, Vec3Int res);
static void copyBorderZ(float* field, Vec3Int res);
static void setNeumannX(float* field, Vec3Int res);
static void setNeumannY(float* field, Vec3Int res);
static void setNeumannZ(float* field, Vec3Int res);
static void setZeroX(float* field, Vec3Int res);
static void setZeroY(float* field, Vec3Int res);
static void setZeroZ(float* field, Vec3Int res);
static void setZeroBorder(float* field, Vec3Int res) {
setZeroX(field, res);
setZeroY(field, res);
setZeroZ(field, res);
static void copyBorderX(float* field, Vec3Int res, int zBegin, int zEnd);
static void copyBorderY(float* field, Vec3Int res, int zBegin, int zEnd);
static void copyBorderZ(float* field, Vec3Int res, int zBegin, int zEnd);
static void setNeumannX(float* field, Vec3Int res, int zBegin, int zEnd);
static void setNeumannY(float* field, Vec3Int res, int zBegin, int zEnd);
static void setNeumannZ(float* field, Vec3Int res, int zBegin, int zEnd);
static void setZeroX(float* field, Vec3Int res, int zBegin, int zEnd);
static void setZeroY(float* field, Vec3Int res, int zBegin, int zEnd);
static void setZeroZ(float* field, Vec3Int res, int zBegin, int zEnd);
static void setZeroBorder(float* field, Vec3Int res, int zBegin, int zEnd) {
setZeroX(field, res, zBegin, zEnd);
setZeroY(field, res, zBegin, zEnd);
setZeroZ(field, res, zBegin, zEnd);
};
// static advection functions, also used by WTURBULENCE
static void advectFieldSemiLagrange(const float dt, const float* velx, const float* vely, const float* velz,
float* oldField, float* newField, Vec3Int res);
static void advectFieldMacCormack(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity,
float* oldField, float* newField, float* temp1, float* temp2, Vec3Int res, const unsigned char* obstacles);
float* oldField, float* newField, Vec3Int res, int zBegin, int zEnd);
static void advectFieldMacCormack1(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity,
float* oldField, float* tempResult, Vec3Int res, int zBegin, int zEnd);
static void advectFieldMacCormack2(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity,
float* oldField, float* newField, float* tempResult, float* temp1,Vec3Int res, const unsigned char* obstacles, int zBegin, int zEnd);
// temp ones for testing
/*static void advectFieldMacCormack(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity,
float* oldField, float* newField, float* temp1, float* temp2, Vec3Int res, const unsigned char* obstacles);*/
/*static void advectFieldSemiLagrange2(const float dt, const float* velx, const float* vely, const float* velz,
float* oldField, float* newField, Vec3Int res);*/
// maccormack helper functions
static void clampExtrema(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity,
float* oldField, float* newField, Vec3Int res);
float* oldField, float* newField, Vec3Int res, int zBegin, int zEnd);
static void clampOutsideRays(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity,
float* oldField, float* newField, Vec3Int res, const unsigned char* obstacles, const float *oldAdvection);
float* oldField, float* newField, Vec3Int res, const unsigned char* obstacles, const float *oldAdvection, int zBegin, int zEnd);
// output helper functions
// static void writeImageSliceXY(const float *field, Vec3Int res, int slice, string prefix, int picCnt, float scale=1.);

@ -19,11 +19,150 @@
// FLUID_3D.cpp: implementation of the FLUID_3D class.
//
//////////////////////////////////////////////////////////////////////
// Both solvers optimized by merging loops and precalculating
// stuff used in iteration loop.
// - MiikaH
//////////////////////////////////////////////////////////////////////
#include "FLUID_3D.h"
#include <cstring>
#define SOLVER_ACCURACY 1e-06
//////////////////////////////////////////////////////////////////////
// solve the heat equation with CG
//////////////////////////////////////////////////////////////////////
void FLUID_3D::solveHeat(float* field, float* b, unsigned char* skip)
{
int x, y, z;
const int twoxr = 2 * _xRes;
size_t index;
const float heatConst = _dt * _heatDiffusion / (_dx * _dx);
float *_q, *_residual, *_direction, *_Acenter;
// i = 0
int i = 0;
_residual = new float[_totalCells]; // set 0
_direction = new float[_totalCells]; // set 0
_q = new float[_totalCells]; // set 0
_Acenter = new float[_totalCells]; // set 0
memset(_residual, 0, sizeof(float)*_totalCells);
memset(_q, 0, sizeof(float)*_totalCells);
memset(_direction, 0, sizeof(float)*_totalCells);
memset(_Acenter, 0, sizeof(float)*_totalCells);
float deltaNew = 0.0f;
// r = b - Ax
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += twoxr)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
{
// if the cell is a variable
_Acenter[index] = 1.0f;
if (!skip[index])
{
// set the matrix to the Poisson stencil in order
if (!skip[index + 1]) _Acenter[index] += heatConst;
if (!skip[index - 1]) _Acenter[index] += heatConst;
if (!skip[index + _xRes]) _Acenter[index] += heatConst;
if (!skip[index - _xRes]) _Acenter[index] += heatConst;
if (!skip[index + _slabSize]) _Acenter[index] += heatConst;
if (!skip[index - _slabSize]) _Acenter[index] += heatConst;
_residual[index] = b[index] - (_Acenter[index] * field[index] +
field[index - 1] * (skip[index - 1] ? 0.0 : -heatConst) +
field[index + 1] * (skip[index + 1] ? 0.0 : -heatConst) +
field[index - _xRes] * (skip[index - _xRes] ? 0.0 : -heatConst) +
field[index + _xRes] * (skip[index + _xRes] ? 0.0 : -heatConst) +
field[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -heatConst) +
field[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -heatConst));
}
else
{
_residual[index] = 0.0f;
}
_direction[index] = _residual[index];
deltaNew += _residual[index] * _residual[index];
}
// While deltaNew > (eps^2) * delta0
const float eps = SOLVER_ACCURACY;
float maxR = 2.0f * eps;
while ((i < _iterations) && (maxR > eps))
{
// q = Ad
float alpha = 0.0f;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += twoxr)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
{
// if the cell is a variable
if (!skip[index])
{
_q[index] = (_Acenter[index] * _direction[index] +
_direction[index - 1] * (skip[index - 1] ? 0.0 : -heatConst) +
_direction[index + 1] * (skip[index + 1] ? 0.0 : -heatConst) +
_direction[index - _xRes] * (skip[index - _xRes] ? 0.0 : -heatConst) +
_direction[index + _xRes] * (skip[index + _xRes] ? 0.0 : -heatConst) +
_direction[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -heatConst) +
_direction[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -heatConst));
}
else
{
_q[index] = 0.0f;
}
alpha += _direction[index] * _q[index];
}
if (fabs(alpha) > 0.0f)
alpha = deltaNew / alpha;
float deltaOld = deltaNew;
deltaNew = 0.0f;
maxR = 0.0f;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += twoxr)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
{
field[index] += alpha * _direction[index];
_residual[index] -= alpha * _q[index];
maxR = (_residual[index] > maxR) ? _residual[index] : maxR;
deltaNew += _residual[index] * _residual[index];
}
float beta = deltaNew / deltaOld;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += twoxr)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
_direction[index] = _residual[index] + beta * _direction[index];
i++;
}
// cout << i << " iterations converged to " << maxR << endl;
if (_residual) delete[] _residual;
if (_direction) delete[] _direction;
if (_q) delete[] _q;
if (_Acenter) delete[] _Acenter;
}
void FLUID_3D::solvePressurePre(float* field, float* b, unsigned char* skip)
{
int x, y, z;
@ -45,6 +184,8 @@ void FLUID_3D::solvePressurePre(float* field, float* b, unsigned char* skip)
memset(_h, 0, sizeof(float)*_xRes*_yRes*_zRes);
memset(_Precond, 0, sizeof(float)*_xRes*_yRes*_zRes);
float deltaNew = 0.0f;
// r = b - Ax
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
@ -62,16 +203,19 @@ void FLUID_3D::solvePressurePre(float* field, float* b, unsigned char* skip)
if (!skip[index - _xRes]) Acenter += 1.;
if (!skip[index + _slabSize]) Acenter += 1.;
if (!skip[index - _slabSize]) Acenter += 1.;
}
_residual[index] = b[index] - (Acenter * field[index] +
_residual[index] = b[index] - (Acenter * field[index] +
field[index - 1] * (skip[index - 1] ? 0.0 : -1.0f)+
field[index + 1] * (skip[index + 1] ? 0.0 : -1.0f)+
field[index - _xRes] * (skip[index - _xRes] ? 0.0 : -1.0f)+
field[index + _xRes] * (skip[index + _xRes] ? 0.0 : -1.0f)+
field[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -1.0f)+
field[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -1.0f) );
_residual[index] = (skip[index]) ? 0.0f : _residual[index];
}
else
{
_residual[index] = 0.0f;
}
// P^-1
if(Acenter < 1.0)
@ -81,18 +225,10 @@ void FLUID_3D::solvePressurePre(float* field, float* b, unsigned char* skip)
// p = P^-1 * r
_direction[index] = _residual[index] * _Precond[index];
deltaNew += _residual[index] * _direction[index];
}
// deltaNew = transpose(r) * p
float deltaNew = 0.0f;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
deltaNew += _residual[index] * _direction[index];
// delta0 = deltaNew
// float delta0 = deltaNew;
// While deltaNew > (eps^2) * delta0
const float eps = SOLVER_ACCURACY;
@ -101,7 +237,9 @@ void FLUID_3D::solvePressurePre(float* field, float* b, unsigned char* skip)
// while (i < _iterations)
while ((i < _iterations) && (maxR > 0.001*eps))
{
// (s) q = Ad (p)
float alpha = 0.0f;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
@ -118,71 +256,52 @@ void FLUID_3D::solvePressurePre(float* field, float* b, unsigned char* skip)
if (!skip[index - _xRes]) Acenter += 1.;
if (!skip[index + _slabSize]) Acenter += 1.;
if (!skip[index - _slabSize]) Acenter += 1.;
}
_q[index] = Acenter * _direction[index] +
_q[index] = Acenter * _direction[index] +
_direction[index - 1] * (skip[index - 1] ? 0.0 : -1.0f) +
_direction[index + 1] * (skip[index + 1] ? 0.0 : -1.0f) +
_direction[index - _xRes] * (skip[index - _xRes] ? 0.0 : -1.0f) +
_direction[index + _xRes] * (skip[index + _xRes] ? 0.0 : -1.0f)+
_direction[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -1.0f) +
_direction[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -1.0f);
_q[index] = (skip[index]) ? 0.0f : _q[index];
}
else
{
_q[index] = 0.0f;
}
alpha += _direction[index] * _q[index];
}
// alpha = deltaNew / (transpose(d) * q)
float alpha = 0.0f;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
alpha += _direction[index] * _q[index];
if (fabs(alpha) > 0.0f)
alpha = deltaNew / alpha;
float deltaOld = deltaNew;
deltaNew = 0.0f;
maxR = 0.0;
float tmp;
// x = x + alpha * d
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
{
field[index] += alpha * _direction[index];
// r = r - alpha * q
maxR = 0.0;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
{
_residual[index] -= alpha * _q[index];
// maxR = (_residual[index] > maxR) ? _residual[index] : maxR;
}
_residual[index] -= alpha * _q[index];
// if(maxR <= eps)
// break;
_h[index] = _Precond[index] * _residual[index];
tmp = _residual[index] * _h[index];
deltaNew += tmp;
maxR = (tmp > maxR) ? tmp : maxR;
// h = P^-1 * r
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
{
_h[index] = _Precond[index] * _residual[index];
}
// deltaOld = deltaNew
float deltaOld = deltaNew;
// deltaNew = transpose(r) * h
deltaNew = 0.0f;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
{
deltaNew += _residual[index] * _h[index];
maxR = (_residual[index]* _h[index] > maxR) ? _residual[index]* _h[index] : maxR;
}
// beta = deltaNew / deltaOld
float beta = deltaNew / deltaOld;
@ -205,320 +324,3 @@ void FLUID_3D::solvePressurePre(float* field, float* b, unsigned char* skip)
if (_direction) delete[] _direction;
if (_q) delete[] _q;
}
//////////////////////////////////////////////////////////////////////
// solve the poisson equation with CG
//////////////////////////////////////////////////////////////////////
#if 0
void FLUID_3D::solvePressure(float* field, float* b, unsigned char* skip)
{
int x, y, z;
size_t index;
// i = 0
int i = 0;
memset(_residual, 0, sizeof(float)*_xRes*_yRes*_zRes);
memset(_q, 0, sizeof(float)*_xRes*_yRes*_zRes);
memset(_direction, 0, sizeof(float)*_xRes*_yRes*_zRes);
// r = b - Ax
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
{
// if the cell is a variable
float Acenter = 0.0f;
if (!skip[index])
{
// set the matrix to the Poisson stencil in order
if (!skip[index + 1]) Acenter += 1.;
if (!skip[index - 1]) Acenter += 1.;
if (!skip[index + _xRes]) Acenter += 1.;
if (!skip[index - _xRes]) Acenter += 1.;
if (!skip[index + _slabSize]) Acenter += 1.;
if (!skip[index - _slabSize]) Acenter += 1.;
}
_residual[index] = b[index] - (Acenter * field[index] +
field[index - 1] * (skip[index - 1] ? 0.0 : -1.0f)+
field[index + 1] * (skip[index + 1] ? 0.0 : -1.0f)+
field[index - _xRes] * (skip[index - _xRes] ? 0.0 : -1.0f)+
field[index + _xRes] * (skip[index + _xRes] ? 0.0 : -1.0f)+
field[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -1.0f)+
field[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -1.0f) );
_residual[index] = (skip[index]) ? 0.0f : _residual[index];
}
// d = r
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
_direction[index] = _residual[index];
// deltaNew = transpose(r) * r
float deltaNew = 0.0f;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
deltaNew += _residual[index] * _residual[index];
// delta0 = deltaNew
// float delta0 = deltaNew;
// While deltaNew > (eps^2) * delta0
const float eps = SOLVER_ACCURACY;
float maxR = 2.0f * eps;
while ((i < _iterations) && (maxR > eps))
{
// q = Ad
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
{
// if the cell is a variable
float Acenter = 0.0f;
if (!skip[index])
{
// set the matrix to the Poisson stencil in order
if (!skip[index + 1]) Acenter += 1.;
if (!skip[index - 1]) Acenter += 1.;
if (!skip[index + _xRes]) Acenter += 1.;
if (!skip[index - _xRes]) Acenter += 1.;
if (!skip[index + _slabSize]) Acenter += 1.;
if (!skip[index - _slabSize]) Acenter += 1.;
}
_q[index] = Acenter * _direction[index] +
_direction[index - 1] * (skip[index - 1] ? 0.0 : -1.0f) +
_direction[index + 1] * (skip[index + 1] ? 0.0 : -1.0f) +
_direction[index - _xRes] * (skip[index - _xRes] ? 0.0 : -1.0f) +
_direction[index + _xRes] * (skip[index + _xRes] ? 0.0 : -1.0f)+
_direction[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -1.0f) +
_direction[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -1.0f);
_q[index] = (skip[index]) ? 0.0f : _q[index];
}
// alpha = deltaNew / (transpose(d) * q)
float alpha = 0.0f;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
alpha += _direction[index] * _q[index];
if (fabs(alpha) > 0.0f)
alpha = deltaNew / alpha;
// x = x + alpha * d
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
field[index] += alpha * _direction[index];
// r = r - alpha * q
maxR = 0.0f;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
{
_residual[index] -= alpha * _q[index];
maxR = (_residual[index] > maxR) ? _residual[index] : maxR;
}
// deltaOld = deltaNew
float deltaOld = deltaNew;
// deltaNew = transpose(r) * r
deltaNew = 0.0f;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
deltaNew += _residual[index] * _residual[index];
// beta = deltaNew / deltaOld
float beta = deltaNew / deltaOld;
// d = r + beta * d
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
_direction[index] = _residual[index] + beta * _direction[index];
// i = i + 1
i++;
}
// cout << i << " iterations converged to " << maxR << endl;
}
#endif
//////////////////////////////////////////////////////////////////////
// solve the heat equation with CG
//////////////////////////////////////////////////////////////////////
void FLUID_3D::solveHeat(float* field, float* b, unsigned char* skip)
{
int x, y, z;
size_t index;
const float heatConst = _dt * _heatDiffusion / (_dx * _dx);
float *_q, *_residual, *_direction;
// i = 0
int i = 0;
_residual = new float[_totalCells]; // set 0
_direction = new float[_totalCells]; // set 0
_q = new float[_totalCells]; // set 0
memset(_residual, 0, sizeof(float)*_xRes*_yRes*_zRes);
memset(_q, 0, sizeof(float)*_xRes*_yRes*_zRes);
memset(_direction, 0, sizeof(float)*_xRes*_yRes*_zRes);
// r = b - Ax
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
{
// if the cell is a variable
float Acenter = 1.0f;
if (!skip[index])
{
// set the matrix to the Poisson stencil in order
if (!skip[index + 1]) Acenter += heatConst;
if (!skip[index - 1]) Acenter += heatConst;
if (!skip[index + _xRes]) Acenter += heatConst;
if (!skip[index - _xRes]) Acenter += heatConst;
if (!skip[index + _slabSize]) Acenter += heatConst;
if (!skip[index - _slabSize]) Acenter += heatConst;
}
_residual[index] = b[index] - (Acenter * field[index] +
field[index - 1] * (skip[index - 1] ? 0.0 : -heatConst) +
field[index + 1] * (skip[index + 1] ? 0.0 : -heatConst) +
field[index - _xRes] * (skip[index - _xRes] ? 0.0 : -heatConst) +
field[index + _xRes] * (skip[index + _xRes] ? 0.0 : -heatConst) +
field[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -heatConst) +
field[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -heatConst));
_residual[index] = (skip[index]) ? 0.0f : _residual[index];
}
// d = r
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
_direction[index] = _residual[index];
// deltaNew = transpose(r) * r
float deltaNew = 0.0f;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
deltaNew += _residual[index] * _residual[index];
// delta0 = deltaNew
// float delta0 = deltaNew;
// While deltaNew > (eps^2) * delta0
const float eps = SOLVER_ACCURACY;
float maxR = 2.0f * eps;
while ((i < _iterations) && (maxR > eps))
{
// q = Ad
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
{
// if the cell is a variable
float Acenter = 1.0f;
if (!skip[index])
{
// set the matrix to the Poisson stencil in order
if (!skip[index + 1]) Acenter += heatConst;
if (!skip[index - 1]) Acenter += heatConst;
if (!skip[index + _xRes]) Acenter += heatConst;
if (!skip[index - _xRes]) Acenter += heatConst;
if (!skip[index + _slabSize]) Acenter += heatConst;
if (!skip[index - _slabSize]) Acenter += heatConst;
}
_q[index] = (Acenter * _direction[index] +
_direction[index - 1] * (skip[index - 1] ? 0.0 : -heatConst) +
_direction[index + 1] * (skip[index + 1] ? 0.0 : -heatConst) +
_direction[index - _xRes] * (skip[index - _xRes] ? 0.0 : -heatConst) +
_direction[index + _xRes] * (skip[index + _xRes] ? 0.0 : -heatConst) +
_direction[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -heatConst) +
_direction[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -heatConst));
_q[index] = (skip[index]) ? 0.0f : _q[index];
}
// alpha = deltaNew / (transpose(d) * q)
float alpha = 0.0f;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
alpha += _direction[index] * _q[index];
if (fabs(alpha) > 0.0f)
alpha = deltaNew / alpha;
// x = x + alpha * d
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
field[index] += alpha * _direction[index];
// r = r - alpha * q
maxR = 0.0f;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
{
_residual[index] -= alpha * _q[index];
maxR = (_residual[index] > maxR) ? _residual[index] : maxR;
}
// deltaOld = deltaNew
float deltaOld = deltaNew;
// deltaNew = transpose(r) * r
deltaNew = 0.0f;
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
deltaNew += _residual[index] * _residual[index];
// beta = deltaNew / deltaOld
float beta = deltaNew / deltaOld;
// d = r + beta * d
index = _slabSize + _xRes + 1;
for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes)
for (y = 1; y < _yRes - 1; y++, index += 2)
for (x = 1; x < _xRes - 1; x++, index++)
_direction[index] = _residual[index] + beta * _direction[index];
// i = i + 1
i++;
}
// cout << i << " iterations converged to " << maxR << endl;
if (_residual) delete[] _residual;
if (_direction) delete[] _direction;
if (_q) delete[] _q;
}

@ -19,6 +19,11 @@
// FLUID_3D.cpp: implementation of the static functions of the FLUID_3D class.
//
//////////////////////////////////////////////////////////////////////
// Heavy parallel optimization done. Many of the old functions now
// take begin and end parameters and process only specified part of the data.
// Some functions were divided into multiple ones.
// - MiikaH
//////////////////////////////////////////////////////////////////////
#include <zlib.h>
#include "FLUID_3D.h"
@ -75,11 +80,11 @@ void FLUID_3D::addSmokeTestCase(float* field, Vec3Int res)
//////////////////////////////////////////////////////////////////////
// set x direction to Neumann boundary conditions
//////////////////////////////////////////////////////////////////////
void FLUID_3D::setNeumannX(float* field, Vec3Int res)
void FLUID_3D::setNeumannX(float* field, Vec3Int res, int zBegin, int zEnd)
{
const int slabSize = res[0] * res[1];
int index;
for (int z = 0; z < res[2]; z++)
for (int z = zBegin; z < zEnd; z++)
for (int y = 0; y < res[1]; y++)
{
// left slab
@ -93,7 +98,7 @@ void FLUID_3D::setNeumannX(float* field, Vec3Int res)
// fix, force top slab to only allow outwards flux
for (int y = 0; y < res[1]; y++)
for (int z = 0; z < res[2]; z++)
for (int z = zBegin; z < zEnd; z++)
{
// top slab
index = y * res[0] + z * slabSize;
@ -107,11 +112,11 @@ void FLUID_3D::setNeumannX(float* field, Vec3Int res)
//////////////////////////////////////////////////////////////////////
// set y direction to Neumann boundary conditions
//////////////////////////////////////////////////////////////////////
void FLUID_3D::setNeumannY(float* field, Vec3Int res)
void FLUID_3D::setNeumannY(float* field, Vec3Int res, int zBegin, int zEnd)
{
const int slabSize = res[0] * res[1];
int index;
for (int z = 0; z < res[2]; z++)
for (int z = zBegin; z < zEnd; z++)
for (int x = 0; x < res[0]; x++)
{
// bottom slab
@ -124,7 +129,7 @@ void FLUID_3D::setNeumannY(float* field, Vec3Int res)
}
// fix, force top slab to only allow outwards flux
for (int z = 0; z < res[2]; z++)
for (int z = zBegin; z < zEnd; z++)
for (int x = 0; x < res[0]; x++)
{
// top slab
@ -140,22 +145,36 @@ void FLUID_3D::setNeumannY(float* field, Vec3Int res)
//////////////////////////////////////////////////////////////////////
// set z direction to Neumann boundary conditions
//////////////////////////////////////////////////////////////////////
void FLUID_3D::setNeumannZ(float* field, Vec3Int res)
void FLUID_3D::setNeumannZ(float* field, Vec3Int res, int zBegin, int zEnd)
{
const int slabSize = res[0] * res[1];
const int totalCells = res[0] * res[1] * res[2];
const int cellsslab = totalCells - slabSize;
int index;
index = 0;
if (zBegin == 0)
for (int y = 0; y < res[1]; y++)
for (int x = 0; x < res[0]; x++)
for (int x = 0; x < res[0]; x++, index++)
{
// front slab
index = x + y * res[0];
field[index] = field[index + 2 * slabSize];
}
if (zEnd == res[2])
{
index = 0;
int indexx = 0;
for (int y = 0; y < res[1]; y++)
for (int x = 0; x < res[0]; x++, index++)
{
// back slab
index += totalCells - slabSize;
field[index] = field[index - 2 * slabSize];
indexx = index + cellsslab;
field[indexx] = field[indexx - 2 * slabSize];
}
// fix, force top slab to only allow outwards flux
for (int y = 0; y < res[1]; y++)
@ -163,22 +182,24 @@ void FLUID_3D::setNeumannZ(float* field, Vec3Int res)
{
// top slab
index = x + y * res[0];
index += totalCells - slabSize;
index += cellsslab;
if(field[index]<0.) field[index] = 0.;
index -= slabSize;
if(field[index]<0.) field[index] = 0.;
}
} // zEnd == res[2]
}
//////////////////////////////////////////////////////////////////////
// set x direction to zero
//////////////////////////////////////////////////////////////////////
void FLUID_3D::setZeroX(float* field, Vec3Int res)
void FLUID_3D::setZeroX(float* field, Vec3Int res, int zBegin, int zEnd)
{
const int slabSize = res[0] * res[1];
int index;
for (int z = 0; z < res[2]; z++)
for (int z = zBegin; z < zEnd; z++)
for (int y = 0; y < res[1]; y++)
{
// left slab
@ -194,11 +215,11 @@ void FLUID_3D::setZeroX(float* field, Vec3Int res)
//////////////////////////////////////////////////////////////////////
// set y direction to zero
//////////////////////////////////////////////////////////////////////
void FLUID_3D::setZeroY(float* field, Vec3Int res)
void FLUID_3D::setZeroY(float* field, Vec3Int res, int zBegin, int zEnd)
{
const int slabSize = res[0] * res[1];
int index;
for (int z = 0; z < res[2]; z++)
for (int z = zBegin; z < zEnd; z++)
for (int x = 0; x < res[0]; x++)
{
// bottom slab
@ -214,32 +235,44 @@ void FLUID_3D::setZeroY(float* field, Vec3Int res)
//////////////////////////////////////////////////////////////////////
// set z direction to zero
//////////////////////////////////////////////////////////////////////
void FLUID_3D::setZeroZ(float* field, Vec3Int res)
void FLUID_3D::setZeroZ(float* field, Vec3Int res, int zBegin, int zEnd)
{
const int slabSize = res[0] * res[1];
const int totalCells = res[0] * res[1] * res[2];
int index;
int index = 0;
if ((zBegin == 0))
for (int y = 0; y < res[1]; y++)
for (int x = 0; x < res[0]; x++)
for (int x = 0; x < res[0]; x++, index++)
{
// front slab
index = x + y * res[0];
field[index] = 0.0f;
}
// back slab
index += totalCells - slabSize;
field[index] = 0.0f;
}
if (zEnd == res[2])
{
index=0;
int indexx=0;
const int cellsslab = totalCells - slabSize;
for (int y = 0; y < res[1]; y++)
for (int x = 0; x < res[0]; x++, index++)
{
// back slab
indexx = index + cellsslab;
field[indexx] = 0.0f;
}
}
}
//////////////////////////////////////////////////////////////////////
// copy grid boundary
//////////////////////////////////////////////////////////////////////
void FLUID_3D::copyBorderX(float* field, Vec3Int res)
void FLUID_3D::copyBorderX(float* field, Vec3Int res, int zBegin, int zEnd)
{
const int slabSize = res[0] * res[1];
int index;
for (int z = 0; z < res[2]; z++)
for (int z = zBegin; z < zEnd; z++)
for (int y = 0; y < res[1]; y++)
{
// left slab
@ -251,12 +284,12 @@ void FLUID_3D::copyBorderX(float* field, Vec3Int res)
field[index] = field[index - 1];
}
}
void FLUID_3D::copyBorderY(float* field, Vec3Int res)
void FLUID_3D::copyBorderY(float* field, Vec3Int res, int zBegin, int zEnd)
{
const int slabSize = res[0] * res[1];
const int totalCells = res[0] * res[1] * res[2];
//const int totalCells = res[0] * res[1] * res[2];
int index;
for (int z = 0; z < res[2]; z++)
for (int z = zBegin; z < zEnd; z++)
for (int x = 0; x < res[0]; x++)
{
// bottom slab
@ -267,40 +300,49 @@ void FLUID_3D::copyBorderY(float* field, Vec3Int res)
field[index] = field[index - res[0]];
}
}
void FLUID_3D::copyBorderZ(float* field, Vec3Int res)
void FLUID_3D::copyBorderZ(float* field, Vec3Int res, int zBegin, int zEnd)
{
const int slabSize = res[0] * res[1];
const int totalCells = res[0] * res[1] * res[2];
int index;
int index=0;
if ((zBegin == 0))
for (int y = 0; y < res[1]; y++)
for (int x = 0; x < res[0]; x++)
for (int x = 0; x < res[0]; x++, index++)
{
// front slab
index = x + y * res[0];
field[index] = field[index + slabSize];
// back slab
index += totalCells - slabSize;
field[index] = field[index - slabSize];
}
if ((zEnd == res[2]))
{
index=0;
int indexx=0;
const int cellsslab = totalCells - slabSize;
for (int y = 0; y < res[1]; y++)
for (int x = 0; x < res[0]; x++, index++)
{
// back slab
indexx = index + cellsslab;
field[indexx] = field[indexx - slabSize];
}
}
}
/////////////////////////////////////////////////////////////////////
// advect field with the semi lagrangian method
//////////////////////////////////////////////////////////////////////
void FLUID_3D::advectFieldSemiLagrange(const float dt, const float* velx, const float* vely, const float* velz,
float* oldField, float* newField, Vec3Int res)
float* oldField, float* newField, Vec3Int res, int zBegin, int zEnd)
{
const int xres = res[0];
const int yres = res[1];
const int zres = res[2];
const int slabSize = res[0] * res[1];
// scale dt up to grid resolution
#if PARALLEL==1 && !_WIN32
#pragma omp parallel
#pragma omp for schedule(static)
#endif
for (int z = 0; z < zres; z++)
for (int z = zBegin; z < zEnd; z++)
for (int y = 0; y < yres; y++)
for (int x = 0; x < xres; x++)
{
@ -357,50 +399,69 @@ void FLUID_3D::advectFieldSemiLagrange(const float dt, const float* velx, const
}
}
/////////////////////////////////////////////////////////////////////
// advect field with the maccormack method
//
// comments are the pseudocode from selle's paper
//////////////////////////////////////////////////////////////////////
void FLUID_3D::advectFieldMacCormack(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity,
float* oldField, float* newField, float* temp1, float* temp2, Vec3Int res, const unsigned char* obstacles)
void FLUID_3D::advectFieldMacCormack1(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity,
float* oldField, float* tempResult, Vec3Int res, int zBegin, int zEnd)
{
float* phiHatN = temp1;
float* phiHatN1 = temp2;
const int sx= res[0];
const int sy= res[1];
const int sz= res[2];
for (int x = 0; x < sx * sy * sz; x++)
phiHatN[x] = phiHatN1[x] = oldField[x];
/*for (int x = 0; x < sx * sy * sz; x++)
phiHatN[x] = phiHatN1[x] = oldField[x];*/ // not needed as all the values are written first
float*& phiN = oldField;
float*& phiN1 = tempResult;
// phiHatN1 = A(phiN)
advectFieldSemiLagrange( dt, xVelocity, yVelocity, zVelocity, phiN, phiN1, res, zBegin, zEnd); // uses wide data from old field and velocities (both are whole)
}
void FLUID_3D::advectFieldMacCormack2(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity,
float* oldField, float* newField, float* tempResult, float* temp1, Vec3Int res, const unsigned char* obstacles, int zBegin, int zEnd)
{
float* phiHatN = tempResult;
float* t1 = temp1;
const int sx= res[0];
const int sy= res[1];
const int sz= res[2];
float*& phiN = oldField;
float*& phiN1 = newField;
// phiHatN1 = A(phiN)
advectFieldSemiLagrange( dt, xVelocity, yVelocity, zVelocity, phiN, phiHatN1, res);
// phiHatN = A^R(phiHatN1)
advectFieldSemiLagrange( -1.0*dt, xVelocity, yVelocity, zVelocity, phiHatN1, phiHatN, res);
advectFieldSemiLagrange( -1.0*dt, xVelocity, yVelocity, zVelocity, phiHatN, t1, res, zBegin, zEnd); // uses wide data from old field and velocities (both are whole)
// phiN1 = phiHatN1 + (phiN - phiHatN) / 2
const int border = 0;
for (int z = border; z < sz-border; z++)
for (int z = zBegin+border; z < zEnd-border; z++)
for (int y = border; y < sy-border; y++)
for (int x = border; x < sx-border; x++) {
int index = x + y * sx + z * sx*sy;
phiN1[index] = phiHatN1[index] + (phiN[index] - phiHatN[index]) * 0.50f;
phiN1[index] = phiHatN[index] + (phiN[index] - t1[index]) * 0.50f;
//phiN1[index] = phiHatN1[index]; // debug, correction off
}
copyBorderX(phiN1, res);
copyBorderY(phiN1, res);
copyBorderZ(phiN1, res);
copyBorderX(phiN1, res, zBegin, zEnd);
copyBorderY(phiN1, res, zBegin, zEnd);
copyBorderZ(phiN1, res, zBegin, zEnd);
// clamp any newly created extrema
clampExtrema(dt, xVelocity, yVelocity, zVelocity, oldField, newField, res);
clampExtrema(dt, xVelocity, yVelocity, zVelocity, oldField, newField, res, zBegin, zEnd); // uses wide data from old field and velocities (both are whole)
// if the error estimate was bad, revert to first order
clampOutsideRays(dt, xVelocity, yVelocity, zVelocity, oldField, newField, res, obstacles, phiHatN1);
clampOutsideRays(dt, xVelocity, yVelocity, zVelocity, oldField, newField, res, obstacles, phiHatN, zBegin, zEnd); // phiHatN is only used at cells within thread range, so its ok
}
@ -408,13 +469,21 @@ void FLUID_3D::advectFieldMacCormack(const float dt, const float* xVelocity, con
// Clamp the extrema generated by the BFECC error correction
//////////////////////////////////////////////////////////////////////
void FLUID_3D::clampExtrema(const float dt, const float* velx, const float* vely, const float* velz,
float* oldField, float* newField, Vec3Int res)
float* oldField, float* newField, Vec3Int res, int zBegin, int zEnd)
{
const int xres= res[0];
const int yres= res[1];
const int zres= res[2];
const int slabSize = res[0] * res[1];
for (int z = 1; z < zres-1; z++)
int bb=0;
int bt=0;
if (zBegin == 0) {bb = 1;}
if (zEnd == res[2]) {bt = 1;}
for (int z = zBegin+bb; z < zEnd-bt; z++)
for (int y = 1; y < yres-1; y++)
for (int x = 1; x < xres-1; x++)
{
@ -484,14 +553,20 @@ void FLUID_3D::clampExtrema(const float dt, const float* velx, const float* vely
// incorrect
//////////////////////////////////////////////////////////////////////
void FLUID_3D::clampOutsideRays(const float dt, const float* velx, const float* vely, const float* velz,
float* oldField, float* newField, Vec3Int res, const unsigned char* obstacles, const float *oldAdvection)
float* oldField, float* newField, Vec3Int res, const unsigned char* obstacles, const float *oldAdvection, int zBegin, int zEnd)
{
const int sx= res[0];
const int sy= res[1];
const int sz= res[2];
const int slabSize = res[0] * res[1];
for (int z = 1; z < sz-1; z++)
int bb=0;
int bt=0;
if (zBegin == 0) {bb = 1;}
if (zEnd == res[2]) {bt = 1;}
for (int z = zBegin+bb; z < zEnd-bt; z++)
for (int y = 1; y < sy-1; y++)
for (int x = 1; x < sx-1; x++)
{
@ -607,4 +682,3 @@ void FLUID_3D::clampOutsideRays(const float dt, const float* velx, const float*
}
} // xyz
}

@ -0,0 +1,136 @@
#include "LU_HELPER.h"
int isNonsingular (sLU LU_) {
for (int j = 0; j < 3; j++) {
if (LU_.values[j][j] == 0)
return 0;
}
return 1;
}
sLU computeLU( float a[3][3])
{
sLU result;
int m=3;
int n=3;
//float LU_[3][3];
for (int i = 0; i < m; i++) {
result.piv[i] = i;
for (int j = 0; j < n; j++) result.values[i][j]=a[i][j];
}
result.pivsign = 1;
//Real *LUrowi = 0;;
//Array1D<Real> LUcolj(m);
//float *LUrowi = 0;
float LUcolj[3];
// Outer loop.
for (int j = 0; j < n; j++) {
// Make a copy of the j-th column to localize references.
for (int i = 0; i < m; i++) {
LUcolj[i] = result.values[i][j];
}
// Apply previous transformations.
for (int i = 0; i < m; i++) {
//float LUrowi[3];
//LUrowi = result.values[i];
// Most of the time is spent in the following dot product.
int kmax = min(i,j);
double s = 0.0;
for (int k = 0; k < kmax; k++) {
s += result.values[i][k]*LUcolj[k];
}
result.values[i][j] = LUcolj[i] -= s;
}
// Find pivot and exchange if necessary.
int p = j;
for (int i = j+1; i < m; i++) {
if (abs(LUcolj[i]) > abs(LUcolj[p])) {
p = i;
}
}
if (p != j) {
int k=0;
for (k = 0; k < n; k++) {
double t = result.values[p][k];
result.values[p][k] = result.values[j][k];
result.values[j][k] = t;
}
k = result.piv[p];
result.piv[p] = result.piv[j];
result.piv[j] = k;
result.pivsign = -result.pivsign;
}
// Compute multipliers.
if ((j < m) && (result.values[j][j] != 0.0)) {
for (int i = j+1; i < m; i++) {
result.values[i][j] /= result.values[j][j];
}
}
}
return result;
}
void solveLU3x3(sLU& A, float x[3], float b[3])
{
//TNT::Array1D<float> jamaB = TNT::Array1D<float>(3, &b[0]);
//TNT::Array1D<float> jamaX = A.solve(jamaB);
// Solve A, B
{
if (!isNonsingular(A)) {
x[0]=0.0f;
x[1]=0.0f;
x[2]=0.0f;
return;
}
//Array1D<Real> Ax = permute_copy(b, piv);
float Ax[3];
// permute copy: b , A.piv
{
for (int i = 0; i < 3; i++)
Ax[i] = b[A.piv[i]];
}
// Solve L*Y = B(piv)
for (int k = 0; k < 3; k++) {
for (int i = k+1; i < 3; i++) {
Ax[i] -= Ax[k]*A.values[i][k];
}
}
// Solve U*X = Y;
for (int k = 2; k >= 0; k--) {
Ax[k] /= A.values[k][k];
for (int i = 0; i < k; i++)
Ax[i] -= Ax[k]*A.values[i][k];
}
x[0] = Ax[0];
x[1] = Ax[1];
x[2] = Ax[2];
return;
}
}

@ -17,40 +17,35 @@
// Copyright 2008 Theodore Kim and Nils Thuerey
//
//////////////////////////////////////////////////////////////////////
// Modified to not require TNT matrix library anymore. It was very slow
// when being run in parallel. Required TNT JAMA:LU libraries were
// converted into independent functions.
// - MiikaH
//////////////////////////////////////////////////////////////////////
#ifndef LU_HELPER_H
#define LU_HELPER_H
#include <cmath>
#include <algorithm>
using namespace std;
//////////////////////////////////////////////////////////////////////
// Helper function, compute eigenvalues of 3x3 matrix
//////////////////////////////////////////////////////////////////////
#include "tnt/jama_lu.h"
//////////////////////////////////////////////////////////////////////
// LU decomposition of 3x3 non-symmetric matrix
//////////////////////////////////////////////////////////////////////
JAMA::LU<float> inline computeLU3x3(
float a[3][3])
struct sLU
{
TNT::Array2D<float> A = TNT::Array2D<float>(3,3, &a[0][0]);
JAMA::LU<float> jLU= JAMA::LU<float>(A);
return jLU;
}
float values[3][3];
int pivsign;
int piv[3];
};
int isNonsingular (sLU LU_);
sLU computeLU( float a[3][3]);
void solveLU3x3(sLU& A, float x[3], float b[3]);
//////////////////////////////////////////////////////////////////////
// LU decomposition of 3x3 non-symmetric matrix
//////////////////////////////////////////////////////////////////////
void inline solveLU3x3(
JAMA::LU<float>& A,
float x[3],
float b[3])
{
TNT::Array1D<float> jamaB = TNT::Array1D<float>(3, &b[0]);
TNT::Array1D<float> jamaX = A.solve(jamaB);
x[0] = jamaX[0];
x[1] = jamaX[1];
x[2] = jamaX[2];
}
#endif

@ -18,6 +18,10 @@
//
// WTURBULENCE handling
///////////////////////////////////////////////////////////////////////////////////
// Parallelized turbulence even further. TNT matrix library functions
// rewritten to improve performance.
// - MiikaH
//////////////////////////////////////////////////////////////////////
#include "WTURBULENCE.h"
#include "INTERPOLATE.h"
@ -29,6 +33,7 @@
#include "LU_HELPER.h"
#include "SPHERE.h"
#include <zlib.h>
#include <math.h>
// needed to access static advection functions
#include "FLUID_3D.h"
@ -278,27 +283,34 @@ static float minDz(int x, int y, int z, float* input, Vec3Int res)
// Beware -- uses big density maccormack as temporary arrays
//////////////////////////////////////////////////////////////////////
void WTURBULENCE::advectTextureCoordinates (float dtOrg, float* xvel, float* yvel, float* zvel, float *tempBig1, float *tempBig2) {
// advection
SWAP_POINTERS(_tcTemp, _tcU);
FLUID_3D::copyBorderX(_tcTemp, _resSm);
FLUID_3D::copyBorderY(_tcTemp, _resSm);
FLUID_3D::copyBorderZ(_tcTemp, _resSm);
FLUID_3D::advectFieldMacCormack(dtOrg, xvel, yvel, zvel,
_tcTemp, _tcU, tempBig1, tempBig2, _resSm, NULL);
FLUID_3D::copyBorderX(_tcTemp, _resSm, 0 , _resSm[2]);
FLUID_3D::copyBorderY(_tcTemp, _resSm, 0 , _resSm[2]);
FLUID_3D::copyBorderZ(_tcTemp, _resSm, 0 , _resSm[2]);
FLUID_3D::advectFieldMacCormack1(dtOrg, xvel, yvel, zvel,
_tcTemp, tempBig1, _resSm, 0 , _resSm[2]);
FLUID_3D::advectFieldMacCormack2(dtOrg, xvel, yvel, zvel,
_tcTemp, _tcU, tempBig1, tempBig2, _resSm, NULL, 0 , _resSm[2]);
SWAP_POINTERS(_tcTemp, _tcV);
FLUID_3D::copyBorderX(_tcTemp, _resSm);
FLUID_3D::copyBorderY(_tcTemp, _resSm);
FLUID_3D::copyBorderZ(_tcTemp, _resSm);
FLUID_3D::advectFieldMacCormack(dtOrg, xvel, yvel, zvel,
_tcTemp, _tcV, tempBig1, tempBig2, _resSm, NULL);
FLUID_3D::copyBorderX(_tcTemp, _resSm, 0 , _resSm[2]);
FLUID_3D::copyBorderY(_tcTemp, _resSm, 0 , _resSm[2]);
FLUID_3D::copyBorderZ(_tcTemp, _resSm, 0 , _resSm[2]);
FLUID_3D::advectFieldMacCormack1(dtOrg, xvel, yvel, zvel,
_tcTemp, tempBig1, _resSm, 0 , _resSm[2]);
FLUID_3D::advectFieldMacCormack2(dtOrg, xvel, yvel, zvel,
_tcTemp, _tcV, tempBig1, tempBig2, _resSm, NULL, 0 , _resSm[2]);
SWAP_POINTERS(_tcTemp, _tcW);
FLUID_3D::copyBorderX(_tcTemp, _resSm);
FLUID_3D::copyBorderY(_tcTemp, _resSm);
FLUID_3D::copyBorderZ(_tcTemp, _resSm);
FLUID_3D::advectFieldMacCormack(dtOrg, xvel, yvel, zvel,
_tcTemp, _tcW, tempBig1, tempBig2, _resSm, NULL);
FLUID_3D::copyBorderX(_tcTemp, _resSm, 0 , _resSm[2]);
FLUID_3D::copyBorderY(_tcTemp, _resSm, 0 , _resSm[2]);
FLUID_3D::copyBorderZ(_tcTemp, _resSm, 0 , _resSm[2]);
FLUID_3D::advectFieldMacCormack1(dtOrg, xvel, yvel, zvel,
_tcTemp, tempBig1, _resSm, 0 , _resSm[2]);
FLUID_3D::advectFieldMacCormack2(dtOrg, xvel, yvel, zvel,
_tcTemp, _tcW, tempBig1, tempBig2, _resSm, NULL, 0 , _resSm[2]);
}
//////////////////////////////////////////////////////////////////////
@ -325,9 +337,9 @@ void WTURBULENCE::computeEigenvalues(float *_eigMin, float *_eigMax) {
// ONLY compute the eigenvalues after checking that the matrix
// is nonsingular
JAMA::LU<float> LU = computeLU3x3(jacobian);
sLU LU = computeLU(jacobian);
if (LU.isNonsingular())
if (isNonsingular(LU))
{
// get the analytic eigenvalues, quite slow right now...
Vec3 eigenvalues = Vec3(1.);
@ -422,9 +434,9 @@ void WTURBULENCE::computeEnergy(float *_energy, float* xvel, float* yvel, float*
for (int x = 0; x < _totalCellsSm; x++)
_energy[x] = 0.5f * (xvel[x] * xvel[x] + yvel[x] * yvel[x] + zvel[x] * zvel[x]);
FLUID_3D::copyBorderX(_energy, _resSm);
FLUID_3D::copyBorderY(_energy, _resSm);
FLUID_3D::copyBorderZ(_energy, _resSm);
FLUID_3D::copyBorderX(_energy, _resSm, 0 , _resSm[2]);
FLUID_3D::copyBorderY(_energy, _resSm, 0 , _resSm[2]);
FLUID_3D::copyBorderZ(_energy, _resSm, 0 , _resSm[2]);
// pseudo-march the values into the obstacles
// the wavelet upsampler only uses a 3x3 support neighborhood, so
@ -563,7 +575,7 @@ Vec3 WTURBULENCE::WVelocityWithJacobian(Vec3 orgPos, float* xUnwarped, float* yU
//////////////////////////////////////////////////////////////////////
// perform an actual noise advection step
//////////////////////////////////////////////////////////////////////
void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel, float* zvel, unsigned char *obstacles)
/*void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel, float* zvel, unsigned char *obstacles)
{
// enlarge timestep to match grid
const float dt = dtOrg * _amplify;
@ -689,9 +701,9 @@ void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel,
const float dtSubdiv = dt / (float)totalSubsteps;
// set boundaries of big velocity grid
FLUID_3D::setZeroX(bigUx, _resBig);
FLUID_3D::setZeroY(bigUy, _resBig);
FLUID_3D::setZeroZ(bigUz, _resBig);
FLUID_3D::setZeroX(bigUx, _resBig, 0, _resBig[2]);
FLUID_3D::setZeroY(bigUy, _resBig, 0, _resBig[2]);
FLUID_3D::setZeroZ(bigUz, _resBig, 0, _resBig[2]);
// do the MacCormack advection, with substepping if necessary
for(int substep = 0; substep < totalSubsteps; substep++)
@ -704,7 +716,7 @@ void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel,
} // substep
// wipe the density borders
FLUID_3D::setZeroBorder(_densityBig, _resBig);
FLUID_3D::setZeroBorder(_densityBig, _resBig, 0, _resBig[2]);
// reset texture coordinates now in preparation for next timestep
// Shouldn't do this before generating the noise because then the
@ -724,7 +736,9 @@ void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel,
_totalStepsBig++;
}
}*/
//struct
//////////////////////////////////////////////////////////////////////
// perform the full turbulence algorithm, including OpenMP
@ -747,6 +761,7 @@ void WTURBULENCE::stepTurbulenceFull(float dtOrg, float* xvel, float* yvel, floa
memset(_tcTemp, 0, sizeof(float)*_totalCellsSm);
// prepare textures
advectTextureCoordinates(dtOrg, xvel,yvel,zvel, tempBig1, tempBig2);
@ -763,25 +778,38 @@ void WTURBULENCE::stepTurbulenceFull(float dtOrg, float* xvel, float* yvel, floa
if (obstacles[x]) highFreqEnergy[x] = 0.f;
Vec3Int ressm(_xResSm, _yResSm, _zResSm);
FLUID_3D::setNeumannX(highFreqEnergy, ressm);
FLUID_3D::setNeumannY(highFreqEnergy, ressm);
FLUID_3D::setNeumannZ(highFreqEnergy, ressm);
FLUID_3D::setNeumannX(highFreqEnergy, ressm, 0 , ressm[2]);
FLUID_3D::setNeumannY(highFreqEnergy, ressm, 0 , ressm[2]);
FLUID_3D::setNeumannZ(highFreqEnergy, ressm, 0 , ressm[2]);
int threadval = 1;
#if PARALLEL==1
threadval = omp_get_max_threads();
#endif
// parallel region setup
float maxVelMagThreads[8] = { -1., -1., -1., -1., -1., -1., -1., -1. };
#if PARALLEL==1 && !_WIN32
// Uses omp_get_max_trheads to get number of required cells.
float* maxVelMagThreads = new float[threadval];
for (int i=0; i<threadval; i++) maxVelMagThreads[i] = -1.0f;
#if PARALLEL==1
#pragma omp parallel
#endif
{ float maxVelMag1 = 0.;
#if PARALLEL==1 && !_WIN32
#if PARALLEL==1
const int id = omp_get_thread_num(); /*, num = omp_get_num_threads(); */
#endif
// vector noise main loop
#if PARALLEL==1 && !_WIN32
#pragma omp for schedule(static)
#if PARALLEL==1
#pragma omp for schedule(static,1)
#endif
for (int zSmall = 0; zSmall < _zResSm; zSmall++)
for (int zSmall = 0; zSmall < _zResSm; zSmall++)
{
for (int ySmall = 0; ySmall < _yResSm; ySmall++)
for (int xSmall = 0; xSmall < _xResSm; xSmall++)
{
@ -796,14 +824,14 @@ void WTURBULENCE::stepTurbulenceFull(float dtOrg, float* xvel, float* yvel, floa
// get LU factorization of texture jacobian and apply
// it to unit vectors
JAMA::LU<float> LU = computeLU3x3(jacobian);
sLU LU = computeLU(jacobian);
float xUnwarped[] = {1.0f, 0.0f, 0.0f};
float yUnwarped[] = {0.0f, 1.0f, 0.0f};
float zUnwarped[] = {0.0f, 0.0f, 1.0f};
float xWarped[] = {1.0f, 0.0f, 0.0f};
float yWarped[] = {0.0f, 1.0f, 0.0f};
float zWarped[] = {0.0f, 0.0f, 1.0f};
bool nonSingular = LU.isNonsingular();
bool nonSingular = isNonsingular(LU);
#if 0
// UNUSED
float eigMax = 10.0f;
@ -908,24 +936,27 @@ void WTURBULENCE::stepTurbulenceFull(float dtOrg, float* xvel, float* yvel, floa
obstacles, posSm[0], posSm[1], posSm[2], _xResSm, _yResSm, _zResSm);
if (obsCheck > 0.95)
bigUx[index] = bigUy[index] = bigUz[index] = 0.;
} // xyz
} // xyz*/
#if PARALLEL==1 && !_WIN32
#if PARALLEL==1
maxVelMagThreads[id] = maxVelMag1;
#else
maxVelMagThreads[0] = maxVelMag1;
#endif
}
}
} // omp
// compute maximum over threads
float maxVelMag = maxVelMagThreads[0];
#if PARALLEL==1 && !_WIN32
for (int i = 1; i < 8; i++)
#if PARALLEL==1
for (int i = 1; i < threadval; i++)
if (maxVelMag < maxVelMagThreads[i])
maxVelMag = maxVelMagThreads[i];
delete [] maxVelMagThreads;
#endif
// prepare density for an advection
SWAP_POINTERS(_densityBig, _densityBigOld);
@ -941,15 +972,56 @@ void WTURBULENCE::stepTurbulenceFull(float dtOrg, float* xvel, float* yvel, floa
const float dtSubdiv = dt / (float)totalSubsteps;
// set boundaries of big velocity grid
FLUID_3D::setZeroX(bigUx, _resBig);
FLUID_3D::setZeroY(bigUy, _resBig);
FLUID_3D::setZeroZ(bigUz, _resBig);
FLUID_3D::setZeroX(bigUx, _resBig, 0 , _resBig[2]);
FLUID_3D::setZeroY(bigUy, _resBig, 0 , _resBig[2]);
FLUID_3D::setZeroZ(bigUz, _resBig, 0 , _resBig[2]);
#if PARALLEL==1
int stepParts = threadval*2; // Dividing parallelized sections into numOfThreads * 2 sections
float partSize = (float)_zResBig/stepParts; // Size of one part;
if (partSize < 4) {stepParts = threadval; // If the slice gets too low (might actually slow things down, change it to larger
partSize = (float)_zResBig/stepParts;}
if (partSize < 4) {stepParts = (int)(ceil((float)_zResBig/4.0f)); // If it's still too low (only possible on future systems with +24 cores), change it to 4
partSize = (float)_zResBig/stepParts;}
#else
int zBegin=0;
int zEnd=_resBig[2];
#endif
// do the MacCormack advection, with substepping if necessary
for(int substep = 0; substep < totalSubsteps; substep++)
{
FLUID_3D::advectFieldMacCormack(dtSubdiv, bigUx, bigUy, bigUz,
_densityBigOld, _densityBig, tempBig1, tempBig2, _resBig, NULL);
#if PARALLEL==1
#pragma omp parallel
{
#pragma omp for schedule(static,1)
for (int i=0; i<stepParts; i++)
{
int zBegin = (int)((float)i*partSize + 0.5f);
int zEnd = (int)((float)(i+1)*partSize + 0.5f);
#endif
FLUID_3D::advectFieldMacCormack1(dtSubdiv, bigUx, bigUy, bigUz,
_densityBigOld, tempBig1, _resBig, zBegin, zEnd);
#if PARALLEL==1
}
#pragma omp barrier
#pragma omp for schedule(static,1)
for (int i=0; i<stepParts; i++)
{
int zBegin = (int)((float)i*partSize + 0.5f);
int zEnd = (int)((float)(i+1)*partSize + 0.5f);
#endif
FLUID_3D::advectFieldMacCormack2(dtSubdiv, bigUx, bigUy, bigUz,
_densityBigOld, _densityBig, tempBig1, tempBig2, _resBig, NULL, zBegin, zEnd);
#if PARALLEL==1
}
}
#endif
if (substep < totalSubsteps - 1)
SWAP_POINTERS(_densityBig, _densityBigOld);
@ -964,7 +1036,7 @@ void WTURBULENCE::stepTurbulenceFull(float dtOrg, float* xvel, float* yvel, floa
free(highFreqEnergy);
// wipe the density borders
FLUID_3D::setZeroBorder(_densityBig, _resBig);
FLUID_3D::setZeroBorder(_densityBig, _resBig, 0 , _resBig[2]);
// reset texture coordinates now in preparation for next timestep
// Shouldn't do this before generating the noise because then the

@ -30,6 +30,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
// y in smoke is z in blender
extern "C" FLUID_3D *smoke_init(int *res, float *p0, float dt)
@ -110,8 +111,8 @@ extern "C" void smoke_dissolve(FLUID_3D *fluid, int speed, int log)
heat[i] *= (1.0 - dydx);
if(heat[i] < 0.0f)
heat[i] = 0.0f;
/*if(heat[i] < 0.0f)
heat[i] = 0.0f;*/
}
}
else // linear falloff
@ -127,10 +128,9 @@ extern "C" void smoke_dissolve(FLUID_3D *fluid, int speed, int log)
if(density[i] < 0.0f)
density[i] = 0.0f;
heat[i] -= dydx;
if(heat[i] < 0.0f)
heat[i] = 0.0f;
if(abs(heat[i]) < dydx) heat[i] = 0.0f;
else if (heat[i]>0.0f) heat[i] -= dydx;
else if (heat[i]<0.0f) heat[i] += dydx;
}
}

@ -86,6 +86,7 @@ class PHYSICS_PT_smoke(PhysicButtonsPanel):
col.label(text="Behavior:")
col.prop(domain, "alpha")
col.prop(domain, "beta")
col.prop(domain, "initial_velocity", text="Initial Velocity")
col.prop(domain, "dissolve_smoke", text="Dissolve")
sub = col.column()
sub.active = domain.dissolve_smoke
@ -155,6 +156,12 @@ class PHYSICS_PT_smoke_cache(PhysicButtonsPanel):
return md and (md.smoke_type == 'TYPE_DOMAIN')
def draw(self, context):
domain = context.smoke.domain_settings
self.layout.prop(domain, "smoke_cache_comp")
md = context.smoke.domain_settings
cache = md.point_cache_low
@ -203,6 +210,13 @@ class PHYSICS_PT_smoke_cache_highres(PhysicButtonsPanel):
return md and (md.smoke_type == 'TYPE_DOMAIN') and md.domain_settings.highres
def draw(self, context):
domain = context.smoke.domain_settings
self.layout.prop(domain, "smoke_cache_high_comp")
md = context.smoke.domain_settings
cache = md.point_cache_high

@ -851,6 +851,7 @@ class TEXTURE_PT_voxeldata(TextureButtonsPanel):
layout.prop(vd, "resolution")
elif vd.file_format == 'SMOKE':
layout.prop(vd, "domain_object")
layout.prop(vd, "smoke_data_type")
elif vd.file_format == 'IMAGE_SEQUENCE':
layout.template_image(tex, "image", tex.image_user)

@ -710,7 +710,9 @@ static int ptcache_write_smoke(PTCacheFile *pf, void *smoke_v)
unsigned char *obstacles;
unsigned int in_len = sizeof(float)*(unsigned int)res;
unsigned char *out = (unsigned char *)MEM_callocN(LZO_OUT_LEN(in_len)*4, "pointcache_lzo_buffer");
int mode = res >= 1000000 ? 2 : 1;
//int mode = res >= 1000000 ? 2 : 1;
int mode=1; // light
if (sds->cache_comp == SM_CACHE_HEAVY) mode=2; // heavy
smoke_export(sds->fluid, &dt, &dx, &dens, &densold, &heat, &heatold, &vx, &vy, &vz, &vxold, &vyold, &vzold, &obstacles);
@ -753,7 +755,10 @@ static int ptcache_write_smoke_turbulence(PTCacheFile *pf, void *smoke_v)
smoke_turbulence_get_res(sds->wt, res_big_array);
res_big = res_big_array[0]*res_big_array[1]*res_big_array[2];
mode = res_big >= 1000000 ? 2 : 1;
//mode = res_big >= 1000000 ? 2 : 1;
mode = 1; // light
if (sds->cache_high_comp == SM_CACHE_HEAVY) mode=2; // heavy
in_len_big = sizeof(float) * (unsigned int)res_big;
smoke_turbulence_export(sds->wt, &dens, &densold, &tcu, &tcv, &tcw);

@ -872,11 +872,13 @@ static void smoke_calc_domain(Scene *scene, Object *ob, SmokeModifierData *smd)
heat[index] = sfs->temp;
density[index] = sfs->density;
/*
// Uses particle velocity as initial velocity for smoke
if(smd->domain->flags & MOD_SMOKE_INITVELOCITY) {
velocity_x[index] = pa->state.vel[0];
velocity_y[index] = pa->state.vel[1];
velocity_z[index] = pa->state.vel[2];
*/
}
// obstacle[index] |= 2;
// we need different handling for the high-res feature
@ -1396,8 +1398,9 @@ static void get_cell(float *p0, int res[3], float dx, float *pos, int *cell, int
static void smoke_calc_transparency(float *result, float *input, float *p0, float *p1, int res[3], float dx, float *light, bresenham_callback cb, float correct)
{
int x, y, z;
int z;
float bv[6];
int slabsize=res[0]*res[1];
memset(result, -1, sizeof(float)*res[0]*res[1]*res[2]); // x
bv[0] = p0[0];
@ -1409,19 +1412,20 @@ static void smoke_calc_transparency(float *result, float *input, float *p0, floa
bv[4] = p0[2];
bv[5] = p1[2];
#pragma omp parallel for schedule(static) private(y, z)
for(x = 0; x < res[0]; x++)
#pragma omp parallel for schedule(static,1)
for(z = 0; z < res[2]; z++)
{
size_t index = z*slabsize;
int x,y;
for(y = 0; y < res[1]; y++)
for(z = 0; z < res[2]; z++)
for(x = 0; x < res[0]; x++, index++)
{
float voxelCenter[3];
size_t index;
float pos[3];
int cell[3];
float tRay = 1.0;
index = smoke_get_index(x, res[0], y, res[1], z);
if(result[index] >= 0.0f)
continue;
voxelCenter[0] = p0[0] + dx * x + dx * 0.5;
@ -1446,5 +1450,6 @@ static void smoke_calc_transparency(float *result, float *input, float *p0, floa
// #pragma omp critical
result[index] = tRay;
}
}
}

@ -33,6 +33,8 @@
#define MOD_SMOKE_HIGHRES (1<<1) /* enable high resolution */
#define MOD_SMOKE_DISSOLVE (1<<2) /* let smoke dissolve */
#define MOD_SMOKE_DISSOLVE_LOG (1<<3) /* using 1/x for dissolve */
#define MOD_SMOKE_INITVELOCITY (1<<4) /* passes particles speed to
the smoke*/
/* noise */
#define MOD_SMOKE_NOISEWAVE (1<<0)
@ -41,6 +43,10 @@
/* viewsettings */
#define MOD_SMOKE_VIEW_SHOWBIG (1<<0)
/* cache compression */
#define SM_CACHE_LIGHT 0
#define SM_CACHE_HEAVY 1
typedef struct SmokeDomainSettings {
struct SmokeModifierData *smd; /* for fast RNA access */
struct FLUID_3D *fluid;
@ -73,6 +79,8 @@ typedef struct SmokeDomainSettings {
int res_wt[3];
float dx_wt;
int v3dnum;
int cache_comp;
int cache_high_comp;
struct PointCache *point_cache[2]; /* definition is in DNA_object_force.h */
struct ListBase ptcaches[2];
struct EffectorWeights *effector_weights;

@ -184,7 +184,7 @@ typedef struct VoxelData {
short file_format;
short flag;
short extend;
short pad;
short smoked_type;
struct Object *object; /* for rendering smoke sims */
float int_multiplier;
@ -546,5 +546,10 @@ typedef struct TexMapping {
#define TEX_VD_IMAGE_SEQUENCE 3
#define TEX_VD_SMOKE 4
/* smoke data types */
#define TEX_VD_SMOKEDENSITY 0
#define TEX_VD_SMOKEHEAT 1
#define TEX_VD_SMOKEVEL 2
#endif

@ -117,6 +117,11 @@ static void rna_def_smoke_domain_settings(BlenderRNA *brna)
/* {MOD_SMOKE_NOISECURL, "NOISECURL", 0, "Curl", ""}, */
{0, NULL, 0, NULL, NULL}};
static EnumPropertyItem smoke_cache_comp_items[] = {
{SM_CACHE_HEAVY, "CACHEHEAVY", 0, "Heavy (Very slow)", "Effective but slow compression."},
{SM_CACHE_LIGHT, "CACHELIGHT", 0, "Light (Fast)", "Fast but not so effective compression."},
{0, NULL, 0, NULL, NULL}};
srna = RNA_def_struct(brna, "SmokeDomainSettings", NULL);
RNA_def_struct_ui_text(srna, "Domain Settings", "Smoke domain settings.");
RNA_def_struct_sdna(srna, "SmokeDomainSettings");
@ -201,6 +206,11 @@ static void rna_def_smoke_domain_settings(BlenderRNA *brna)
RNA_def_property_ui_text(prop, "Dissolve Speed", "Dissolve Speed");
RNA_def_property_update(prop, 0, NULL);
prop= RNA_def_property(srna, "initial_velocity", PROP_BOOLEAN, PROP_NONE);
RNA_def_property_boolean_sdna(prop, NULL, "flags", MOD_SMOKE_INITVELOCITY);
RNA_def_property_ui_text(prop, "Initial Velocity", "Smoke inherits it's velocity from the emitter particle.");
RNA_def_property_update(prop, 0, NULL);
prop= RNA_def_property(srna, "dissolve_smoke", PROP_BOOLEAN, PROP_NONE);
RNA_def_property_boolean_sdna(prop, NULL, "flags", MOD_SMOKE_DISSOLVE);
RNA_def_property_ui_text(prop, "Dissolve Smoke", "Enable smoke to disappear over time.");
@ -221,10 +231,24 @@ static void rna_def_smoke_domain_settings(BlenderRNA *brna)
RNA_def_property_pointer_sdna(prop, NULL, "point_cache[1]");
RNA_def_property_ui_text(prop, "Point Cache", "");
prop= RNA_def_property(srna, "smoke_cache_comp", PROP_ENUM, PROP_NONE);
RNA_def_property_enum_sdna(prop, NULL, "cache_comp");
RNA_def_property_enum_items(prop, smoke_cache_comp_items);
RNA_def_property_ui_text(prop, "Cache Compression", "Compression method to be used.");
RNA_def_property_update(prop, 0, NULL);
prop= RNA_def_property(srna, "smoke_cache_high_comp", PROP_ENUM, PROP_NONE);
RNA_def_property_enum_sdna(prop, NULL, "cache_high_comp");
RNA_def_property_enum_items(prop, smoke_cache_comp_items);
RNA_def_property_ui_text(prop, "Cache Compression", "Compression method to be used.");
RNA_def_property_update(prop, 0, NULL);
prop= RNA_def_property(srna, "effector_weights", PROP_POINTER, PROP_NONE);
RNA_def_property_struct_type(prop, "EffectorWeights");
RNA_def_property_clear_flag(prop, PROP_EDITABLE);
RNA_def_property_ui_text(prop, "Effector Weights", "");
}
static void rna_def_smoke_flow_settings(BlenderRNA *brna)

@ -1726,6 +1726,12 @@ static void rna_def_texture_voxeldata(BlenderRNA *brna)
{TEX_REPEAT, "REPEAT", 0, "Repeat", "Causes the image to repeat horizontally and vertically"},
{0, NULL, 0, NULL, NULL}};
static EnumPropertyItem smoked_type_items[] = {
{TEX_VD_SMOKEDENSITY, "SMOKEDENSITY", 0, "Density", "Use smoke density as texture data."},
{TEX_VD_SMOKEHEAT, "SMOKEHEAT", 0, "Heat", "Use smoke heat as texture data. Values from -2.0 to 2.0 are used."},
{TEX_VD_SMOKEVEL, "SMOKEVEL", 0, "Velocity", "Use smoke velocity as texture data."},
{0, NULL, 0, NULL, NULL}};
srna= RNA_def_struct(brna, "VoxelData", NULL);
RNA_def_struct_sdna(srna, "VoxelData");
RNA_def_struct_ui_text(srna, "VoxelData", "Voxel data settings.");
@ -1735,6 +1741,12 @@ static void rna_def_texture_voxeldata(BlenderRNA *brna)
RNA_def_property_enum_items(prop, interpolation_type_items);
RNA_def_property_ui_text(prop, "Interpolation", "Method to interpolate/smooth values between voxel cells");
RNA_def_property_update(prop, 0, "rna_Texture_update");
prop= RNA_def_property(srna, "smoke_data_type", PROP_ENUM, PROP_NONE);
RNA_def_property_enum_sdna(prop, NULL, "smoked_type");
RNA_def_property_enum_items(prop, smoked_type_items);
RNA_def_property_ui_text(prop, "Source", "Simulation value to be used as a texture.");
RNA_def_property_update(prop, 0, "rna_Texture_update");
prop= RNA_def_property(srna, "extension", PROP_ENUM, PROP_NONE);
RNA_def_property_enum_sdna(prop, NULL, "extend");

@ -165,16 +165,61 @@ void init_frame_smoke(Render *re, VoxelData *vd, Tex *tex)
if( (md = (ModifierData *)modifiers_findByType(ob, eModifierType_Smoke)) )
{
SmokeModifierData *smd = (SmokeModifierData *)md;
if(smd->domain && smd->domain->fluid) {
if (smd->domain->flags & MOD_SMOKE_HIGHRES) {
smoke_turbulence_get_res(smd->domain->wt, vd->resol);
vd->dataset = smoke_turbulence_get_density(smd->domain->wt);
} else {
if (vd->smoked_type == TEX_VD_SMOKEHEAT) {
int totRes;
float *heat;
int i;
VECCOPY(vd->resol, smd->domain->res);
vd->dataset = smoke_get_density(smd->domain->fluid);
totRes = (vd->resol[0])*(vd->resol[1])*(vd->resol[2]);
// scaling heat values from -2.0-2.0 to 0.0-1.0
vd->dataset = MEM_mapallocN(sizeof(float)*(totRes), "smoke data");
heat = smoke_get_heat(smd->domain->fluid);
for (i=0; i<totRes; i++)
{
vd->dataset[i] = (heat[i]+2.0f)/4.0f;
}
//vd->dataset = smoke_get_heat(smd->domain->fluid);
}
else if (vd->smoked_type == TEX_VD_SMOKEVEL) {
int totRes;
float *xvel, *yvel, *zvel;
int i;
VECCOPY(vd->resol, smd->domain->res);
totRes = (vd->resol[0])*(vd->resol[1])*(vd->resol[2]);
// scaling heat values from -2.0-2.0 to 0.0-1.0
vd->dataset = MEM_mapallocN(sizeof(float)*(totRes), "smoke data");
xvel = smoke_get_velocity_x(smd->domain->fluid);
yvel = smoke_get_velocity_y(smd->domain->fluid);
zvel = smoke_get_velocity_z(smd->domain->fluid);
for (i=0; i<totRes; i++)
{
vd->dataset[i] = sqrt(xvel[i]*xvel[i] + yvel[i]*yvel[i] + zvel[i]*zvel[i])*3.0f;
}
}
else {
if (smd->domain->flags & MOD_SMOKE_HIGHRES) {
smoke_turbulence_get_res(smd->domain->wt, vd->resol);
vd->dataset = smoke_turbulence_get_density(smd->domain->wt);
} else {
VECCOPY(vd->resol, smd->domain->res);
vd->dataset = smoke_get_density(smd->domain->fluid);
}
} // end of fluid condition
}
}
}