forked from bartvdbraak/blender
speedup for fast gauss blue (approx 10% - 15%)
- get the image width and height once rather then calculating on every access (was doing min/max subtract). - use unsigned int's - faster for looping.
This commit is contained in:
parent
250e919b7c
commit
2f29f8d186
@ -20,6 +20,8 @@
|
|||||||
* Monique Dewanchand
|
* Monique Dewanchand
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
#include "COM_FastGaussianBlurOperation.h"
|
#include "COM_FastGaussianBlurOperation.h"
|
||||||
#include "MEM_guardedalloc.h"
|
#include "MEM_guardedalloc.h"
|
||||||
#include "BLI_utildefines.h"
|
#include "BLI_utildefines.h"
|
||||||
@ -74,7 +76,7 @@ void FastGaussianBlurOperation::deinitExecution()
|
|||||||
delete this->iirgaus;
|
delete this->iirgaus;
|
||||||
this->iirgaus = NULL;
|
this->iirgaus = NULL;
|
||||||
}
|
}
|
||||||
BlurBaseOperation::deinitMutex();
|
BlurBaseOperation::deinitMutex();
|
||||||
}
|
}
|
||||||
|
|
||||||
void *FastGaussianBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers)
|
void *FastGaussianBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers)
|
||||||
@ -84,7 +86,7 @@ void *FastGaussianBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **m
|
|||||||
MemoryBuffer *newBuf = (MemoryBuffer *)this->inputProgram->initializeTileData(rect, memoryBuffers);
|
MemoryBuffer *newBuf = (MemoryBuffer *)this->inputProgram->initializeTileData(rect, memoryBuffers);
|
||||||
MemoryBuffer *copy = newBuf->duplicate();
|
MemoryBuffer *copy = newBuf->duplicate();
|
||||||
updateSize(memoryBuffers);
|
updateSize(memoryBuffers);
|
||||||
|
|
||||||
int c;
|
int c;
|
||||||
sx = data->sizex * this->size / 2.0f;
|
sx = data->sizex * this->size / 2.0f;
|
||||||
sy = data->sizey * this->size / 2.0f;
|
sy = data->sizey * this->size / 2.0f;
|
||||||
@ -109,11 +111,14 @@ void *FastGaussianBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **m
|
|||||||
return iirgaus;
|
return iirgaus;
|
||||||
}
|
}
|
||||||
|
|
||||||
void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, int chan, int xy)
|
void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, unsigned int chan, unsigned int xy)
|
||||||
{
|
{
|
||||||
double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
|
double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
|
||||||
double *X, *Y, *W;
|
double *X, *Y, *W;
|
||||||
int i, x, y, sz;
|
const unsigned int src_width = src->getWidth();
|
||||||
|
const unsigned int src_height = src->getHeight();
|
||||||
|
unsigned int x, y, sz;
|
||||||
|
unsigned int i;
|
||||||
float *buffer = src->getBuffer();
|
float *buffer = src->getBuffer();
|
||||||
|
|
||||||
// <0.5 not valid, though can have a possibly useful sort of sharpening effect
|
// <0.5 not valid, though can have a possibly useful sort of sharpening effect
|
||||||
@ -123,8 +128,8 @@ void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, int ch
|
|||||||
|
|
||||||
// XXX The YVV macro defined below explicitly expects sources of at least 3x3 pixels,
|
// XXX The YVV macro defined below explicitly expects sources of at least 3x3 pixels,
|
||||||
// so just skiping blur along faulty direction if src's def is below that limit!
|
// so just skiping blur along faulty direction if src's def is below that limit!
|
||||||
if (src->getWidth() < 3) xy &= ~(int) 1;
|
if (src_width < 3) xy &= ~(int) 1;
|
||||||
if (src->getHeight() < 3) xy &= ~(int) 2;
|
if (src_height < 3) xy &= ~(int) 2;
|
||||||
if (xy < 1) return;
|
if (xy < 1) return;
|
||||||
|
|
||||||
// see "Recursive Gabor Filtering" by Young/VanVliet
|
// see "Recursive Gabor Filtering" by Young/VanVliet
|
||||||
@ -178,33 +183,34 @@ void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, int ch
|
|||||||
Y[L - 1] = cf[0] * W[L - 1] + cf[1] * tsv[0] + cf[2] * tsv[1] + cf[3] * tsv[2]; \
|
Y[L - 1] = cf[0] * W[L - 1] + cf[1] * tsv[0] + cf[2] * tsv[1] + cf[3] * tsv[2]; \
|
||||||
Y[L - 2] = cf[0] * W[L - 2] + cf[1] * Y[L - 1] + cf[2] * tsv[0] + cf[3] * tsv[1]; \
|
Y[L - 2] = cf[0] * W[L - 2] + cf[1] * Y[L - 1] + cf[2] * tsv[0] + cf[3] * tsv[1]; \
|
||||||
Y[L - 3] = cf[0] * W[L - 3] + cf[1] * Y[L - 2] + cf[2] * Y[L - 1] + cf[3] * tsv[0]; \
|
Y[L - 3] = cf[0] * W[L - 3] + cf[1] * Y[L - 2] + cf[2] * Y[L - 1] + cf[3] * tsv[0]; \
|
||||||
for (i = L - 4; i >= 0; i--) { \
|
/* 'i != UINT_MAX' is really 'i >= 0', but necessary for unsigned int wrapping */ \
|
||||||
|
for (i = L - 4; i != UINT_MAX; i--) { \
|
||||||
Y[i] = cf[0] * W[i] + cf[1] * Y[i + 1] + cf[2] * Y[i + 2] + cf[3] * Y[i + 3]; \
|
Y[i] = cf[0] * W[i] + cf[1] * Y[i + 1] + cf[2] * Y[i + 2] + cf[3] * Y[i + 3]; \
|
||||||
} \
|
} \
|
||||||
} (void)0
|
} (void)0
|
||||||
|
|
||||||
// intermediate buffers
|
// intermediate buffers
|
||||||
sz = MAX2(src->getWidth(), src->getHeight());
|
sz = MAX2(src_width, src_height);
|
||||||
X = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss X buf");
|
X = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss X buf");
|
||||||
Y = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss Y buf");
|
Y = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss Y buf");
|
||||||
W = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss W buf");
|
W = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss W buf");
|
||||||
if (xy & 1) { // H
|
if (xy & 1) { // H
|
||||||
for (y = 0; y < src->getHeight(); ++y) {
|
for (y = 0; y < src_height; ++y) {
|
||||||
const int yx = y * src->getWidth();
|
const int yx = y * src_width;
|
||||||
for (x = 0; x < src->getWidth(); ++x)
|
for (x = 0; x < src_width; ++x)
|
||||||
X[x] = buffer[(x + yx) * COM_NUMBER_OF_CHANNELS + chan];
|
X[x] = buffer[(x + yx) * COM_NUMBER_OF_CHANNELS + chan];
|
||||||
YVV(src->getWidth());
|
YVV(src_width);
|
||||||
for (x = 0; x < src->getWidth(); ++x)
|
for (x = 0; x < src_width; ++x)
|
||||||
buffer[(x + yx) * COM_NUMBER_OF_CHANNELS + chan] = Y[x];
|
buffer[(x + yx) * COM_NUMBER_OF_CHANNELS + chan] = Y[x];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (xy & 2) { // V
|
if (xy & 2) { // V
|
||||||
for (x = 0; x < src->getWidth(); ++x) {
|
for (x = 0; x < src_width; ++x) {
|
||||||
for (y = 0; y < src->getHeight(); ++y)
|
for (y = 0; y < src_height; ++y)
|
||||||
X[y] = buffer[(x + y * src->getWidth()) * COM_NUMBER_OF_CHANNELS + chan];
|
X[y] = buffer[(x + y * src_width) * COM_NUMBER_OF_CHANNELS + chan];
|
||||||
YVV(src->getHeight());
|
YVV(src_height);
|
||||||
for (y = 0; y < src->getHeight(); ++y)
|
for (y = 0; y < src_height; ++y)
|
||||||
buffer[(x + y * src->getWidth()) * COM_NUMBER_OF_CHANNELS + chan] = Y[y];
|
buffer[(x + y * src_width) * COM_NUMBER_OF_CHANNELS + chan] = Y[y];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ public:
|
|||||||
bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output);
|
bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output);
|
||||||
void executePixel(float *color, int x, int y, MemoryBuffer * inputBuffers[], void *data);
|
void executePixel(float *color, int x, int y, MemoryBuffer * inputBuffers[], void *data);
|
||||||
|
|
||||||
static void IIR_gauss(MemoryBuffer *src, float sigma, int channel, int xy);
|
static void IIR_gauss(MemoryBuffer *src, float sigma, unsigned int channel, unsigned int xy);
|
||||||
void *initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers);
|
void *initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers);
|
||||||
void deinitExecution();
|
void deinitExecution();
|
||||||
void initExecution();
|
void initExecution();
|
||||||
|
@ -32,6 +32,8 @@
|
|||||||
|
|
||||||
#include "node_composite_util.h"
|
#include "node_composite_util.h"
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
CompBuf *alloc_compbuf(int sizex, int sizey, int type, int alloc)
|
CompBuf *alloc_compbuf(int sizex, int sizey, int type, int alloc)
|
||||||
{
|
{
|
||||||
CompBuf *cbuf= MEM_callocN(sizeof(CompBuf), "compbuf");
|
CompBuf *cbuf= MEM_callocN(sizeof(CompBuf), "compbuf");
|
||||||
@ -1300,33 +1302,35 @@ void IIR_gauss(CompBuf* src, float sigma, int chan, int xy)
|
|||||||
{
|
{
|
||||||
double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
|
double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
|
||||||
double *X, *Y, *W;
|
double *X, *Y, *W;
|
||||||
int i, x, y, sz;
|
const unsigned int src_width = src->x;
|
||||||
|
const unsigned int src_height = src->y;
|
||||||
|
unsigned int i, x, y, sz;
|
||||||
|
|
||||||
// <0.5 not valid, though can have a possibly useful sort of sharpening effect
|
// <0.5 not valid, though can have a possibly useful sort of sharpening effect
|
||||||
if (sigma < 0.5f) return;
|
if (sigma < 0.5f) return;
|
||||||
|
|
||||||
if ((xy < 1) || (xy > 3)) xy = 3;
|
if ((xy < 1) || (xy > 3)) xy = 3;
|
||||||
|
|
||||||
// XXX The YVV macro defined below explicitly expects sources of at least 3x3 pixels,
|
// XXX The YVV macro defined below explicitly expects sources of at least 3x3 pixels,
|
||||||
// so just skiping blur along faulty direction if src's def is below that limit!
|
// so just skiping blur along faulty direction if src's def is below that limit!
|
||||||
if (src->x < 3) xy &= ~(int) 1;
|
if (src_width < 3) xy &= ~(int) 1;
|
||||||
if (src->y < 3) xy &= ~(int) 2;
|
if (src_height < 3) xy &= ~(int) 2;
|
||||||
if (xy < 1) return;
|
if (xy < 1) return;
|
||||||
|
|
||||||
// see "Recursive Gabor Filtering" by Young/VanVliet
|
// see "Recursive Gabor Filtering" by Young/VanVliet
|
||||||
// all factors here in double.prec. Required, because for single.prec it seems to blow up if sigma > ~200
|
// all factors here in double.prec. Required, because for single.prec it seems to blow up if sigma > ~200
|
||||||
if (sigma >= 3.556f)
|
if (sigma >= 3.556f)
|
||||||
q = 0.9804f*(sigma - 3.556f) + 2.5091f;
|
q = 0.9804f * (sigma - 3.556f) + 2.5091f;
|
||||||
else // sigma >= 0.5
|
else // sigma >= 0.5
|
||||||
q = (0.0561f*sigma + 0.5784f)*sigma - 0.2568f;
|
q = (0.0561f * sigma + 0.5784f) * sigma - 0.2568f;
|
||||||
q2 = q*q;
|
q2 = q * q;
|
||||||
sc = (1.1668 + q)*(3.203729649 + (2.21566 + q)*q);
|
sc = (1.1668 + q) * (3.203729649 + (2.21566 + q) * q);
|
||||||
// no gabor filtering here, so no complex multiplies, just the regular coefs.
|
// no gabor filtering here, so no complex multiplies, just the regular coefs.
|
||||||
// all negated here, so as not to have to recalc Triggs/Sdika matrix
|
// all negated here, so as not to have to recalc Triggs/Sdika matrix
|
||||||
cf[1] = q*(5.788961737 + (6.76492 + 3.0*q)*q)/ sc;
|
cf[1] = q * (5.788961737 + (6.76492 + 3.0 * q) * q) / sc;
|
||||||
cf[2] = -q2*(3.38246 + 3.0*q)/sc;
|
cf[2] = -q2 * (3.38246 + 3.0 * q) / sc;
|
||||||
// 0 & 3 unchanged
|
// 0 & 3 unchanged
|
||||||
cf[3] = q2*q/sc;
|
cf[3] = q2 * q / sc;
|
||||||
cf[0] = 1.0 - cf[1] - cf[2] - cf[3];
|
cf[0] = 1.0 - cf[1] - cf[2] - cf[3];
|
||||||
|
|
||||||
// Triggs/Sdika border corrections,
|
// Triggs/Sdika border corrections,
|
||||||
@ -1336,59 +1340,62 @@ void IIR_gauss(CompBuf* src, float sigma, int chan, int xy)
|
|||||||
// but neither seem to be quite the same, result seems to be ok so far anyway.
|
// but neither seem to be quite the same, result seems to be ok so far anyway.
|
||||||
// Extra scale factor here to not have to do it in filter,
|
// Extra scale factor here to not have to do it in filter,
|
||||||
// though maybe this had something to with the precision errors
|
// though maybe this had something to with the precision errors
|
||||||
sc = cf[0]/((1.0 + cf[1] - cf[2] + cf[3])*(1.0 - cf[1] - cf[2] - cf[3])*(1.0 + cf[2] + (cf[1] - cf[3])*cf[3]));
|
sc = cf[0] / ((1.0 + cf[1] - cf[2] + cf[3]) * (1.0 - cf[1] - cf[2] - cf[3]) * (1.0 + cf[2] + (cf[1] - cf[3]) * cf[3]));
|
||||||
tsM[0] = sc*(-cf[3]*cf[1] + 1.0 - cf[3]*cf[3] - cf[2]);
|
tsM[0] = sc * (-cf[3] * cf[1] + 1.0 - cf[3] * cf[3] - cf[2]);
|
||||||
tsM[1] = sc*((cf[3] + cf[1])*(cf[2] + cf[3]*cf[1]));
|
tsM[1] = sc * ((cf[3] + cf[1]) * (cf[2] + cf[3] * cf[1]));
|
||||||
tsM[2] = sc*(cf[3]*(cf[1] + cf[3]*cf[2]));
|
tsM[2] = sc * (cf[3] * (cf[1] + cf[3] * cf[2]));
|
||||||
tsM[3] = sc*(cf[1] + cf[3]*cf[2]);
|
tsM[3] = sc * (cf[1] + cf[3] * cf[2]);
|
||||||
tsM[4] = sc*(-(cf[2] - 1.0)*(cf[2] + cf[3]*cf[1]));
|
tsM[4] = sc * (-(cf[2] - 1.0) * (cf[2] + cf[3] * cf[1]));
|
||||||
tsM[5] = sc*(-(cf[3]*cf[1] + cf[3]*cf[3] + cf[2] - 1.0)*cf[3]);
|
tsM[5] = sc * (-(cf[3] * cf[1] + cf[3] * cf[3] + cf[2] - 1.0) * cf[3]);
|
||||||
tsM[6] = sc*(cf[3]*cf[1] + cf[2] + cf[1]*cf[1] - cf[2]*cf[2]);
|
tsM[6] = sc * (cf[3] * cf[1] + cf[2] + cf[1] * cf[1] - cf[2] * cf[2]);
|
||||||
tsM[7] = sc*(cf[1]*cf[2] + cf[3]*cf[2]*cf[2] - cf[1]*cf[3]*cf[3] - cf[3]*cf[3]*cf[3] - cf[3]*cf[2] + cf[3]);
|
tsM[7] = sc * (cf[1] * cf[2] + cf[3] * cf[2] * cf[2] - cf[1] * cf[3] * cf[3] - cf[3] * cf[3] * cf[3] - cf[3] * cf[2] + cf[3]);
|
||||||
tsM[8] = sc*(cf[3]*(cf[1] + cf[3]*cf[2]));
|
tsM[8] = sc * (cf[3] * (cf[1] + cf[3] * cf[2]));
|
||||||
|
|
||||||
#define YVV(L) \
|
#define YVV(L) \
|
||||||
{ \
|
{ \
|
||||||
W[0] = cf[0]*X[0] + cf[1]*X[0] + cf[2]*X[0] + cf[3]*X[0]; \
|
W[0] = cf[0] * X[0] + cf[1] * X[0] + cf[2] * X[0] + cf[3] * X[0]; \
|
||||||
W[1] = cf[0]*X[1] + cf[1]*W[0] + cf[2]*X[0] + cf[3]*X[0]; \
|
W[1] = cf[0] * X[1] + cf[1] * W[0] + cf[2] * X[0] + cf[3] * X[0]; \
|
||||||
W[2] = cf[0]*X[2] + cf[1]*W[1] + cf[2]*W[0] + cf[3]*X[0]; \
|
W[2] = cf[0] * X[2] + cf[1] * W[1] + cf[2] * W[0] + cf[3] * X[0]; \
|
||||||
for (i=3; i<L; i++) \
|
for (i = 3; i < L; i++) { \
|
||||||
W[i] = cf[0]*X[i] + cf[1]*W[i-1] + cf[2]*W[i-2] + cf[3]*W[i-3]; \
|
W[i] = cf[0] * X[i] + cf[1] * W[i - 1] + cf[2] * W[i - 2] + cf[3] * W[i - 3]; \
|
||||||
tsu[0] = W[L-1] - X[L-1]; \
|
} \
|
||||||
tsu[1] = W[L-2] - X[L-1]; \
|
tsu[0] = W[L - 1] - X[L - 1]; \
|
||||||
tsu[2] = W[L-3] - X[L-1]; \
|
tsu[1] = W[L - 2] - X[L - 1]; \
|
||||||
tsv[0] = tsM[0]*tsu[0] + tsM[1]*tsu[1] + tsM[2]*tsu[2] + X[L-1]; \
|
tsu[2] = W[L - 3] - X[L - 1]; \
|
||||||
tsv[1] = tsM[3]*tsu[0] + tsM[4]*tsu[1] + tsM[5]*tsu[2] + X[L-1]; \
|
tsv[0] = tsM[0] * tsu[0] + tsM[1] * tsu[1] + tsM[2] * tsu[2] + X[L - 1]; \
|
||||||
tsv[2] = tsM[6]*tsu[0] + tsM[7]*tsu[1] + tsM[8]*tsu[2] + X[L-1]; \
|
tsv[1] = tsM[3] * tsu[0] + tsM[4] * tsu[1] + tsM[5] * tsu[2] + X[L - 1]; \
|
||||||
Y[L-1] = cf[0]*W[L-1] + cf[1]*tsv[0] + cf[2]*tsv[1] + cf[3]*tsv[2]; \
|
tsv[2] = tsM[6] * tsu[0] + tsM[7] * tsu[1] + tsM[8] * tsu[2] + X[L - 1]; \
|
||||||
Y[L-2] = cf[0]*W[L-2] + cf[1]*Y[L-1] + cf[2]*tsv[0] + cf[3]*tsv[1]; \
|
Y[L - 1] = cf[0] * W[L - 1] + cf[1] * tsv[0] + cf[2] * tsv[1] + cf[3] * tsv[2]; \
|
||||||
Y[L-3] = cf[0]*W[L-3] + cf[1]*Y[L-2] + cf[2]*Y[L-1] + cf[3]*tsv[0]; \
|
Y[L - 2] = cf[0] * W[L - 2] + cf[1] * Y[L - 1] + cf[2] * tsv[0] + cf[3] * tsv[1]; \
|
||||||
for (i=L-4; i>=0; i--) \
|
Y[L - 3] = cf[0] * W[L - 3] + cf[1] * Y[L - 2] + cf[2] * Y[L - 1] + cf[3] * tsv[0]; \
|
||||||
Y[i] = cf[0]*W[i] + cf[1]*Y[i+1] + cf[2]*Y[i+2] + cf[3]*Y[i+3]; \
|
/* 'i != UINT_MAX' is really 'i >= 0', but necessary for unsigned int wrapping */ \
|
||||||
|
for (i = L - 4; i != UINT_MAX; i--) { \
|
||||||
|
Y[i] = cf[0] * W[i] + cf[1] * Y[i + 1] + cf[2] * Y[i + 2] + cf[3] * Y[i + 3]; \
|
||||||
|
} \
|
||||||
} (void)0
|
} (void)0
|
||||||
|
|
||||||
// intermediate buffers
|
// intermediate buffers
|
||||||
sz = MAX2(src->x, src->y);
|
sz = MAX2(src_width, src_height);
|
||||||
X = MEM_callocN(sz*sizeof(double), "IIR_gauss X buf");
|
X = MEM_callocN(sz * sizeof(double), "IIR_gauss X buf");
|
||||||
Y = MEM_callocN(sz*sizeof(double), "IIR_gauss Y buf");
|
Y = MEM_callocN(sz * sizeof(double), "IIR_gauss Y buf");
|
||||||
W = MEM_callocN(sz*sizeof(double), "IIR_gauss W buf");
|
W = MEM_callocN(sz * sizeof(double), "IIR_gauss W buf");
|
||||||
if (xy & 1) { // H
|
if (xy & 1) { // H
|
||||||
for (y=0; y<src->y; ++y) {
|
for (y = 0; y < src_height; ++y) {
|
||||||
const int yx = y*src->x;
|
const int yx = y * src_width;
|
||||||
for (x=0; x<src->x; ++x)
|
for (x = 0; x < src_width; ++x)
|
||||||
X[x] = src->rect[(x + yx)*src->type + chan];
|
X[x] = src->rect[(x + yx) * src->type + chan];
|
||||||
YVV(src->x);
|
YVV(src_width);
|
||||||
for (x=0; x<src->x; ++x)
|
for (x = 0; x < src_width; ++x)
|
||||||
src->rect[(x + yx)*src->type + chan] = Y[x];
|
src->rect[(x + yx) * src->type + chan] = Y[x];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (xy & 2) { // V
|
if (xy & 2) { // V
|
||||||
for (x=0; x<src->x; ++x) {
|
for (x = 0; x < src_width; ++x) {
|
||||||
for (y=0; y<src->y; ++y)
|
for (y = 0; y < src_height; ++y)
|
||||||
X[y] = src->rect[(x + y*src->x)*src->type + chan];
|
X[y] = src->rect[(x + y * src_width) * src->type + chan];
|
||||||
YVV(src->y);
|
YVV(src_height);
|
||||||
for (y=0; y<src->y; ++y)
|
for (y = 0; y < src_height; ++y)
|
||||||
src->rect[(x + y*src->x)*src->type + chan] = Y[y];
|
src->rect[(x + y * src_width) * src->type + chan] = Y[y];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,9 +29,10 @@
|
|||||||
* \ingroup cmpnodes
|
* \ingroup cmpnodes
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include "node_composite_util.h"
|
#include "node_composite_util.h"
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
/* ************ qdn: Defocus node ****************** */
|
/* ************ qdn: Defocus node ****************** */
|
||||||
static bNodeSocketTemplate cmp_node_defocus_in[]= {
|
static bNodeSocketTemplate cmp_node_defocus_in[]= {
|
||||||
{ SOCK_RGBA, 1, N_("Image"), 1.0f, 1.0f, 1.0f, 1.0f},
|
{ SOCK_RGBA, 1, N_("Image"), 1.0f, 1.0f, 1.0f, 1.0f},
|
||||||
@ -148,11 +149,13 @@ static float RI_vdC(unsigned int bits, unsigned int r)
|
|||||||
// much faster than anything else, constant time independent of width
|
// much faster than anything else, constant time independent of width
|
||||||
// should extend to multichannel and make this a node, could be useful
|
// should extend to multichannel and make this a node, could be useful
|
||||||
// note: this is an almost exact copy of 'IIR_gauss'
|
// note: this is an almost exact copy of 'IIR_gauss'
|
||||||
static void IIR_gauss_single(CompBuf* buf, float sigma)
|
static void IIR_gauss_single(CompBuf *buf, float sigma)
|
||||||
{
|
{
|
||||||
double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
|
double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
|
||||||
float *X, *Y, *W;
|
float *X, *Y, *W;
|
||||||
int i, x, y, sz;
|
const unsigned int src_width = buf->x;
|
||||||
|
const unsigned int src_height = buf->y;
|
||||||
|
unsigned int i, x, y, sz;
|
||||||
|
|
||||||
// single channel only for now
|
// single channel only for now
|
||||||
if (buf->type != CB_VAL) return;
|
if (buf->type != CB_VAL) return;
|
||||||
@ -180,58 +183,61 @@ static void IIR_gauss_single(CompBuf* buf, float sigma)
|
|||||||
// it seems to work, not entirely sure if it is actually totally correct,
|
// it seems to work, not entirely sure if it is actually totally correct,
|
||||||
// Besides J.M.Geusebroek's anigauss.c (see http://www.science.uva.nl/~mark),
|
// Besides J.M.Geusebroek's anigauss.c (see http://www.science.uva.nl/~mark),
|
||||||
// found one other implementation by Cristoph Lampert,
|
// found one other implementation by Cristoph Lampert,
|
||||||
// but neither seem to be quite the same, result seems to be ok sofar anyway.
|
// but neither seem to be quite the same, result seems to be ok so far anyway.
|
||||||
// Extra scale factor here to not have to do it in filter,
|
// Extra scale factor here to not have to do it in filter,
|
||||||
// though maybe this had something to with the precision errors
|
// though maybe this had something to with the precision errors
|
||||||
sc = cf[0]/((1.0 + cf[1] - cf[2] + cf[3])*(1.0 - cf[1] - cf[2] - cf[3])*(1.0 + cf[2] + (cf[1] - cf[3])*cf[3]));
|
sc = cf[0] / ((1.0 + cf[1] - cf[2] + cf[3]) * (1.0 - cf[1] - cf[2] - cf[3]) * (1.0 + cf[2] + (cf[1] - cf[3]) * cf[3]));
|
||||||
tsM[0] = sc*(-cf[3]*cf[1] + 1.0 - cf[3]*cf[3] - cf[2]);
|
tsM[0] = sc * (-cf[3] * cf[1] + 1.0 - cf[3] * cf[3] - cf[2]);
|
||||||
tsM[1] = sc*((cf[3] + cf[1])*(cf[2] + cf[3]*cf[1]));
|
tsM[1] = sc * ((cf[3] + cf[1]) * (cf[2] + cf[3] * cf[1]));
|
||||||
tsM[2] = sc*(cf[3]*(cf[1] + cf[3]*cf[2]));
|
tsM[2] = sc * (cf[3] * (cf[1] + cf[3] * cf[2]));
|
||||||
tsM[3] = sc*(cf[1] + cf[3]*cf[2]);
|
tsM[3] = sc * (cf[1] + cf[3] * cf[2]);
|
||||||
tsM[4] = sc*(-(cf[2] - 1.0)*(cf[2] + cf[3]*cf[1]));
|
tsM[4] = sc * (-(cf[2] - 1.0) * (cf[2] + cf[3] * cf[1]));
|
||||||
tsM[5] = sc*(-(cf[3]*cf[1] + cf[3]*cf[3] + cf[2] - 1.0)*cf[3]);
|
tsM[5] = sc * (-(cf[3] * cf[1] + cf[3] * cf[3] + cf[2] - 1.0) * cf[3]);
|
||||||
tsM[6] = sc*(cf[3]*cf[1] + cf[2] + cf[1]*cf[1] - cf[2]*cf[2]);
|
tsM[6] = sc * (cf[3] * cf[1] + cf[2] + cf[1] * cf[1] - cf[2] * cf[2]);
|
||||||
tsM[7] = sc*(cf[1]*cf[2] + cf[3]*cf[2]*cf[2] - cf[1]*cf[3]*cf[3] - cf[3]*cf[3]*cf[3] - cf[3]*cf[2] + cf[3]);
|
tsM[7] = sc * (cf[1] * cf[2] + cf[3] * cf[2] * cf[2] - cf[1] * cf[3] * cf[3] - cf[3] * cf[3] * cf[3] - cf[3] * cf[2] + cf[3]);
|
||||||
tsM[8] = sc*(cf[3]*(cf[1] + cf[3]*cf[2]));
|
tsM[8] = sc * (cf[3] * (cf[1] + cf[3] * cf[2]));
|
||||||
|
|
||||||
#define YVV(L)\
|
#define YVV(L) \
|
||||||
{\
|
{ \
|
||||||
W[0] = cf[0]*X[0] + cf[1]*X[0] + cf[2]*X[0] + cf[3]*X[0];\
|
W[0] = cf[0] * X[0] + cf[1] * X[0] + cf[2] * X[0] + cf[3] * X[0]; \
|
||||||
W[1] = cf[0]*X[1] + cf[1]*W[0] + cf[2]*X[0] + cf[3]*X[0];\
|
W[1] = cf[0] * X[1] + cf[1] * W[0] + cf[2] * X[0] + cf[3] * X[0]; \
|
||||||
W[2] = cf[0]*X[2] + cf[1]*W[1] + cf[2]*W[0] + cf[3]*X[0];\
|
W[2] = cf[0] * X[2] + cf[1] * W[1] + cf[2] * W[0] + cf[3] * X[0]; \
|
||||||
for (i=3; i<L; i++)\
|
for (i = 3; i < L; i++) { \
|
||||||
W[i] = cf[0]*X[i] + cf[1]*W[i-1] + cf[2]*W[i-2] + cf[3]*W[i-3];\
|
W[i] = cf[0] * X[i] + cf[1] * W[i - 1] + cf[2] * W[i - 2] + cf[3] * W[i - 3]; \
|
||||||
tsu[0] = W[L-1] - X[L-1];\
|
} \
|
||||||
tsu[1] = W[L-2] - X[L-1];\
|
tsu[0] = W[L - 1] - X[L - 1]; \
|
||||||
tsu[2] = W[L-3] - X[L-1];\
|
tsu[1] = W[L - 2] - X[L - 1]; \
|
||||||
tsv[0] = tsM[0]*tsu[0] + tsM[1]*tsu[1] + tsM[2]*tsu[2] + X[L-1];\
|
tsu[2] = W[L - 3] - X[L - 1]; \
|
||||||
tsv[1] = tsM[3]*tsu[0] + tsM[4]*tsu[1] + tsM[5]*tsu[2] + X[L-1];\
|
tsv[0] = tsM[0] * tsu[0] + tsM[1] * tsu[1] + tsM[2] * tsu[2] + X[L - 1]; \
|
||||||
tsv[2] = tsM[6]*tsu[0] + tsM[7]*tsu[1] + tsM[8]*tsu[2] + X[L-1];\
|
tsv[1] = tsM[3] * tsu[0] + tsM[4] * tsu[1] + tsM[5] * tsu[2] + X[L - 1]; \
|
||||||
Y[L-1] = cf[0]*W[L-1] + cf[1]*tsv[0] + cf[2]*tsv[1] + cf[3]*tsv[2];\
|
tsv[2] = tsM[6] * tsu[0] + tsM[7] * tsu[1] + tsM[8] * tsu[2] + X[L - 1]; \
|
||||||
Y[L-2] = cf[0]*W[L-2] + cf[1]*Y[L-1] + cf[2]*tsv[0] + cf[3]*tsv[1];\
|
Y[L - 1] = cf[0] * W[L - 1] + cf[1] * tsv[0] + cf[2] * tsv[1] + cf[3] * tsv[2]; \
|
||||||
Y[L-3] = cf[0]*W[L-3] + cf[1]*Y[L-2] + cf[2]*Y[L-1] + cf[3]*tsv[0];\
|
Y[L - 2] = cf[0] * W[L - 2] + cf[1] * Y[L - 1] + cf[2] * tsv[0] + cf[3] * tsv[1]; \
|
||||||
for (i=L-4; i>=0; i--)\
|
Y[L - 3] = cf[0] * W[L - 3] + cf[1] * Y[L - 2] + cf[2] * Y[L - 1] + cf[3] * tsv[0]; \
|
||||||
Y[i] = cf[0]*W[i] + cf[1]*Y[i+1] + cf[2]*Y[i+2] + cf[3]*Y[i+3];\
|
/* 'i != UINT_MAX' is really 'i >= 0', but necessary for unsigned int wrapping */ \
|
||||||
}
|
for (i = L - 4; i != UINT_MAX; i--) { \
|
||||||
|
Y[i] = cf[0] * W[i] + cf[1] * Y[i + 1] + cf[2] * Y[i + 2] + cf[3] * Y[i + 3]; \
|
||||||
|
} \
|
||||||
|
} (void)0
|
||||||
|
|
||||||
// intermediate buffers
|
// intermediate buffers
|
||||||
sz = MAX2(buf->x, buf->y);
|
sz = MAX2(src_width, src_height);
|
||||||
Y = MEM_callocN(sz*sizeof(float), "IIR_gauss Y buf");
|
Y = MEM_callocN(sz * sizeof(float), "IIR_gauss Y buf");
|
||||||
W = MEM_callocN(sz*sizeof(float), "IIR_gauss W buf");
|
W = MEM_callocN(sz * sizeof(float), "IIR_gauss W buf");
|
||||||
// H
|
// H
|
||||||
for (y=0; y<buf->y; y++) {
|
for (y = 0; y < src_height; y++) {
|
||||||
X = &buf->rect[y*buf->x];
|
X = &buf->rect[y * src_width];
|
||||||
YVV(buf->x);
|
YVV(src_width);
|
||||||
memcpy(X, Y, sizeof(float)*buf->x);
|
memcpy(X, Y, sizeof(float) * src_width);
|
||||||
}
|
}
|
||||||
// V
|
// V
|
||||||
X = MEM_callocN(buf->y*sizeof(float), "IIR_gauss X buf");
|
X = MEM_callocN(src_height * sizeof(float), "IIR_gauss X buf");
|
||||||
for (x=0; x<buf->x; x++) {
|
for (x = 0; x < src_width; x++) {
|
||||||
for (y=0; y<buf->y; y++)
|
for (y = 0; y < src_height; y++)
|
||||||
X[y] = buf->rect[x + y*buf->x];
|
X[y] = buf->rect[x + y * src_width];
|
||||||
YVV(buf->y);
|
YVV(src_height);
|
||||||
for (y=0; y<buf->y; y++)
|
for (y = 0; y < src_height; y++)
|
||||||
buf->rect[x + y*buf->x] = Y[y];
|
buf->rect[x + y * src_width] = Y[y];
|
||||||
}
|
}
|
||||||
MEM_freeN(X);
|
MEM_freeN(X);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user