From 2f29f8d18656e9c8796b68671a60812d0cffcb70 Mon Sep 17 00:00:00 2001 From: Campbell Barton Date: Sat, 16 Jun 2012 09:52:38 +0000 Subject: [PATCH] speedup for fast gauss blue (approx 10% - 15%) - get the image width and height once rather then calculating on every access (was doing min/max subtract). - use unsigned int's - faster for looping. --- .../COM_FastGaussianBlurOperation.cpp | 44 +++--- .../COM_FastGaussianBlurOperation.h | 2 +- .../nodes/composite/node_composite_util.c | 127 +++++++++--------- .../composite/nodes/node_composite_defocus.c | 100 +++++++------- 4 files changed, 146 insertions(+), 127 deletions(-) diff --git a/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp b/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp index 7830eef829c..48cfbeb36f8 100644 --- a/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp +++ b/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp @@ -20,6 +20,8 @@ * Monique Dewanchand */ +#include + #include "COM_FastGaussianBlurOperation.h" #include "MEM_guardedalloc.h" #include "BLI_utildefines.h" @@ -74,7 +76,7 @@ void FastGaussianBlurOperation::deinitExecution() delete this->iirgaus; this->iirgaus = NULL; } - BlurBaseOperation::deinitMutex(); + BlurBaseOperation::deinitMutex(); } void *FastGaussianBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers) @@ -84,7 +86,7 @@ void *FastGaussianBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **m MemoryBuffer *newBuf = (MemoryBuffer *)this->inputProgram->initializeTileData(rect, memoryBuffers); MemoryBuffer *copy = newBuf->duplicate(); updateSize(memoryBuffers); - + int c; sx = data->sizex * this->size / 2.0f; sy = data->sizey * this->size / 2.0f; @@ -109,11 +111,14 @@ void *FastGaussianBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **m return iirgaus; } -void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, int chan, int xy) +void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, unsigned int chan, unsigned int xy) { double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3]; double *X, *Y, *W; - int i, x, y, sz; + const unsigned int src_width = src->getWidth(); + const unsigned int src_height = src->getHeight(); + unsigned int x, y, sz; + unsigned int i; float *buffer = src->getBuffer(); // <0.5 not valid, though can have a possibly useful sort of sharpening effect @@ -123,8 +128,8 @@ void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, int ch // XXX The YVV macro defined below explicitly expects sources of at least 3x3 pixels, // so just skiping blur along faulty direction if src's def is below that limit! - if (src->getWidth() < 3) xy &= ~(int) 1; - if (src->getHeight() < 3) xy &= ~(int) 2; + if (src_width < 3) xy &= ~(int) 1; + if (src_height < 3) xy &= ~(int) 2; if (xy < 1) return; // see "Recursive Gabor Filtering" by Young/VanVliet @@ -178,33 +183,34 @@ void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, int ch Y[L - 1] = cf[0] * W[L - 1] + cf[1] * tsv[0] + cf[2] * tsv[1] + cf[3] * tsv[2]; \ Y[L - 2] = cf[0] * W[L - 2] + cf[1] * Y[L - 1] + cf[2] * tsv[0] + cf[3] * tsv[1]; \ Y[L - 3] = cf[0] * W[L - 3] + cf[1] * Y[L - 2] + cf[2] * Y[L - 1] + cf[3] * tsv[0]; \ - for (i = L - 4; i >= 0; i--) { \ + /* 'i != UINT_MAX' is really 'i >= 0', but necessary for unsigned int wrapping */ \ + for (i = L - 4; i != UINT_MAX; i--) { \ Y[i] = cf[0] * W[i] + cf[1] * Y[i + 1] + cf[2] * Y[i + 2] + cf[3] * Y[i + 3]; \ } \ } (void)0 // intermediate buffers - sz = MAX2(src->getWidth(), src->getHeight()); + sz = MAX2(src_width, src_height); X = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss X buf"); Y = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss Y buf"); W = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss W buf"); if (xy & 1) { // H - for (y = 0; y < src->getHeight(); ++y) { - const int yx = y * src->getWidth(); - for (x = 0; x < src->getWidth(); ++x) + for (y = 0; y < src_height; ++y) { + const int yx = y * src_width; + for (x = 0; x < src_width; ++x) X[x] = buffer[(x + yx) * COM_NUMBER_OF_CHANNELS + chan]; - YVV(src->getWidth()); - for (x = 0; x < src->getWidth(); ++x) + YVV(src_width); + for (x = 0; x < src_width; ++x) buffer[(x + yx) * COM_NUMBER_OF_CHANNELS + chan] = Y[x]; } } if (xy & 2) { // V - for (x = 0; x < src->getWidth(); ++x) { - for (y = 0; y < src->getHeight(); ++y) - X[y] = buffer[(x + y * src->getWidth()) * COM_NUMBER_OF_CHANNELS + chan]; - YVV(src->getHeight()); - for (y = 0; y < src->getHeight(); ++y) - buffer[(x + y * src->getWidth()) * COM_NUMBER_OF_CHANNELS + chan] = Y[y]; + for (x = 0; x < src_width; ++x) { + for (y = 0; y < src_height; ++y) + X[y] = buffer[(x + y * src_width) * COM_NUMBER_OF_CHANNELS + chan]; + YVV(src_height); + for (y = 0; y < src_height; ++y) + buffer[(x + y * src_width) * COM_NUMBER_OF_CHANNELS + chan] = Y[y]; } } diff --git a/source/blender/compositor/operations/COM_FastGaussianBlurOperation.h b/source/blender/compositor/operations/COM_FastGaussianBlurOperation.h index f92e3dc68a5..0f3929f052c 100644 --- a/source/blender/compositor/operations/COM_FastGaussianBlurOperation.h +++ b/source/blender/compositor/operations/COM_FastGaussianBlurOperation.h @@ -36,7 +36,7 @@ public: bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output); void executePixel(float *color, int x, int y, MemoryBuffer * inputBuffers[], void *data); - static void IIR_gauss(MemoryBuffer *src, float sigma, int channel, int xy); + static void IIR_gauss(MemoryBuffer *src, float sigma, unsigned int channel, unsigned int xy); void *initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers); void deinitExecution(); void initExecution(); diff --git a/source/blender/nodes/composite/node_composite_util.c b/source/blender/nodes/composite/node_composite_util.c index afd10d96e99..70788dfe0c8 100644 --- a/source/blender/nodes/composite/node_composite_util.c +++ b/source/blender/nodes/composite/node_composite_util.c @@ -32,6 +32,8 @@ #include "node_composite_util.h" +#include + CompBuf *alloc_compbuf(int sizex, int sizey, int type, int alloc) { CompBuf *cbuf= MEM_callocN(sizeof(CompBuf), "compbuf"); @@ -1300,33 +1302,35 @@ void IIR_gauss(CompBuf* src, float sigma, int chan, int xy) { double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3]; double *X, *Y, *W; - int i, x, y, sz; + const unsigned int src_width = src->x; + const unsigned int src_height = src->y; + unsigned int i, x, y, sz; // <0.5 not valid, though can have a possibly useful sort of sharpening effect if (sigma < 0.5f) return; - + if ((xy < 1) || (xy > 3)) xy = 3; - + // XXX The YVV macro defined below explicitly expects sources of at least 3x3 pixels, // so just skiping blur along faulty direction if src's def is below that limit! - if (src->x < 3) xy &= ~(int) 1; - if (src->y < 3) xy &= ~(int) 2; + if (src_width < 3) xy &= ~(int) 1; + if (src_height < 3) xy &= ~(int) 2; if (xy < 1) return; // see "Recursive Gabor Filtering" by Young/VanVliet // all factors here in double.prec. Required, because for single.prec it seems to blow up if sigma > ~200 if (sigma >= 3.556f) - q = 0.9804f*(sigma - 3.556f) + 2.5091f; - else // sigma >= 0.5 - q = (0.0561f*sigma + 0.5784f)*sigma - 0.2568f; - q2 = q*q; - sc = (1.1668 + q)*(3.203729649 + (2.21566 + q)*q); + q = 0.9804f * (sigma - 3.556f) + 2.5091f; + else // sigma >= 0.5 + q = (0.0561f * sigma + 0.5784f) * sigma - 0.2568f; + q2 = q * q; + sc = (1.1668 + q) * (3.203729649 + (2.21566 + q) * q); // no gabor filtering here, so no complex multiplies, just the regular coefs. // all negated here, so as not to have to recalc Triggs/Sdika matrix - cf[1] = q*(5.788961737 + (6.76492 + 3.0*q)*q)/ sc; - cf[2] = -q2*(3.38246 + 3.0*q)/sc; + cf[1] = q * (5.788961737 + (6.76492 + 3.0 * q) * q) / sc; + cf[2] = -q2 * (3.38246 + 3.0 * q) / sc; // 0 & 3 unchanged - cf[3] = q2*q/sc; + cf[3] = q2 * q / sc; cf[0] = 1.0 - cf[1] - cf[2] - cf[3]; // Triggs/Sdika border corrections, @@ -1336,59 +1340,62 @@ void IIR_gauss(CompBuf* src, float sigma, int chan, int xy) // but neither seem to be quite the same, result seems to be ok so far anyway. // Extra scale factor here to not have to do it in filter, // though maybe this had something to with the precision errors - sc = cf[0]/((1.0 + cf[1] - cf[2] + cf[3])*(1.0 - cf[1] - cf[2] - cf[3])*(1.0 + cf[2] + (cf[1] - cf[3])*cf[3])); - tsM[0] = sc*(-cf[3]*cf[1] + 1.0 - cf[3]*cf[3] - cf[2]); - tsM[1] = sc*((cf[3] + cf[1])*(cf[2] + cf[3]*cf[1])); - tsM[2] = sc*(cf[3]*(cf[1] + cf[3]*cf[2])); - tsM[3] = sc*(cf[1] + cf[3]*cf[2]); - tsM[4] = sc*(-(cf[2] - 1.0)*(cf[2] + cf[3]*cf[1])); - tsM[5] = sc*(-(cf[3]*cf[1] + cf[3]*cf[3] + cf[2] - 1.0)*cf[3]); - tsM[6] = sc*(cf[3]*cf[1] + cf[2] + cf[1]*cf[1] - cf[2]*cf[2]); - tsM[7] = sc*(cf[1]*cf[2] + cf[3]*cf[2]*cf[2] - cf[1]*cf[3]*cf[3] - cf[3]*cf[3]*cf[3] - cf[3]*cf[2] + cf[3]); - tsM[8] = sc*(cf[3]*(cf[1] + cf[3]*cf[2])); + sc = cf[0] / ((1.0 + cf[1] - cf[2] + cf[3]) * (1.0 - cf[1] - cf[2] - cf[3]) * (1.0 + cf[2] + (cf[1] - cf[3]) * cf[3])); + tsM[0] = sc * (-cf[3] * cf[1] + 1.0 - cf[3] * cf[3] - cf[2]); + tsM[1] = sc * ((cf[3] + cf[1]) * (cf[2] + cf[3] * cf[1])); + tsM[2] = sc * (cf[3] * (cf[1] + cf[3] * cf[2])); + tsM[3] = sc * (cf[1] + cf[3] * cf[2]); + tsM[4] = sc * (-(cf[2] - 1.0) * (cf[2] + cf[3] * cf[1])); + tsM[5] = sc * (-(cf[3] * cf[1] + cf[3] * cf[3] + cf[2] - 1.0) * cf[3]); + tsM[6] = sc * (cf[3] * cf[1] + cf[2] + cf[1] * cf[1] - cf[2] * cf[2]); + tsM[7] = sc * (cf[1] * cf[2] + cf[3] * cf[2] * cf[2] - cf[1] * cf[3] * cf[3] - cf[3] * cf[3] * cf[3] - cf[3] * cf[2] + cf[3]); + tsM[8] = sc * (cf[3] * (cf[1] + cf[3] * cf[2])); -#define YVV(L) \ -{ \ - W[0] = cf[0]*X[0] + cf[1]*X[0] + cf[2]*X[0] + cf[3]*X[0]; \ - W[1] = cf[0]*X[1] + cf[1]*W[0] + cf[2]*X[0] + cf[3]*X[0]; \ - W[2] = cf[0]*X[2] + cf[1]*W[1] + cf[2]*W[0] + cf[3]*X[0]; \ - for (i=3; i=0; i--) \ - Y[i] = cf[0]*W[i] + cf[1]*Y[i+1] + cf[2]*Y[i+2] + cf[3]*Y[i+3]; \ +#define YVV(L) \ +{ \ + W[0] = cf[0] * X[0] + cf[1] * X[0] + cf[2] * X[0] + cf[3] * X[0]; \ + W[1] = cf[0] * X[1] + cf[1] * W[0] + cf[2] * X[0] + cf[3] * X[0]; \ + W[2] = cf[0] * X[2] + cf[1] * W[1] + cf[2] * W[0] + cf[3] * X[0]; \ + for (i = 3; i < L; i++) { \ + W[i] = cf[0] * X[i] + cf[1] * W[i - 1] + cf[2] * W[i - 2] + cf[3] * W[i - 3]; \ + } \ + tsu[0] = W[L - 1] - X[L - 1]; \ + tsu[1] = W[L - 2] - X[L - 1]; \ + tsu[2] = W[L - 3] - X[L - 1]; \ + tsv[0] = tsM[0] * tsu[0] + tsM[1] * tsu[1] + tsM[2] * tsu[2] + X[L - 1]; \ + tsv[1] = tsM[3] * tsu[0] + tsM[4] * tsu[1] + tsM[5] * tsu[2] + X[L - 1]; \ + tsv[2] = tsM[6] * tsu[0] + tsM[7] * tsu[1] + tsM[8] * tsu[2] + X[L - 1]; \ + Y[L - 1] = cf[0] * W[L - 1] + cf[1] * tsv[0] + cf[2] * tsv[1] + cf[3] * tsv[2]; \ + Y[L - 2] = cf[0] * W[L - 2] + cf[1] * Y[L - 1] + cf[2] * tsv[0] + cf[3] * tsv[1]; \ + Y[L - 3] = cf[0] * W[L - 3] + cf[1] * Y[L - 2] + cf[2] * Y[L - 1] + cf[3] * tsv[0]; \ + /* 'i != UINT_MAX' is really 'i >= 0', but necessary for unsigned int wrapping */ \ + for (i = L - 4; i != UINT_MAX; i--) { \ + Y[i] = cf[0] * W[i] + cf[1] * Y[i + 1] + cf[2] * Y[i + 2] + cf[3] * Y[i + 3]; \ + } \ } (void)0 // intermediate buffers - sz = MAX2(src->x, src->y); - X = MEM_callocN(sz*sizeof(double), "IIR_gauss X buf"); - Y = MEM_callocN(sz*sizeof(double), "IIR_gauss Y buf"); - W = MEM_callocN(sz*sizeof(double), "IIR_gauss W buf"); - if (xy & 1) { // H - for (y=0; yy; ++y) { - const int yx = y*src->x; - for (x=0; xx; ++x) - X[x] = src->rect[(x + yx)*src->type + chan]; - YVV(src->x); - for (x=0; xx; ++x) - src->rect[(x + yx)*src->type + chan] = Y[x]; + sz = MAX2(src_width, src_height); + X = MEM_callocN(sz * sizeof(double), "IIR_gauss X buf"); + Y = MEM_callocN(sz * sizeof(double), "IIR_gauss Y buf"); + W = MEM_callocN(sz * sizeof(double), "IIR_gauss W buf"); + if (xy & 1) { // H + for (y = 0; y < src_height; ++y) { + const int yx = y * src_width; + for (x = 0; x < src_width; ++x) + X[x] = src->rect[(x + yx) * src->type + chan]; + YVV(src_width); + for (x = 0; x < src_width; ++x) + src->rect[(x + yx) * src->type + chan] = Y[x]; } } - if (xy & 2) { // V - for (x=0; xx; ++x) { - for (y=0; yy; ++y) - X[y] = src->rect[(x + y*src->x)*src->type + chan]; - YVV(src->y); - for (y=0; yy; ++y) - src->rect[(x + y*src->x)*src->type + chan] = Y[y]; + if (xy & 2) { // V + for (x = 0; x < src_width; ++x) { + for (y = 0; y < src_height; ++y) + X[y] = src->rect[(x + y * src_width) * src->type + chan]; + YVV(src_height); + for (y = 0; y < src_height; ++y) + src->rect[(x + y * src_width) * src->type + chan] = Y[y]; } } diff --git a/source/blender/nodes/composite/nodes/node_composite_defocus.c b/source/blender/nodes/composite/nodes/node_composite_defocus.c index 1b82da372ec..2ae3cd6ba56 100644 --- a/source/blender/nodes/composite/nodes/node_composite_defocus.c +++ b/source/blender/nodes/composite/nodes/node_composite_defocus.c @@ -29,9 +29,10 @@ * \ingroup cmpnodes */ - #include "node_composite_util.h" +#include + /* ************ qdn: Defocus node ****************** */ static bNodeSocketTemplate cmp_node_defocus_in[]= { { SOCK_RGBA, 1, N_("Image"), 1.0f, 1.0f, 1.0f, 1.0f}, @@ -148,11 +149,13 @@ static float RI_vdC(unsigned int bits, unsigned int r) // much faster than anything else, constant time independent of width // should extend to multichannel and make this a node, could be useful // note: this is an almost exact copy of 'IIR_gauss' -static void IIR_gauss_single(CompBuf* buf, float sigma) +static void IIR_gauss_single(CompBuf *buf, float sigma) { double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3]; float *X, *Y, *W; - int i, x, y, sz; + const unsigned int src_width = buf->x; + const unsigned int src_height = buf->y; + unsigned int i, x, y, sz; // single channel only for now if (buf->type != CB_VAL) return; @@ -180,58 +183,61 @@ static void IIR_gauss_single(CompBuf* buf, float sigma) // it seems to work, not entirely sure if it is actually totally correct, // Besides J.M.Geusebroek's anigauss.c (see http://www.science.uva.nl/~mark), // found one other implementation by Cristoph Lampert, - // but neither seem to be quite the same, result seems to be ok sofar anyway. + // but neither seem to be quite the same, result seems to be ok so far anyway. // Extra scale factor here to not have to do it in filter, // though maybe this had something to with the precision errors - sc = cf[0]/((1.0 + cf[1] - cf[2] + cf[3])*(1.0 - cf[1] - cf[2] - cf[3])*(1.0 + cf[2] + (cf[1] - cf[3])*cf[3])); - tsM[0] = sc*(-cf[3]*cf[1] + 1.0 - cf[3]*cf[3] - cf[2]); - tsM[1] = sc*((cf[3] + cf[1])*(cf[2] + cf[3]*cf[1])); - tsM[2] = sc*(cf[3]*(cf[1] + cf[3]*cf[2])); - tsM[3] = sc*(cf[1] + cf[3]*cf[2]); - tsM[4] = sc*(-(cf[2] - 1.0)*(cf[2] + cf[3]*cf[1])); - tsM[5] = sc*(-(cf[3]*cf[1] + cf[3]*cf[3] + cf[2] - 1.0)*cf[3]); - tsM[6] = sc*(cf[3]*cf[1] + cf[2] + cf[1]*cf[1] - cf[2]*cf[2]); - tsM[7] = sc*(cf[1]*cf[2] + cf[3]*cf[2]*cf[2] - cf[1]*cf[3]*cf[3] - cf[3]*cf[3]*cf[3] - cf[3]*cf[2] + cf[3]); - tsM[8] = sc*(cf[3]*(cf[1] + cf[3]*cf[2])); + sc = cf[0] / ((1.0 + cf[1] - cf[2] + cf[3]) * (1.0 - cf[1] - cf[2] - cf[3]) * (1.0 + cf[2] + (cf[1] - cf[3]) * cf[3])); + tsM[0] = sc * (-cf[3] * cf[1] + 1.0 - cf[3] * cf[3] - cf[2]); + tsM[1] = sc * ((cf[3] + cf[1]) * (cf[2] + cf[3] * cf[1])); + tsM[2] = sc * (cf[3] * (cf[1] + cf[3] * cf[2])); + tsM[3] = sc * (cf[1] + cf[3] * cf[2]); + tsM[4] = sc * (-(cf[2] - 1.0) * (cf[2] + cf[3] * cf[1])); + tsM[5] = sc * (-(cf[3] * cf[1] + cf[3] * cf[3] + cf[2] - 1.0) * cf[3]); + tsM[6] = sc * (cf[3] * cf[1] + cf[2] + cf[1] * cf[1] - cf[2] * cf[2]); + tsM[7] = sc * (cf[1] * cf[2] + cf[3] * cf[2] * cf[2] - cf[1] * cf[3] * cf[3] - cf[3] * cf[3] * cf[3] - cf[3] * cf[2] + cf[3]); + tsM[8] = sc * (cf[3] * (cf[1] + cf[3] * cf[2])); -#define YVV(L)\ -{\ - W[0] = cf[0]*X[0] + cf[1]*X[0] + cf[2]*X[0] + cf[3]*X[0];\ - W[1] = cf[0]*X[1] + cf[1]*W[0] + cf[2]*X[0] + cf[3]*X[0];\ - W[2] = cf[0]*X[2] + cf[1]*W[1] + cf[2]*W[0] + cf[3]*X[0];\ - for (i=3; i=0; i--)\ - Y[i] = cf[0]*W[i] + cf[1]*Y[i+1] + cf[2]*Y[i+2] + cf[3]*Y[i+3];\ -} +#define YVV(L) \ +{ \ + W[0] = cf[0] * X[0] + cf[1] * X[0] + cf[2] * X[0] + cf[3] * X[0]; \ + W[1] = cf[0] * X[1] + cf[1] * W[0] + cf[2] * X[0] + cf[3] * X[0]; \ + W[2] = cf[0] * X[2] + cf[1] * W[1] + cf[2] * W[0] + cf[3] * X[0]; \ + for (i = 3; i < L; i++) { \ + W[i] = cf[0] * X[i] + cf[1] * W[i - 1] + cf[2] * W[i - 2] + cf[3] * W[i - 3]; \ + } \ + tsu[0] = W[L - 1] - X[L - 1]; \ + tsu[1] = W[L - 2] - X[L - 1]; \ + tsu[2] = W[L - 3] - X[L - 1]; \ + tsv[0] = tsM[0] * tsu[0] + tsM[1] * tsu[1] + tsM[2] * tsu[2] + X[L - 1]; \ + tsv[1] = tsM[3] * tsu[0] + tsM[4] * tsu[1] + tsM[5] * tsu[2] + X[L - 1]; \ + tsv[2] = tsM[6] * tsu[0] + tsM[7] * tsu[1] + tsM[8] * tsu[2] + X[L - 1]; \ + Y[L - 1] = cf[0] * W[L - 1] + cf[1] * tsv[0] + cf[2] * tsv[1] + cf[3] * tsv[2]; \ + Y[L - 2] = cf[0] * W[L - 2] + cf[1] * Y[L - 1] + cf[2] * tsv[0] + cf[3] * tsv[1]; \ + Y[L - 3] = cf[0] * W[L - 3] + cf[1] * Y[L - 2] + cf[2] * Y[L - 1] + cf[3] * tsv[0]; \ + /* 'i != UINT_MAX' is really 'i >= 0', but necessary for unsigned int wrapping */ \ + for (i = L - 4; i != UINT_MAX; i--) { \ + Y[i] = cf[0] * W[i] + cf[1] * Y[i + 1] + cf[2] * Y[i + 2] + cf[3] * Y[i + 3]; \ + } \ +} (void)0 // intermediate buffers - sz = MAX2(buf->x, buf->y); - Y = MEM_callocN(sz*sizeof(float), "IIR_gauss Y buf"); - W = MEM_callocN(sz*sizeof(float), "IIR_gauss W buf"); + sz = MAX2(src_width, src_height); + Y = MEM_callocN(sz * sizeof(float), "IIR_gauss Y buf"); + W = MEM_callocN(sz * sizeof(float), "IIR_gauss W buf"); // H - for (y=0; yy; y++) { - X = &buf->rect[y*buf->x]; - YVV(buf->x); - memcpy(X, Y, sizeof(float)*buf->x); + for (y = 0; y < src_height; y++) { + X = &buf->rect[y * src_width]; + YVV(src_width); + memcpy(X, Y, sizeof(float) * src_width); } // V - X = MEM_callocN(buf->y*sizeof(float), "IIR_gauss X buf"); - for (x=0; xx; x++) { - for (y=0; yy; y++) - X[y] = buf->rect[x + y*buf->x]; - YVV(buf->y); - for (y=0; yy; y++) - buf->rect[x + y*buf->x] = Y[y]; + X = MEM_callocN(src_height * sizeof(float), "IIR_gauss X buf"); + for (x = 0; x < src_width; x++) { + for (y = 0; y < src_height; y++) + X[y] = buf->rect[x + y * src_width]; + YVV(src_height); + for (y = 0; y < src_height; y++) + buf->rect[x + y * src_width] = Y[y]; } MEM_freeN(X);