Optimization for speed regression in mipmap generation

Regression was caused by alpha premul cleanup commit and the reason
of slowdown was uchar <-> float conversion which is slow.

Replaced with uchar <-> int conversion which seeps to be accurate
enough and mostly eliminates slowdown.

Slowdown was easy to notice when movie clip is used for 3d vierport
background and undistortion is enabled. In this case every frame
will re-calculate mipmaps.

It's still a nit slower than mipmap generation before cleanup
commit, but couldn't think about extra boost here atm.
This commit is contained in:
Sergey Sharybin 2013-01-04 17:28:58 +00:00
parent 4190169282
commit 4990883664
3 changed files with 40 additions and 10 deletions

@ -106,6 +106,8 @@ MINLINE void premul_to_straight_v4(float straight[4], const float premul[4]);
MINLINE void straight_to_premul_v4(float straight[4], const float premul[4]);
MINLINE void straight_uchar_to_premul_float(float result[4], const unsigned char color[4]);
MINLINE void premul_float_to_straight_uchar(unsigned char *result, const float color[4]);
MINLINE void straight_uchar_to_premul_int(int result[4], const unsigned char color[4]);
MINLINE void premul_int_to_straight_uchar(unsigned char *result, const int color[4]);
/************************** Other *************************/

@ -326,4 +326,32 @@ MINLINE void premul_float_to_straight_uchar(unsigned char *result, const float c
}
}
MINLINE void straight_uchar_to_premul_int(int result[4], const unsigned char color[4])
{
int alpha = color[3];
result[0] = (color[0] * alpha) / 255;
result[1] = (color[1] * alpha) / 255;
result[2] = (color[2] * alpha) / 255;
result[3] = alpha;
}
MINLINE void premul_int_to_straight_uchar(unsigned char *result, const int color[4])
{
if (color[3] == 0 || color[3] == 255) {
result[0] = color[0];
result[1] = color[1];
result[2] = color[2];
result[3] = color[3];
}
else {
int alpha = color[3];
result[0] = color[0] * 255 / alpha;
result[0] = color[1] * 255 / alpha;
result[0] = color[2] * 255 / alpha;
result[3] = alpha;
}
}
#endif /* __MATH_COLOR_INLINE_C__ */

@ -311,19 +311,19 @@ void imb_onehalf_no_alloc(struct ImBuf *ibuf2, struct ImBuf *ibuf1)
for (y = ibuf2->y; y > 0; y--) {
cp2 = cp1 + (ibuf1->x << 2);
for (x = ibuf2->x; x > 0; x--) {
float p1f[8], p2f[8], destf[4];
int p1i[8], p2i[8], desti[4];
straight_uchar_to_premul_float(p1f, cp1);
straight_uchar_to_premul_float(p2f, cp2);
straight_uchar_to_premul_float(p1f + 4, cp1 + 4);
straight_uchar_to_premul_float(p2f + 4, cp2 + 4);
straight_uchar_to_premul_int(p1i, cp1);
straight_uchar_to_premul_int(p2i, cp2);
straight_uchar_to_premul_int(p1i + 4, cp1 + 4);
straight_uchar_to_premul_int(p2i + 4, cp2 + 4);
destf[0] = 0.25f * (p1f[0] + p2f[0] + p1f[4] + p2f[4]);
destf[1] = 0.25f * (p1f[1] + p2f[1] + p1f[5] + p2f[5]);
destf[2] = 0.25f * (p1f[2] + p2f[2] + p1f[6] + p2f[6]);
destf[3] = 0.25f * (p1f[3] + p2f[3] + p1f[7] + p2f[7]);
desti[0] = (p1i[0] + p2i[0] + p1i[4] + p2i[4]) >> 2;
desti[1] = (p1i[1] + p2i[1] + p1i[5] + p2i[5]) >> 2;
desti[2] = (p1i[2] + p2i[2] + p1i[6] + p2i[6]) >> 2;
desti[3] = (p1i[3] + p2i[3] + p1i[7] + p2i[7]) >> 2;
premul_float_to_straight_uchar(dest, destf);
premul_int_to_straight_uchar(dest, desti);
cp1 += 8;
cp2 += 8;