forked from bartvdbraak/blender
Optimization for speed regression in mipmap generation
Regression was caused by alpha premul cleanup commit and the reason of slowdown was uchar <-> float conversion which is slow. Replaced with uchar <-> int conversion which seeps to be accurate enough and mostly eliminates slowdown. Slowdown was easy to notice when movie clip is used for 3d vierport background and undistortion is enabled. In this case every frame will re-calculate mipmaps. It's still a nit slower than mipmap generation before cleanup commit, but couldn't think about extra boost here atm.
This commit is contained in:
parent
4190169282
commit
4990883664
@ -106,6 +106,8 @@ MINLINE void premul_to_straight_v4(float straight[4], const float premul[4]);
|
|||||||
MINLINE void straight_to_premul_v4(float straight[4], const float premul[4]);
|
MINLINE void straight_to_premul_v4(float straight[4], const float premul[4]);
|
||||||
MINLINE void straight_uchar_to_premul_float(float result[4], const unsigned char color[4]);
|
MINLINE void straight_uchar_to_premul_float(float result[4], const unsigned char color[4]);
|
||||||
MINLINE void premul_float_to_straight_uchar(unsigned char *result, const float color[4]);
|
MINLINE void premul_float_to_straight_uchar(unsigned char *result, const float color[4]);
|
||||||
|
MINLINE void straight_uchar_to_premul_int(int result[4], const unsigned char color[4]);
|
||||||
|
MINLINE void premul_int_to_straight_uchar(unsigned char *result, const int color[4]);
|
||||||
|
|
||||||
/************************** Other *************************/
|
/************************** Other *************************/
|
||||||
|
|
||||||
|
@ -326,4 +326,32 @@ MINLINE void premul_float_to_straight_uchar(unsigned char *result, const float c
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MINLINE void straight_uchar_to_premul_int(int result[4], const unsigned char color[4])
|
||||||
|
{
|
||||||
|
int alpha = color[3];
|
||||||
|
|
||||||
|
result[0] = (color[0] * alpha) / 255;
|
||||||
|
result[1] = (color[1] * alpha) / 255;
|
||||||
|
result[2] = (color[2] * alpha) / 255;
|
||||||
|
result[3] = alpha;
|
||||||
|
}
|
||||||
|
|
||||||
|
MINLINE void premul_int_to_straight_uchar(unsigned char *result, const int color[4])
|
||||||
|
{
|
||||||
|
if (color[3] == 0 || color[3] == 255) {
|
||||||
|
result[0] = color[0];
|
||||||
|
result[1] = color[1];
|
||||||
|
result[2] = color[2];
|
||||||
|
result[3] = color[3];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int alpha = color[3];
|
||||||
|
|
||||||
|
result[0] = color[0] * 255 / alpha;
|
||||||
|
result[0] = color[1] * 255 / alpha;
|
||||||
|
result[0] = color[2] * 255 / alpha;
|
||||||
|
result[3] = alpha;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* __MATH_COLOR_INLINE_C__ */
|
#endif /* __MATH_COLOR_INLINE_C__ */
|
||||||
|
@ -311,19 +311,19 @@ void imb_onehalf_no_alloc(struct ImBuf *ibuf2, struct ImBuf *ibuf1)
|
|||||||
for (y = ibuf2->y; y > 0; y--) {
|
for (y = ibuf2->y; y > 0; y--) {
|
||||||
cp2 = cp1 + (ibuf1->x << 2);
|
cp2 = cp1 + (ibuf1->x << 2);
|
||||||
for (x = ibuf2->x; x > 0; x--) {
|
for (x = ibuf2->x; x > 0; x--) {
|
||||||
float p1f[8], p2f[8], destf[4];
|
int p1i[8], p2i[8], desti[4];
|
||||||
|
|
||||||
straight_uchar_to_premul_float(p1f, cp1);
|
straight_uchar_to_premul_int(p1i, cp1);
|
||||||
straight_uchar_to_premul_float(p2f, cp2);
|
straight_uchar_to_premul_int(p2i, cp2);
|
||||||
straight_uchar_to_premul_float(p1f + 4, cp1 + 4);
|
straight_uchar_to_premul_int(p1i + 4, cp1 + 4);
|
||||||
straight_uchar_to_premul_float(p2f + 4, cp2 + 4);
|
straight_uchar_to_premul_int(p2i + 4, cp2 + 4);
|
||||||
|
|
||||||
destf[0] = 0.25f * (p1f[0] + p2f[0] + p1f[4] + p2f[4]);
|
desti[0] = (p1i[0] + p2i[0] + p1i[4] + p2i[4]) >> 2;
|
||||||
destf[1] = 0.25f * (p1f[1] + p2f[1] + p1f[5] + p2f[5]);
|
desti[1] = (p1i[1] + p2i[1] + p1i[5] + p2i[5]) >> 2;
|
||||||
destf[2] = 0.25f * (p1f[2] + p2f[2] + p1f[6] + p2f[6]);
|
desti[2] = (p1i[2] + p2i[2] + p1i[6] + p2i[6]) >> 2;
|
||||||
destf[3] = 0.25f * (p1f[3] + p2f[3] + p1f[7] + p2f[7]);
|
desti[3] = (p1i[3] + p2i[3] + p1i[7] + p2i[7]) >> 2;
|
||||||
|
|
||||||
premul_float_to_straight_uchar(dest, destf);
|
premul_int_to_straight_uchar(dest, desti);
|
||||||
|
|
||||||
cp1 += 8;
|
cp1 += 8;
|
||||||
cp2 += 8;
|
cp2 += 8;
|
||||||
|
Loading…
Reference in New Issue
Block a user