== Sequencer / Imbuf ==

Added a faster linear interpolation scaler to IMB_scaleImBuf. Only handles common cases when we either enlarge both, width and height or shrink both, width and height but that is pretty fast: * does only blit once instead of two passes like the old code (fewer cache misses) * uses fixed point integer arithmetic for byte buffers * doesn't branch in tight loops Should be comparable in speed to the ImBuf ..._fast functions at least for byte-buffers. For the rather seldom situations that e.g. width is enlarged and height is shrunken, the old code is still in use. ------- Also small fix in sequencer: free float buffers on read now.
2008-01-30 15:41:02 +00:00 · 2008-01-30 15:41:02 +00:00 · 19e37e4a35
commit 19e37e4a35
parent 6601cef1d0
2 changed files with 472 additions and 2 deletions
--- a/source/blender/imbuf/intern/scaling.c
+++ b/source/blender/imbuf/intern/scaling.c
@ -483,6 +483,468 @@ struct ImBuf *IMB_halflace(struct ImBuf *ibuf1)
 	return (ibuf2);
 }

+/* q_scale_linear_interpolation helper functions */
+
+static void enlarge_picture_byte(
+	unsigned char* src, unsigned char* dst, int src_width, 
+	int src_height, int dst_width, int dst_height)
+{
+	double ratiox = (double) (dst_width - 1.0) 
+		/ (double) (src_width - 1.001);
+	double ratioy = (double) (dst_height - 1.0) 
+		/ (double) (src_height - 1.001);
+	unsigned long x_src, dx_src, x_dst;
+	unsigned long y_src, dy_src, y_dst;
+
+	dx_src = 65536.0 / ratiox;
+	dy_src = 65536.0 / ratioy;
+
+	y_src = 0;
+	for (y_dst = 0; y_dst < dst_height; y_dst++) {
+		unsigned char* line1 = src + (y_src >> 16) * 4 * src_width;
+		unsigned char* line2 = line1 + 4 * src_width;
+		unsigned long weight1y = 65536 - (y_src & 0xffff);
+		unsigned long weight2y = 65536 - weight1y;
+
+		if ((y_src >> 16) == src_height - 1) {
+			line2 = line1;
+		}
+
+		x_src = 0;
+		for (x_dst = 0; x_dst < dst_width; x_dst++) {
+			unsigned long weight1x = 65536 - (x_src & 0xffff);
+			unsigned long weight2x = 65536 - weight1x;
+
+			unsigned long x = (x_src >> 16) * 4;
+
+			*dst++ = ((((line1[x] * weight1y) >> 16) 
+				   * weight1x) >> 16)
+				+ ((((line2[x] * weight2y) >> 16) 
+				    * weight1x) >> 16)
+				+ ((((line1[4 + x] * weight1y) >> 16) 
+				   * weight2x) >> 16)
+				+ ((((line2[4 + x] * weight2y) >> 16) 
+				    * weight2x) >> 16);
+
+			*dst++ = ((((line1[x + 1] * weight1y) >> 16) 
+				   * weight1x) >> 16)
+				+ ((((line2[x + 1] * weight2y) >> 16) 
+				    * weight1x) >> 16)
+				+ ((((line1[4 + x + 1] * weight1y) >> 16) 
+				   * weight2x) >> 16)
+				+ ((((line2[4 + x + 1] * weight2y) >> 16) 
+				    * weight2x) >> 16);
+
+			*dst++ = ((((line1[x + 2] * weight1y) >> 16) 
+				   * weight1x) >> 16)
+				+ ((((line2[x + 2] * weight2y) >> 16) 
+				    * weight1x) >> 16)
+				+ ((((line1[4 + x + 2] * weight1y) >> 16) 
+				   * weight2x) >> 16)
+				+ ((((line2[4 + x + 2] * weight2y) >> 16) 
+				    * weight2x) >> 16);
+
+			*dst++ = ((((line1[x + 3] * weight1y) >> 16) 
+				   * weight1x) >> 16)
+				+ ((((line2[x + 3] * weight2y) >> 16) 
+				    * weight1x) >> 16)
+				+ ((((line1[4 + x + 3] * weight1y) >> 16) 
+				   * weight2x) >> 16)
+				+ ((((line2[4 + x + 3] * weight2y) >> 16) 
+				    * weight2x) >> 16);
+
+			x_src += dx_src;
+		}
+		y_src += dy_src;
+	}
+}
+
+struct scale_outpix_byte {
+	unsigned long r;
+	unsigned long g;
+	unsigned long b;
+	unsigned long a;
+
+	unsigned long weight;
+};
+
+static void shrink_picture_byte(
+	unsigned char* src, unsigned char* dst, int src_width, 
+	int src_height, int dst_width, int dst_height)
+{
+	double ratiox = (double) (dst_width) / (double) (src_width);
+	double ratioy = (double) (dst_height) / (double) (src_height);
+	unsigned long x_src, dx_dst, x_dst;
+	unsigned long y_src, dy_dst, y_dst;
+	long y_counter;
+	unsigned char * dst_begin = dst;
+
+	struct scale_outpix_byte * dst_line1 = NULL;
+	struct scale_outpix_byte * dst_line2 = NULL;
+
+	dst_line1 = (struct scale_outpix_byte*) MEM_callocN(
+		(dst_width + 1) * sizeof(struct scale_outpix_byte), 
+		"shrink_picture_byte 1");
+	dst_line2 = (struct scale_outpix_byte*) MEM_callocN(
+		(dst_width + 1) * sizeof(struct scale_outpix_byte),
+		"shrink_picture_byte 2");
+
+	dx_dst = 65536.0 * ratiox;
+	dy_dst = 65536.0 * ratioy;
+
+	y_dst = 0;
+	y_counter = 65536;
+	for (y_src = 0; y_src < src_height; y_src++) {
+		unsigned char* line = src + y_src * 4 * src_width;
+		unsigned long weight1y = 65536 - (y_dst & 0xffff);
+		unsigned long weight2y = 65536 - weight1y;
+		x_dst = 0;
+		for (x_src = 0; x_src < src_width; x_src++) {
+			unsigned long weight1x = 65536 - (x_dst & 0xffff);
+			unsigned long weight2x = 65536 - weight1x;
+
+			unsigned long x = x_dst >> 16;
+
+			unsigned long w;
+
+			w = (weight1y * weight1x) >> 16;
+
+			dst_line1[x].r += (line[0] * w) >> 16;
+			dst_line1[x].g += (line[1] * w) >> 16;
+			dst_line1[x].b += (line[2] * w) >> 16;
+			dst_line1[x].a += (line[3] * w) >> 16;
+			dst_line1[x].weight += w;
+
+			w = (weight2y * weight1x) >> 16;
+
+			dst_line2[x].r += (line[0] * w) >> 16;
+			dst_line2[x].g += (line[1] * w) >> 16;
+			dst_line2[x].b += (line[2] * w) >> 16;
+			dst_line2[x].a += (line[3] * w) >> 16;
+			dst_line2[x].weight += w;
+
+			w = (weight1y * weight2x) >> 16;
+
+			dst_line1[x+1].r += (line[0] * w) >> 16;
+			dst_line1[x+1].g += (line[1] * w) >> 16;
+			dst_line1[x+1].b += (line[2] * w) >> 16;
+			dst_line1[x+1].a += (line[3] * w) >> 16;
+			dst_line1[x+1].weight += w;
+
+			w = (weight2y * weight2x) >> 16;
+
+			dst_line2[x+1].r += (line[0] * w) >> 16;
+			dst_line2[x+1].g += (line[1] * w) >> 16;
+			dst_line2[x+1].b += (line[2] * w) >> 16;
+			dst_line2[x+1].a += (line[3] * w) >> 16;
+			dst_line2[x+1].weight += w;
+
+			x_dst += dx_dst;
+			line += 4;
+		}
+
+		y_dst += dy_dst;
+		y_counter -= dy_dst;
+		if (y_counter < 0) {
+			unsigned long x;
+			struct scale_outpix_byte * temp;
+
+			y_counter += 65536;
+			
+			for (x=0; x < dst_width; x++) {
+				unsigned long f = 0x80000000UL
+					/ dst_line1[x].weight;
+				*dst++ = (dst_line1[x].r * f) >> 15;
+				*dst++ = (dst_line1[x].g * f) >> 15;
+				*dst++ = (dst_line1[x].b * f) >> 15;
+				*dst++ = (dst_line1[x].a * f) >> 15;
+			}
+			memset(dst_line1, 0, dst_width *
+			       sizeof(struct scale_outpix_byte));
+			temp = dst_line1;
+			dst_line1 = dst_line2;
+			dst_line2 = temp;
+		}
+	}
+	if (dst - dst_begin < dst_width * dst_height * 4) {
+		unsigned long x;
+		for (x = 0; x < dst_width; x++) {
+			unsigned long f = 0x80000000UL / dst_line1[x].weight;
+			*dst++ = (dst_line1[x].r * f) >> 15;
+			*dst++ = (dst_line1[x].g * f) >> 15;
+			*dst++ = (dst_line1[x].b * f) >> 15;
+			*dst++ = (dst_line1[x].a * f) >> 15;
+		}
+	}
+	MEM_freeN(dst_line1);
+	MEM_freeN(dst_line2);
+}
+
+
+static void q_scale_byte(unsigned char* in, unsigned char* out, int in_width, 
+			 int in_height, int dst_width, int dst_height)
+{
+	if (dst_width > in_width && dst_height > in_height) {
+		enlarge_picture_byte(in, out, in_width, in_height,
+				     dst_width, dst_height);
+	} else if (dst_width < in_width && dst_height < in_height) {
+		shrink_picture_byte(in, out, in_width, in_height,
+				    dst_width, dst_height);
+	}
+}
+
+static void enlarge_picture_float(
+	float* src, float* dst, int src_width, 
+	int src_height, int dst_width, int dst_height)
+{
+	double ratiox = (double) (dst_width - 1.0) 
+		/ (double) (src_width - 1.001);
+	double ratioy = (double) (dst_height - 1.0) 
+		/ (double) (src_height - 1.001);
+	unsigned long x_dst;
+	unsigned long y_dst;
+	double x_src, dx_src;
+	double y_src, dy_src;
+
+	dx_src = 1.0 / ratiox;
+	dy_src = 1.0 / ratioy;
+
+	y_src = 0;
+	for (y_dst = 0; y_dst < dst_height; y_dst++) {
+		float* line1 = src + ((int) y_src) * 4 * src_width;
+		float* line2 = line1 + 4 * src_width;
+		float weight1y = 1.0 - (y_src - (int) y_src);
+		float weight2y = 1.0 - weight1y;
+
+		if ((int) y_src == src_height - 1) {
+			line2 = line1;
+		}
+		       
+		x_src = 0;
+		for (x_dst = 0; x_dst < dst_width; x_dst++) {
+			float weight1x = 1.0 - (x_src - (int) x_src);
+			float weight2x = 1.0 - weight1x;
+
+			float w11 = weight1y * weight1x;
+			float w21 = weight2y * weight1x;
+			float w12 = weight1y * weight2x;
+			float w22 = weight2y * weight2x;
+
+			unsigned long x = ((int) x_src) * 4;
+
+			*dst++ =  line1[x]     * w11	
+				+ line2[x]     * w21
+				+ line1[4 + x] * w12 
+				+ line2[4 + x] * w22;
+
+			*dst++ =  line1[x + 1] * w11 
+				+ line2[x + 1] * w21
+				+ line1[4 + x + 1] * w12
+				+ line2[4 + x + 1] * w22;
+
+			*dst++ =  line1[x + 2] * w11 
+				+ line2[x + 2] * w21
+				+ line1[4 + x + 2] * w12  
+				+ line2[4 + x + 2] * w22;
+
+			*dst++ =  line1[x + 3] * w11 
+				+ line2[x + 3] * w21
+				+ line1[4 + x + 3] * w12  
+				+ line2[4 + x + 3] * w22;
+
+			x_src += dx_src;
+		}
+		y_src += dy_src;
+	}
+}
+
+struct scale_outpix_float {
+	float r;
+	float g;
+	float b;
+	float a;
+
+	float weight;
+};
+
+static void shrink_picture_float(
+	float* src, float* dst, int src_width, 
+	int src_height, int dst_width, int dst_height)
+{
+	double ratiox = (double) (dst_width) / (double) (src_width);
+	double ratioy = (double) (dst_height) / (double) (src_height);
+	unsigned long x_src;
+	unsigned long y_src;
+        float dx_dst, x_dst;
+	float dy_dst, y_dst;
+	float y_counter;
+	float * dst_begin = dst;
+
+	struct scale_outpix_float * dst_line1;
+	struct scale_outpix_float * dst_line2;
+
+	dst_line1 = (struct scale_outpix_float*) MEM_callocN(
+		(dst_width + 1) * sizeof(struct scale_outpix_float), 
+		"shrink_picture_float 1");
+	dst_line2 = (struct scale_outpix_float*) MEM_callocN(
+		(dst_width + 1) * sizeof(struct scale_outpix_float),
+		"shrink_picture_float 2");
+
+	dx_dst = ratiox;
+	dy_dst = ratioy;
+
+	y_dst = 0;
+	y_counter = 1.0;
+	for (y_src = 0; y_src < src_height; y_src++) {
+		float* line = src + y_src * 4 * src_width;
+		unsigned long weight1y = 1.0 - (y_dst - (int) y_dst);
+		unsigned long weight2y = 1.0 - weight1y;
+		x_dst = 0;
+		for (x_src = 0; x_src < src_width; x_src++) {
+			unsigned long weight1x = 1.0 - (x_dst - (int) x_dst);
+			unsigned long weight2x = 1.0 - weight1x;
+
+			unsigned long x = (int) x_dst;
+
+			float w;
+
+			w = weight1y * weight1x;
+
+			dst_line1[x].r += line[0] * w;
+			dst_line1[x].g += line[1] * w;
+			dst_line1[x].b += line[2] * w;
+			dst_line1[x].a += line[3] * w;
+			dst_line1[x].weight += w;
+
+			w = weight2y * weight1x;
+
+			dst_line2[x].r += line[0] * w;
+			dst_line2[x].g += line[1] * w;
+			dst_line2[x].b += line[2] * w;
+			dst_line2[x].a += line[3] * w;
+			dst_line2[x].weight += w;
+
+			w = weight1y * weight2x;
+
+			dst_line1[x+1].r += line[0] * w;
+			dst_line1[x+1].g += line[1] * w;
+			dst_line1[x+1].b += line[2] * w;
+			dst_line1[x+1].a += line[3] * w;
+			dst_line1[x+1].weight += w;
+
+			w = weight2y * weight2x;
+
+			dst_line2[x+1].r += line[0] * w;
+			dst_line2[x+1].g += line[1] * w;
+			dst_line2[x+1].b += line[2] * w;
+			dst_line2[x+1].a += line[3] * w;
+			dst_line2[x+1].weight += w;
+
+			x_dst += dx_dst;
+			line += 4;
+		}
+
+		y_dst += dy_dst;
+		y_counter -= dy_dst;
+		if (y_counter < 0) {
+			unsigned long x;
+			struct scale_outpix_float * temp;
+
+			y_counter += 1.0;
+			
+			for (x=0; x < dst_width; x++) {
+				float f = 1.0 / dst_line1[x].weight;
+				*dst++ = dst_line1[x].r * f;
+				*dst++ = dst_line1[x].g * f;
+				*dst++ = dst_line1[x].b * f;
+				*dst++ = dst_line1[x].a * f;
+			}
+			memset(dst_line1, 0, dst_width *
+			       sizeof(struct scale_outpix_float));
+			temp = dst_line1;
+			dst_line1 = dst_line2;
+			dst_line2 = temp;
+		}
+	}
+	if (dst - dst_begin < dst_width * dst_height * 4) {
+		unsigned long x;
+		for (x = 0; x < dst_width; x++) {
+			float f = 1.0 / dst_line1[x].weight;
+			*dst++ = dst_line1[x].r * f;
+			*dst++ = dst_line1[x].g * f;
+			*dst++ = dst_line1[x].b * f;
+			*dst++ = dst_line1[x].a * f;
+		}
+	}
+	MEM_freeN(dst_line1);
+	MEM_freeN(dst_line2);
+}
+
+
+static void q_scale_float(float* in, float* out, int in_width, 
+			 int in_height, int dst_width, int dst_height)
+{
+	if (dst_width > in_width && dst_height > in_height) {
+		enlarge_picture_float(in, out, in_width, in_height,
+				      dst_width, dst_height);
+	} else if (dst_width < in_width && dst_height < in_height) {
+		shrink_picture_float(in, out, in_width, in_height,
+				     dst_width, dst_height);
+	}
+}
+
+/* q_scale_linear_interpolation (derived from ppmqscale, http://libdv.sf.net)
+
+   q stands for quick _and_ quality :)
+
+   only handles common cases when we either
+
+   scale  both, x and y or
+   shrink both, x and y
+
+   but that is pretty fast:
+   * does only blit once instead of two passes like the old code
+     (fewer cache misses)
+   * uses fixed point integer arithmetic for byte buffers
+   * doesn't branch in tight loops
+
+   Should be comparable in speed to the ImBuf ..._fast functions at least 
+   for byte-buffers.
+
+*/
+static int q_scale_linear_interpolation(
+	struct ImBuf *ibuf, int newx, int newy)
+{
+	if ((newx > ibuf->x && newy < ibuf->y) ||
+	    (newx < ibuf->x && newy > ibuf->y)) {
+		return FALSE;
+	}
+
+	if (ibuf->rect) {
+		unsigned char * newrect = 
+			MEM_mallocN(newx * newy * sizeof(int), "q_scale rect");
+		q_scale_byte(ibuf->rect, newrect, ibuf->x, ibuf->y,
+			     newx, newy);
+
+		imb_freerectImBuf(ibuf);
+		ibuf->mall |= IB_rect;
+		ibuf->rect = (unsigned int *) newrect;
+	}
+	if (ibuf->rect_float) {
+		float * newrect = 
+			MEM_mallocN(newx * newy * 4 *sizeof(float), 
+				    "q_scale rectfloat");
+		q_scale_float(ibuf->rect_float, newrect, ibuf->x, ibuf->y,
+			      newx, newy);
+		imb_freerectfloatImBuf(ibuf);
+		ibuf->mall |= IB_rectfloat;
+		ibuf->rect_float = newrect;
+	}
+	ibuf->x = newx;
+	ibuf->y = newy;
+
+	return TRUE;
+}

 static struct ImBuf *scaledownx(struct ImBuf *ibuf, int newx)
 {
@ -1113,11 +1575,18 @@ struct ImBuf *IMB_scaleImBuf(struct ImBuf * ibuf, short newx, short newy)
 {
 	if (ibuf==NULL) return (0);
 	if (ibuf->rect==NULL && ibuf->rect_float==NULL) return (ibuf);
+	
+	if (newx == ibuf->x && newy == ibuf->y) { return ibuf; }

-	// scaleup / scaledown functions below change ibuf->x and ibuf->y
-	// so we first scale the Z-buffer (if any)
+	/* scaleup / scaledown functions below change ibuf->x and ibuf->y
+	   so we first scale the Z-buffer (if any) */
 	scalefast_Z_ImBuf(ibuf, newx, newy);

+	/* try to scale common cases in a fast way */
+	if (q_scale_linear_interpolation(ibuf, newx, newy)) {
+		return ibuf;
+	}
+
 	if (newx < ibuf->x) if (newx) scaledownx(ibuf,newx);
 	if (newy < ibuf->y) if (newy) scaledowny(ibuf,newy);
 	if (newx > ibuf->x) if (newx) scaleupx(ibuf,newx);
--- a/source/blender/src/sequence.c
+++ b/source/blender/src/sequence.c
@ -1095,6 +1095,7 @@ static void input_preprocess(Sequence * seq, TStripElem* se, int cfra)
 	if(seq->flag & SEQ_MAKE_FLOAT) {
 		if (!se->ibuf->rect_float) {
 			IMB_float_from_rect(se->ibuf);
+			imb_freerectImBuf(se->ibuf);
 		}
 	}