Georg Acher
Thu, 05 Apr 2001 09:46:29 -0700
Hi,
I don't know who's currently "responsible" for the whirl&pinch plugin, so I
post my patch to this list.
I have modified whirl&pinch slightly to use "blocking", ie. doing all
calculations in small squares (32*32). With that technique very common in
numerical computing, the CPU caches (and for GIMP) the tile cache have a much
higher hit rate.
The boost is quite spectacular: The original whirl&pinch on a larger image
(1400*1400) needs on a Athlon-600 30s to complete, with my patch only 6.5s.
That's a speedup by a factor of 4.5 without any change in the algorithm
itself!
The changes are relatively small (effectively about 10 lines) and affect
mostly clipping.
I have found no side efects of the patch...
The blocking can IMHO easily used for a lot of other filters, and should
give a large speedup for most of GIMP's filters.
Please try out the patch and apply it to the source tree if you like it ;-)
--
Georg Acher, [EMAIL PROTECTED]
http://www.in.tum.de/~acher/
"Oh no, not again !" The bowl of petunias
--- whirlpinch.c.org Thu Apr 5 17:47:17 2001
+++ whirlpinch.c Thu Apr 5 18:49:09 2001
@@ -22,6 +22,14 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
+/* Version 2.10:
+ *
+ * Major Speedup by use of "blocking", ie. doing the calcualations
+ * in small squares, thus gaining a performance boost from CPU caches
+ * and the tile cache.
+ *
+ * Georg Acher, [EMAIL PROTECTED]
+ */
/* Version 2.09:
*
@@ -63,7 +71,7 @@
#define PLUG_IN_NAME "plug_in_whirl_pinch"
-#define PLUG_IN_VERSION "May 1997, 2.09"
+#define PLUG_IN_VERSION "April 2001, 2.10"
/***** Magic numbers *****/
@@ -71,6 +79,10 @@
#define SCALE_WIDTH 200
#define ENTRY_WIDTH 60
+/* blocking size, 32*32pixels is a good compromise for all CPUs */
+
+#define BLOCKING 32
+
/***** Types *****/
typedef struct
@@ -366,12 +378,13 @@
guchar *top_row, *bot_row;
guchar *top_p, *bot_p;
gint row, col;
+ gint row1,col1;
guchar pixel[4][4];
guchar values[4];
double whirl;
double cx, cy;
int ix, iy;
- int i;
+ int i,n;
guchar bg_color[4];
pixel_fetcher_t *pft, *pfb;
@@ -406,112 +419,133 @@
whirl = wpvals.whirl * G_PI / 180;
radius2 = radius * radius * wpvals.radius;
- for (row = sel_y1; row <= ((sel_y1 + sel_y2) / 2); row++)
+ /* Whirl&Pinch in small squares to benefit from cache effects
+ (tile cache, CPU cache)
+ 20010405 GA
+ */
+ for (row1 = sel_y1; row1 <= ((sel_y1 + sel_y2) / 2); row1+=BLOCKING)
{
- top_p = top_row;
- bot_p = bot_row + img_bpp * (sel_width - 1);
-
- for (col = sel_x1; col < sel_x2; col++)
- {
- if (calc_undistorted_coords (col, row, whirl, wpvals.pinch, &cx, &cy))
- {
- /* We are inside the distortion area */
-
- /* Top */
-
- if (cx >= 0.0)
- ix = (int) cx;
- else
- ix = -((int) -cx + 1);
-
- if (cy >= 0.0)
- iy = (int) cy;
- else
- iy = -((int) -cy + 1);
-
- pixel_fetcher_get_pixel (pft, ix, iy, pixel[0]);
- pixel_fetcher_get_pixel (pft, ix + 1, iy, pixel[1]);
- pixel_fetcher_get_pixel (pft, ix, iy + 1, pixel[2]);
- pixel_fetcher_get_pixel (pft, ix + 1, iy + 1, pixel[3]);
-
- for (i = 0; i < img_bpp; i++)
- {
- values[0] = pixel[0][i];
- values[1] = pixel[1][i];
- values[2] = pixel[2][i];
- values[3] = pixel[3][i];
-
- *top_p++ = bilinear (cx, cy, values);
- }
-
- /* Bottom */
-
- cx = cen_x + (cen_x - cx);
- cy = cen_y + (cen_y - cy);
-
- if (cx >= 0.0)
- ix = (int) cx;
- else
- ix = -((int) -cx + 1);
-
- if (cy >= 0.0)
- iy = (int) cy;
- else
- iy = -((int) -cy + 1);
-
- pixel_fetcher_get_pixel (pfb, ix, iy, pixel[0]);
- pixel_fetcher_get_pixel (pfb, ix + 1, iy, pixel[1]);
- pixel_fetcher_get_pixel (pfb, ix, iy + 1, pixel[2]);
- pixel_fetcher_get_pixel (pfb, ix + 1, iy + 1, pixel[3]);
-
- for (i = 0; i < img_bpp; i++)
- {
- values[0] = pixel[0][i];
- values[1] = pixel[1][i];
- values[2] = pixel[2][i];
- values[3] = pixel[3][i];
-
- *bot_p++ = bilinear (cx, cy, values);
- }
-
- bot_p -= 2 * img_bpp; /* We move backwards! */
- }
- else
- {
- /* We are outside the distortion area;
- * just copy the source pixels
- */
-
- /* Top */
-
- pixel_fetcher_get_pixel (pft, col, row, pixel[0]);
-
- for (i = 0; i < img_bpp; i++)
- *top_p++ = pixel[0][i];
-
- /* Bottom */
-
- pixel_fetcher_get_pixel (pfb,
- (sel_x2 - 1) - (col - sel_x1),
- (sel_y2 - 1) - (row - sel_y1),
- pixel[0]);
-
- for (i = 0; i < img_bpp; i++)
- *bot_p++ = pixel[0][i];
-
- bot_p -= 2 * img_bpp; /* We move backwards! */
- }
- }
-
- /* Paint rows to image */
-
- gimp_pixel_rgn_set_row (&dest_rgn, top_row, sel_x1, row, sel_width);
- gimp_pixel_rgn_set_row (&dest_rgn, bot_row,
- sel_x1, (sel_y2 - 1) - (row - sel_y1), sel_width);
-
+ for(col1 = sel_x1; col1 < sel_x2; col1+=BLOCKING)
+ {
+ /* Now whirl the block starting at (col1,row1)
+with size BLOCKING*BLOCKING */
+
+ for (row = row1; (row <= ((sel_y1 + sel_y2) /
+2))&&(row<(row1+BLOCKING)); row++)
+ {
+ top_p = top_row;
+ bot_p = bot_row + img_bpp * (BLOCKING - 1);
+
+ for (col = col1; (col <
+sel_x2)&&(col<(col1+BLOCKING)); col++)
+ {
+ if (calc_undistorted_coords
+(col, row, whirl, wpvals.pinch, &cx, &cy))
+ {
+ /* We are inside the
+distortion area */
+
+ /* Top */
+
+ if (cx >= 0.0)
+
+ ix = (int) cx;
+ else
+
+ ix = -((int) -cx + 1);
+
+ if (cy >= 0.0)
+
+ iy = (int) cy;
+ else
+
+ iy = -((int) -cy + 1);
+
+ pixel_fetcher_get_pixel
+(pft, ix, iy, pixel[0]);
+ pixel_fetcher_get_pixel
+(pft, ix + 1, iy, pixel[1]);
+ pixel_fetcher_get_pixel
+(pft, ix, iy + 1, pixel[2]);
+ pixel_fetcher_get_pixel
+(pft, ix + 1, iy + 1, pixel[3]);
+
+ for (i = 0; i < img_bpp; i++)
+
+ {
+
+ values[0] = pixel[0][i];
+
+ values[1] = pixel[1][i];
+
+ values[2] = pixel[2][i];
+
+ values[3] = pixel[3][i];
+
+
+ *top_p++ = bilinear (cx, cy, values);
+
+ }
+
+ /* Bottom */
+
+ cx = cen_x + (cen_x -
+cx);
+ cy = cen_y + (cen_y - cy);
+
+ if (cx >= 0.0)
+
+ ix = (int) cx;
+ else
+
+ ix = -((int) -cx + 1);
+
+ if (cy >= 0.0)
+
+ iy = (int) cy;
+ else
+
+ iy = -((int) -cy + 1);
+
+
+pixel_fetcher_get_pixel (pfb, ix, iy, pixel[0]);
+ pixel_fetcher_get_pixel (pfb,
+ix + 1, iy, pixel[1]);
+ pixel_fetcher_get_pixel (pfb,
+ix, iy + 1, pixel[2]);
+ pixel_fetcher_get_pixel (pfb,
+ix + 1, iy + 1, pixel[3]);
+
+ for (i = 0; i < img_bpp; i++)
+
+ {
+
+ values[0] = pixel[0][i];
+
+ values[1] = pixel[1][i];
+
+ values[2] = pixel[2][i];
+
+ values[3] = pixel[3][i];
+
+
+ *bot_p++ = bilinear (cx, cy, values);
+
+ }
+
+ bot_p -= 2 * img_bpp; /* We
+move backwards! */
+ }
+ else
+ {
+ /* We are outside the
+distortion area;
+ * just copy the source pixels
+ */
+
+ /* Top */
+
+ pixel_fetcher_get_pixel (pft,
+col, row, pixel[0]);
+
+ for (i = 0; i < img_bpp; i++)
+
+ *top_p++ = pixel[0][i];
+
+ /* Bottom */
+
+
+pixel_fetcher_get_pixel (pfb,
+ (sel_x2 - 1) -
+(col - sel_x1),
+ (sel_y2 - 1) -
+(row - sel_y1),
+ pixel[0]);
+
+ for (i = 0; i <
+img_bpp; i++)
+
+ *bot_p++ = pixel[0][i];
+
+ bot_p -= 2 * img_bpp;
+/* We move backwards! */
+ }/* else */
+ } /* for col=col1 */
+
+ /* Paint rows to image */
+ /* Due to blocking, some
+clipping is needed */
+ if (col1<(sel_x2-BLOCKING))
+
+ gimp_pixel_rgn_set_row(&dest_rgn, top_row, col1, row, BLOCKING);
+ else
+
+ gimp_pixel_rgn_set_row(&dest_rgn, top_row, col1, row, sel_x2-col1);
+
+
+n=sel_x2-(col1-sel_x1)-BLOCKING;
+
+ if (n>=(sel_x1))
+
+ gimp_pixel_rgn_set_row(&dest_rgn, bot_row, n, (sel_y2 - 1) - (row - sel_y1),
+BLOCKING);
+ else
+
+ gimp_pixel_rgn_set_row(&dest_rgn, bot_row+ (sel_x1-n)* img_bpp, sel_x1,
+
+
+ (sel_y2 - 1) - (row - sel_y1),BLOCKING-(sel_x1-n));
+ } /* for row=row1 */
+
+ } /* for col1= */
/* Update progress */
- progress += sel_width * 2;
+ progress += sel_width * 2 *BLOCKING;
gimp_progress_update ((double) progress / max_progress);
}