Hi,
I don't know who's currently "responsible" for the whirlpinch plugin, so I
post my patch to this list.
I have modified whirlpinch slightly to use "blocking", ie. doing all
calculations in small squares (32*32). With that technique very common in
numerical computing, the CPU caches (and for GIMP) the tile cache have a much
higher hit rate.
The boost is quite spectacular: The original whirlpinch on a larger image
(1400*1400) needs on a Athlon-600 30s to complete, with my patch only 6.5s.
That's a speedup by a factor of 4.5 without any change in the algorithm
itself!
The changes are relatively small (effectively about 10 lines) and affect
mostly clipping.
I have found no side efects of the patch...
The blocking can IMHO easily used for a lot of other filters, and should
give a large speedup for most of GIMP's filters.
Please try out the patch and apply it to the source tree if you like it ;-)
--
Georg Acher, [EMAIL PROTECTED]
http://www.in.tum.de/~acher/
"Oh no, not again !" The bowl of petunias
--- whirlpinch.c.orgThu Apr 5 17:47:17 2001
+++ whirlpinch.cThu Apr 5 18:49:09 2001
@@ -22,6 +22,14 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
+/* Version 2.10:
+ *
+ * Major Speedup by use of "blocking", ie. doing the calcualations
+ * in small squares, thus gaining a performance boost from CPU caches
+ * and the tile cache.
+ *
+ * Georg Acher, [EMAIL PROTECTED]
+ */
/* Version 2.09:
*
@@ -63,7 +71,7 @@
#define PLUG_IN_NAME"plug_in_whirl_pinch"
-#define PLUG_IN_VERSION "May 1997, 2.09"
+#define PLUG_IN_VERSION "April 2001, 2.10"
/* Magic numbers */
@@ -71,6 +79,10 @@
#define SCALE_WIDTH 200
#define ENTRY_WIDTH 60
+/* blocking size, 32*32pixels is a good compromise for all CPUs */
+
+#define BLOCKING 32
+
/* Types */
typedef struct
@@ -366,12 +378,13 @@
guchar *top_row, *bot_row;
guchar *top_p, *bot_p;
gint row, col;
+ gint row1,col1;
guchar pixel[4][4];
guchar values[4];
double whirl;
double cx, cy;
int ix, iy;
- int i;
+ int i,n;
guchar bg_color[4];
pixel_fetcher_t *pft, *pfb;
@@ -406,112 +419,133 @@
whirl = wpvals.whirl * G_PI / 180;
radius2 = radius * radius * wpvals.radius;
- for (row = sel_y1; row = ((sel_y1 + sel_y2) / 2); row++)
+ /* WhirlPinch in small squares to benefit from cache effects
+ (tile cache, CPU cache)
+ 20010405 GA
+ */
+ for (row1 = sel_y1; row1 = ((sel_y1 + sel_y2) / 2); row1+=BLOCKING)
{
- top_p = top_row;
- bot_p = bot_row + img_bpp * (sel_width - 1);
-
- for (col = sel_x1; col sel_x2; col++)
- {
- if (calc_undistorted_coords (col, row, whirl, wpvals.pinch, cx, cy))
- {
- /* We are inside the distortion area */
-
- /* Top */
-
- if (cx = 0.0)
- ix = (int) cx;
- else
- ix = -((int) -cx + 1);
-
- if (cy = 0.0)
- iy = (int) cy;
- else
- iy = -((int) -cy + 1);
-
- pixel_fetcher_get_pixel (pft, ix, iy, pixel[0]);
- pixel_fetcher_get_pixel (pft, ix + 1, iy, pixel[1]);
- pixel_fetcher_get_pixel (pft, ix, iy + 1, pixel[2]);
- pixel_fetcher_get_pixel (pft, ix + 1, iy + 1, pixel[3]);
-
- for (i = 0; i img_bpp; i++)
- {
- values[0] = pixel[0][i];
- values[1] = pixel[1][i];
- values[2] = pixel[2][i];
- values[3] = pixel[3][i];
-
- *top_p++ = bilinear (cx, cy, values);
- }
-
- /* Bottom */
-
- cx = cen_x + (cen_x - cx);
- cy = cen_y + (cen_y - cy);
-
- if (cx = 0.0)
- ix = (int) cx;
- else
- ix = -((int) -cx + 1);
-
- if (cy = 0.0)
- iy = (int) cy;
- else
- iy = -((int) -cy + 1);
-
- pixel_fetcher_get_pixel (pfb, ix, iy, pixel[0]);
- pixel_fetcher_get_pixel (pfb, ix + 1, iy, pixel[1]);
- pixel_fetcher_get_pixel (pfb, ix, iy + 1, pixel[2]);
- pixel_fetcher_get_pixel (pfb, ix + 1, iy + 1, pixel[3]);
-
- for (i = 0; i img_bpp; i++)
- {
- values[0] = pixel[0][i];
- values[1] = pixel[1][i];
- values[2] = pixel[2][i];
- values[3] = pixel[3][i];
-
- *bot_p++ = bilinear (cx, cy, values);
- }
-
- bot_p -= 2 * img_bpp; /* We move backwards! */
- }
- else
- {
-