I'm commiting the earlier discussed patches.
the patches i'm commiting change the performance from 1.9, to 1.7. 10% speed
boost. ;D
attatched, you will find even more patches. per patch policy, i post them for
review. i'll apply friday if no-one objects.
patch 60 speeds things up slightly, as does patch 70. the important thing about
70 is that it allows me to pull my "sse2 neat trick" in the next round
of patches!
for your perusal...
Julia Longtin <[EMAIL PROTECTED]>
--- ../../dev2/gift/ChangeLog 2006-08-13 17:18:40.000000000 +0000
+++ ChangeLog 2006-08-13 17:21:22.000000000 +0000
@@ -1,3 +1,9 @@
+2006-08-13 <[EMAIL PROTECTED]>
+
+ * FeatureExtraction/gabor.c
+ dont use calloc for conv, instead declare, and clear manually.
+ change to C99 datatypes in gabor_filter().
+
2006-08-02 <[EMAIL PROTECTED]>
* FeatureExtraction/extract_block_features.c
define the kernals here, pass to gabor.c for init, use
--- ../../dev2/gift/FeatureExtraction/gabor.c 2006-08-13 16:47:51.000000000
+0000
+++ FeatureExtraction/gabor.c 2006-08-13 17:07:00.000000000 +0000
@@ -79,15 +79,18 @@
}
}
-void gabor_filter(double *image, int width, int height, int filter_scale, int
orientation, double ** kernelsxy, double *output) {
+void gabor_filter(double *image, int width, int height, int filter_scale, int
orientation, double **kernelsxy, double *output) {
- double *conv;
- int x, y, t_x, t_y;
- int i;
+ uint32_t x, y;
+ int32_t t_x, t_y;
+ uint32_t i;
double * target_kernal;
+ double conv[65536]; /* take advantage of our fixed image size. 65536 ==
width*height */
-
- conv = (double *)calloc(width*height, sizeof(double));
+ for (i = 0; i < width*height; i++)
+ {
+ conv[i]= 0; /* needs to be zeroed */
+ }
target_kernal=kernelsxy[filter_scale*num_gabors_per_scale+orientation];
@@ -144,5 +147,4 @@
}
}
- free(conv);
}
--- ../../dev2/gift/ChangeLog 2006-08-14 01:54:51.000000000 +0000
+++ ChangeLog 2006-08-14 02:03:22.000000000 +0000
@@ -1,3 +1,8 @@
+2006-08-14 <[EMAIL PROTECTED]>
+
+ * FeatureExtraction/gabor.c
+ remove int32_t t_x, replace with uint32_t k. uint32_t is slightly
faster, and simplifies the inner loops.
+
2006-08-13 <[EMAIL PROTECTED]>
* FeatureExtraction/gabor.c
--- FeatureExtraction/gabor.c 2006-08-14 01:56:25.000000000 +0000
+++ ../../dev2/gift/FeatureExtraction/gabor.c 2006-08-14 01:54:58.000000000
+0000
@@ -82,36 +82,33 @@
void gabor_filter(double *image, int width, int height, int filter_scale, int
orientation, double **kernelsxy, double *output) {
uint32_t x, y;
- int32_t t_y;
+ int32_t t_x, t_y;
uint32_t i;
- uint32_t k;
double * target_kernal;
double conv[65536]; /* take advantage of our fixed image size. 65536 ==
width*height */
- double * target_conv;
- double * target_image;
for (i = 0; i < width*height; i++)
{
conv[i]= 0; /* needs to be zeroed */
- output[i]= 0; /* needs to be zeroed */
}
- /* first convolution */
target_kernal=kernelsxy[filter_scale*num_gabors_per_scale+orientation];
+
+ /* first convolution */
for (x = 0; x < width; x++) {
for (y = 0; y < height; y++) {
-
target_image=&image[(width*height)-(y*width+x+kernal_size[filter_scale]/2)];
- for (k=0; k < kernal_size[filter_scale]; k++) {
- if ((x+kernal_size[filter_scale]/2 >= k) &&
(x+kernal_size[filter_scale]/2 < width+k)) {
+ output[y*width + x] = 0; /* might as well be here */
+ for (t_x = -kernal_size[filter_scale]/2; t_x <=
kernal_size[filter_scale]/2; t_x++) {
+ if (((x - t_x) >= 0) && ((x - t_x) < width)) {
conv[y*width + x] +=
- target_kernal[k]*target_image[k];
+ target_kernal[t_x +
kernal_size[filter_scale]/2]*image[65536-(y*width+ (x - t_x))];
}
}
}
}
- /* second convolution */
target_kernal=&target_kernal[kernal_size[filter_scale]];
+ /* second convolution */
for (x = 0; x < width; x++) {
for (y = 0; y < height; y++) {
for (t_y = -kernal_size[filter_scale]/2; t_y <=
kernal_size[filter_scale]/2; t_y++) {
@@ -125,22 +122,21 @@
for (i = 0; i < width*height; i++)
conv[i] = 0;
- /* third convolution */
target_kernal=&target_kernal[kernal_size[filter_scale]];
+ /* third convolution */
for (x = 0; x < width; x++) {
for (y = 0; y < height; y++) {
-
target_image=&image[(width*height)-(y*width+x+kernal_size[filter_scale]/2)];
- for (k=0; k < kernal_size[filter_scale]; k++) {
- if ((x+kernal_size[filter_scale]/2 >= k) &&
(x+kernal_size[filter_scale]/2 < width+k)) {
+ for (t_x = -kernal_size[filter_scale]/2; t_x <=
kernal_size[filter_scale]/2; t_x++) {
+ if (((x - t_x) >= 0) && ((x - t_x) < width)) {
conv[y*width + x] +=
- target_kernal[k]*target_image[k];
+ target_kernal[t_x +
kernal_size[filter_scale]/2]*image[65536-(y*width + (x - t_x))];
}
}
}
}
- /* fourth convolution */
target_kernal=&target_kernal[kernal_size[filter_scale]];
+ /* fourth convolution */
for (x = 0; x < width; x++) {
for (y = 0; y < height; y++) {
for (t_y = -kernal_size[filter_scale]/2; t_y <=
kernal_size[filter_scale]/2; t_y++) {
_______________________________________________
help-GIFT mailing list
[email protected]
http://lists.gnu.org/mailman/listinfo/help-gift