blender: speedup for fast gauss blue (approx 10% - 15%)

Campbell Barton Sat, 16 Jun 2012 02:53:02 -0700

Revision: 47986
          
http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=47986
Author:   campbellbarton
Date:     2012-06-16 09:52:38 +0000 (Sat, 16 Jun 2012)
Log Message:
-----------
speedup for fast gauss blue (approx 10% - 15%)
- get the image width and height once rather then calculating on every access 
(was doing min/max subtract).
- use unsigned int's - faster for looping.


Modified Paths:
--------------
    
trunk/blender/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp
    
trunk/blender/source/blender/compositor/operations/COM_FastGaussianBlurOperation.h
    trunk/blender/source/blender/nodes/composite/node_composite_util.c
    trunk/blender/source/blender/nodes/composite/nodes/node_composite_defocus.c

Modified: 
trunk/blender/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp
===================================================================
--- 
trunk/blender/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp
        2012-06-16 09:18:00 UTC (rev 47985)
+++ 
trunk/blender/source/blender/compositor/operations/COM_FastGaussianBlurOperation.cpp
        2012-06-16 09:52:38 UTC (rev 47986)
@@ -20,6 +20,8 @@
  *             Monique Dewanchand
  */
 
+#include <limits.h>
+
 #include "COM_FastGaussianBlurOperation.h"
 #include "MEM_guardedalloc.h"
 #include "BLI_utildefines.h"
@@ -77,6 +79,8 @@
        BlurBaseOperation::deinitMutex();       
 }
 
+#include "PIL_time.h"
+
 void *FastGaussianBlurOperation::initializeTileData(rcti *rect, MemoryBuffer 
**memoryBuffers)
 {
        lockMutex();
@@ -84,7 +88,9 @@
                MemoryBuffer *newBuf = (MemoryBuffer 
*)this->inputProgram->initializeTileData(rect, memoryBuffers);
                MemoryBuffer *copy = newBuf->duplicate();
                updateSize(memoryBuffers);
-               
+
+               TIMEIT_START(fast);
+
                int c;
                sx = data->sizex * this->size / 2.0f;
                sy = data->sizey * this->size / 2.0f;
@@ -104,16 +110,21 @@
                        }
                }
                this->iirgaus = copy;
+
+               TIMEIT_END(fast);
        }
        unlockMutex();
        return iirgaus;
 }
 
-void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, int 
chan, int xy)
+void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, 
unsigned int chan, unsigned int xy)
 {
        double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
        double *X, *Y, *W;
-       int i, x, y, sz;
+       const unsigned int src_width = src->getWidth();
+       const unsigned int src_height = src->getHeight();
+       unsigned int x, y, sz;
+       unsigned int i;
        float *buffer = src->getBuffer();
        
        // <0.5 not valid, though can have a possibly useful sort of sharpening 
effect
@@ -123,8 +134,8 @@
        
        // XXX The YVV macro defined below explicitly expects sources of at 
least 3x3 pixels,
        //     so just skiping blur along faulty direction if src's def is 
below that limit!
-       if (src->getWidth() < 3) xy &= ~(int) 1;
-       if (src->getHeight() < 3) xy &= ~(int) 2;
+       if (src_width < 3) xy &= ~(int) 1;
+       if (src_height < 3) xy &= ~(int) 2;
        if (xy < 1) return;
        
        // see "Recursive Gabor Filtering" by Young/VanVliet
@@ -178,33 +189,34 @@
        Y[L - 1] = cf[0] * W[L - 1] + cf[1] * tsv[0] + cf[2] * tsv[1] + cf[3] * 
tsv[2];     \
        Y[L - 2] = cf[0] * W[L - 2] + cf[1] * Y[L - 1] + cf[2] * tsv[0] + cf[3] 
* tsv[1];   \
        Y[L - 3] = cf[0] * W[L - 3] + cf[1] * Y[L - 2] + cf[2] * Y[L - 1] + 
cf[3] * tsv[0]; \
-       for (i = L - 4; i >= 0; i--) {                                          
            \
+       /* 'i != UINT_MAX' is really 'i >= 0', but necessary for unsigned int 
wrapping */   \
+       for (i = L - 4; i != UINT_MAX; i--) {                                   
            \
                Y[i] = cf[0] * W[i] + cf[1] * Y[i + 1] + cf[2] * Y[i + 2] + 
cf[3] * Y[i + 3];   \
        }                                                                       
            \
 } (void)0
        
        // intermediate buffers
-       sz = MAX2(src->getWidth(), src->getHeight());
+       sz = MAX2(src_width, src_height);
        X = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss X buf");
        Y = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss Y buf");
        W = (double *)MEM_callocN(sz * sizeof(double), "IIR_gauss W buf");
        if (xy & 1) {   // H
-               for (y = 0; y < src->getHeight(); ++y) {
-                       const int yx = y * src->getWidth();
-                       for (x = 0; x < src->getWidth(); ++x)
+               for (y = 0; y < src_height; ++y) {
+                       const int yx = y * src_width;
+                       for (x = 0; x < src_width; ++x)
                                X[x] = buffer[(x + yx) * COM_NUMBER_OF_CHANNELS 
+ chan];
-                       YVV(src->getWidth());
-                       for (x = 0; x < src->getWidth(); ++x)
+                       YVV(src_width);
+                       for (x = 0; x < src_width; ++x)
                                buffer[(x + yx) * COM_NUMBER_OF_CHANNELS + 
chan] = Y[x];
                }
        }
        if (xy & 2) {   // V
-               for (x = 0; x < src->getWidth(); ++x) {
-                       for (y = 0; y < src->getHeight(); ++y)
-                               X[y] = buffer[(x + y * src->getWidth()) * 
COM_NUMBER_OF_CHANNELS + chan];
-                       YVV(src->getHeight());
-                       for (y = 0; y < src->getHeight(); ++y)
-                               buffer[(x + y * src->getWidth()) * 
COM_NUMBER_OF_CHANNELS + chan] = Y[y];
+               for (x = 0; x < src_width; ++x) {
+                       for (y = 0; y < src_height; ++y)
+                               X[y] = buffer[(x + y * src_width) * 
COM_NUMBER_OF_CHANNELS + chan];
+                       YVV(src_height);
+                       for (y = 0; y < src_height; ++y)
+                               buffer[(x + y * src_width) * 
COM_NUMBER_OF_CHANNELS + chan] = Y[y];
                }
        }
        

Modified: 
trunk/blender/source/blender/compositor/operations/COM_FastGaussianBlurOperation.h
===================================================================
--- 
trunk/blender/source/blender/compositor/operations/COM_FastGaussianBlurOperation.h
  2012-06-16 09:18:00 UTC (rev 47985)
+++ 
trunk/blender/source/blender/compositor/operations/COM_FastGaussianBlurOperation.h
  2012-06-16 09:52:38 UTC (rev 47986)
@@ -36,7 +36,7 @@
        bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation 
*readOperation, rcti *output);
        void executePixel(float *color, int x, int y, MemoryBuffer * 
inputBuffers[], void *data);
        
-       static void IIR_gauss(MemoryBuffer *src, float sigma, int channel, int 
xy);
+       static void IIR_gauss(MemoryBuffer *src, float sigma, unsigned int 
channel, unsigned int xy);
        void *initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers);
        void deinitExecution();
        void initExecution();

Modified: trunk/blender/source/blender/nodes/composite/node_composite_util.c
===================================================================
--- trunk/blender/source/blender/nodes/composite/node_composite_util.c  
2012-06-16 09:18:00 UTC (rev 47985)
+++ trunk/blender/source/blender/nodes/composite/node_composite_util.c  
2012-06-16 09:52:38 UTC (rev 47986)
@@ -32,6 +32,8 @@
 
 #include "node_composite_util.h"
 
+#include <limits.h>
+
 CompBuf *alloc_compbuf(int sizex, int sizey, int type, int alloc)
 {
        CompBuf *cbuf= MEM_callocN(sizeof(CompBuf), "compbuf");
@@ -1300,33 +1302,35 @@
 {
        double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
        double *X, *Y, *W;
-       int i, x, y, sz;
+       const unsigned int src_width = src->x;
+       const unsigned int src_height = src->y;
+       unsigned int i, x, y, sz;
 
        // <0.5 not valid, though can have a possibly useful sort of sharpening 
effect
        if (sigma < 0.5f) return;
-       
+
        if ((xy < 1) || (xy > 3)) xy = 3;
-       
+
        // XXX The YVV macro defined below explicitly expects sources of at 
least 3x3 pixels,
        //     so just skiping blur along faulty direction if src's def is 
below that limit!
-       if (src->x < 3) xy &= ~(int) 1;
-       if (src->y < 3) xy &= ~(int) 2;
+       if (src_width < 3) xy &= ~(int) 1;
+       if (src_height < 3) xy &= ~(int) 2;
        if (xy < 1) return;
 
        // see "Recursive Gabor Filtering" by Young/VanVliet
        // all factors here in double.prec. Required, because for single.prec 
it seems to blow up if sigma > ~200
        if (sigma >= 3.556f)
-               q = 0.9804f*(sigma - 3.556f) + 2.5091f;
-       else // sigma >= 0.5
-               q = (0.0561f*sigma + 0.5784f)*sigma - 0.2568f;
-       q2 = q*q;
-       sc = (1.1668 + q)*(3.203729649  + (2.21566 + q)*q);
+               q = 0.9804f * (sigma - 3.556f) + 2.5091f;
+       else     // sigma >= 0.5
+               q = (0.0561f * sigma + 0.5784f) * sigma - 0.2568f;
+       q2 = q * q;
+       sc = (1.1668 + q) * (3.203729649  + (2.21566 + q) * q);
        // no gabor filtering here, so no complex multiplies, just the regular 
coefs.
        // all negated here, so as not to have to recalc Triggs/Sdika matrix
-       cf[1] = q*(5.788961737 + (6.76492 + 3.0*q)*q)/ sc;
-       cf[2] = -q2*(3.38246 + 3.0*q)/sc;
+       cf[1] = q * (5.788961737 + (6.76492 + 3.0 * q) * q) / sc;
+       cf[2] = -q2 * (3.38246 + 3.0 * q) / sc;
        // 0 & 3 unchanged
-       cf[3] = q2*q/sc;
+       cf[3] = q2 * q / sc;
        cf[0] = 1.0 - cf[1] - cf[2] - cf[3];
 
        // Triggs/Sdika border corrections,
@@ -1336,59 +1340,62 @@
        // but neither seem to be quite the same, result seems to be ok so far 
anyway.
        // Extra scale factor here to not have to do it in filter,
        // though maybe this had something to with the precision errors
-       sc = cf[0]/((1.0 + cf[1] - cf[2] + cf[3])*(1.0 - cf[1] - cf[2] - 
cf[3])*(1.0 + cf[2] + (cf[1] - cf[3])*cf[3]));
-       tsM[0] = sc*(-cf[3]*cf[1] + 1.0 - cf[3]*cf[3] - cf[2]);
-       tsM[1] = sc*((cf[3] + cf[1])*(cf[2] + cf[3]*cf[1]));
-       tsM[2] = sc*(cf[3]*(cf[1] + cf[3]*cf[2]));
-       tsM[3] = sc*(cf[1] + cf[3]*cf[2]);
-       tsM[4] = sc*(-(cf[2] - 1.0)*(cf[2] + cf[3]*cf[1]));
-       tsM[5] = sc*(-(cf[3]*cf[1] + cf[3]*cf[3] + cf[2] - 1.0)*cf[3]);
-       tsM[6] = sc*(cf[3]*cf[1] + cf[2] + cf[1]*cf[1] - cf[2]*cf[2]);
-       tsM[7] = sc*(cf[1]*cf[2] + cf[3]*cf[2]*cf[2] - cf[1]*cf[3]*cf[3] - 
cf[3]*cf[3]*cf[3] - cf[3]*cf[2] + cf[3]);
-       tsM[8] = sc*(cf[3]*(cf[1] + cf[3]*cf[2]));
+       sc = cf[0] / ((1.0 + cf[1] - cf[2] + cf[3]) * (1.0 - cf[1] - cf[2] - 
cf[3]) * (1.0 + cf[2] + (cf[1] - cf[3]) * cf[3]));
+       tsM[0] = sc * (-cf[3] * cf[1] + 1.0 - cf[3] * cf[3] - cf[2]);
+       tsM[1] = sc * ((cf[3] + cf[1]) * (cf[2] + cf[3] * cf[1]));
+       tsM[2] = sc * (cf[3] * (cf[1] + cf[3] * cf[2]));
+       tsM[3] = sc * (cf[1] + cf[3] * cf[2]);
+       tsM[4] = sc * (-(cf[2] - 1.0) * (cf[2] + cf[3] * cf[1]));
+       tsM[5] = sc * (-(cf[3] * cf[1] + cf[3] * cf[3] + cf[2] - 1.0) * cf[3]);
+       tsM[6] = sc * (cf[3] * cf[1] + cf[2] + cf[1] * cf[1] - cf[2] * cf[2]);
+       tsM[7] = sc * (cf[1] * cf[2] + cf[3] * cf[2] * cf[2] - cf[1] * cf[3] * 
cf[3] - cf[3] * cf[3] * cf[3] - cf[3] * cf[2] + cf[3]);
+       tsM[8] = sc * (cf[3] * (cf[1] + cf[3] * cf[2]));
 
-#define YVV(L)                                                                \
-{                                                                             \
-       W[0] = cf[0]*X[0] + cf[1]*X[0] + cf[2]*X[0] + cf[3]*X[0];               
  \
-       W[1] = cf[0]*X[1] + cf[1]*W[0] + cf[2]*X[0] + cf[3]*X[0];               
  \
-       W[2] = cf[0]*X[2] + cf[1]*W[1] + cf[2]*W[0] + cf[3]*X[0];               
  \
-       for (i=3; i<L; i++)                                                     
  \
-               W[i] = cf[0]*X[i] + cf[1]*W[i-1] + cf[2]*W[i-2] + cf[3]*W[i-3]; 
      \
-       tsu[0] = W[L-1] - X[L-1];                                               
  \
-       tsu[1] = W[L-2] - X[L-1];                                               
  \
-       tsu[2] = W[L-3] - X[L-1];                                               
  \
-       tsv[0] = tsM[0]*tsu[0] + tsM[1]*tsu[1] + tsM[2]*tsu[2] + X[L-1];        
  \
-       tsv[1] = tsM[3]*tsu[0] + tsM[4]*tsu[1] + tsM[5]*tsu[2] + X[L-1];        
  \
-       tsv[2] = tsM[6]*tsu[0] + tsM[7]*tsu[1] + tsM[8]*tsu[2] + X[L-1];        
  \
-       Y[L-1] = cf[0]*W[L-1] + cf[1]*tsv[0] + cf[2]*tsv[1] + cf[3]*tsv[2];     
  \

@@ Diff output truncated at 10240 characters. @@
_______________________________________________
Bf-blender-cvs mailing list
[email protected]
http://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [47986] trunk/blender/source/blender: speedup for fast gauss blue (approx 10% - 15%)

Reply via email to