jpeg pushed a commit to branch master.

http://git.enlightenment.org/core/efl.git/commit/?id=4443ecfa8be65aef0dedcb0d749c9081769cc140

commit 4443ecfa8be65aef0dedcb0d749c9081769cc140
Author: Jean-Philippe Andre <jp.an...@samsung.com>
Date:   Wed Mar 12 10:20:27 2014 +0900

    Evas filters: Optimize alpha box blur
    
    Use two optimizable functions for BOX blur: vertical and horizontal.
    These functions will run as many times as requested (from 1 to 6 max).
    
    The horizontal case is pretty straightforward as the source is already
    contiguous (nice in terms of cache hits). The only catch is to swap
    src and dst without ever writing to the input buffer.
    
    In case of vertical blur, we apply the same method as above, after
    rotating the column into a horizontal (contiguous) span, and rotating
    it back afterwards.
    
    Now, the same needs to be done for RGBA :)
---
 src/lib/evas/filters/blur/blur_box_alpha_.c | 266 +++++++++++++++++++++++-----
 src/lib/evas/filters/evas_filter.c          |   2 +-
 src/lib/evas/filters/evas_filter_blur.c     |  59 ++++--
 3 files changed, 270 insertions(+), 57 deletions(-)

diff --git a/src/lib/evas/filters/blur/blur_box_alpha_.c 
b/src/lib/evas/filters/blur/blur_box_alpha_.c
index 2202152..4a9facd 100644
--- a/src/lib/evas/filters/blur/blur_box_alpha_.c
+++ b/src/lib/evas/filters/blur/blur_box_alpha_.c
@@ -1,68 +1,246 @@
 /* @file blur_box_alpha_.c
- * Should define the functions:
- * - _box_blur_horiz_alpha_step
- * - _box_blur_vert_alpha_step
+ * Defines the following function:
+ * _box_blur_alpha_step
  */
 
 #include "evas_common_private.h"
 #include "../evas_filter_private.h"
 
-#if !defined (FUNCTION_NAME) || !defined (STEP)
-# error Must define FUNCTION_NAME and STEP
-#endif
-
 static inline void
-FUNCTION_NAME(const DATA8* restrict src, DATA8* restrict dst,
-              const int radius, const int len,
-              const int loops, const int loopstep)
+_box_blur_alpha_horiz_step(const DATA8* restrict const srcdata,
+                           DATA8* restrict const dstdata,
+                           const int* restrict const radii,
+                           const int len,
+                           const int loops)
 {
-   DEFINE_DIVIDER(2 * radius + 1);
-   const int left = MIN(radius, len);
-   const int right = MIN(radius, (len - radius));
-   int acc = 0, k;
+   const DATA8* restrict src;
+   DATA8* restrict dst;
+   DATA8* restrict span1;
+   DATA8* restrict span2;
 
-   for (int l = loops; l; --l)
+#if DIV_USING_BITSHIFT
+   int pow2_shifts[6] = {0};
+   int numerators[6] = {0};
+   for (int run = 0; radii[run]; run++)
      {
-        const DATA8* restrict sr = src;
-        const DATA8* restrict sl = src;
-        DATA8* restrict d = dst;
+        const int div = radii[run] * 2 + 1;
+        pow2_shifts[run] = evas_filter_smallest_pow2_larger_than(div << 10);
+        numerators[run] = (1 << pow2_shifts[run]) / (div);
+     }
+#endif
+
+   span1 = alloca(len);
+   span2 = alloca(len);
 
-        for (k = left; k; k--)
+   // For each line, apply as many blurs as requested
+   for (int l = 0; l < loops; l++)
+     {
+        int run;
+
+        // New line: reset source & destination pointers
+        src = srcdata + len * l;
+        if (!radii[1]) // Only one run
+          dst = dstdata + len * l;
+        else
+          dst = span1;
+
+        // Apply blur with current radius
+        for (run = 0; radii[run]; run++)
           {
-             acc += *sr;
-             sr += STEP;
+             const int radius = radii[run];
+             const int left = MIN(radius, len);
+             const int right = MIN(radius, (len - radius));
+             int acc = 0;
+
+#if DIV_USING_BITSHIFT
+             const int pow2 = pow2_shifts[run];
+             const int numerator = numerators[run];
+#else
+             const int divider = 2 * radius + 1;
+#endif
+
+             const DATA8* restrict sr = src;
+             const DATA8* restrict sl = src;
+             DATA8* restrict d = dst;
+
+             // Read-ahead & accumulate
+             for (int k = left; k; k--)
+               {
+                  acc += *sr;
+                  sr += 1;
+               }
+
+             // Left edge
+             for (int k = 0; k < left; k++)
+               {
+                  acc += *sr;
+                  *d = acc / (k + left + 1);
+                  sr += 1;
+                  d += 1;
+               }
+
+             // Middle part, normal blur
+             for (int k = len - (2 * radius); k; k--)
+               {
+                  acc += *sr;
+                  *d = DIVIDE(acc);
+                  acc -= *sl;
+                  sl += 1;
+                  sr += 1;
+                  d += 1;
+               }
+
+             // Right edge
+             for (int k = right; k; k--)
+               {
+                  *d = acc / (k + right);
+                  acc -= *sl;
+                  d += 1;
+                  sl += 1;
+               }
+
+             // More runs to go: swap spans
+             if (radii[run + 1])
+               {
+                  src = dst;
+                  if (radii[run + 2])
+                    {
+                       // Two more runs: swap
+                       DATA8* swap = span1;
+                       span1 = span2;
+                       span2 = swap;
+                       dst = span1;
+                    }
+                  else
+                    {
+                       // Last run: write directly to dstdata
+                       dst = dstdata + len * l;
+                    }
+               }
           }
+     }
+}
+
+// ATTENTION: Make sure the below code's inner loop is the SAME as above.
 
-        for (k = 0; k < left; k++)
+static inline void
+_box_blur_alpha_vert_step(const DATA8* restrict const srcdata,
+                          DATA8* restrict const dstdata,
+                          const int* restrict const radii,
+                          const int len,
+                          const int loops)
+{
+   /* Note: This function tries to optimize cache hits by working on
+    * contiguous horizontal spans.
+    */
+
+   const int step = loops;
+   DATA8* restrict src;
+   DATA8* restrict dst;
+   DATA8* restrict span1;
+   DATA8* restrict span2;
+
+#if DIV_USING_BITSHIFT
+   int pow2_shifts[6] = {0};
+   int numerators[6] = {0};
+   for (int run = 0; radii[run]; run++)
+     {
+        const int div = radii[run] * 2 + 1;
+        pow2_shifts[run] = evas_filter_smallest_pow2_larger_than(div << 10);
+        numerators[run] = (1 << pow2_shifts[run]) / (div);
+     }
+#endif
+
+   span1 = alloca(len);
+   span2 = alloca(len);
+
+   // For each line, apply as many blurs as requested
+   for (int l = 0; l < loops; l++)
+     {
+        int run;
+
+        // Rotate input into work span
+        const DATA8* srcptr = srcdata + l;
+        DATA8* s = span1;
+        for (int k = len; k; --k)
           {
-             acc += *sr;
-             *d = acc / (k + left + 1);
-             sr += STEP;
-             d += STEP;
+             *s++ = *srcptr;
+             srcptr += step;
           }
 
-        for (k = len - (2 * radius); k; k--)
+        src = span1;
+        dst = span2;
+
+        // Apply blur with current radius
+        for (run = 0; radii[run]; run++)
           {
-             acc += *sr;
-             *d = DIVIDE(acc);
-             acc -= *sl;
-             sl += STEP;
-             sr += STEP;
-             d += STEP;
+             const int radius = radii[run];
+             const int left = MIN(radius, len);
+             const int right = MIN(radius, (len - radius));
+             int acc = 0;
+
+#if DIV_USING_BITSHIFT
+             const int pow2 = pow2_shifts[run];
+             const int numerator = numerators[run];
+#else
+             const int divider = 2 * radius + 1;
+#endif
+
+             const DATA8* restrict sr = src;
+             const DATA8* restrict sl = src;
+             DATA8* restrict d = dst;
+
+             // Read-ahead & accumulate
+             for (int k = left; k; k--)
+               {
+                  acc += *sr;
+                  sr += 1;
+               }
+
+             // Left edge
+             for (int k = 0; k < left; k++)
+               {
+                  acc += *sr;
+                  *d = acc / (k + left + 1);
+                  sr += 1;
+                  d += 1;
+               }
+
+             // Middle part, normal blur
+             for (int k = len - (2 * radius); k; k--)
+               {
+                  acc += *sr;
+                  *d = DIVIDE(acc);
+                  acc -= *sl;
+                  sl += 1;
+                  sr += 1;
+                  d += 1;
+               }
+
+             // Right edge
+             for (int k = right; k; k--)
+               {
+                  *d = acc / (k + right);
+                  acc -= *sl;
+                  d += 1;
+                  sl += 1;
+               }
+
+             // More runs to go: swap spans
+             if (radii[run + 1])
+               {
+                  DATA8* swap = src;
+                  src = dst;
+                  dst = swap;
+               }
           }
 
-        for (k = right; k; k--)
+        // Last run: rotate & copy back to destination
+        DATA8* restrict dstptr = dstdata + l;
+        for (int k = len; k; --k)
           {
-             *d = acc / (k + right);
-             acc -= *sl;
-             d += STEP;
-             sl += STEP;
+             *dstptr = *dst++;
+             dstptr += step;
           }
-
-        src += loopstep;
-        dst += loopstep;
      }
 }
-
-#undef FUNCTION_NAME
-#undef STEP
diff --git a/src/lib/evas/filters/evas_filter.c 
b/src/lib/evas/filters/evas_filter.c
index 6e899a9..67006a2 100644
--- a/src/lib/evas/filters/evas_filter.c
+++ b/src/lib/evas/filters/evas_filter.c
@@ -973,7 +973,7 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void 
*drawctx,
                 else
                   type = EVAS_FILTER_BLUR_BOX;
 
-                id = evas_filter_command_blur_add(ctx, drawctx, inbuf, tmp_in,
+                id = evas_filter_command_blur_add(ctx, drawctx, tmp_in, outbuf,
                                                   type, 0, dy, ox, oy, 0);
                 if (id < 0) goto fail;
                 cmd = _evas_filter_command_get(ctx, id);
diff --git a/src/lib/evas/filters/evas_filter_blur.c 
b/src/lib/evas/filters/evas_filter_blur.c
index b2d9733..2220c9d 100644
--- a/src/lib/evas/filters/evas_filter_blur.c
+++ b/src/lib/evas/filters/evas_filter_blur.c
@@ -4,6 +4,35 @@
 #include <math.h>
 #include <time.h>
 
+static int
+_box_blur_auto_radius(int *radii, int r)
+{
+   if (r <= 2)
+     {
+        radii[0] = r;
+        radii[1] = 0;
+        WRN("Radius is too small for auto box blur: %d", r);
+        return 1;
+     }
+   else if (r <= 6)
+     {
+        radii[0] = r / 2;
+        radii[1] = r - radii[0] - 1;
+        radii[2] = 0;
+        DBG("Using auto radius for %d: %d %d", r, radii[0], radii[1]);
+        return 2;
+     }
+   else
+     {
+        radii[0] = (r + 3) / 3;
+        radii[1] = (r + 2) / 3;
+        radii[2] = r - radii[0] - radii[1];
+        radii[3] = 0;
+        DBG("Using auto radius for %d: %d %d %d", r, radii[0], radii[1], 
radii[2]);
+        return 3;
+     }
+}
+
 #define FUNCTION_NAME _box_blur_horiz_rgba_step
 #define STEP (sizeof(DATA32))
 #include "./blur/blur_box_rgba_.c"
@@ -76,27 +105,21 @@ _box_blur_vert_apply_rgba(Evas_Filter_Command *cmd)
    return EINA_TRUE;
 }
 
-#define FUNCTION_NAME _box_blur_horiz_alpha_step
-#define STEP 1
 #include "./blur/blur_box_alpha_.c"
 
 static void
-_box_blur_horiz_alpha(DATA8 *src, DATA8 *dst, int radius, int w, int h)
+_box_blur_horiz_alpha(DATA8 *src, DATA8 *dst, int* radii, int w, int h)
 {
    DEBUG_TIME_BEGIN();
-   _box_blur_horiz_alpha_step(src, dst, radius, w, h, w);
+   _box_blur_alpha_horiz_step(src, dst, radii, w, h);
    DEBUG_TIME_END();
 }
 
-#define FUNCTION_NAME _box_blur_vert_alpha_step
-#define STEP loops
-#include "./blur/blur_box_alpha_.c"
-
 static void
-_box_blur_vert_alpha(DATA8 *src, DATA8 *dst, int radius, int w, int h)
+_box_blur_vert_alpha(DATA8 *src, DATA8 *dst, int* radii, int w, int h)
 {
    DEBUG_TIME_BEGIN();
-   _box_blur_vert_alpha_step(src, dst, radius, h, w, 1);
+   _box_blur_alpha_vert_step(src, dst, radii, h, w);
    DEBUG_TIME_END();
 }
 
@@ -104,6 +127,7 @@ static Eina_Bool
 _box_blur_horiz_apply_alpha(Evas_Filter_Command *cmd)
 {
    RGBA_Image *in, *out;
+   int radii[7] = {0};
    unsigned int r;
 
    EINA_SAFETY_ON_NULL_RETURN_VAL(cmd, EINA_FALSE);
@@ -114,11 +138,16 @@ _box_blur_horiz_apply_alpha(Evas_Filter_Command *cmd)
    in = cmd->input->backing;
    out = cmd->output->backing;
 
+   if (cmd->blur.auto_count)
+     _box_blur_auto_radius(radii, r);
+   else for (int k = 0; k < cmd->blur.count; k++)
+     radii[k] = r;
+
    EINA_SAFETY_ON_NULL_RETURN_VAL(in->image.data8, EINA_FALSE);
    EINA_SAFETY_ON_NULL_RETURN_VAL(out->image.data8, EINA_FALSE);
    EINA_SAFETY_ON_FALSE_RETURN_VAL(out->cache_entry.w >= (2*r + 1), 
EINA_FALSE);
 
-   _box_blur_horiz_alpha(in->image.data8, out->image.data8, r,
+   _box_blur_horiz_alpha(in->image.data8, out->image.data8, radii,
                          in->cache_entry.w, in->cache_entry.h);
 
    return EINA_TRUE;
@@ -128,6 +157,7 @@ static Eina_Bool
 _box_blur_vert_apply_alpha(Evas_Filter_Command *cmd)
 {
    RGBA_Image *in, *out;
+   int radii[7] = {0};
    unsigned int r;
 
    EINA_SAFETY_ON_NULL_RETURN_VAL(cmd, EINA_FALSE);
@@ -138,11 +168,16 @@ _box_blur_vert_apply_alpha(Evas_Filter_Command *cmd)
    in = cmd->input->backing;
    out = cmd->output->backing;
 
+   if (cmd->blur.auto_count)
+     _box_blur_auto_radius(radii, r);
+   else for (int k = 0; k < cmd->blur.count; k++)
+     radii[k] = r;
+
    EINA_SAFETY_ON_NULL_RETURN_VAL(in->image.data8, EINA_FALSE);
    EINA_SAFETY_ON_NULL_RETURN_VAL(out->image.data8, EINA_FALSE);
    EINA_SAFETY_ON_FALSE_RETURN_VAL(out->cache_entry.h >= (2*r + 1), 
EINA_FALSE);
 
-   _box_blur_vert_alpha(in->image.data8, out->image.data8, r,
+   _box_blur_vert_alpha(in->image.data8, out->image.data8, radii,
                         in->cache_entry.w, in->cache_entry.h);
 
    return EINA_TRUE;

-- 


Reply via email to