cedric pushed a commit to branch master.

http://git.enlightenment.org/core/efl.git/commit/?id=b3dc08bf8bf438b531a28fc231c3ac854fe09fc9

commit b3dc08bf8bf438b531a28fc231c3ac854fe09fc9
Author: Cedric BAIL <ced...@osg.samsung.com>
Date:   Sat Sep 12 07:31:49 2015 +0200

    ector: cleanup default backend drawer.
---
 src/lib/ector/software/ector_drawhelper.c         |  68 ++++----
 src/lib/ector/software/ector_drawhelper_neon.c    | 203 +++++++++++-----------
 src/lib/ector/software/ector_drawhelper_private.h |  21 ++-
 src/lib/ector/software/ector_drawhelper_sse2.c    |  30 +++-
 4 files changed, 175 insertions(+), 147 deletions(-)

diff --git a/src/lib/ector/software/ector_drawhelper.c 
b/src/lib/ector/software/ector_drawhelper.c
index eee1d7c..43cf038 100644
--- a/src/lib/ector/software/ector_drawhelper.c
+++ b/src/lib/ector/software/ector_drawhelper.c
@@ -15,28 +15,30 @@
 */
 
 /*
-  result = s + d * sia  
+  result = s + d * sia
   dest = (s + d * sia) * ca + d * cia
        = s * ca + d * (sia * ca + cia)
        = s * ca + d * (1 - sa*ca)
 */
-void
-comp_func_solid_source_over(uint *dest, int length, uint color, uint 
const_alpha)
+
+static void
+_comp_func_solid_source_over(uint *dest, int length, uint color, uint 
const_alpha)
 {
    int ialpha, i;
+
    if (const_alpha != 255)
      color = BYTE_MUL(color, const_alpha);
-   ialpha = _alpha(~color);
+   ialpha = alpha_inverse(color);
    for (i = 0; i < length; ++i)
      dest[i] = color + BYTE_MUL(dest[i], ialpha);
 }
 
-
 static void
-comp_func_source_over(uint *dest, const uint *src, int length, uint color, 
uint const_alpha)
+_comp_func_source_over(uint *dest, const uint *src, int length, uint color, 
uint const_alpha)
 {
    int i;
    uint s, sc, sia;
+
    if (const_alpha != 255)
      color = BYTE_MUL(color, const_alpha);
 
@@ -49,7 +51,7 @@ comp_func_source_over(uint *dest, const uint *src, int 
length, uint color, uint
                dest[i] = s;
              else if (s != 0)
                {
-                  sia = _alpha(~s);
+                  sia = alpha_inverse(s);
                   dest[i] = s + BYTE_MUL(dest[i], sia);
                }
           }
@@ -60,7 +62,7 @@ comp_func_source_over(uint *dest, const uint *src, int 
length, uint color, uint
           {
              s = src[i];
              sc = ECTOR_MUL4_SYM(color, s);
-             sia = _alpha(~sc);
+             sia = alpha_inverse(sc);
              dest[i] = sc + BYTE_MUL(dest[i], sia);
           }
      }
@@ -71,10 +73,14 @@ comp_func_source_over(uint *dest, const uint *src, int 
length, uint color, uint
   dest = s * ca + d * cia
 */
 static void
-comp_func_solid_source(uint *dest, int length, uint color, uint const_alpha)
+_comp_func_solid_source(uint *dest, int length, uint color, uint const_alpha)
 {
    int ialpha, i;
-   if (const_alpha == 255) _ector_memfill(dest, length, color);
+
+   if (const_alpha == 255)
+     {
+        _ector_memfill(dest, length, color);
+     }
    else
      {
         ialpha = 255 - const_alpha;
@@ -85,20 +91,23 @@ comp_func_solid_source(uint *dest, int length, uint color, 
uint const_alpha)
 }
 
 static void
-comp_func_source(uint *dest, const uint *src, int length, uint color, uint 
const_alpha)
+_comp_func_source(uint *dest, const uint *src, int length, uint color, uint 
const_alpha)
 {
    int i, ialpha;
    uint src_color;
+
    if (color == 0xffffffff) // No color multiplier
      {
         if (const_alpha == 255)
-          memcpy(dest, src, length * sizeof(uint));
+          {
+             memcpy(dest, src, length * sizeof(uint));
+          }
         else
-         {
-            ialpha = 255 - const_alpha;
-            for (i = 0; i < length; ++i)
-              dest[i] = INTERPOLATE_PIXEL_256(src[i], const_alpha, dest[i], 
ialpha);
-         }
+          {
+             ialpha = 255 - const_alpha;
+             for (i = 0; i < length; ++i)
+               dest[i] = INTERPOLATE_PIXEL_256(src[i], const_alpha, dest[i], 
ialpha);
+          }
      }
    else
      {
@@ -109,24 +118,24 @@ comp_func_source(uint *dest, const uint *src, int length, 
uint color, uint const
           }
         else
           {
-            ialpha = 255 - const_alpha;
-            for (i = 0; i < length; ++i)
-              {
-                 src_color = ECTOR_MUL4_SYM(src[i], color);
-                 dest[i] = INTERPOLATE_PIXEL_256(src_color, const_alpha, 
dest[i], ialpha);
-            }
+             ialpha = 255 - const_alpha;
+             for (i = 0; i < length; ++i)
+               {
+                  src_color = ECTOR_MUL4_SYM(src[i], color);
+                  dest[i] = INTERPOLATE_PIXEL_256(src_color, const_alpha, 
dest[i], ialpha);
+               }
           }
      }
 }
 
 RGBA_Comp_Func_Solid func_for_mode_solid[ECTOR_ROP_LAST] = {
-        comp_func_solid_source_over,
-        comp_func_solid_source
+  _comp_func_solid_source_over,
+  _comp_func_solid_source
 };
 
 RGBA_Comp_Func func_for_mode[ECTOR_ROP_LAST] = {
-        comp_func_source_over,
-        comp_func_source
+  _comp_func_source_over,
+  _comp_func_source
 };
 
 RGBA_Comp_Func_Solid
@@ -146,13 +155,10 @@ RGBA_Comp_Func ector_comp_func_span_get(Ector_Rop op, 
uint color, Eina_Bool src_
      {
         if (op == ECTOR_ROP_BLEND) op = ECTOR_ROP_COPY;
      }
+
    return func_for_mode[op];
 }
 
-extern void init_drawhelper_gradient();
-extern void init_draw_helper_sse2();
-extern void init_draw_helper_neon();
-
 void init_draw_helper()
 {
    init_drawhelper_gradient();
diff --git a/src/lib/ector/software/ector_drawhelper_neon.c 
b/src/lib/ector/software/ector_drawhelper_neon.c
index 3adfdba..59e032f 100644
--- a/src/lib/ector/software/ector_drawhelper_neon.c
+++ b/src/lib/ector/software/ector_drawhelper_neon.c
@@ -7,6 +7,7 @@
 
 #ifdef BUILD_NEON
 #include <arm_neon.h>
+
 static void
 comp_func_solid_source_over_neon(uint * __restrict dest, int length, uint 
color, uint const_alpha)
 {
@@ -39,8 +40,8 @@ comp_func_solid_source_over_neon(uint * __restrict dest, int 
length, uint color,
    // alpha can only be 0 if color is 0x0. In that case we can just return.
    // Otherwise we can assume alpha != 0. This allows more optimization in
    // NEON code.
-   if(!color)
-      return;
+   if (!color)
+     return;
 
    DATA32 *start = dest;
    int size = length;
@@ -53,46 +54,47 @@ comp_func_solid_source_over_neon(uint * __restrict dest, 
int length, uint color,
    c_32x4 = vdupq_n_u32(color);
 
    while (start < end)
-   {
-      d0_32x4 = vld1q_u32(start);
-      d1_32x4 = vld1q_u32(start+4);
-      d0_8x16 = vreinterpretq_u8_u32(d0_32x4);
-      d1_8x16 = vreinterpretq_u8_u32(d1_32x4);
-
-      d00_8x8 = vget_low_u8(d0_8x16);
-      d01_8x8 = vget_high_u8(d0_8x16);
-      d10_8x8 = vget_low_u8(d1_8x16);
-      d11_8x8 = vget_high_u8(d1_8x16);
-
-      temp00_16x8 = vmull_u8(alpha_8x8, d00_8x8);
-      temp01_16x8 = vmull_u8(alpha_8x8, d01_8x8);
-      temp10_16x8 = vmull_u8(alpha_8x8, d10_8x8);
-      temp11_16x8 = vmull_u8(alpha_8x8, d11_8x8);
-
-      temp00_8x8 = vshrn_n_u16(temp00_16x8,8);
-      temp01_8x8 = vshrn_n_u16(temp01_16x8,8);
-      temp10_8x8 = vshrn_n_u16(temp10_16x8,8);
-      temp11_8x8 = vshrn_n_u16(temp11_16x8,8);
-
-      temp0_8x16 = vcombine_u8(temp00_8x8, temp01_8x8);
-      temp1_8x16 = vcombine_u8(temp10_8x8, temp11_8x8);
-
-      temp0_32x4 = vreinterpretq_u32_u8(temp0_8x16);
-      temp1_32x4 = vreinterpretq_u32_u8(temp1_8x16);
-
-      d0_32x4 = vaddq_u32(c_32x4, temp0_32x4);
-      d1_32x4 = vaddq_u32(c_32x4, temp1_32x4);
-
-      vst1q_u32(start, d0_32x4);
-      vst1q_u32(start+4, d1_32x4);
-      start+=8;
-   }
+     {
+        d0_32x4 = vld1q_u32(start);
+        d1_32x4 = vld1q_u32(start+4);
+        d0_8x16 = vreinterpretq_u8_u32(d0_32x4);
+        d1_8x16 = vreinterpretq_u8_u32(d1_32x4);
+
+        d00_8x8 = vget_low_u8(d0_8x16);
+        d01_8x8 = vget_high_u8(d0_8x16);
+        d10_8x8 = vget_low_u8(d1_8x16);
+        d11_8x8 = vget_high_u8(d1_8x16);
+
+        temp00_16x8 = vmull_u8(alpha_8x8, d00_8x8);
+        temp01_16x8 = vmull_u8(alpha_8x8, d01_8x8);
+        temp10_16x8 = vmull_u8(alpha_8x8, d10_8x8);
+        temp11_16x8 = vmull_u8(alpha_8x8, d11_8x8);
+
+        temp00_8x8 = vshrn_n_u16(temp00_16x8,8);
+        temp01_8x8 = vshrn_n_u16(temp01_16x8,8);
+        temp10_8x8 = vshrn_n_u16(temp10_16x8,8);
+        temp11_8x8 = vshrn_n_u16(temp11_16x8,8);
+
+        temp0_8x16 = vcombine_u8(temp00_8x8, temp01_8x8);
+        temp1_8x16 = vcombine_u8(temp10_8x8, temp11_8x8);
+
+        temp0_32x4 = vreinterpretq_u32_u8(temp0_8x16);
+        temp1_32x4 = vreinterpretq_u32_u8(temp1_8x16);
+
+        d0_32x4 = vaddq_u32(c_32x4, temp0_32x4);
+        d1_32x4 = vaddq_u32(c_32x4, temp1_32x4);
+
+        vst1q_u32(start, d0_32x4);
+        vst1q_u32(start+4, d1_32x4);
+        start+=8;
+     }
+
    end += (size & 7);
    while (start <  end)
-   {
-      *start = color + MUL_256(alpha, *start);
-      start++;
-   }
+     {
+        *start = color + MUL_256(alpha, *start);
+        start++;
+     }
 }
 
 /* Note: Optimisation is based on keeping _dest_ aligned: else it's a pair of
@@ -132,6 +134,9 @@ comp_func_source_over_sse2(uint * __restrict dest, const 
uint * __restrict src,
    uint8x8_t s1_8x8;
    uint8x8_t sc0_8x8;
    uint8x8_t sc1_8x8;
+   int size;
+   DATA32 *start;
+   DATA32 *end;
 
    if (const_alpha != 255)
      color = BYTE_MUL(color, const_alpha);
@@ -143,69 +148,69 @@ comp_func_source_over_sse2(uint * __restrict dest, const 
uint * __restrict src,
    x0_32x4 = vreinterpretq_u32_u8(x0_8x16);
    x1_8x16 = vdupq_n_u8(0x1);
    x1_32x4 = vreinterpretq_u32_u8(x1_8x16);
-   DATA32 *start = dest;
-   int size = l;
-   DATA32 *end = start + (size & ~3);
+   start = dest;
+   size = l;
+   end = start + (size & ~3);
+
    while (start < end)
-   {
-
-      s_32x4 = vld1q_u32(src);
-      s_8x16 = vreinterpretq_u8_u32(s_32x4);
-
-      d_32x4 = vld1q_u32(start);
-      d_8x16 = vreinterpretq_u8_u32(d_32x4);
-      d0_8x8 = vget_low_u8(d_8x16);
-      d1_8x8 = vget_high_u8(d_8x16);
-
-      s0_8x8 = vget_low_u8(s_8x16);
-      s1_8x8 = vget_high_u8(s_8x16);
-
-      sc0_16x8 = vmull_u8(s0_8x8, c_8x8);
-      sc1_16x8 = vmull_u8(s1_8x8, c_8x8);
-      sc0_16x8 = vaddq_u16(sc0_16x8, x255_16x8);
-      sc1_16x8 = vaddq_u16(sc1_16x8, x255_16x8);
-      sc0_8x8 = vshrn_n_u16(sc0_16x8, 8);
-      sc1_8x8 = vshrn_n_u16(sc1_16x8, 8);
-      sc_8x16 = vcombine_u8(sc0_8x8, sc1_8x8);
-
-      alpha_32x4 = vreinterpretq_u32_u8(sc_8x16);
-      alpha_32x4 = vshrq_n_u32(alpha_32x4, 24);
-      alpha_32x4 = vmulq_u32(x1_32x4, alpha_32x4);
-      alpha_8x16 = vreinterpretq_u8_u32(alpha_32x4);
-      alpha_8x16 = vsubq_u8(x0_8x16, alpha_8x16);
-      alpha0_8x8 = vget_low_u8(alpha_8x16);
-      alpha1_8x8 = vget_high_u8(alpha_8x16);
-
-      ad0_16x8 = vmull_u8(alpha0_8x8, d0_8x8);
-      ad1_16x8 = vmull_u8(alpha1_8x8, d1_8x8);
-      ad0_8x8 = vshrn_n_u16(ad0_16x8,8);
-      ad1_8x8 = vshrn_n_u16(ad1_16x8,8);
-      ad_8x16 = vcombine_u8(ad0_8x8, ad1_8x8);
-      ad_32x4 = vreinterpretq_u32_u8(ad_8x16);
-
-      alpha_32x4 = vreinterpretq_u32_u8(alpha_8x16);
-      cond_32x4 = vceqq_u32(alpha_32x4, x0_32x4);
-      ad_32x4 = vbslq_u32(cond_32x4, d_32x4 , ad_32x4);
-
-      sc_32x4 = vreinterpretq_u32_u8(sc_8x16);
-      d_32x4 = vaddq_u32(sc_32x4, ad_32x4);
-
-      vst1q_u32(start, d_32x4);
-
-      src+=4;
-      start+=4;
-   }
+     {
+        s_32x4 = vld1q_u32(src);
+        s_8x16 = vreinterpretq_u8_u32(s_32x4);
+
+        d_32x4 = vld1q_u32(start);
+        d_8x16 = vreinterpretq_u8_u32(d_32x4);
+        d0_8x8 = vget_low_u8(d_8x16);
+        d1_8x8 = vget_high_u8(d_8x16);
+
+        s0_8x8 = vget_low_u8(s_8x16);
+        s1_8x8 = vget_high_u8(s_8x16);
+
+        sc0_16x8 = vmull_u8(s0_8x8, c_8x8);
+        sc1_16x8 = vmull_u8(s1_8x8, c_8x8);
+        sc0_16x8 = vaddq_u16(sc0_16x8, x255_16x8);
+        sc1_16x8 = vaddq_u16(sc1_16x8, x255_16x8);
+        sc0_8x8 = vshrn_n_u16(sc0_16x8, 8);
+        sc1_8x8 = vshrn_n_u16(sc1_16x8, 8);
+        sc_8x16 = vcombine_u8(sc0_8x8, sc1_8x8);
+
+        alpha_32x4 = vreinterpretq_u32_u8(sc_8x16);
+        alpha_32x4 = vshrq_n_u32(alpha_32x4, 24);
+        alpha_32x4 = vmulq_u32(x1_32x4, alpha_32x4);
+        alpha_8x16 = vreinterpretq_u8_u32(alpha_32x4);
+        alpha_8x16 = vsubq_u8(x0_8x16, alpha_8x16);
+        alpha0_8x8 = vget_low_u8(alpha_8x16);
+        alpha1_8x8 = vget_high_u8(alpha_8x16);
+
+        ad0_16x8 = vmull_u8(alpha0_8x8, d0_8x8);
+        ad1_16x8 = vmull_u8(alpha1_8x8, d1_8x8);
+        ad0_8x8 = vshrn_n_u16(ad0_16x8,8);
+        ad1_8x8 = vshrn_n_u16(ad1_16x8,8);
+        ad_8x16 = vcombine_u8(ad0_8x8, ad1_8x8);
+        ad_32x4 = vreinterpretq_u32_u8(ad_8x16);
+
+        alpha_32x4 = vreinterpretq_u32_u8(alpha_8x16);
+        cond_32x4 = vceqq_u32(alpha_32x4, x0_32x4);
+        ad_32x4 = vbslq_u32(cond_32x4, d_32x4 , ad_32x4);
+
+        sc_32x4 = vreinterpretq_u32_u8(sc_8x16);
+        d_32x4 = vaddq_u32(sc_32x4, ad_32x4);
+
+        vst1q_u32(start, d_32x4);
+
+        src+=4;
+        start+=4;
+     }
+
    end += (size & 3);
    while (start <  end)
-   {
-      DATA32 sc = MUL4_SYM(color, *s);
-      DATA32 alpha = 256 - (sc >> 24);
-      *start = sc + MUL_256(alpha, *start);
-      start++;
-      src++;
-   }
+     {
+        DATA32 sc = MUL4_SYM(color, *s);
+        DATA32 alpha = 256 - (sc >> 24);
+        *start = sc + MUL_256(alpha, *start);
+        start++;
+        src++;
+     }
 }
-
 #endif
 
 void
diff --git a/src/lib/ector/software/ector_drawhelper_private.h 
b/src/lib/ector/software/ector_drawhelper_private.h
index a753987..007ef53 100644
--- a/src/lib/ector/software/ector_drawhelper_private.h
+++ b/src/lib/ector/software/ector_drawhelper_private.h
@@ -1,9 +1,7 @@
 #ifndef ECTOR_DRAWHELPER_PRIVATE_H
 #define ECTOR_DRAWHELPER_PRIVATE_H
 
-#ifdef HAVE_CONFIG_H
-# include <config.h>
-#endif
+#include "ector_private.h"
 
 #ifndef MIN
 #define MIN( a, b )  ( (a) < (b) ? (a) : (b) )
@@ -17,11 +15,6 @@
 typedef unsigned int uint;
 #endif
 
-static inline int _alpha(uint c)
-{
-   return c>>24;
-}
-
 #define ECTOR_ARGB_JOIN(a,r,g,b) \
         (((a) << 24) + ((r) << 16) + ((g) << 8) + (b))
 
@@ -53,6 +46,13 @@ static inline int _alpha(uint c)
       } \
    }
 
+static inline int
+alpha_inverse(int color)
+{
+   color = ~color;
+   return A_VAL(&color);
+}
+
 static inline void
 _ector_memfill(uint *dest, int length, uint value)
 {
@@ -89,9 +89,14 @@ INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b)
 
 typedef void (*RGBA_Comp_Func)(uint *dest, const uint *src, int length, uint 
mul_col, uint const_alpha);
 typedef void (*RGBA_Comp_Func_Solid)(uint *dest, int length, uint color, uint 
const_alpha);
+
 extern RGBA_Comp_Func_Solid func_for_mode_solid[ECTOR_ROP_LAST];
 extern RGBA_Comp_Func func_for_mode[ECTOR_ROP_LAST];
 
+void init_drawhelper_gradient();
+void init_draw_helper_sse2();
+void init_draw_helper_neon();
+
 void init_draw_helper();
 
 RGBA_Comp_Func_Solid ector_comp_func_solid_span_get(Ector_Rop op, uint color);
diff --git a/src/lib/ector/software/ector_drawhelper_sse2.c 
b/src/lib/ector/software/ector_drawhelper_sse2.c
index 581cbb7..3af1bc1 100644
--- a/src/lib/ector/software/ector_drawhelper_sse2.c
+++ b/src/lib/ector/software/ector_drawhelper_sse2.c
@@ -89,6 +89,7 @@ v4_mul_color_sse2(__m128i x, __m128i y)
 {
    const __m128i zero = _mm_setzero_si128();
    const __m128i sym4_mask = _mm_set_epi32(0x00FF00FF, 0x000000FF, 0x00FF00FF, 
0x000000FF);
+
    __m128i x_l = _mm_unpacklo_epi8(x, zero);
    __m128i x_h = _mm_unpackhi_epi8(x, zero);
 
@@ -111,6 +112,7 @@ static inline __m128i
 v4_ialpha_sse2(__m128i c)
 {
    __m128i a = _mm_srli_epi32(c, 24);
+
    return _mm_sub_epi32(_mm_set1_epi32(0xff), a);
 }
 
@@ -141,10 +143,14 @@ comp_func_helper_sse2 (uint *dest, int length, uint 
color, uint alpha)
 void
 comp_func_solid_source_sse2(uint *dest, int length, uint color, uint 
const_alpha)
 {
-   int ialpha;
-   if (const_alpha == 255) _ector_memfill(dest, length, color);
+   if (const_alpha == 255)
+     {
+        _ector_memfill(dest, length, color);
+     }
    else
      {
+        int ialpha;
+
         ialpha = 255 - const_alpha;
         color = BYTE_MUL(color, const_alpha);
         comp_func_helper_sse2(dest, length, color, ialpha);
@@ -155,9 +161,10 @@ void
 comp_func_solid_source_over_sse2(uint *dest, int length, uint color, uint 
const_alpha)
 {
    int ialpha;
+
    if (const_alpha != 255)
      color = BYTE_MUL(color, const_alpha);
-   ialpha = _alpha(~color);
+   ialpha = alpha_inverse(color);
    comp_func_helper_sse2(dest, length, color, ialpha);
 }
 
@@ -194,21 +201,23 @@ comp_func_solid_source_over_sse2(uint *dest, int length, 
uint color, uint const_
 #define V4_COMP_OP_SRC \
   v_src = v4_interpolate_color_sse2(v_alpha, v_src, v_dest);
 
-
-
 static void
 comp_func_source_sse2(uint *dest, const uint *src, int length, uint color, 
uint const_alpha)
 {
    int ialpha;
    uint src_color;
+
    if (color == 0xffffffff) // No color multiplier
      {
         if (const_alpha == 255)
-          memcpy(dest, src, length * sizeof(uint));
+          {
+             memcpy(dest, src, length * sizeof(uint));
+          }
         else
           {
              ialpha = 255 - const_alpha;
              __m128i v_alpha = _mm_set1_epi32(const_alpha);
+
              LOOP_ALIGNED_U1_A4(dest, length,
                { /* UOP */
                   *dest = INTERPOLATE_PIXEL_256(*src, const_alpha, *dest, 
ialpha);
@@ -225,6 +234,7 @@ comp_func_source_sse2(uint *dest, const uint *src, int 
length, uint color, uint
    else
      {
         __m128i v_color = _mm_set1_epi32(color);
+
         if (const_alpha == 255)
           {
              LOOP_ALIGNED_U1_A4(dest, length,
@@ -243,6 +253,7 @@ comp_func_source_sse2(uint *dest, const uint *src, int 
length, uint color, uint
           {
              ialpha = 255 - const_alpha;
              __m128i v_alpha = _mm_set1_epi32(const_alpha);
+
              LOOP_ALIGNED_U1_A4(dest, length,
                { /* UOP */
                   src_color = ECTOR_MUL4_SYM(*src, color);
@@ -264,6 +275,7 @@ static void
 comp_func_source_over_sse2(uint *dest, const uint *src, int length, uint 
color, uint const_alpha)
 {
    uint s, sia;
+
    if (const_alpha != 255)
      color = BYTE_MUL(color, const_alpha);
 
@@ -272,7 +284,7 @@ comp_func_source_over_sse2(uint *dest, const uint *src, int 
length, uint color,
         LOOP_ALIGNED_U1_A4(dest, length,
          { /* UOP */
             s = *src;
-            sia = _alpha(~s);
+            sia = alpha_inverse(s);
             *dest = s + BYTE_MUL(*dest, sia);
             dest++; src++; length--;
          },
@@ -286,10 +298,11 @@ comp_func_source_over_sse2(uint *dest, const uint *src, 
int length, uint color,
    else
      {
         __m128i v_color = _mm_set1_epi32(color);
+
         LOOP_ALIGNED_U1_A4(dest, length,
          { /* UOP */
             s = ECTOR_MUL4_SYM(*src, color);
-            sia = _alpha(~s);
+            sia = alpha_inverse(s);
             *dest = s + BYTE_MUL(*dest, sia);
             dest++; src++; length--;
          },
@@ -321,4 +334,3 @@ init_draw_helper_sse2()
       }
 #endif
 }
-

-- 


Reply via email to