cedric pushed a commit to branch master.

http://git.enlightenment.org/core/efl.git/commit/?id=71eec44ccc9ab43e728ba986fadce6c6cfd2ff7c

commit 71eec44ccc9ab43e728ba986fadce6c6cfd2ff7c
Author: Yury Usishchev <y.usishc...@samsung.com>
Date:   Wed Apr 15 17:21:33 2015 +0200

    evas: enable NEON-optimized code for aarch64.
    
    Summary:
    Add new define, BUILD_NEON_INTRINSICS to control whether NEON inline code or
    NEON intrinsics should be built.
    
    GCC NEON intrinsics can be built both for armv7 and armv8. However NEON 
inline
    code can be built only for armv7.
    
    @feature
    
    Reviewers: raster, stefan_schmidt, cedric
    
    Subscribers: cedric, stefan_schmidt
    
    Projects: #efl
    
    Differential Revision: https://phab.enlightenment.org/D2309
    
    Signed-off-by: Cedric BAIL <ced...@osg.samsung.com>
---
 configure.ac                                       | 18 +++++++++
 src/lib/evas/common/evas_blit_main.c               |  8 ++++
 src/lib/evas/common/evas_cpu.c                     |  9 +++++
 .../common/evas_op_blend/op_blend_color_neon.c     | 10 ++++-
 .../evas_op_blend/op_blend_mask_color_neon.c       | 47 ++++++++++++++++++++++
 .../evas_op_blend/op_blend_pixel_color_neon.c      | 14 ++++++-
 .../common/evas_op_blend/op_blend_pixel_neon.c     | 33 ++++++++++++++-
 .../evas/common/evas_op_copy/op_copy_color_neon.c  |  9 +++++
 8 files changed, 145 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index 9eed98c..63cc54d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -576,6 +576,21 @@ case $host_cpu in
        CFLAGS="${CFLAGS_save}"
     fi
     ;;
+  aarch64*)
+    if test "x${want_neon}" = "xyes"; then
+       build_cpu_neon="yes"
+       AC_MSG_CHECKING([whether to use NEON instructions])
+       AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <arm_neon.h>]], 
[[volatile uint32x4_t test = vdupq_n_u32(0x1);]])],[
+           AC_MSG_RESULT([yes])
+           AC_DEFINE([BUILD_NEON], [1], [Build NEON Code])
+           AC_DEFINE([BUILD_NEON_INTRINSICS], [1], [Build NEON Intrinsics])
+           build_cpu_neon="yes"
+                ],[
+          AC_MSG_RESULT([no])
+           build_cpu_neon="no"
+                ])
+    fi
+    ;;
 esac
 
 AC_SUBST([ALTIVEC_CFLAGS])
@@ -4741,6 +4756,9 @@ case $host_cpu in
   arm*)
     EFL_ADD_FEATURE([cpu], [neon], [${build_cpu_neon}])
     ;;
+  aarch64*)
+    EFL_ADD_FEATURE([cpu], [neon], [${build_cpu_neon}])
+    ;;
 esac
 
 if test "${have_linux}" = "yes"; then
diff --git a/src/lib/evas/common/evas_blit_main.c 
b/src/lib/evas/common/evas_blit_main.c
index 7f8faa1..4da4034 100644
--- a/src/lib/evas/common/evas_blit_main.c
+++ b/src/lib/evas/common/evas_blit_main.c
@@ -132,6 +132,9 @@ evas_common_copy_rev_pixels_c(DATA32 *src, DATA32 *dst, int 
len)
 static void
 evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, int len)
 {
+#ifdef BUILD_NEON_INTRINSICS
+evas_common_copy_pixels_rev_c(src, dst, len);
+#else
    uint32_t *tmp = (void *)37;
 #define AP     "evas_common_copy_rev_pixels_neon_"
    asm volatile (
@@ -228,6 +231,7 @@ evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, 
int len)
    );
 #undef AP
 
+#endif
 }
 #endif
 
@@ -324,6 +328,9 @@ evas_common_copy_pixels_mmx2(DATA32 *src, DATA32 *dst, int 
len)
 #ifdef BUILD_NEON
 static void
 evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int len){
+#ifdef BUILD_NEON_INTRINSICS
+evas_common_copy_pixels_c(src, dst, len);
+#else
    uint32_t *e,*tmp = (void *)37;
    e = dst + len;
 #define AP     "evas_common_copy_pixels_neon_"
@@ -410,6 +417,7 @@ evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int 
len){
    );
 #undef AP
 
+#endif
 }
 #endif /* BUILD_NEON */
 
diff --git a/src/lib/evas/common/evas_cpu.c b/src/lib/evas/common/evas_cpu.c
index 4139098..0f83258 100644
--- a/src/lib/evas/common/evas_cpu.c
+++ b/src/lib/evas/common/evas_cpu.c
@@ -2,6 +2,11 @@
 #ifdef BUILD_MMX
 #include "evas_mmx.h"
 #endif
+#ifdef BUILD_NEON
+#ifdef BUILD_NEON_INTRINSICS
+#include <arm_neon.h>
+#endif
+#endif
 #if defined BUILD_SSE3
 #include <immintrin.h>
 #endif
@@ -92,6 +97,9 @@ evas_common_cpu_neon_test(void)
 {
 //#if defined(__ARM_ARCH__) && (__ARM_ARCH__ >= 70)
 #ifdef BUILD_NEON
+#ifdef BUILD_NEON_INTRINSICS
+   volatile uint32x4_t temp = vdupq_n_u32(0x1);
+#else
    asm volatile (
                ".fpu neon           \n\t"
                  "vqadd.u8 d0, d1, d0\n"
@@ -101,6 +109,7 @@ evas_common_cpu_neon_test(void)
                  "d0", "d1"
                  );
 #endif
+#endif
 //#endif
 }
 
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c 
b/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c
index 9e94298..2bf14c1 100644
--- a/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c
+++ b/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c
@@ -3,6 +3,14 @@
 #ifdef BUILD_NEON
 static void
 _op_blend_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m EINA_UNUSED, DATA32 c, 
DATA32 *d, int l) {
+#ifdef BUILD_NEON_INTRINSICS
+    DATA32 *e, a = 256 - (c >> 24);
+    UNROLL8_PLD_WHILE(d, l, e,
+                      {
+                         *d = c + MUL_256(a, *d);
+                         d++;
+                      });
+#else
        DATA32 *e, *tmp = 0;
 #define AP     "B_C_DP"
    asm volatile (
@@ -142,7 +150,7 @@ _op_blend_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m 
EINA_UNUSED, DATA32 c, DATA3
 
        );
 #undef AP
-
+#endif
 }
 
 #define _op_blend_caa_dp_neon _op_blend_c_dp_neon
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c 
b/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c
index 99f4b38..dbeb063 100644
--- a/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c
+++ b/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c
@@ -19,6 +19,30 @@
 #ifdef BUILD_NEON
 static void
 _op_blend_mas_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, 
int l) {
+#ifdef BUILD_NEON_INTRINSICS
+   DATA32 *e;
+   int alpha = 256 - (c >> 24);
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        DATA32 a = *m;
+                        switch(a)
+                          {
+                          case 0:
+                             break;
+                          case 255:
+                             *d = c + MUL_256(alpha, *d);
+                             break;
+                          default:
+                               {
+                                  DATA32 mc = MUL_SYM(a, c);
+                                  a = 256 - (mc >> 24);
+                                  *d = mc + MUL_256(a, *d);
+                               }
+                             break;
+                          }
+                        m++;  d++;
+                     });
+#else
    DATA32 *e = d + l;
 
    // everything we can do only once per cycle
@@ -142,12 +166,34 @@ _op_blend_mas_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, 
DATA32 c, DATA32 *d, in
                                 "q10", "q15", "q14", "memory"
         );
     }
+#endif
 }
 #endif
 
 #ifdef BUILD_NEON
 static void
 _op_blend_mas_can_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 
*d, int l) {
+#ifdef BUILD_NEON_INTRINSICS
+   DATA32 *e;
+   int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        alpha = *m;
+                        switch(alpha)
+                          {
+                          case 0:
+                             break;
+                          case 255:
+                             *d = c;
+                             break;
+                          default:
+                             alpha++;
+                             *d = INTERP_256(alpha, c, *d);
+                             break;
+                          }
+                        m++;  d++;
+                     });
+#else
    DATA32 *e,*tmp;
    int alpha;
 
@@ -372,6 +418,7 @@ _op_blend_mas_can_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, 
DATA32 c, DATA32 *d,
 
      );
 #undef AP
+#endif
 }
 #endif
 
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c 
b/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c
index d6b3a73..c47ec7c 100644
--- a/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c
+++ b/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c
@@ -7,7 +7,18 @@
  * reads, then two writes, a miss on read is 'just' two reads */
 static void
 _op_blend_p_c_dp_neon(DATA32 * __restrict s, DATA8 *m EINA_UNUSED, DATA32 c, 
DATA32 * __restrict d, int l) {
-
+#ifdef BUILD_NEON_INTRINSICS
+   DATA32 *e;
+   int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        DATA32 sc = MUL4_SYM(c, *s);
+                        alpha = 256 - (sc >> 24);
+                        *d = sc + MUL_256(alpha, *d);
+                        d++;
+                        s++;
+                     });
+#else
 #define AP     "blend_p_c_dp_"
    asm volatile (
       ".fpu neon\n\t"
@@ -92,6 +103,7 @@ _op_blend_p_c_dp_neon(DATA32 * __restrict s, DATA8 *m 
EINA_UNUSED, DATA32 c, DAT
       : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "memory"
    );
 #undef AP
+#endif
 }
 
 static void
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c 
b/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c
index 4b9993b..3c32790 100644
--- a/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c
+++ b/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c
@@ -3,6 +3,16 @@
 #ifdef BUILD_NEON
 static void
 _op_blend_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
+#ifdef BUILD_NEON_INTRINSICS
+   DATA32 *e;
+   int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        alpha = 256 - (*s >> 24);
+                        *d = *s++ + MUL_256(alpha, *d);
+                        d++;
+                     });
+#else
 #define AP "blend_p_dp_"
   asm volatile (
        ".fpu neon                                      \n\t"
@@ -238,11 +248,31 @@ _op_blend_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 
*d, int l) {
           : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","q8","memory" // 
clobbered
    );
 #undef AP
-
+#endif
 }
 
 static void
 _op_blend_pas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
+#ifdef BUILD_NEON_INTRINSICS
+   DATA32 *e;
+   int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        switch (*s & 0xff000000)
+                          {
+                          case 0:
+                             break;
+                          case 0xff000000:
+                             *d = *s;
+                             break;
+                          default:
+                             alpha = 256 - (*s >> 24);
+                             *d = *s + MUL_256(alpha, *d);
+                             break;
+                          }
+                        s++;  d++;
+                     });
+#else
 #define AP "blend_pas_dp_"
    DATA32 *e = d + l,*tmp  = e + 32,*pl=(void*)912;
       asm volatile (
@@ -447,6 +477,7 @@ _op_blend_pas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 
*d, int l) {
                 "q0","q1","q2","q3","q4","q5","q6","q7","q8","memory"
       );
 #undef AP
+#endif
 }
 
 #define _op_blend_pan_dp_neon NULL
diff --git a/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c 
b/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c
index 96310cd..009bd75 100644
--- a/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c
+++ b/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c
@@ -3,6 +3,14 @@
 #ifdef BUILD_NEON
 static void
 _op_copy_c_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
+#ifdef BUILD_NEON_INTRINSICS
+   DATA32 *e;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        *d = c;
+                        d++;
+                     });
+#else
 #define AP "COPY_C_DP_"
    uint32_t *e = d + l,*tmp;
    asm volatile (
@@ -85,6 +93,7 @@ _op_copy_c_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, 
int l) {
 
 
    );
+#endif
 }
 
 #define _op_copy_cn_dp_neon _op_copy_c_dp_neon

-- 


Reply via email to