raster pushed a commit to branch master.

http://git.enlightenment.org/core/efl.git/commit/?id=48d3253b45b0e155b70c22ac6ea0b769aaecbd3a

commit 48d3253b45b0e155b70c22ac6ea0b769aaecbd3a
Author: Carsten Haitzler (Rasterman) <ras...@rasterman.com>
Date:   Mon Dec 2 16:33:34 2013 +0900

    NEON vectorization: added use of COLSAME define in map routine
    
    Reviewers: raster
    
    Reviewed By: raster
    
    CC: cedric
    
    Differential Revision: https://phab.enlightenment.org/D341
---
 src/lib/evas/common/evas_map_image_core.c |  3 ---
 src/lib/evas/common/evas_map_image_loop.c | 36 +++++++++++++++++++++++++++++--
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/src/lib/evas/common/evas_map_image_core.c 
b/src/lib/evas/common/evas_map_image_core.c
index 6e2be0e..7e44c4b 100644
--- a/src/lib/evas/common/evas_map_image_core.c
+++ b/src/lib/evas/common/evas_map_image_core.c
@@ -19,9 +19,6 @@
 #ifdef SCALE_USING_MMX
              pxor_r2r(mm0, mm0);
              MOV_A2R(ALPHA_255, mm5)
-#elif defined SCALE_USING_NEON
-             FPU_NEON;
-             VMOV_I2R_NEON(q2, #255);
 #endif
                
              line = &(spans[y - ystart]);
diff --git a/src/lib/evas/common/evas_map_image_loop.c 
b/src/lib/evas/common/evas_map_image_loop.c
index a8a49eb..9da2ebf 100644
--- a/src/lib/evas/common/evas_map_image_loop.c
+++ b/src/lib/evas/common/evas_map_image_loop.c
@@ -8,15 +8,21 @@
 #  endif   
 # endif
 # ifdef SCALE_USING_NEON
+   FPU_NEON;
+   VMOV_I2R_NEON(q2, #255);
 #  ifdef COLMUL
 #   ifndef COLBLACK
    // this part can be done here as c1 and c2 are constants in the cycle
    FPU_NEON;
    VMOV_M2R_NEON(d18, c1);
    VEOR_NEON(q8);
+#    ifndef COLSAME
    VMOV_M2R_NEON(d19, c2);
+#    endif
    VZIP_NEON(q9, q8);
+#    ifndef COLSAME
    VMOV_R2R_NEON(d19, d16);
+#    endif
    // here we have c1 and c2 spread through q9 register
 #   endif
 #  endif
@@ -117,10 +123,22 @@
             VMOV_R2R_NEON(d11, d2);
             // by this point we have all required data in right registers
             INTERP_256_NEON(q3, q5, q4, q2); // interpolate val1,val2 and 
val3,val4
+#   ifdef COLMUL
+#    ifdef COLSAME
+            INTERP_256_NEON(d14, d9, d8, d4);
+#    else
             VSWP_NEON(d9, d12); // move result of val3,val4 interpolation (and 
c1 if COLMUL is defined) for next step
             INTERP_256_NEON(q7, q6, q4, q2); // second stage of interpolation, 
also here c1 and c2 are interpolated
+#    endif
+#   else
+            INTERP_256_NEON(d14, d9, d8, d4);
+#   endif
 #   ifdef COLMUL
+#    ifdef COLSAME
+            MUL4_SYM_NEON(d8, d12, d4);
+#    else
             MUL4_SYM_NEON(d8, d9, d4); // do required multiplication
+#    endif
 #   endif
             VMOV_R2M_NEON(q4, d8, d); // save result to d
           }
@@ -154,13 +172,22 @@
 # ifdef SCALE_USING_NEON
 #  ifdef COLMUL
 #   ifndef COLBLACK
+#    ifdef COLSAME
+   FPU_NEON;
+   VMOV_I2R_NEON(q2, #255);
+   VMOV_M2R_NEON(d10, c1);
+   VEOR_NEON(d0);
+   VZIP_NEON(d10, d0);
+#    else
    // c1 and c2 are constants inside the cycle
    FPU_NEON;
+   VMOV_I2R_NEON(q2, #255);
    VMOV_M2R_NEON(d10, c1);
    VEOR_NEON(q0);
    VMOV_M2R_NEON(d11, c2);
    VZIP_NEON(q5, q0);
    VMOV_R2R_NEON(d11, d0);
+#    endif
 #   endif
 #  endif
 # endif
@@ -184,9 +211,14 @@
         val1 = *s; // col
 #   ifdef COLSAME
 #    ifdef SCALE_USING_NEON
-        *d = MUL4_SYM(c1, val1);
+        VMOV_M2R_NEON(d1, val1);
+        VEOR_NEON(d0);
+        VZIP_NEON(d1, d0);
+        VMOV_R2R_NEON(d0, d10);
+        MUL4_SYM_NEON(d0, d1, d4)
+        VMOV_R2M_NEON(q0, d0, d);
 #    else
-        *d = MUL4_SYM(c1, val1); // XXX: do this in neon
+        *d = MUL4_SYM(c1, val1);
 #    endif
 #   else
 #    ifdef SCALE_USING_NEON

-- 


Reply via email to