raster pushed a commit to branch master. http://git.enlightenment.org/core/efl.git/commit/?id=48d3253b45b0e155b70c22ac6ea0b769aaecbd3a
commit 48d3253b45b0e155b70c22ac6ea0b769aaecbd3a Author: Carsten Haitzler (Rasterman) <ras...@rasterman.com> Date: Mon Dec 2 16:33:34 2013 +0900 NEON vectorization: added use of COLSAME define in map routine Reviewers: raster Reviewed By: raster CC: cedric Differential Revision: https://phab.enlightenment.org/D341 --- src/lib/evas/common/evas_map_image_core.c | 3 --- src/lib/evas/common/evas_map_image_loop.c | 36 +++++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/lib/evas/common/evas_map_image_core.c b/src/lib/evas/common/evas_map_image_core.c index 6e2be0e..7e44c4b 100644 --- a/src/lib/evas/common/evas_map_image_core.c +++ b/src/lib/evas/common/evas_map_image_core.c @@ -19,9 +19,6 @@ #ifdef SCALE_USING_MMX pxor_r2r(mm0, mm0); MOV_A2R(ALPHA_255, mm5) -#elif defined SCALE_USING_NEON - FPU_NEON; - VMOV_I2R_NEON(q2, #255); #endif line = &(spans[y - ystart]); diff --git a/src/lib/evas/common/evas_map_image_loop.c b/src/lib/evas/common/evas_map_image_loop.c index a8a49eb..9da2ebf 100644 --- a/src/lib/evas/common/evas_map_image_loop.c +++ b/src/lib/evas/common/evas_map_image_loop.c @@ -8,15 +8,21 @@ # endif # endif # ifdef SCALE_USING_NEON + FPU_NEON; + VMOV_I2R_NEON(q2, #255); # ifdef COLMUL # ifndef COLBLACK // this part can be done here as c1 and c2 are constants in the cycle FPU_NEON; VMOV_M2R_NEON(d18, c1); VEOR_NEON(q8); +# ifndef COLSAME VMOV_M2R_NEON(d19, c2); +# endif VZIP_NEON(q9, q8); +# ifndef COLSAME VMOV_R2R_NEON(d19, d16); +# endif // here we have c1 and c2 spread through q9 register # endif # endif @@ -117,10 +123,22 @@ VMOV_R2R_NEON(d11, d2); // by this point we have all required data in right registers INTERP_256_NEON(q3, q5, q4, q2); // interpolate val1,val2 and val3,val4 +# ifdef COLMUL +# ifdef COLSAME + INTERP_256_NEON(d14, d9, d8, d4); +# else VSWP_NEON(d9, d12); // move result of val3,val4 interpolation (and c1 if COLMUL is defined) for next step INTERP_256_NEON(q7, q6, q4, q2); // second stage of interpolation, also here c1 and c2 are interpolated +# endif +# else + INTERP_256_NEON(d14, d9, d8, d4); +# endif # ifdef COLMUL +# ifdef COLSAME + MUL4_SYM_NEON(d8, d12, d4); +# else MUL4_SYM_NEON(d8, d9, d4); // do required multiplication +# endif # endif VMOV_R2M_NEON(q4, d8, d); // save result to d } @@ -154,13 +172,22 @@ # ifdef SCALE_USING_NEON # ifdef COLMUL # ifndef COLBLACK +# ifdef COLSAME + FPU_NEON; + VMOV_I2R_NEON(q2, #255); + VMOV_M2R_NEON(d10, c1); + VEOR_NEON(d0); + VZIP_NEON(d10, d0); +# else // c1 and c2 are constants inside the cycle FPU_NEON; + VMOV_I2R_NEON(q2, #255); VMOV_M2R_NEON(d10, c1); VEOR_NEON(q0); VMOV_M2R_NEON(d11, c2); VZIP_NEON(q5, q0); VMOV_R2R_NEON(d11, d0); +# endif # endif # endif # endif @@ -184,9 +211,14 @@ val1 = *s; // col # ifdef COLSAME # ifdef SCALE_USING_NEON - *d = MUL4_SYM(c1, val1); + VMOV_M2R_NEON(d1, val1); + VEOR_NEON(d0); + VZIP_NEON(d1, d0); + VMOV_R2R_NEON(d0, d10); + MUL4_SYM_NEON(d0, d1, d4) + VMOV_R2M_NEON(q0, d0, d); # else - *d = MUL4_SYM(c1, val1); // XXX: do this in neon + *d = MUL4_SYM(c1, val1); # endif # else # ifdef SCALE_USING_NEON --