[libav-devel] [PATCH 7/7] arm: dcadsp: implement decode_hf as external NEON asm

Janne Grunau Sat, 22 Feb 2014 11:00:11 -0800

---
 libavcodec/arm/dcadsp_init_arm.c | 46 ++++++----------------------------------
 libavcodec/arm/dcadsp_neon.S     | 29 +++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 40 deletions(-)


diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c
index b96d588..5400484 100644
--- a/libavcodec/arm/dcadsp_init_arm.c
+++ b/libavcodec/arm/dcadsp_init_arm.c
@@ -49,43 +49,11 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
                                 float out[32], const float in[32],
                                 float scale);
 
-#if HAVE_NEON_INLINE && HAVE_ASM_MOD_Y
-
-static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp,
-                                     float *dst, const int8_t *src, int scale)
-{
-    __asm__ ("vcvt.f32.s32 %2,  %2,  #4         \n"
-             "vld1.8       {d0},     [%1,:64]   \n"
-             "vmovl.s8     q0,  d0              \n"
-             "vmovl.s16    q1,  d1              \n"
-             "vmovl.s16    q0,  d0              \n"
-             "vcvt.f32.s32 q0,  q0              \n"
-             "vcvt.f32.s32 q1,  q1              \n"
-             "vmul.f32     q0,  q0,  %y2        \n"
-             "vmul.f32     q1,  q1,  %y2        \n"
-             "vst1.32      {q0-q1},  [%m0,:128] \n"
-             : "=Um"(*(float (*)[8])dst)
-             : "r"(src), "x"(scale)
-             : "d0", "d1", "d2", "d3");
-}
-
-static void decode_hf_neon(float dst[DCA_SUBBANDS][8],
-                           const int32_t vq_num[DCA_SUBBANDS],
-                           const int8_t hf_vq[1024][32], intptr_t vq_offset,
-                           int32_t scale[DCA_SUBBANDS][2],
-                           intptr_t start, intptr_t end)
-{
-    int l;
-    for (l = start; l < end; l++) {
-        /* 1 vector -> 32 samples but we only need the 8 samples
-         * for this subsubframe. */
-        int hfvq = vq_num[l];
-
-        int8x8_fmul_int32(NULL, dst[l], hf_vq[hfvq] + vq_offset, scale[l][0]);
-    }
-}
-
-#endif /* HAVE_NEON_INLINE && HAVE_ASM_MOD_Y */
+void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8],
+                       const int32_t vq_num[DCA_SUBBANDS],
+                       const int8_t hf_vq[1024][32], intptr_t vq_offset,
+                       int32_t scale[DCA_SUBBANDS][2],
+                       intptr_t start, intptr_t end);
 
 av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
 {
@@ -99,9 +67,7 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
     if (have_neon(cpu_flags)) {
         s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
         s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
-#if HAVE_NEON_INLINE && HAVE_ASM_MOD_Y
-        s->decode_hf  = decode_hf_neon;
-#endif
+        s->decode_hf  = ff_decode_hf_neon;
     }
 }
 
diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S
index 735c4c2..70580cd 100644
--- a/libavcodec/arm/dcadsp_neon.S
+++ b/libavcodec/arm/dcadsp_neon.S
@@ -20,6 +20,35 @@
 
 #include "libavutil/arm/asm.S"
 
+function ff_decode_hf_neon, export=1
+        push            {r4-r5,lr}
+        add             r2,  r2,  r3
+        ldr             r3,       [sp, #12]
+        ldrd            r4,  r5,  [sp, #16]
+        add             r3,  r3,  r4, lsl #3
+        add             r1,  r1,  r4, lsl #2
+        add             r0,  r0,  r4, lsl #5
+
+1:      ldr_post        lr,  r1,  #4
+        add             r4,  r4,  #1
+        add             lr,  r2,  lr, lsl #5
+        cmp             r4,  r5
+        vld1.32         {d7},     [r3]!
+        vld1.8          {d0},     [lr,:64]
+        vcvt.f32.s32    d7,  d7,  #4
+        vmovl.s8        q1,  d0
+        vmovl.s16       q0,  d2
+        vmovl.s16       q1,  d3
+        vcvt.f32.s32    q0,  q0
+        vcvt.f32.s32    q1,  q1
+        vmul.f32        q0,  q0,  d7[0]
+        vmul.f32        q1,  q1,  d7[0]
+        vst1.32         {q0-q1},  [r0,:128]!
+        bne             1b
+
+        pop             {r4-r5,pc}
+endfunc
+
 function ff_dca_lfe_fir0_neon, export=1
         push            {r4-r6,lr}
         mov             r3,  #32                @ decifactor
-- 
1.9.0

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 7/7] arm: dcadsp: implement decode_hf as external NEON asm

Reply via email to