---
libavcodec/arm/dcadsp_init_arm.c | 46 ++++++----------------------------------
libavcodec/arm/dcadsp_neon.S | 29 +++++++++++++++++++++++++
2 files changed, 35 insertions(+), 40 deletions(-)
diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c
index b96d588..5400484 100644
--- a/libavcodec/arm/dcadsp_init_arm.c
+++ b/libavcodec/arm/dcadsp_init_arm.c
@@ -49,43 +49,11 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
float out[32], const float in[32],
float scale);
-#if HAVE_NEON_INLINE && HAVE_ASM_MOD_Y
-
-static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp,
- float *dst, const int8_t *src, int scale)
-{
- __asm__ ("vcvt.f32.s32 %2, %2, #4 \n"
- "vld1.8 {d0}, [%1,:64] \n"
- "vmovl.s8 q0, d0 \n"
- "vmovl.s16 q1, d1 \n"
- "vmovl.s16 q0, d0 \n"
- "vcvt.f32.s32 q0, q0 \n"
- "vcvt.f32.s32 q1, q1 \n"
- "vmul.f32 q0, q0, %y2 \n"
- "vmul.f32 q1, q1, %y2 \n"
- "vst1.32 {q0-q1}, [%m0,:128] \n"
- : "=Um"(*(float (*)[8])dst)
- : "r"(src), "x"(scale)
- : "d0", "d1", "d2", "d3");
-}
-
-static void decode_hf_neon(float dst[DCA_SUBBANDS][8],
- const int32_t vq_num[DCA_SUBBANDS],
- const int8_t hf_vq[1024][32], intptr_t vq_offset,
- int32_t scale[DCA_SUBBANDS][2],
- intptr_t start, intptr_t end)
-{
- int l;
- for (l = start; l < end; l++) {
- /* 1 vector -> 32 samples but we only need the 8 samples
- * for this subsubframe. */
- int hfvq = vq_num[l];
-
- int8x8_fmul_int32(NULL, dst[l], hf_vq[hfvq] + vq_offset, scale[l][0]);
- }
-}
-
-#endif /* HAVE_NEON_INLINE && HAVE_ASM_MOD_Y */
+void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8],
+ const int32_t vq_num[DCA_SUBBANDS],
+ const int8_t hf_vq[1024][32], intptr_t vq_offset,
+ int32_t scale[DCA_SUBBANDS][2],
+ intptr_t start, intptr_t end);
av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
{
@@ -99,9 +67,7 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
if (have_neon(cpu_flags)) {
s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
-#if HAVE_NEON_INLINE && HAVE_ASM_MOD_Y
- s->decode_hf = decode_hf_neon;
-#endif
+ s->decode_hf = ff_decode_hf_neon;
}
}
diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S
index 735c4c2..70580cd 100644
--- a/libavcodec/arm/dcadsp_neon.S
+++ b/libavcodec/arm/dcadsp_neon.S
@@ -20,6 +20,35 @@
#include "libavutil/arm/asm.S"
+function ff_decode_hf_neon, export=1
+ push {r4-r5,lr}
+ add r2, r2, r3
+ ldr r3, [sp, #12]
+ ldrd r4, r5, [sp, #16]
+ add r3, r3, r4, lsl #3
+ add r1, r1, r4, lsl #2
+ add r0, r0, r4, lsl #5
+
+1: ldr_post lr, r1, #4
+ add r4, r4, #1
+ add lr, r2, lr, lsl #5
+ cmp r4, r5
+ vld1.32 {d7}, [r3]!
+ vld1.8 {d0}, [lr,:64]
+ vcvt.f32.s32 d7, d7, #4
+ vmovl.s8 q1, d0
+ vmovl.s16 q0, d2
+ vmovl.s16 q1, d3
+ vcvt.f32.s32 q0, q0
+ vcvt.f32.s32 q1, q1
+ vmul.f32 q0, q0, d7[0]
+ vmul.f32 q1, q1, d7[0]
+ vst1.32 {q0-q1}, [r0,:128]!
+ bne 1b
+
+ pop {r4-r5,pc}
+endfunc
+
function ff_dca_lfe_fir0_neon, export=1
push {r4-r6,lr}
mov r3, #32 @ decifactor
--
1.9.0
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel