[libav-devel] [PATCH 3/3] ac3enc: sse2 version of compute_mantissa_size()

Justin Ruggles Fri, 13 May 2011 17:11:40 -0700

---
 libavcodec/x86/ac3dsp.asm   |   48 +++++++++++++++++++++++++++++++++++++++++++
 libavcodec/x86/ac3dsp_mmx.c |    3 ++
 2 files changed, 51 insertions(+), 0 deletions(-)

diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 1b34751..fc05f25 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -27,6 +27,9 @@ SECTION_RODATA
 ; 16777216.0f - used in ff_float_to_fixed24()
 pf_1_24: times 4 dd 0x4B800000
 
+; used in ff_ac3_compute_mantissa_size()
+cextern ac3_bap_bits
+
 SECTION .text
 
 ;-----------------------------------------------------------------------------
@@ -293,3 +296,48 @@ cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len
 %endif
     ja .loop
     REP_RET
+
+;------------------------------------------------------------------------------
+; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16])
+;------------------------------------------------------------------------------
+
+INIT_XMM
+cglobal ac3_compute_mantissa_size_sse2, 1,5,2, mant_cnt, blk, sum, tmp1, tmp2
+    xor       sumq, sumq
+    mov       blkd, 6
+    pxor        m0, m0
+    pxor        m1, m1
+    ALIGN 16
+.loop
+    paddw       m0, [mant_cntq   ]
+    paddw       m1, [mant_cntq+16]
+
+    movzx    tmp1d, word [mant_cntq+8]
+    shr      tmp1d, 1
+    movzx    tmp2d, word [mant_cntq+4]
+    imul     tmp2d, 43691
+    shr      tmp2d, 17
+    add      tmp2d, tmp1d
+    lea      tmp1d, [tmp2q*8]
+    sub      tmp1d, tmp2d
+    movzx    tmp2d, word [mant_cntq+2]
+    imul     tmp2d, 43691
+    shr      tmp2d, 17
+    lea      tmp2d, [tmp2q+tmp2q*4]
+    add      tmp2d, tmp1d
+    add       sumd, tmp2d
+
+    add  mant_cntq, 32
+    dec       blkd
+    ja .loop
+
+    pmaddwd     m0, [ff_ac3_bap_bits   ]
+    pmaddwd     m1, [ff_ac3_bap_bits+16]
+    paddd       m0, m1
+    movhlps     m1, m0
+    paddd       m0, m1
+    pshufd      m1, m0, 0x1
+    paddd       m0, m1
+    movd       eax, m0
+    add        eax, sumd
+    RET
diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c
index 4750423..2664736 100644
--- a/libavcodec/x86/ac3dsp_mmx.c
+++ b/libavcodec/x86/ac3dsp_mmx.c
@@ -42,6 +42,8 @@ extern void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned i
 extern void ff_float_to_fixed24_sse  (int32_t *dst, const float *src, unsigned int len);
 extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len);
 
+extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
+
 av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
 {
     int mm_flags = av_get_cpu_flags();
@@ -69,6 +71,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
         c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
         c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
         c->float_to_fixed24 = ff_float_to_fixed24_sse2;
+        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
         if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
             c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
             c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 3/3] ac3enc: sse2 version of compute_mantissa_size()

Reply via email to