---
Athlon64
C - 12412
SSE2 - 3706
SandyBridge
C - 16440
SSE2 - 3116
SSE4 - 1002
libavresample/x86/audio_convert.asm | 43 ++++++++++++++++++++++++++++++++
libavresample/x86/audio_convert_init.c | 9 ++++++
2 files changed, 52 insertions(+), 0 deletions(-)
diff --git a/libavresample/x86/audio_convert.asm
b/libavresample/x86/audio_convert.asm
index cd99f94..4d4c798 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -23,6 +23,10 @@
%include "x86inc.asm"
%include "x86util.asm"
+SECTION_RODATA 32
+
+pf_s16_inv_scale: times 4 dd 0x38000000
+
SECTION_TEXT
;------------------------------------------------------------------------------
@@ -47,6 +51,45 @@ cglobal conv_s16_to_s32, 3,3,3, dst, src, len
jl .loop
REP_RET
+;------------------------------------------------------------------------------
+; void ff_conv_s16_to_flt(float *dst, const int16_t *src, int len);
+;------------------------------------------------------------------------------
+
+%macro CONV_S16_TO_FLT 0
+cglobal conv_s16_to_flt, 3,3,3, dst, src, len
+ lea lenq, [2*lend]
+ add srcq, lenq
+ lea dstq, [dstq + 2*lenq]
+ neg lenq
+ mova m2, [pf_s16_inv_scale]
+ ALIGN 16
+.loop:
+%if cpuflag(sse4)
+ pmovsxwd m0, [srcq+lenq ]
+ pmovsxwd m1, [srcq+lenq+mmsize/2]
+%else
+ mova m1, [srcq+lenq]
+ punpcklwd m0, m1
+ punpckhwd m1, m1
+ psrad m0, 16
+ psrad m1, 16
+%endif
+ cvtdq2ps m0, m0
+ cvtdq2ps m1, m1
+ mulps m0, m0, m2
+ mulps m1, m1, m2
+ mova [dstq+2*lenq ], m0
+ mova [dstq+2*lenq+mmsize], m1
+ add lenq, mmsize
+ jl .loop
+ REP_RET
+%endmacro
+
+INIT_XMM sse2
+CONV_S16_TO_FLT
+INIT_XMM sse4
+CONV_S16_TO_FLT
+
;-----------------------------------------------------------------------------
; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len,
; int channels);
diff --git a/libavresample/x86/audio_convert_init.c
b/libavresample/x86/audio_convert_init.c
index caaae78..fad26a7 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -24,6 +24,9 @@
extern void ff_conv_s16_to_s32_sse2(int16_t *dst, const int32_t *src, int len);
+extern void ff_conv_s16_to_flt_sse2(float *dst, const int16_t *src, int len);
+extern void ff_conv_s16_to_flt_sse4(float *dst, const int16_t *src, int len);
+
extern void ff_conv_fltp_to_flt_6ch_mmx(float *dst, float *const *src, int
len);
extern void ff_conv_fltp_to_flt_6ch_sse(float *dst, float *const *src, int
len);
@@ -43,6 +46,12 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16,
0, 16, 8, "SSE2", ff_conv_s16_to_s32_sse2);
+ ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
+ 0, 16, 8, "SSE2", ff_conv_s16_to_flt_sse2);
+ }
+ if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
+ ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
+ 0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4);
}
#endif
}
--
1.7.1
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel