--- libavresample/x86/audio_convert.asm | 60 ++++++++++++++++++++++++++++++++ libavresample/x86/audio_convert_init.c | 9 +++++ 2 files changed, 69 insertions(+), 0 deletions(-)
diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index 70519e1..4690419 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -772,3 +772,63 @@ CONV_S16_TO_S16P_2CH INIT_XMM avx CONV_S16_TO_S16P_2CH %endif + +;------------------------------------------------------------------------------ +; void ff_conv_s16_to_s16p_6ch(int16_t *const *dst, int16_t *src, int len, +; int channels); +;------------------------------------------------------------------------------ + +%macro CONV_S16_TO_S16P_6CH 0 +cglobal conv_s16_to_s16p_6ch, 2,8,5, dst, src, dst1, dst2, dst3, dst4, dst5, len +%if ARCH_X86_64 + mov lend, r2d +%else + %define lend dword r2m +%endif + mov dst1q, [dstq+ gprsize] + mov dst2q, [dstq+2*gprsize] + mov dst3q, [dstq+3*gprsize] + mov dst4q, [dstq+4*gprsize] + mov dst5q, [dstq+5*gprsize] + mov dstq, [dstq ] + sub dst1q, dstq + sub dst2q, dstq + sub dst3q, dstq + sub dst4q, dstq + sub dst5q, dstq + ALIGN 16 +.loop: + mova m0, [srcq ] ; m0 = 0, 1, 2, 3, 4, 5, 6, 7 + mova m2, [srcq+2*mmsize] ; m2 = 16, 17, 18, 19, 20, 21, 22, 23 + movq m3, [srcq+ mmsize+mmsize/2] + movlhps m3, m2 ; m3 = 12, 13, 14, 15, 16, 17, 18, 19 + movhpd m1, [srcq+ mmsize] + movhlps m1, m0 ; m1 = 4, 5, 6, 7, 8, 9, 10, 11 + psrldq m1, 4 ; m1 = 6, 7, 8, 9, 10, 11, x, x + psrldq m2, 4 ; m2 = 18, 19, 20, 21, 22, 23, x, x + punpcklwd m4, m0, m1 ; m4 = 0, 6, 1, 7, 2, 8, 3, 9 + punpckhwd m0, m1 ; m0 = 4, 10, 5, 11, x, x, x, x + punpcklwd m1, m3, m2 ; m1 = 12, 18, 13, 19, 14, 20, 15, 21 + punpckhwd m3, m2 ; m3 = 16, 22, 17, 23, x, x, x, x + punpckldq m2, m4, m1 ; m2 = 0, 6, 12, 18, 1, 7, 13, 19 + punpckhdq m4, m1 ; m4 = 2, 8, 14, 20, 3, 9, 15, 21 + punpckldq m0, m3 ; m0 = 4, 10, 16, 22, 5, 11, 17, 23 + movq [dstq ], m2 + movhps [dstq+dst1q], m2 + movq [dstq+dst2q], m4 + movhps [dstq+dst3q], m4 + movq [dstq+dst4q], m0 + movhps [dstq+dst5q], m0 + add srcq, mmsize*3 + add dstq, mmsize/2 + sub lend, mmsize/4 + jg .loop + REP_RET +%endmacro + +INIT_XMM sse2 +CONV_S16_TO_S16P_6CH +%if HAVE_AVX +INIT_XMM avx +CONV_S16_TO_S16P_6CH +%endif diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index 65c6e10..e907aa4 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -93,6 +93,11 @@ extern void ff_conv_s16_to_s16p_2ch_sse2(int16_t *const *dst, int16_t *src, extern void ff_conv_s16_to_s16p_2ch_avx (int16_t *const *dst, int16_t *src, int len, int channels); +extern void ff_conv_s16_to_s16p_6ch_sse2(int16_t *const *dst, int16_t *src, + int len, int channels); +extern void ff_conv_s16_to_s16p_6ch_avx (int16_t *const *dst, int16_t *src, + int len, int channels); + av_cold void ff_audio_convert_init_x86(AudioConvert *ac) { #if HAVE_YASM @@ -139,6 +144,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 6, 16, 4, "SSE2", ff_conv_fltp_to_s16_6ch_sse2); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, 2, 16, 8, "SSE2", ff_conv_s16_to_s16p_2ch_sse2); + ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, + 6, 16, 4, "SSE2", ff_conv_s16_to_s16p_6ch_sse2); } if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, @@ -169,6 +176,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) 6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, 2, 16, 8, "AVX", ff_conv_s16_to_s16p_2ch_avx); + ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, + 6, 16, 4, "AVX", ff_conv_s16_to_s16p_6ch_avx); } #endif } -- 1.7.1 _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel