On 2014-02-07 21:57:08 +0100, Christophe Gisquet wrote: > From 87983deb56aa52c2cdcfbf248dd76bccb97d694a Mon Sep 17 00:00:00 2001 > From: Christophe Gisquet <christophe.gisq...@gmail.com> > Date: Fri, 11 May 2012 11:25:30 +0200 > Subject: [PATCH 02/10] x86: dcadsp: implement int8x8_fmul_int32 > > For the callable function (as opposed to the inline one): > C SSE SSE2 SSE4 > Win32: 47 42 29 26 > Win64: 30 33 25 23 > The SSE version is neither compiled nor set for ARCH_X86_64, as the > inlinable function takes over. > --- > libavcodec/dcadec.c | 3 ++ > libavcodec/dcadsp.c | 1 + > libavcodec/dcadsp.h | 1 + > libavcodec/x86/Makefile | 2 + > libavcodec/x86/dca.h | 52 +++++++++++++++++++++++++ > libavcodec/x86/dcadsp.asm | 90 > ++++++++++++++++++++++++++++++++++++++++++++ > libavcodec/x86/dcadsp_init.c | 47 +++++++++++++++++++++++ > 7 files changed, 196 insertions(+) > create mode 100644 libavcodec/x86/dca.h > create mode 100644 libavcodec/x86/dcadsp.asm > create mode 100644 libavcodec/x86/dcadsp_init.c > > diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c > index b6df3b9..6ffb040 100644 > --- a/libavcodec/dcadec.c > +++ b/libavcodec/dcadec.c > @@ -50,6 +50,9 @@ > #if ARCH_ARM > # include "arm/dca.h" > #endif > +#if ARCH_X86 > +# include "x86/dca.h" > +#endif > > //#define TRACE > > diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c > index b984864..148f6dd 100644 > --- a/libavcodec/dcadsp.c > +++ b/libavcodec/dcadsp.c > @@ -88,4 +88,5 @@ av_cold void ff_dcadsp_init(DCADSPContext *s) > s->qmf_32_subbands = dca_qmf_32_subbands; > s->int8x8_fmul_int32 = int8x8_fmul_int32_c; > if (ARCH_ARM) ff_dcadsp_init_arm(s); > + if (ARCH_X86) ff_dcadsp_init_x86(s); > } > diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h > index 0f79dd6..e2ad09a 100644 > --- a/libavcodec/dcadsp.h > +++ b/libavcodec/dcadsp.h > @@ -36,5 +36,6 @@ typedef struct DCADSPContext { > > void ff_dcadsp_init(DCADSPContext *s); > void ff_dcadsp_init_arm(DCADSPContext *s); > +void ff_dcadsp_init_x86(DCADSPContext *s); > > #endif /* AVCODEC_DCADSP_H */ > diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile > index 6f4935b..f985525 100644 > --- a/libavcodec/x86/Makefile > +++ b/libavcodec/x86/Makefile > @@ -4,6 +4,7 @@ OBJS += x86/constants.o > \ > OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp_init.o > OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o > OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp.o > +OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp_init.o > OBJS-$(CONFIG_DCT) += x86/dct_init.o > OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o > OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o \ > @@ -54,6 +55,7 @@ YASM-OBJS += x86/deinterlace.o > \ > > YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o > YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o > +YASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o > YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o > YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o \ > x86/fpel.o \ > diff --git a/libavcodec/x86/dca.h b/libavcodec/x86/dca.h > new file mode 100644 > index 0000000..c14e94f > --- /dev/null > +++ b/libavcodec/x86/dca.h > @@ -0,0 +1,52 @@ > +/* > + * Copyright (c) 2012-2014 Christophe Gisquet <christophe.gisq...@gmail.com> > + * > + * This file is part of Libav. > + * > + * Libav is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * Libav is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with Libav; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#if ARCH_X86_64 > +# include "libavutil/x86/asm.h" > +# include "libavutil/mem.h" > + > +# define int8x8_fmul_int32 int8x8_fmul_int32 > +static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp, > + float *dst, const int8_t *src, int > scale) > +{ > + DECLARE_ALIGNED(16, static const uint32_t, inverse16) = 0x3D800000; > + __asm__ volatile ( > + "cvtsi2ss %2, %%xmm0 \n\t" > + "mulss %3, %%xmm0 \n\t" > + "movq (%1), %%xmm1 \n\t" > + "punpcklbw %%xmm1, %%xmm1 \n\t" > + "movaps %%xmm1, %%xmm2 \n\t" > + "punpcklwd %%xmm1, %%xmm1 \n\t" > + "punpckhwd %%xmm2, %%xmm2 \n\t" > + "psrad $24, %%xmm1 \n\t" > + "psrad $24, %%xmm2 \n\t" > + "shufps $0, %%xmm0, %%xmm0 \n\t" > + "cvtdq2ps %%xmm1, %%xmm1 \n\t" > + "cvtdq2ps %%xmm2, %%xmm2 \n\t" > + "mulps %%xmm0, %%xmm1 \n\t" > + "mulps %%xmm0, %%xmm2 \n\t" > + "movaps %%xmm1, 0(%0) \n\t" > + "movaps %%xmm2, 16(%0) \n\t" > + :: "r"(dst), "r"(src), "m"(scale), "m"(inverse16) > + XMM_CLOBBERS_ONLY("xmm0", "xmm1", "xmm2") > + ); > +} > + > +#endif /* ARCH_X86_64 */ > diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm > new file mode 100644 > index 0000000..214f514 > --- /dev/null > +++ b/libavcodec/x86/dcadsp.asm > @@ -0,0 +1,90 @@ > +;****************************************************************************** > +;* SSE-optimized functions for the DCA decoder > +;* Copyright (C) 2012-2014 Christophe Gisquet <christophe.gisq...@gmail.com> > +;* > +;* This file is part of Libav. > +;* > +;* Libav is free software; you can redistribute it and/or > +;* modify it under the terms of the GNU Lesser General Public > +;* License as published by the Free Software Foundation; either > +;* version 2.1 of the License, or (at your option) any later version. > +;* > +;* Libav is distributed in the hope that it will be useful, > +;* but WITHOUT ANY WARRANTY; without even the implied warranty of > +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +;* Lesser General Public License for more details. > +;* > +;* You should have received a copy of the GNU Lesser General Public > +;* License along with Libav; if not, write to the Free Software > +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > +;****************************************************************************** > + > +%include "libavutil/x86/x86util.asm" > + > +SECTION_RODATA > +pf_inv16: times 4 dd 0x3D800000 ; 1/16 > + > +SECTION_TEXT > + > +; void int8x8_fmul_int32_sse2(float *dst, const int8_t *src, int scale) > +%macro INT8X8_FMUL_INT32 0 > +cglobal int8x8_fmul_int32, 3,3,5, dst, src, scale > + cvtsi2ss m0, scalem > + mulss m0, [pf_inv16] > + shufps m0, m0, 0 > +%if cpuflag(sse2) > +%if cpuflag(sse4) > + pmovsxbd m1, [srcq+0] > + pmovsxbd m2, [srcq+4] > +%else > + movq m1, [srcq] > + punpcklbw m1, m1 > + mova m2, m1 > + punpcklwd m1, m1 > + punpckhwd m2, m2 > + psrad m1, 24 > + psrad m2, 24 > +%endif > + cvtdq2ps m1, m1 > + cvtdq2ps m2, m2 > +%else > + movd mm0, [srcq+0] > + movd mm1, [srcq+4] > + punpcklbw mm0, mm0 > + punpcklbw mm1, mm1 > + movq mm2, mm0 > + movq mm3, mm1 > + punpcklwd mm0, mm0 > + punpcklwd mm1, mm1 > + punpckhwd mm2, mm2 > + punpckhwd mm3, mm3 > + psrad mm0, 24 > + psrad mm1, 24 > + psrad mm2, 24 > + psrad mm3, 24 > + cvtpi2ps m1, mm0 > + cvtpi2ps m2, mm1 > + cvtpi2ps m3, mm2 > + cvtpi2ps m4, mm3 > + shufps m0, m0, 0 > + emms > + shufps m1, m3, q1010 > + shufps m2, m4, q1010 > +%endif > + mulps m1, m0 > + mulps m2, m0 > + mova [dstq+ 0], m1 > + mova [dstq+16], m2 > + REP_RET > +%endmacro > + > +%if ARCH_X86_32 > +INIT_XMM sse > +INT8X8_FMUL_INT32 > +%endif > + > +INIT_XMM sse2 > +INT8X8_FMUL_INT32 > + > +INIT_XMM sse4 > +INT8X8_FMUL_INT32
Do you have someone who's keen to review x86 asm? > diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c > new file mode 100644 > index 0000000..976d8a3 > --- /dev/null > +++ b/libavcodec/x86/dcadsp_init.c > @@ -0,0 +1,47 @@ > +/* > + * Copyright (c) 2012-2014 Christophe Gisquet <christophe.gisq...@gmail.com> > + * > + * This file is part of Libav. > + * > + * Libav is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * Libav is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with Libav; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "libavutil/attributes.h" > +#include "libavutil/cpu.h" > +#include "libavutil/x86/cpu.h" > +#include "libavcodec/dcadsp.h" > + > +void ff_int8x8_fmul_int32_sse(float *dst, const int8_t *src, int scale); > +void ff_int8x8_fmul_int32_sse2(float *dst, const int8_t *src, int scale); > +void ff_int8x8_fmul_int32_sse4(float *dst, const int8_t *src, int scale); > + > +av_cold void ff_dcadsp_init_x86(DCADSPContext *s) > +{ > + int cpu_flags = av_get_cpu_flags(); > + > + if (EXTERNAL_SSE(cpu_flags)) { > +#if ARCH_X86_32 The ARCH_X86_32 can be moved into the if condition, no need to update the patch, can be fixed before pushing if remembered, no problem if it's forgottten > + s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse; > +#endif > + } > + > + if (EXTERNAL_SSE2(cpu_flags)) { > + s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse2; > + } > + > + if (EXTERNAL_SSE4(cpu_flags)) { > + s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse4; > + } > +} otherwise ok Janne _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel