On 2014-02-07 21:57:08 +0100, Christophe Gisquet wrote:
> From 87983deb56aa52c2cdcfbf248dd76bccb97d694a Mon Sep 17 00:00:00 2001
> From: Christophe Gisquet <christophe.gisq...@gmail.com>
> Date: Fri, 11 May 2012 11:25:30 +0200
> Subject: [PATCH 02/10] x86: dcadsp: implement int8x8_fmul_int32
> 
> For the callable function (as opposed to the inline one):
>          C  SSE  SSE2  SSE4
> Win32:  47   42   29    26
> Win64:  30   33   25    23
> The SSE version is neither compiled nor set for ARCH_X86_64, as the
> inlinable function takes over.
> ---
>  libavcodec/dcadec.c          |  3 ++
>  libavcodec/dcadsp.c          |  1 +
>  libavcodec/dcadsp.h          |  1 +
>  libavcodec/x86/Makefile      |  2 +
>  libavcodec/x86/dca.h         | 52 +++++++++++++++++++++++++
>  libavcodec/x86/dcadsp.asm    | 90 
> ++++++++++++++++++++++++++++++++++++++++++++
>  libavcodec/x86/dcadsp_init.c | 47 +++++++++++++++++++++++
>  7 files changed, 196 insertions(+)
>  create mode 100644 libavcodec/x86/dca.h
>  create mode 100644 libavcodec/x86/dcadsp.asm
>  create mode 100644 libavcodec/x86/dcadsp_init.c
> 
> diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
> index b6df3b9..6ffb040 100644
> --- a/libavcodec/dcadec.c
> +++ b/libavcodec/dcadec.c
> @@ -50,6 +50,9 @@
>  #if ARCH_ARM
>  #   include "arm/dca.h"
>  #endif
> +#if ARCH_X86
> +#   include "x86/dca.h"
> +#endif
>  
>  //#define TRACE
>  
> diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c
> index b984864..148f6dd 100644
> --- a/libavcodec/dcadsp.c
> +++ b/libavcodec/dcadsp.c
> @@ -88,4 +88,5 @@ av_cold void ff_dcadsp_init(DCADSPContext *s)
>      s->qmf_32_subbands = dca_qmf_32_subbands;
>      s->int8x8_fmul_int32 = int8x8_fmul_int32_c;
>      if (ARCH_ARM) ff_dcadsp_init_arm(s);
> +    if (ARCH_X86) ff_dcadsp_init_x86(s);
>  }
> diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h
> index 0f79dd6..e2ad09a 100644
> --- a/libavcodec/dcadsp.h
> +++ b/libavcodec/dcadsp.h
> @@ -36,5 +36,6 @@ typedef struct DCADSPContext {
>  
>  void ff_dcadsp_init(DCADSPContext *s);
>  void ff_dcadsp_init_arm(DCADSPContext *s);
> +void ff_dcadsp_init_x86(DCADSPContext *s);
>  
>  #endif /* AVCODEC_DCADSP_H */
> diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
> index 6f4935b..f985525 100644
> --- a/libavcodec/x86/Makefile
> +++ b/libavcodec/x86/Makefile
> @@ -4,6 +4,7 @@ OBJS                                   += x86/constants.o     
>           \
>  OBJS-$(CONFIG_AAC_DECODER)             += x86/sbrdsp_init.o
>  OBJS-$(CONFIG_AC3DSP)                  += x86/ac3dsp_init.o
>  OBJS-$(CONFIG_CAVS_DECODER)            += x86/cavsdsp.o
> +OBJS-$(CONFIG_DCA_DECODER)             += x86/dcadsp_init.o
>  OBJS-$(CONFIG_DCT)                     += x86/dct_init.o
>  OBJS-$(CONFIG_DNXHD_ENCODER)           += x86/dnxhdenc.o
>  OBJS-$(CONFIG_DSPUTIL)                 += x86/dsputil_init.o            \
> @@ -54,6 +55,7 @@ YASM-OBJS                              += x86/deinterlace.o 
>             \
>  
>  YASM-OBJS-$(CONFIG_AAC_DECODER)        += x86/sbrdsp.o
>  YASM-OBJS-$(CONFIG_AC3DSP)             += x86/ac3dsp.o
> +YASM-OBJS-$(CONFIG_DCA_DECODER)        += x86/dcadsp.o
>  YASM-OBJS-$(CONFIG_DCT)                += x86/dct32.o
>  YASM-OBJS-$(CONFIG_DSPUTIL)            += x86/dsputil.o                 \
>                                            x86/fpel.o                    \
> diff --git a/libavcodec/x86/dca.h b/libavcodec/x86/dca.h
> new file mode 100644
> index 0000000..c14e94f
> --- /dev/null
> +++ b/libavcodec/x86/dca.h
> @@ -0,0 +1,52 @@
> +/*
> + * Copyright (c) 2012-2014 Christophe Gisquet <christophe.gisq...@gmail.com>
> + *
> + * This file is part of Libav.
> + *
> + * Libav is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * Libav is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with Libav; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + */
> +
> +#if ARCH_X86_64
> +# include "libavutil/x86/asm.h"
> +# include "libavutil/mem.h"
> +
> +# define int8x8_fmul_int32 int8x8_fmul_int32
> +static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp,
> +                                     float *dst, const int8_t *src, int 
> scale)
> +{
> +    DECLARE_ALIGNED(16, static const uint32_t, inverse16) = 0x3D800000;
> +    __asm__ volatile (
> +        "cvtsi2ss        %2, %%xmm0 \n\t"
> +        "mulss           %3, %%xmm0 \n\t"
> +        "movq          (%1), %%xmm1 \n\t"
> +        "punpcklbw   %%xmm1, %%xmm1 \n\t"
> +        "movaps      %%xmm1, %%xmm2 \n\t"
> +        "punpcklwd   %%xmm1, %%xmm1 \n\t"
> +        "punpckhwd   %%xmm2, %%xmm2 \n\t"
> +        "psrad          $24, %%xmm1 \n\t"
> +        "psrad          $24, %%xmm2 \n\t"
> +        "shufps  $0, %%xmm0, %%xmm0 \n\t"
> +        "cvtdq2ps    %%xmm1, %%xmm1 \n\t"
> +        "cvtdq2ps    %%xmm2, %%xmm2 \n\t"
> +        "mulps       %%xmm0, %%xmm1 \n\t"
> +        "mulps       %%xmm0, %%xmm2 \n\t"
> +        "movaps      %%xmm1,  0(%0) \n\t"
> +        "movaps      %%xmm2, 16(%0) \n\t"
> +        :: "r"(dst), "r"(src), "m"(scale), "m"(inverse16)
> +        XMM_CLOBBERS_ONLY("xmm0", "xmm1", "xmm2")
> +    );
> +}
> +
> +#endif /* ARCH_X86_64 */
> diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
> new file mode 100644
> index 0000000..214f514
> --- /dev/null
> +++ b/libavcodec/x86/dcadsp.asm
> @@ -0,0 +1,90 @@
> +;******************************************************************************
> +;* SSE-optimized functions for the DCA decoder
> +;* Copyright (C) 2012-2014 Christophe Gisquet <christophe.gisq...@gmail.com>
> +;*
> +;* This file is part of Libav.
> +;*
> +;* Libav is free software; you can redistribute it and/or
> +;* modify it under the terms of the GNU Lesser General Public
> +;* License as published by the Free Software Foundation; either
> +;* version 2.1 of the License, or (at your option) any later version.
> +;*
> +;* Libav is distributed in the hope that it will be useful,
> +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +;* Lesser General Public License for more details.
> +;*
> +;* You should have received a copy of the GNU Lesser General Public
> +;* License along with Libav; if not, write to the Free Software
> +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> +;******************************************************************************
> +
> +%include "libavutil/x86/x86util.asm"
> +
> +SECTION_RODATA
> +pf_inv16:  times 4 dd 0x3D800000 ; 1/16
> +
> +SECTION_TEXT
> +
> +; void int8x8_fmul_int32_sse2(float *dst, const int8_t *src, int scale)
> +%macro INT8X8_FMUL_INT32 0
> +cglobal int8x8_fmul_int32, 3,3,5, dst, src, scale
> +    cvtsi2ss    m0, scalem
> +    mulss       m0, [pf_inv16]
> +    shufps      m0, m0, 0
> +%if cpuflag(sse2)
> +%if cpuflag(sse4)
> +    pmovsxbd    m1, [srcq+0]
> +    pmovsxbd    m2, [srcq+4]
> +%else
> +    movq        m1, [srcq]
> +    punpcklbw   m1, m1
> +    mova        m2, m1
> +    punpcklwd   m1, m1
> +    punpckhwd   m2, m2
> +    psrad       m1, 24
> +    psrad       m2, 24
> +%endif
> +    cvtdq2ps    m1, m1
> +    cvtdq2ps    m2, m2
> +%else
> +    movd       mm0, [srcq+0]
> +    movd       mm1, [srcq+4]
> +    punpcklbw  mm0, mm0
> +    punpcklbw  mm1, mm1
> +    movq       mm2, mm0
> +    movq       mm3, mm1
> +    punpcklwd  mm0, mm0
> +    punpcklwd  mm1, mm1
> +    punpckhwd  mm2, mm2
> +    punpckhwd  mm3, mm3
> +    psrad      mm0, 24
> +    psrad      mm1, 24
> +    psrad      mm2, 24
> +    psrad      mm3, 24
> +    cvtpi2ps    m1, mm0
> +    cvtpi2ps    m2, mm1
> +    cvtpi2ps    m3, mm2
> +    cvtpi2ps    m4, mm3
> +    shufps      m0, m0, 0
> +    emms
> +    shufps      m1, m3, q1010
> +    shufps      m2, m4, q1010
> +%endif
> +    mulps       m1, m0
> +    mulps       m2, m0
> +    mova [dstq+ 0], m1
> +    mova [dstq+16], m2
> +    REP_RET
> +%endmacro
> +
> +%if ARCH_X86_32
> +INIT_XMM sse
> +INT8X8_FMUL_INT32
> +%endif
> +
> +INIT_XMM sse2
> +INT8X8_FMUL_INT32
> +
> +INIT_XMM sse4
> +INT8X8_FMUL_INT32

Do you have someone who's keen to review x86 asm?

> diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
> new file mode 100644
> index 0000000..976d8a3
> --- /dev/null
> +++ b/libavcodec/x86/dcadsp_init.c
> @@ -0,0 +1,47 @@
> +/*
> + * Copyright (c) 2012-2014 Christophe Gisquet <christophe.gisq...@gmail.com>
> + *
> + * This file is part of Libav.
> + *
> + * Libav is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * Libav is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with Libav; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + */
> +
> +#include "libavutil/attributes.h"
> +#include "libavutil/cpu.h"
> +#include "libavutil/x86/cpu.h"
> +#include "libavcodec/dcadsp.h"
> +
> +void ff_int8x8_fmul_int32_sse(float *dst, const int8_t *src, int scale);
> +void ff_int8x8_fmul_int32_sse2(float *dst, const int8_t *src, int scale);
> +void ff_int8x8_fmul_int32_sse4(float *dst, const int8_t *src, int scale);
> +
> +av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
> +{
> +    int cpu_flags = av_get_cpu_flags();
> +
> +    if (EXTERNAL_SSE(cpu_flags)) {
> +#if ARCH_X86_32

The ARCH_X86_32 can be moved into the if condition, no need to update the
patch, can be fixed before pushing if remembered, no problem if it's
forgottten

> +        s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse;
> +#endif
> +    }
> +
> +    if (EXTERNAL_SSE2(cpu_flags)) {
> +        s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse2;
> +    }
> +
> +    if (EXTERNAL_SSE4(cpu_flags)) {
> +        s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse4;
> +    }
> +}

otherwise ok

Janne
_______________________________________________
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to