Re: [libav-devel] [PATCH 2/2] dca: NEON optimised high freq VQ decoding

Kostya Shishkov Wed, 28 Sep 2011 23:00:01 -0700

On Wed, Sep 28, 2011 at 09:52:26PM +0100, Mans Rullgard wrote:
> Signed-off-by: Mans Rullgard <[email protected]>
> ---
>  libavcodec/arm/dca.h |   47 +++++++++++++++++++++++++++++++++++++++++++++++
>  libavcodec/dca.c     |   26 ++++++++++++++++++--------
>  libavcodec/dcadata.h |    2 +-
>  3 files changed, 66 insertions(+), 9 deletions(-)
>  create mode 100644 libavcodec/arm/dca.h
> 
> diff --git a/libavcodec/arm/dca.h b/libavcodec/arm/dca.h
> new file mode 100644
> index 0000000..4782f6e
> --- /dev/null
> +++ b/libavcodec/arm/dca.h
> @@ -0,0 +1,47 @@
> +/*
> + * Copyright (c) 2011 Mans Rullgard <[email protected]>
> + *
> + * This file is part of Libav.
> + *
> + * Libav is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * Libav is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with Libav; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + */
> +
> +#ifndef AVCODEC_ARM_DCA_H
> +#define AVCODEC_ARM_DCA_H
> +
> +#include "config.h"
> +
> +#if HAVE_NEON && HAVE_INLINE_ASM
> +
> +#define int8x8_fmul_int32 int8x8_fmul_int32
> +static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int 
> scale)
> +{
> +    __asm__ ("vld1.8       {d0},     [%1,:64]   \n"
> +             "vmovl.s8     q0,  d0              \n"
> +             "vmovl.s16    q1,  d1              \n"
> +             "vmovl.s16    q0,  d0              \n"
> +             "vcvt.f32.s32 q0,  q0              \n"
> +             "vcvt.f32.s32 q1,  q1              \n"
> +             "vmul.f32     q0,  q0,  %y2        \n"
> +             "vmul.f32     q1,  q1,  %y2        \n"
> +             "vst1.32      {q0-q1},  [%m0,:128] \n"
> +             : "=Um"(*(float (*)[8])dst)
> +             : "r"(src), "x"(scale / 16.0f)
> +             : "d0", "d1", "d2", "d3");
> +}
> +
> +#endif
> +
> +#endif /* AVCODEC_ARM_DCA_H */
> diff --git a/libavcodec/dca.c b/libavcodec/dca.c
> index 735d7ba..ee065e9 100644
> --- a/libavcodec/dca.c
> +++ b/libavcodec/dca.c
> @@ -42,6 +42,10 @@
>  #include "dcadsp.h"
>  #include "fmtconvert.h"
>  
> +#if ARCH_ARM
> +#   include "arm/dca.h"
> +#endif
> +
>  //#define TRACE
>  
>  #define DCA_PRIM_CHANNELS_MAX (7)
> @@ -320,7 +324,7 @@ typedef struct {
>      int lfe_scale_factor;
>  
>      /* Subband samples history (for ADPCM) */
> -    float subband_samples_hist[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4];
> +    DECLARE_ALIGNED(16, float, 
> subband_samples_hist)[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4];
>      DECLARE_ALIGNED(32, float, subband_fir_hist)[DCA_PRIM_CHANNELS_MAX][512];
>      DECLARE_ALIGNED(32, float, 
> subband_fir_noidea)[DCA_PRIM_CHANNELS_MAX][32];
>      int hist_index[DCA_PRIM_CHANNELS_MAX];
> @@ -1057,6 +1061,16 @@ static int decode_blockcode(int code, int levels, int 
> *values)
>  static const uint8_t abits_sizes[7] = { 7, 10, 12, 13, 15, 17, 19 };
>  static const uint8_t abits_levels[7] = { 3, 5, 7, 9, 13, 17, 25 };
>  
> +#ifndef int8x8_fmul_int32
> +static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int 
> scale)
> +{
> +    float fscale = scale / 16.0;
> +    int i;
> +    for (i = 0; i < 8; i++)
> +        dst[i] = src[i] * fscale;
> +}
> +#endif
> +
>  static int dca_subsubframe(DCAContext * s, int base_channel, int block_index)
>  {
>      int k, l;
> @@ -1161,19 +1175,15 @@ static int dca_subsubframe(DCAContext * s, int 
> base_channel, int block_index)
>          for (l = s->vq_start_subband[k]; l < s->subband_activity[k]; l++) {
>              /* 1 vector -> 32 samples but we only need the 8 samples
>               * for this subsubframe. */
> -            int m;
>  
>              if (!s->debug_flag & 0x01) {
>                  av_log(s->avctx, AV_LOG_DEBUG, "Stream with high frequencies 
> VQ coding\n");
>                  s->debug_flag |= 0x01;
>              }
>  
> -            for (m = 0; m < 8; m++) {
> -                subband_samples[k][l][m] =
> -                    high_freq_vq[s->high_freq_vq[k][l]][subsubframe * 8 +
> -                                                        m]
> -                    * (float) s->scale_factor[k][l][0] / 16.0;
> -            }
> +            int8x8_fmul_int32(subband_samples[k][l],
> +                          &high_freq_vq[s->high_freq_vq[k][l]][subsubframe * 
> 8],
> +                          s->scale_factor[k][l][0]);


some Diego might complain about identation here

In general looks good to me
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/2] dca: NEON optimised high freq VQ decoding

Reply via email to