On Wed, Sep 28, 2011 at 09:52:26PM +0100, Mans Rullgard wrote: > Signed-off-by: Mans Rullgard <[email protected]> > --- > libavcodec/arm/dca.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ > libavcodec/dca.c | 26 ++++++++++++++++++-------- > libavcodec/dcadata.h | 2 +- > 3 files changed, 66 insertions(+), 9 deletions(-) > create mode 100644 libavcodec/arm/dca.h > > diff --git a/libavcodec/arm/dca.h b/libavcodec/arm/dca.h > new file mode 100644 > index 0000000..4782f6e > --- /dev/null > +++ b/libavcodec/arm/dca.h > @@ -0,0 +1,47 @@ > +/* > + * Copyright (c) 2011 Mans Rullgard <[email protected]> > + * > + * This file is part of Libav. > + * > + * Libav is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * Libav is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with Libav; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#ifndef AVCODEC_ARM_DCA_H > +#define AVCODEC_ARM_DCA_H > + > +#include "config.h" > + > +#if HAVE_NEON && HAVE_INLINE_ASM > + > +#define int8x8_fmul_int32 int8x8_fmul_int32 > +static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int > scale) > +{ > + __asm__ ("vld1.8 {d0}, [%1,:64] \n" > + "vmovl.s8 q0, d0 \n" > + "vmovl.s16 q1, d1 \n" > + "vmovl.s16 q0, d0 \n" > + "vcvt.f32.s32 q0, q0 \n" > + "vcvt.f32.s32 q1, q1 \n" > + "vmul.f32 q0, q0, %y2 \n" > + "vmul.f32 q1, q1, %y2 \n" > + "vst1.32 {q0-q1}, [%m0,:128] \n" > + : "=Um"(*(float (*)[8])dst) > + : "r"(src), "x"(scale / 16.0f) > + : "d0", "d1", "d2", "d3"); > +} > + > +#endif > + > +#endif /* AVCODEC_ARM_DCA_H */ > diff --git a/libavcodec/dca.c b/libavcodec/dca.c > index 735d7ba..ee065e9 100644 > --- a/libavcodec/dca.c > +++ b/libavcodec/dca.c > @@ -42,6 +42,10 @@ > #include "dcadsp.h" > #include "fmtconvert.h" > > +#if ARCH_ARM > +# include "arm/dca.h" > +#endif > + > //#define TRACE > > #define DCA_PRIM_CHANNELS_MAX (7) > @@ -320,7 +324,7 @@ typedef struct { > int lfe_scale_factor; > > /* Subband samples history (for ADPCM) */ > - float subband_samples_hist[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4]; > + DECLARE_ALIGNED(16, float, > subband_samples_hist)[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4]; > DECLARE_ALIGNED(32, float, subband_fir_hist)[DCA_PRIM_CHANNELS_MAX][512]; > DECLARE_ALIGNED(32, float, > subband_fir_noidea)[DCA_PRIM_CHANNELS_MAX][32]; > int hist_index[DCA_PRIM_CHANNELS_MAX]; > @@ -1057,6 +1061,16 @@ static int decode_blockcode(int code, int levels, int > *values) > static const uint8_t abits_sizes[7] = { 7, 10, 12, 13, 15, 17, 19 }; > static const uint8_t abits_levels[7] = { 3, 5, 7, 9, 13, 17, 25 }; > > +#ifndef int8x8_fmul_int32 > +static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int > scale) > +{ > + float fscale = scale / 16.0; > + int i; > + for (i = 0; i < 8; i++) > + dst[i] = src[i] * fscale; > +} > +#endif > + > static int dca_subsubframe(DCAContext * s, int base_channel, int block_index) > { > int k, l; > @@ -1161,19 +1175,15 @@ static int dca_subsubframe(DCAContext * s, int > base_channel, int block_index) > for (l = s->vq_start_subband[k]; l < s->subband_activity[k]; l++) { > /* 1 vector -> 32 samples but we only need the 8 samples > * for this subsubframe. */ > - int m; > > if (!s->debug_flag & 0x01) { > av_log(s->avctx, AV_LOG_DEBUG, "Stream with high frequencies > VQ coding\n"); > s->debug_flag |= 0x01; > } > > - for (m = 0; m < 8; m++) { > - subband_samples[k][l][m] = > - high_freq_vq[s->high_freq_vq[k][l]][subsubframe * 8 + > - m] > - * (float) s->scale_factor[k][l][0] / 16.0; > - } > + int8x8_fmul_int32(subband_samples[k][l], > + &high_freq_vq[s->high_freq_vq[k][l]][subsubframe * > 8], > + s->scale_factor[k][l][0]);
some Diego might complain about identation here In general looks good to me _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
