On 2014-02-07 21:51:02 +0100, Christophe Gisquet wrote: > From c233b96fa29150b0385776499e90afc9b59405b5 Mon Sep 17 00:00:00 2001 > From: Christophe Gisquet <[email protected]> > Date: Fri, 11 May 2012 11:17:36 +0200 > Subject: [PATCH 01/10] dcadsp: add int8x8_fmul_int32 to DSP context > > It is currently declared as a macro who is set to inlinable functions, > among which a Neon and a default C implementations. > > Add a DSP parameter to each inline function, unused except by the > default C implementation which calls a function from the DSP context. > > On an Arrandale CPU, gain for an inlined SSE2 function vs. a call: > - Win32: 29 to 26 cycles > - Win64: 25 to 23 cycles > --- > libavcodec/arm/dca.h | 3 ++- > libavcodec/dcadec.c | 10 ++++------ > libavcodec/dcadsp.c | 9 +++++++++ > libavcodec/dcadsp.h | 1 + > 4 files changed, 16 insertions(+), 7 deletions(-) > > diff --git a/libavcodec/arm/dca.h b/libavcodec/arm/dca.h > index 39ec2b6..06e3ea6 100644 > --- a/libavcodec/arm/dca.h > +++ b/libavcodec/arm/dca.h > @@ -83,7 +83,8 @@ static inline int decode_blockcodes(int code1, int code2, > int levels, > #if HAVE_NEON_INLINE && HAVE_ASM_MOD_Y > > #define int8x8_fmul_int32 int8x8_fmul_int32 > -static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int > scale) > +static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp, > + float *dst, const int8_t *src, int > scale) > { > __asm__ ("vcvt.f32.s32 %2, %2, #4 \n" > "vld1.8 {d0}, [%1,:64] \n" > diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c > index f9e39bc..b6df3b9 100644 > --- a/libavcodec/dcadec.c > +++ b/libavcodec/dcadec.c > @@ -1086,12 +1086,10 @@ static const uint8_t abits_sizes[7] = { 7, 10, 12, > 13, 15, 17, 19 }; > static const uint8_t abits_levels[7] = { 3, 5, 7, 9, 13, 17, 25 }; > > #ifndef int8x8_fmul_int32 > -static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int > scale) > +static inline void int8x8_fmul_int32(DCADSPContext *dsp, float *dst, > + const int8_t *src, int scale) > { > - float fscale = scale / 16.0; > - int i; > - for (i = 0; i < 8; i++) > - dst[i] = src[i] * fscale; > + dsp->int8x8_fmul_int32(dst, src, scale); > } > #endif > > @@ -1219,7 +1217,7 @@ static int dca_subsubframe(DCAContext *s, int > base_channel, int block_index) > s->debug_flag |= 0x01; > } > > - int8x8_fmul_int32(subband_samples[k][l], > + int8x8_fmul_int32(&s->dcadsp, subband_samples[k][l], > &high_freq_vq[hfvq][subsubframe * 8], > s->scale_factor[k][l][0]); > } > diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c > index 57d716e..b984864 100644 > --- a/libavcodec/dcadsp.c > +++ b/libavcodec/dcadsp.c > @@ -24,6 +24,14 @@ > #include "libavutil/intreadwrite.h" > #include "dcadsp.h" > > +static void int8x8_fmul_int32_c(float *dst, const int8_t *src, int scale) > +{ > + float fscale = scale / 16.0; > + int i; > + for (i = 0; i < 8; i++) > + dst[i] = src[i] * fscale; > +} > + > static void dca_lfe_fir_c(float *out, const float *in, const float *coefs, > int decifactor, float scale) > { > @@ -78,5 +86,6 @@ av_cold void ff_dcadsp_init(DCADSPContext *s) > { > s->lfe_fir = dca_lfe_fir_c; > s->qmf_32_subbands = dca_qmf_32_subbands; > + s->int8x8_fmul_int32 = int8x8_fmul_int32_c; > if (ARCH_ARM) ff_dcadsp_init_arm(s); > } > diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h > index ec88be7..0f79dd6 100644 > --- a/libavcodec/dcadsp.h > +++ b/libavcodec/dcadsp.h > @@ -31,6 +31,7 @@ typedef struct DCADSPContext { > int *synth_buf_offset, float synth_buf2[32], > const float window[512], float *samples_out, > float raXin[32], float scale); > + void (*int8x8_fmul_int32)(float *dst, const int8_t *src, int scale); > } DCADSPContext; > > void ff_dcadsp_init(DCADSPContext *s);
ok and queued Janne _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
