On 2012-01-11 22:01:04 +0100, Christophe Gisquet wrote: > From bcceaa75fa791ed1d1662b1b6c739f63842b3150 Mon Sep 17 00:00:00 2001 > From: Christophe GISQUET <christophe.gisq...@gmail.com> > Date: Tue, 3 Jan 2012 20:38:29 +0100 > Subject: [PATCH 4/5] rv34: 1-pass inter MB reconstruction > > Implement 1-pass inverse transform and reconstruction for inter blocks. > --- > libavcodec/arm/rv34dsp_init_neon.c | 3 +- > libavcodec/rv34.c | 224 > +++++++++++++++--------------------- > libavcodec/rv34dsp.c | 62 +++-------- > libavcodec/rv34dsp.h | 7 +- > libavcodec/x86/rv34dsp.asm | 4 +- > libavcodec/x86/rv34dsp_init.c | 3 +- > 6 files changed, 115 insertions(+), 188 deletions(-)
[...] > -static av_always_inline void rv34_row_transform(int temp[16], const DCTELEM > *block) > +static av_always_inline void rv34_row_transform(int temp[16], DCTELEM *block) > { > int i; > > for(i = 0; i < 4; i++){ > - const int z0 = 13*(block[i+8*0] + block[i+8*2]); > - const int z1 = 13*(block[i+8*0] - block[i+8*2]); > - const int z2 = 7* block[i+8*1] - 17*block[i+8*3]; > - const int z3 = 17* block[i+8*1] + 7*block[i+8*3]; > + const int z0 = 13*(block[i+4*0] + block[i+4*2]); that needs the same change in the neon asm > + const int z1 = 13*(block[i+4*0] - block[i+4*2]); > + const int z2 = 7* block[i+4*1] - 17*block[i+4*3]; > + const int z3 = 17* block[i+4*1] + 7*block[i+4*3]; > > temp[4*i+0] = z0 + z3; > temp[4*i+1] = z1 + z2; > @@ -116,10 +94,10 @@ static void rv34_inv_transform_noround_c(DCTELEM *block){ > const int z2 = 7* temp[4*1+i] - 17*temp[4*3+i]; > const int z3 = 17* temp[4*1+i] + 7*temp[4*3+i]; > > - block[i*8+0] = ((z0 + z3) * 3) >> 11; > - block[i*8+1] = ((z1 + z2) * 3) >> 11; > - block[i*8+2] = ((z1 - z2) * 3) >> 11; > - block[i*8+3] = ((z0 - z3) * 3) >> 11; > + block[i*4+0] = ((z0 + z3) * 3) >> 11; here too > + block[i*4+1] = ((z1 + z2) * 3) >> 11; > + block[i*4+2] = ((z1 - z2) * 3) >> 11; > + block[i*4+3] = ((z0 - z3) * 3) >> 11; > } > } > > @@ -139,22 +117,12 @@ static void rv34_idct_dc_add_c(uint8_t *dst, int > stride, int dc) > } > } > > -static void rv34_inv_transform_dc_c(DCTELEM *block) > -{ > - DCTELEM dc = (13*13*block[0] + 0x200) >> 10; > - int i, j; > - > - for (i = 0; i < 4; i++, block += 8) > - for (j = 0; j < 4; j++) > - block[j] = dc; > -} > - > static void rv34_inv_transform_dc_noround_c(DCTELEM *block) > { > DCTELEM dc = (13*13*3*block[0]) >> 11; > int i, j; > > - for (i = 0; i < 4; i++, block += 8) > + for (i = 0; i < 4; i++, block += 4) and again I can do that if rv34_inv_transform[_dc]* is going to stay Janne _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel