On 24/04/14 23:11, Derek Buitenhuis wrote: > From: Michael Niedermayer <[email protected]> > > before: > 5225 decicycles in IDCT, 32756 runs, 12 skips > > after: > 5057 decicycles in IDCT, 32765 runs, 3 skips >
Not push as is. It is not doing just what it states. > Signed-off-by: Michael Niedermayer <[email protected]> > Signed-off-by: Derek Buitenhuis <[email protected]> > --- > libavcodec/fic.c | 13 +++++++------ > 1 file changed, 7 insertions(+), 6 deletions(-) > > diff --git a/libavcodec/fic.c b/libavcodec/fic.c > index 8512ef3..f9f935f 100644 > --- a/libavcodec/fic.c > +++ b/libavcodec/fic.c > @@ -79,7 +79,7 @@ static const uint8_t fic_header[7] = { 0, 0, 1, 'F', 'I', > 'C', 'V' }; > > #define FIC_HEADER_SIZE 27 > > -static av_always_inline void fic_idct(int16_t *blk, int step, int shift) > +static av_always_inline void fic_idct(int16_t *blk, int step, int shift, int > rnd) (question open why the shift variable isn't constant-propagated making rnd somehow useful) > { > const int t0 = 27246 * blk[3 * step] + 18405 * blk[5 * step]; > const int t1 = 27246 * blk[5 * step] - 18405 * blk[3 * step]; > @@ -91,8 +91,8 @@ static av_always_inline void fic_idct(int16_t *blk, int > step, int shift) > const int t7 = t3 - t1; > const int t8 = 17734 * blk[2 * step] - 42813 * blk[6 * step]; > const int t9 = 17734 * blk[6 * step] + 42814 * blk[2 * step]; > - const int tA = (blk[0 * step] - blk[4 * step] << 15) + (1 << shift - 1); > - const int tB = (blk[0 * step] + blk[4 * step] << 15) + (1 << shift - 1); > + const int tA = (blk[0 * step] - blk[4 * step] << 15) + rnd; > + const int tB = (blk[0 * step] + blk[4 * step] << 15) + rnd; > blk[0 * step] = ( t4 + t9 + tB) >> shift; > blk[1 * step] = ( t6 + t7 + t8 + tA) >> shift; > blk[2 * step] = ( t6 - t7 - t8 + tA) >> shift; > @@ -109,14 +109,15 @@ static void fic_idct_put(uint8_t *dst, int stride, > int16_t *block) > int16_t *ptr; > > ptr = block; > - for (i = 0; i < 8; i++) { > - fic_idct(ptr, 8, 13); > + fic_idct(ptr++, 8, 13, (1 << 12) + (1 << 17)); Why that? > + for (i = 1; i < 8; i++) { > + fic_idct(ptr, 8, 13, 1 << 12); > ptr++; > } > > ptr = block; > for (i = 0; i < 8; i++) { > - fic_idct(ptr, 1, 20); > + fic_idct(ptr, 1, 20, 0); > ptr += 8; > } I'm not looking forward to compare the assembled code out of clang and recent gcc. lu _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
