On 2012-01-11 22:01:04 +0100, Christophe Gisquet wrote:
> From bcceaa75fa791ed1d1662b1b6c739f63842b3150 Mon Sep 17 00:00:00 2001
> From: Christophe GISQUET <christophe.gisq...@gmail.com>
> Date: Tue, 3 Jan 2012 20:38:29 +0100
> Subject: [PATCH 4/5] rv34: 1-pass inter MB reconstruction
> 
> Implement 1-pass inverse transform and reconstruction for inter blocks.
> ---
>  libavcodec/arm/rv34dsp_init_neon.c |    3 +-
>  libavcodec/rv34.c                  |  224 
> +++++++++++++++---------------------
>  libavcodec/rv34dsp.c               |   62 +++--------
>  libavcodec/rv34dsp.h               |    7 +-
>  libavcodec/x86/rv34dsp.asm         |    4 +-
>  libavcodec/x86/rv34dsp_init.c      |    3 +-
>  6 files changed, 115 insertions(+), 188 deletions(-)

[...]

> -static av_always_inline void rv34_row_transform(int temp[16], const DCTELEM 
> *block)
> +static av_always_inline void rv34_row_transform(int temp[16], DCTELEM *block)
>  {
>      int i;
>  
>      for(i = 0; i < 4; i++){
> -        const int z0 = 13*(block[i+8*0] +    block[i+8*2]);
> -        const int z1 = 13*(block[i+8*0] -    block[i+8*2]);
> -        const int z2 =  7* block[i+8*1] - 17*block[i+8*3];
> -        const int z3 = 17* block[i+8*1] +  7*block[i+8*3];
> +        const int z0 = 13*(block[i+4*0] +    block[i+4*2]);

that needs the same change in the neon asm

> +        const int z1 = 13*(block[i+4*0] -    block[i+4*2]);
> +        const int z2 =  7* block[i+4*1] - 17*block[i+4*3];
> +        const int z3 = 17* block[i+4*1] +  7*block[i+4*3];
>  
>          temp[4*i+0] = z0 + z3;
>          temp[4*i+1] = z1 + z2;
> @@ -116,10 +94,10 @@ static void rv34_inv_transform_noround_c(DCTELEM *block){
>          const int z2 =  7* temp[4*1+i] - 17*temp[4*3+i];
>          const int z3 = 17* temp[4*1+i] +  7*temp[4*3+i];
>  
> -        block[i*8+0] = ((z0 + z3) * 3) >> 11;
> -        block[i*8+1] = ((z1 + z2) * 3) >> 11;
> -        block[i*8+2] = ((z1 - z2) * 3) >> 11;
> -        block[i*8+3] = ((z0 - z3) * 3) >> 11;
> +        block[i*4+0] = ((z0 + z3) * 3) >> 11;

here too

> +        block[i*4+1] = ((z1 + z2) * 3) >> 11;
> +        block[i*4+2] = ((z1 - z2) * 3) >> 11;
> +        block[i*4+3] = ((z0 - z3) * 3) >> 11;
>      }
>  }
>  
> @@ -139,22 +117,12 @@ static void rv34_idct_dc_add_c(uint8_t *dst, int 
> stride, int dc)
>      }
>  }
>  
> -static void rv34_inv_transform_dc_c(DCTELEM *block)
> -{
> -    DCTELEM dc = (13*13*block[0] + 0x200) >> 10;
> -    int i, j;
> -
> -    for (i = 0; i < 4; i++, block += 8)
> -        for (j = 0; j < 4; j++)
> -            block[j] = dc;
> -}
> -
>  static void rv34_inv_transform_dc_noround_c(DCTELEM *block)
>  {
>      DCTELEM dc = (13*13*3*block[0]) >> 11;
>      int i, j;
>  
> -    for (i = 0; i < 4; i++, block += 8)
> +    for (i = 0; i < 4; i++, block += 4)

and again

I can do that if rv34_inv_transform[_dc]* is going to stay

Janne
_______________________________________________
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to