Re: [libav-devel] [PATCH 1/3] prores: extract idct into its own dspcontext and merge with put_pixels.

Kostya Shishkov Thu, 06 Oct 2011 07:19:26 -0700

On Tue, Oct 04, 2011 at 06:33:01AM +0000, Ronald S. Bultje wrote:
> Hi,
> 
> On Mon, Oct 3, 2011 at 6:59 PM, Diego Biurrun <[email protected]> wrote:
> > On Mon, Oct 03, 2011 at 11:37:35AM -0700, Ronald S. Bultje wrote:
> >> From: "Ronald S. Bultje" <[email protected]>
> >>
> >> --- a/libavcodec/dsputil.c
> >> +++ b/libavcodec/dsputil.c
> >> @@ -145,6 +145,41 @@ void ff_init_scantable(uint8_t *permutation, 
> >> ScanTable *st, const uint8_t *src_s
> >>
> >> +void ff_init_scantable_permutation(uint8_t *idct_permutation,
> >> +                                   int idct_permutation_type)
> >> +{
> >> +    int i;
> >> +
> >> +    switch(idct_permutation_type){
> >> +        case FF_NO_IDCT_PERM:
> >> +            for(i=0; i<64; i++)
> >> +                idct_permutation[i]= i;
> >> +            break;
> >> @@ -3123,32 +3158,6 @@ av_cold void dsputil_init(DSPContext* c, 
> >> AVCodecContext *avctx)
> >>
> >> -    switch(c->idct_permutation_type){
> >> -    case FF_NO_IDCT_PERM:
> >> -        for(i=0; i<64; i++)
> >> -            c->idct_permutation[i]= i;
> >> -        break;
> >
> > Please maintain K&R style and keep the case statements at the indentation
> > level as the switch.
> >
> >> --- /dev/null
> >> +++ b/libavcodec/proresdsp.h
> >> @@ -0,0 +1,38 @@
> >> +
> >> +#ifndef LIBAVCODEC_PRORESDSP_H
> >> +#define LIBAVCODEC_PRORESDSP_H
> >
> > AVCODEC_PRORESDSP_H
> >
> >> +#define PRORES_BITS_PER_SAMPLE 10 ///< output precision of that decoder
> >
> > "that"?  Just write "prores"...
> 
> All fixed, see attached.
> 
> Ronald


>                                                                               
>                                                                               
>                                                                               
>                      
> Return-Path: <[email protected]>
> Received: from localhost.localdomain (dhcp-172-22-79-63.mtv.corp.google.com 
> [172.22.79.63])
>         by mx.google.com with ESMTPS id ji3sm56244687pbc.2.2011.10.03.11.37.41
>         (version=TLSv1/SSLv3 cipher=OTHER);
>         Mon, 03 Oct 2011 11:37:41 -0700 (PDT)
> From: "Ronald S. Bultje" <[email protected]>
> To: [email protected]
> Cc: "Ronald S. Bultje" <[email protected]>
> Subject: [PATCH 1/3] prores: extract idct into its own dspcontext and merge 
> with put_pixels.
> Date: Mon,  3 Oct 2011 11:37:35 -0700
> Message-Id: <[email protected]>
> X-Mailer: git-send-email 1.7.6
> 
> From: "Ronald S. Bultje" <[email protected]>
> 
> ---
>  libavcodec/Makefile    |    2 +-
>  libavcodec/dsputil.c   |   65 +++++++++++++++++++++----------------
>  libavcodec/proresdec.c |   83 
> +++++++++++++-----------------------------------
>  libavcodec/proresdsp.c |   61 +++++++++++++++++++++++++++++++++++
>  libavcodec/proresdsp.h |   38 ++++++++++++++++++++++
>  5 files changed, 159 insertions(+), 90 deletions(-)
>  create mode 100644 libavcodec/proresdsp.c
>  create mode 100644 libavcodec/proresdsp.h
> 
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 3c4e2f8..b7b5124 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -295,7 +295,7 @@ OBJS-$(CONFIG_PNG_DECODER)             += png.o pngdec.o
>  OBJS-$(CONFIG_PNG_ENCODER)             += png.o pngenc.o
>  OBJS-$(CONFIG_PPM_DECODER)             += pnmdec.o pnm.o
>  OBJS-$(CONFIG_PPM_ENCODER)             += pnmenc.o pnm.o
> -OBJS-$(CONFIG_PRORES_DECODER)          += proresdec.o
> +OBJS-$(CONFIG_PRORES_DECODER)          += proresdec.o proresdsp.o
>  OBJS-$(CONFIG_PTX_DECODER)             += ptx.o
>  OBJS-$(CONFIG_QCELP_DECODER)           += qcelpdec.o celp_math.o         \
>                                            celp_filters.o acelp_vectors.o \
> diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
> index 967406e..e248516 100644
> --- a/libavcodec/dsputil.c
> +++ b/libavcodec/dsputil.c
> @@ -145,6 +145,41 @@ void ff_init_scantable(uint8_t *permutation, ScanTable 
> *st, const uint8_t *src_s
>      }
>  }
>  
> +void ff_init_scantable_permutation(uint8_t *idct_permutation,
> +                                   int idct_permutation_type)
> +{
> +    int i;
> +
> +    switch(idct_permutation_type){
> +    case FF_NO_IDCT_PERM:
> +        for(i=0; i<64; i++)
> +            idct_permutation[i]= i;
> +        break;
> +    case FF_LIBMPEG2_IDCT_PERM:
> +        for(i=0; i<64; i++)
> +            idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 
> 2);
> +        break;
> +    case FF_SIMPLE_IDCT_PERM:
> +        for(i=0; i<64; i++)
> +            idct_permutation[i]= simple_mmx_permutation[i];
> +        break;
> +    case FF_TRANSPOSE_IDCT_PERM:
> +        for(i=0; i<64; i++)
> +            idct_permutation[i]= ((i&7)<<3) | (i>>3);
> +        break;
> +    case FF_PARTTRANS_IDCT_PERM:
> +        for(i=0; i<64; i++)
> +            idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
> +        break;
> +    case FF_SSE2_IDCT_PERM:
> +        for(i=0; i<64; i++)
> +            idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
> +        break;
> +    default:
> +        av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not 
> set\n");
> +    }
> +}
> +
>  static int pix_sum_c(uint8_t * pix, int line_size)
>  {
>      int s, i, j;
> @@ -3123,32 +3158,6 @@ av_cold void dsputil_init(DSPContext* c, 
> AVCodecContext *avctx)
>              c->avg_2tap_qpel_pixels_tab[0][i]= 
> c->avg_h264_qpel_pixels_tab[0][i];
>      }
>  
> -    switch(c->idct_permutation_type){
> -    case FF_NO_IDCT_PERM:
> -        for(i=0; i<64; i++)
> -            c->idct_permutation[i]= i;
> -        break;
> -    case FF_LIBMPEG2_IDCT_PERM:
> -        for(i=0; i<64; i++)
> -            c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) 
> << 2);
> -        break;
> -    case FF_SIMPLE_IDCT_PERM:
> -        for(i=0; i<64; i++)
> -            c->idct_permutation[i]= simple_mmx_permutation[i];
> -        break;
> -    case FF_TRANSPOSE_IDCT_PERM:
> -        for(i=0; i<64; i++)
> -            c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
> -        break;
> -    case FF_PARTTRANS_IDCT_PERM:
> -        for(i=0; i<64; i++)
> -            c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
> -        break;
> -    case FF_SSE2_IDCT_PERM:
> -        for(i=0; i<64; i++)
> -            c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
> -        break;
> -    default:
> -        av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not 
> set\n");
> -    }
> +    ff_init_scantable_permutation(c->idct_permutation,
> +                                  c->idct_permutation_type);
>  }
> diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
> index c70d145..0424093 100644
> --- a/libavcodec/proresdec.c
> +++ b/libavcodec/proresdec.c
> @@ -34,17 +34,11 @@
>  
>  #include "libavutil/intmath.h"
>  #include "avcodec.h"
> -#include "dsputil.h"
> +#include "proresdsp.h"
>  #include "get_bits.h"
>  
> -#define BITS_PER_SAMPLE 10                              ///< output 
> precision of that decoder
> -#define BIAS     (1 << (BITS_PER_SAMPLE - 1))           ///< bias value for 
> converting signed pixels into unsigned ones
> -#define CLIP_MIN (1 << (BITS_PER_SAMPLE - 8))           ///< minimum value 
> for clipping resulting pixels
> -#define CLIP_MAX (1 << BITS_PER_SAMPLE) - CLIP_MIN - 1  ///< maximum value 
> for clipping resulting pixels
> -
> -
>  typedef struct {
> -    DSPContext dsp;
> +    ProresDSPContext dsp;
>      AVFrame    picture;
>      ScanTable  scantable;
>      int        scantable_type;           ///< -1 = uninitialized, 0 = 
> progressive, 1/2 = interlaced
> @@ -104,8 +98,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
>  
>      avctx->pix_fmt = PIX_FMT_YUV422P10; // set default pixel format
>  
> -    avctx->bits_per_raw_sample = BITS_PER_SAMPLE;
> -    dsputil_init(&ctx->dsp, avctx);
> +    avctx->bits_per_raw_sample = PRORES_BITS_PER_SAMPLE;
> +    ff_proresdsp_init(&ctx->dsp);
>  
>      avctx->coded_frame = &ctx->picture;
>      avcodec_get_frame_defaults(&ctx->picture);
> @@ -449,48 +443,6 @@ static inline void decode_ac_coeffs(GetBitContext *gb, 
> DCTELEM *out,
>  }
>  
>  
> -#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX))
> -
> -/**
> - * Add bias value, clamp and output pixels of a slice
> - */
> -static void put_pixels(const DCTELEM *in, uint16_t *out, int stride,
> -                       int mbs_per_slice, int blocks_per_mb)
> -{
> -    int mb, x, y, src_offset, dst_offset;
> -    const DCTELEM *src1, *src2;
> -    uint16_t *dst1, *dst2;
> -
> -    src1 = in;
> -    src2 = in + (blocks_per_mb << 5);
> -    dst1 = out;
> -    dst2 = out + (stride << 3);
> -
> -    for (mb = 0; mb < mbs_per_slice; mb++) {
> -        for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) {
> -            for (x = 0; x < 8; x++) {
> -                src_offset = (y << 3) + x;
> -
> -                dst1[dst_offset + x] = CLIP_AND_BIAS(src1[src_offset]);
> -                dst2[dst_offset + x] = CLIP_AND_BIAS(src2[src_offset]);
> -
> -                if (blocks_per_mb > 2) {
> -                    dst1[dst_offset + x + 8] =
> -                        CLIP_AND_BIAS(src1[src_offset + 64]);
> -                    dst2[dst_offset + x + 8] =
> -                        CLIP_AND_BIAS(src2[src_offset + 64]);
> -                }
> -            }
> -        }
> -
> -        src1 += blocks_per_mb << 6;
> -        src2 += blocks_per_mb << 6;
> -        dst1 += blocks_per_mb << 2;
> -        dst2 += blocks_per_mb << 2;
> -    }
> -}
> -
> -
>  /**
>   * Decode a slice plane (luma or chroma).
>   */
> @@ -502,7 +454,7 @@ static void decode_slice_plane(ProresContext *ctx, const 
> uint8_t *buf,
>  {
>      GetBitContext gb;
>      DCTELEM *block_ptr;
> -    int i, blk_num, blocks_per_slice;
> +    int i, j, mb_num, blocks_per_slice;
>  
>      blocks_per_slice = mbs_per_slice * blocks_per_mb;
>  
> @@ -518,20 +470,29 @@ static void decode_slice_plane(ProresContext *ctx, 
> const uint8_t *buf,
>      /* inverse quantization, inverse transform and output */
>      block_ptr = ctx->blocks;
>  
> -    for (blk_num = 0; blk_num < blocks_per_slice; blk_num++, block_ptr += 
> 64) {
> +    for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += 
> blocks_per_mb * 4) {
>          /* TODO: the correct solution shoud be (block_ptr[i] * qmat[i]) >> 1
>           * and the input of the inverse transform should be scaled by 2
>           * in order to avoid rounding errors.
>           * Due to the fact the existing Libav transforms are incompatible 
> with
>           * that input I temporally introduced the coarse solution below... */
> -        for (i = 0; i < 64; i++)
> -            block_ptr[i] = (block_ptr[i] * qmat[i]) >> 2;
> -
> -        ctx->dsp.idct(block_ptr);
> +        for (j = 0; j < blocks_per_mb; j++)
> +            for (i = 0; i < 64; i++)
> +                block_ptr[j * 64 + i] = (block_ptr[j * 64 + i] * qmat[i]) >> 
> 2;

Why is it still here instead of inside DCT (like VP3 does IIRC)?
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 1/3] prores: extract idct into its own dspcontext and merge with put_pixels.

Reply via email to