On Tue, Oct 04, 2011 at 06:33:01AM +0000, Ronald S. Bultje wrote: > Hi, > > On Mon, Oct 3, 2011 at 6:59 PM, Diego Biurrun <[email protected]> wrote: > > On Mon, Oct 03, 2011 at 11:37:35AM -0700, Ronald S. Bultje wrote: > >> From: "Ronald S. Bultje" <[email protected]> > >> > >> --- a/libavcodec/dsputil.c > >> +++ b/libavcodec/dsputil.c > >> @@ -145,6 +145,41 @@ void ff_init_scantable(uint8_t *permutation, > >> ScanTable *st, const uint8_t *src_s > >> > >> +void ff_init_scantable_permutation(uint8_t *idct_permutation, > >> + int idct_permutation_type) > >> +{ > >> + int i; > >> + > >> + switch(idct_permutation_type){ > >> + case FF_NO_IDCT_PERM: > >> + for(i=0; i<64; i++) > >> + idct_permutation[i]= i; > >> + break; > >> @@ -3123,32 +3158,6 @@ av_cold void dsputil_init(DSPContext* c, > >> AVCodecContext *avctx) > >> > >> - switch(c->idct_permutation_type){ > >> - case FF_NO_IDCT_PERM: > >> - for(i=0; i<64; i++) > >> - c->idct_permutation[i]= i; > >> - break; > > > > Please maintain K&R style and keep the case statements at the indentation > > level as the switch. > > > >> --- /dev/null > >> +++ b/libavcodec/proresdsp.h > >> @@ -0,0 +1,38 @@ > >> + > >> +#ifndef LIBAVCODEC_PRORESDSP_H > >> +#define LIBAVCODEC_PRORESDSP_H > > > > AVCODEC_PRORESDSP_H > > > >> +#define PRORES_BITS_PER_SAMPLE 10 ///< output precision of that decoder > > > > "that"? Just write "prores"... > > All fixed, see attached. > > Ronald
> > > > > Return-Path: <[email protected]> > Received: from localhost.localdomain (dhcp-172-22-79-63.mtv.corp.google.com > [172.22.79.63]) > by mx.google.com with ESMTPS id ji3sm56244687pbc.2.2011.10.03.11.37.41 > (version=TLSv1/SSLv3 cipher=OTHER); > Mon, 03 Oct 2011 11:37:41 -0700 (PDT) > From: "Ronald S. Bultje" <[email protected]> > To: [email protected] > Cc: "Ronald S. Bultje" <[email protected]> > Subject: [PATCH 1/3] prores: extract idct into its own dspcontext and merge > with put_pixels. > Date: Mon, 3 Oct 2011 11:37:35 -0700 > Message-Id: <[email protected]> > X-Mailer: git-send-email 1.7.6 > > From: "Ronald S. Bultje" <[email protected]> > > --- > libavcodec/Makefile | 2 +- > libavcodec/dsputil.c | 65 +++++++++++++++++++++---------------- > libavcodec/proresdec.c | 83 > +++++++++++++----------------------------------- > libavcodec/proresdsp.c | 61 +++++++++++++++++++++++++++++++++++ > libavcodec/proresdsp.h | 38 ++++++++++++++++++++++ > 5 files changed, 159 insertions(+), 90 deletions(-) > create mode 100644 libavcodec/proresdsp.c > create mode 100644 libavcodec/proresdsp.h > > diff --git a/libavcodec/Makefile b/libavcodec/Makefile > index 3c4e2f8..b7b5124 100644 > --- a/libavcodec/Makefile > +++ b/libavcodec/Makefile > @@ -295,7 +295,7 @@ OBJS-$(CONFIG_PNG_DECODER) += png.o pngdec.o > OBJS-$(CONFIG_PNG_ENCODER) += png.o pngenc.o > OBJS-$(CONFIG_PPM_DECODER) += pnmdec.o pnm.o > OBJS-$(CONFIG_PPM_ENCODER) += pnmenc.o pnm.o > -OBJS-$(CONFIG_PRORES_DECODER) += proresdec.o > +OBJS-$(CONFIG_PRORES_DECODER) += proresdec.o proresdsp.o > OBJS-$(CONFIG_PTX_DECODER) += ptx.o > OBJS-$(CONFIG_QCELP_DECODER) += qcelpdec.o celp_math.o \ > celp_filters.o acelp_vectors.o \ > diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c > index 967406e..e248516 100644 > --- a/libavcodec/dsputil.c > +++ b/libavcodec/dsputil.c > @@ -145,6 +145,41 @@ void ff_init_scantable(uint8_t *permutation, ScanTable > *st, const uint8_t *src_s > } > } > > +void ff_init_scantable_permutation(uint8_t *idct_permutation, > + int idct_permutation_type) > +{ > + int i; > + > + switch(idct_permutation_type){ > + case FF_NO_IDCT_PERM: > + for(i=0; i<64; i++) > + idct_permutation[i]= i; > + break; > + case FF_LIBMPEG2_IDCT_PERM: > + for(i=0; i<64; i++) > + idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << > 2); > + break; > + case FF_SIMPLE_IDCT_PERM: > + for(i=0; i<64; i++) > + idct_permutation[i]= simple_mmx_permutation[i]; > + break; > + case FF_TRANSPOSE_IDCT_PERM: > + for(i=0; i<64; i++) > + idct_permutation[i]= ((i&7)<<3) | (i>>3); > + break; > + case FF_PARTTRANS_IDCT_PERM: > + for(i=0; i<64; i++) > + idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); > + break; > + case FF_SSE2_IDCT_PERM: > + for(i=0; i<64; i++) > + idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; > + break; > + default: > + av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not > set\n"); > + } > +} > + > static int pix_sum_c(uint8_t * pix, int line_size) > { > int s, i, j; > @@ -3123,32 +3158,6 @@ av_cold void dsputil_init(DSPContext* c, > AVCodecContext *avctx) > c->avg_2tap_qpel_pixels_tab[0][i]= > c->avg_h264_qpel_pixels_tab[0][i]; > } > > - switch(c->idct_permutation_type){ > - case FF_NO_IDCT_PERM: > - for(i=0; i<64; i++) > - c->idct_permutation[i]= i; > - break; > - case FF_LIBMPEG2_IDCT_PERM: > - for(i=0; i<64; i++) > - c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) > << 2); > - break; > - case FF_SIMPLE_IDCT_PERM: > - for(i=0; i<64; i++) > - c->idct_permutation[i]= simple_mmx_permutation[i]; > - break; > - case FF_TRANSPOSE_IDCT_PERM: > - for(i=0; i<64; i++) > - c->idct_permutation[i]= ((i&7)<<3) | (i>>3); > - break; > - case FF_PARTTRANS_IDCT_PERM: > - for(i=0; i<64; i++) > - c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); > - break; > - case FF_SSE2_IDCT_PERM: > - for(i=0; i<64; i++) > - c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; > - break; > - default: > - av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not > set\n"); > - } > + ff_init_scantable_permutation(c->idct_permutation, > + c->idct_permutation_type); > } > diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c > index c70d145..0424093 100644 > --- a/libavcodec/proresdec.c > +++ b/libavcodec/proresdec.c > @@ -34,17 +34,11 @@ > > #include "libavutil/intmath.h" > #include "avcodec.h" > -#include "dsputil.h" > +#include "proresdsp.h" > #include "get_bits.h" > > -#define BITS_PER_SAMPLE 10 ///< output > precision of that decoder > -#define BIAS (1 << (BITS_PER_SAMPLE - 1)) ///< bias value for > converting signed pixels into unsigned ones > -#define CLIP_MIN (1 << (BITS_PER_SAMPLE - 8)) ///< minimum value > for clipping resulting pixels > -#define CLIP_MAX (1 << BITS_PER_SAMPLE) - CLIP_MIN - 1 ///< maximum value > for clipping resulting pixels > - > - > typedef struct { > - DSPContext dsp; > + ProresDSPContext dsp; > AVFrame picture; > ScanTable scantable; > int scantable_type; ///< -1 = uninitialized, 0 = > progressive, 1/2 = interlaced > @@ -104,8 +98,8 @@ static av_cold int decode_init(AVCodecContext *avctx) > > avctx->pix_fmt = PIX_FMT_YUV422P10; // set default pixel format > > - avctx->bits_per_raw_sample = BITS_PER_SAMPLE; > - dsputil_init(&ctx->dsp, avctx); > + avctx->bits_per_raw_sample = PRORES_BITS_PER_SAMPLE; > + ff_proresdsp_init(&ctx->dsp); > > avctx->coded_frame = &ctx->picture; > avcodec_get_frame_defaults(&ctx->picture); > @@ -449,48 +443,6 @@ static inline void decode_ac_coeffs(GetBitContext *gb, > DCTELEM *out, > } > > > -#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX)) > - > -/** > - * Add bias value, clamp and output pixels of a slice > - */ > -static void put_pixels(const DCTELEM *in, uint16_t *out, int stride, > - int mbs_per_slice, int blocks_per_mb) > -{ > - int mb, x, y, src_offset, dst_offset; > - const DCTELEM *src1, *src2; > - uint16_t *dst1, *dst2; > - > - src1 = in; > - src2 = in + (blocks_per_mb << 5); > - dst1 = out; > - dst2 = out + (stride << 3); > - > - for (mb = 0; mb < mbs_per_slice; mb++) { > - for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) { > - for (x = 0; x < 8; x++) { > - src_offset = (y << 3) + x; > - > - dst1[dst_offset + x] = CLIP_AND_BIAS(src1[src_offset]); > - dst2[dst_offset + x] = CLIP_AND_BIAS(src2[src_offset]); > - > - if (blocks_per_mb > 2) { > - dst1[dst_offset + x + 8] = > - CLIP_AND_BIAS(src1[src_offset + 64]); > - dst2[dst_offset + x + 8] = > - CLIP_AND_BIAS(src2[src_offset + 64]); > - } > - } > - } > - > - src1 += blocks_per_mb << 6; > - src2 += blocks_per_mb << 6; > - dst1 += blocks_per_mb << 2; > - dst2 += blocks_per_mb << 2; > - } > -} > - > - > /** > * Decode a slice plane (luma or chroma). > */ > @@ -502,7 +454,7 @@ static void decode_slice_plane(ProresContext *ctx, const > uint8_t *buf, > { > GetBitContext gb; > DCTELEM *block_ptr; > - int i, blk_num, blocks_per_slice; > + int i, j, mb_num, blocks_per_slice; > > blocks_per_slice = mbs_per_slice * blocks_per_mb; > > @@ -518,20 +470,29 @@ static void decode_slice_plane(ProresContext *ctx, > const uint8_t *buf, > /* inverse quantization, inverse transform and output */ > block_ptr = ctx->blocks; > > - for (blk_num = 0; blk_num < blocks_per_slice; blk_num++, block_ptr += > 64) { > + for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += > blocks_per_mb * 4) { > /* TODO: the correct solution shoud be (block_ptr[i] * qmat[i]) >> 1 > * and the input of the inverse transform should be scaled by 2 > * in order to avoid rounding errors. > * Due to the fact the existing Libav transforms are incompatible > with > * that input I temporally introduced the coarse solution below... */ > - for (i = 0; i < 64; i++) > - block_ptr[i] = (block_ptr[i] * qmat[i]) >> 2; > - > - ctx->dsp.idct(block_ptr); > + for (j = 0; j < blocks_per_mb; j++) > + for (i = 0; i < 64; i++) > + block_ptr[j * 64 + i] = (block_ptr[j * 64 + i] * qmat[i]) >> > 2; Why is it still here instead of inside DCT (like VP3 does IIRC)? _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
