On Sat, Apr 16, 2011 at 02:58:34PM -0400, Ronald S. Bultje wrote:
> IDCT coefficients are read transposed, but simple_idct does not expect
> this. Therefore, only do tranposed coefficient reading if we're not
> using simple_idct.
>
> Fixes http://forum.videolan.org/viewtopic.php?f=14&t=89651
> ---
> libavcodec/vc1.h | 1 +
> libavcodec/vc1dec.c | 91
> ++++++++++++++++++++++++++++-----------------------
> 2 files changed, 51 insertions(+), 41 deletions(-)
>
> diff --git a/libavcodec/vc1.h b/libavcodec/vc1.h
> index d0c0ccc..db8a7f4 100644
> --- a/libavcodec/vc1.h
> +++ b/libavcodec/vc1.h
> @@ -218,6 +218,7 @@ typedef struct VC1Context{
> int range_x, range_y; ///< MV range
> uint8_t pq, altpq; ///< Current/alternate frame quantizer scale
> uint8_t zz_8x8[4][64];///< Zigzag table for TT_8x8, permuted for IDCT
> + int left_blk_sh, top_blk_sh; ///< Either 3 or 0, positions of l/t in
> blk[]
> const uint8_t* zz_8x4;///< Zigzag scan table for TT_8x4 coding mode
> const uint8_t* zz_4x8;///< Zigzag scan table for TT_4x8 coding mode
> /** pquant parameters */
> diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
> index 8200cde..765eff9 100644
> --- a/libavcodec/vc1dec.c
> +++ b/libavcodec/vc1dec.c
> @@ -1499,16 +1499,16 @@ static int vc1_decode_i_block(VC1Context *v, DCTELEM
> block[64], int n, int coded
> if(s->ac_pred) {
> if(dc_pred_dir) { //left
> for(k = 1; k < 8; k++)
> - block[k] += ac_val[k];
> + block[k << v->left_blk_sh] += ac_val[k];
> } else { //top
> for(k = 1; k < 8; k++)
> - block[k << 3] += ac_val[k + 8];
> + block[k << v->top_blk_sh ] += ac_val[k + 8];
stray whitespace
> }
> }
> /* save AC coeffs for further prediction */
> for(k = 1; k < 8; k++) {
> - ac_val2[k] = block[k];
> - ac_val2[k + 8] = block[k << 3];
> + ac_val2[k] = block[k << v->left_blk_sh];
> + ac_val2[k + 8] = block[k << v->top_blk_sh];
> }
>
> /* scale AC coeffs */
> @@ -1545,15 +1545,15 @@ not_coded:
> if(s->ac_pred) {
> if(dc_pred_dir) { //left
> for(k = 1; k < 8; k++) {
> - block[k] = ac_val[k] * scale;
> - if(!v->pquantizer && block[k])
> - block[k] += (block[k] < 0) ? -v->pq : v->pq;
> + block[k << v->left_blk_sh] = ac_val[k] * scale;
> + if(!v->pquantizer && block[k << v->left_blk_sh])
> + block[k << v->left_blk_sh] += (block[k <<
> v->left_blk_sh] < 0) ? -v->pq : v->pq;
> }
> } else { //top
> for(k = 1; k < 8; k++) {
> - block[k << 3] = ac_val[k + 8] * scale;
> - if(!v->pquantizer && block[k << 3])
> - block[k << 3] += (block[k << 3] < 0) ? -v->pq :
> v->pq;
> + block[k << v->top_blk_sh] = ac_val[k + 8] * scale;
> + if(!v->pquantizer && block[k << v->top_blk_sh])
> + block[k << v->top_blk_sh] += (block[k <<
> v->top_blk_sh] < 0) ? -v->pq : v->pq;
> }
> }
> i = 63;
> @@ -1680,25 +1680,25 @@ static int vc1_decode_i_block_adv(VC1Context *v,
> DCTELEM block[64], int n, int c
>
> if(dc_pred_dir) { //left
> for(k = 1; k < 8; k++)
> - block[k] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1]
> + 0x20000) >> 18;
> + block[k << v->left_blk_sh] += (ac_val[k] * q2 *
> ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
> } else { //top
> for(k = 1; k < 8; k++)
> - block[k << 3] += (ac_val[k + 8] * q2 *
> ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
> + block[k << v->top_blk_sh] += (ac_val[k + 8] * q2 *
> ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
> }
> } else {
> if(dc_pred_dir) { //left
> for(k = 1; k < 8; k++)
> - block[k] += ac_val[k];
> + block[k << v->left_blk_sh] += ac_val[k];
> } else { //top
> for(k = 1; k < 8; k++)
> - block[k << 3] += ac_val[k + 8];
> + block[k << v->top_blk_sh] += ac_val[k + 8];
> }
> }
> }
> /* save AC coeffs for further prediction */
> for(k = 1; k < 8; k++) {
> - ac_val2[k] = block[k];
> - ac_val2[k + 8] = block[k << 3];
> + ac_val2[k ] = block[k << v->left_blk_sh];
> + ac_val2[k + 8] = block[k << v->top_blk_sh ];
> }
>
> /* scale AC coeffs */
> @@ -1740,15 +1740,15 @@ static int vc1_decode_i_block_adv(VC1Context *v,
> DCTELEM block[64], int n, int c
> if(use_pred) {
> if(dc_pred_dir) { //left
> for(k = 1; k < 8; k++) {
> - block[k] = ac_val2[k] * scale;
> - if(!v->pquantizer && block[k])
> - block[k] += (block[k] < 0) ? -mquant : mquant;
> + block[k << v->left_blk_sh] = ac_val2[k] * scale;
> + if(!v->pquantizer && block[k << v->left_blk_sh])
> + block[k << v->left_blk_sh] += (block[k <<
> v->left_blk_sh] < 0) ? -mquant : mquant;
> }
> } else { //top
> for(k = 1; k < 8; k++) {
> - block[k << 3] = ac_val2[k + 8] * scale;
> - if(!v->pquantizer && block[k << 3])
> - block[k << 3] += (block[k << 3] < 0) ? -mquant :
> mquant;
> + block[k << v->top_blk_sh] = ac_val2[k + 8] * scale;
> + if(!v->pquantizer && block[k << v->top_blk_sh])
> + block[k << v->top_blk_sh] += (block[k <<
> v->top_blk_sh] < 0) ? -mquant : mquant;
> }
> }
> i = 63;
> @@ -1878,25 +1878,25 @@ static int vc1_decode_intra_block(VC1Context *v,
> DCTELEM block[64], int n, int c
>
> if(dc_pred_dir) { //left
> for(k = 1; k < 8; k++)
> - block[k] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1]
> + 0x20000) >> 18;
> + block[k << v->left_blk_sh] += (ac_val[k] * q2 *
> ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
> } else { //top
> for(k = 1; k < 8; k++)
> - block[k << 3] += (ac_val[k + 8] * q2 *
> ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
> + block[k << v->top_blk_sh] += (ac_val[k + 8] * q2 *
> ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
> }
> } else {
> if(dc_pred_dir) { //left
> for(k = 1; k < 8; k++)
> - block[k] += ac_val[k];
> + block[k << v->left_blk_sh] += ac_val[k];
> } else { //top
> for(k = 1; k < 8; k++)
> - block[k << 3] += ac_val[k + 8];
> + block[k << v->top_blk_sh] += ac_val[k + 8];
> }
> }
> }
> /* save AC coeffs for further prediction */
> for(k = 1; k < 8; k++) {
> - ac_val2[k] = block[k];
> - ac_val2[k + 8] = block[k << 3];
> + ac_val2[k ] = block[k << v->left_blk_sh];
> + ac_val2[k + 8] = block[k << v->top_blk_sh ];
> }
>
> /* scale AC coeffs */
> @@ -1938,15 +1938,15 @@ static int vc1_decode_intra_block(VC1Context *v,
> DCTELEM block[64], int n, int c
> if(use_pred) {
> if(dc_pred_dir) { //left
> for(k = 1; k < 8; k++) {
> - block[k] = ac_val2[k] * scale;
> - if(!v->pquantizer && block[k])
> - block[k] += (block[k] < 0) ? -mquant : mquant;
> + block[k << v->left_blk_sh] = ac_val2[k] * scale;
> + if(!v->pquantizer && block[k << v->left_blk_sh])
> + block[k << v->left_blk_sh] += (block[k <<
> v->left_blk_sh] < 0) ? -mquant : mquant;
> }
> } else { //top
> for(k = 1; k < 8; k++) {
> - block[k << 3] = ac_val2[k + 8] * scale;
> - if(!v->pquantizer && block[k << 3])
> - block[k << 3] += (block[k << 3] < 0) ? -mquant :
> mquant;
> + block[k << v->top_blk_sh] = ac_val2[k + 8] * scale;
> + if(!v->pquantizer && block[k << v->top_blk_sh])
> + block[k << v->top_blk_sh] += (block[k <<
> v->top_blk_sh] < 0) ? -mquant : mquant;
> }
> }
> i = 63;
> @@ -3236,13 +3236,6 @@ static av_cold int vc1_decode_init(AVCodecContext
> *avctx)
> return -1;
> if (vc1_init_common(v) < 0) return -1;
> ff_vc1dsp_init(&v->vc1dsp);
> - for (i = 0; i < 64; i++) {
> -#define transpose(x) ((x>>3) | ((x&7)<<3))
> - v->zz_8x8[0][i] = transpose(wmv1_scantable[0][i]);
> - v->zz_8x8[1][i] = transpose(wmv1_scantable[1][i]);
> - v->zz_8x8[2][i] = transpose(wmv1_scantable[2][i]);
> - v->zz_8x8[3][i] = transpose(wmv1_scantable[3][i]);
> - }
>
> avctx->coded_width = avctx->width;
> avctx->coded_height = avctx->height;
> @@ -3326,6 +3319,22 @@ static av_cold int vc1_decode_init(AVCodecContext
> *avctx)
> s->mb_width = (avctx->coded_width+15)>>4;
> s->mb_height = (avctx->coded_height+15)>>4;
>
> + if (v->res_fasttx) {
> + for (i = 0; i < 64; i++) {
> +#define transpose(x) ((x>>3) | ((x&7)<<3))
> + v->zz_8x8[0][i] = transpose(wmv1_scantable[0][i]);
> + v->zz_8x8[1][i] = transpose(wmv1_scantable[1][i]);
> + v->zz_8x8[2][i] = transpose(wmv1_scantable[2][i]);
> + v->zz_8x8[3][i] = transpose(wmv1_scantable[3][i]);
> + }
> + v->left_blk_sh = 0;
> + v->top_blk_sh = 3;
> + } else {
> + memcpy(v->zz_8x8, wmv1_scantable, 4*64);
> + v->left_blk_sh = 3;
> + v->top_blk_sh = 0;
> + }
> +
> /* Allocate mb bitplanes */
> v->mv_type_mb_plane = av_malloc(s->mb_stride * s->mb_height);
> v->direct_mb_plane = av_malloc(s->mb_stride * s->mb_height);
> --
in general looks ok but won't it cause noticeable performance drop?
If yes than maybe it's better to use single flag and have two codepaths with
constants.
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel