On Sun, May 01, 2011 at 10:35:14PM -0400, Ronald S. Bultje wrote:
> ---
> libavcodec/vc1.h | 2 +
> libavcodec/vc1dec.c | 327
> ++++++++++++++++++++++++++++++++++++++++-----------
> libavcodec/vc1dsp.c | 58 +++++++++-
> libavcodec/vc1dsp.h | 6 +-
> 4 files changed, 321 insertions(+), 72 deletions(-)
>
> diff --git a/libavcodec/vc1.h b/libavcodec/vc1.h
> index db8a7f4..96e5744 100644
> --- a/libavcodec/vc1.h
> +++ b/libavcodec/vc1.h
> @@ -317,6 +317,8 @@ typedef struct VC1Context{
> int bi_type;
> int x8_type;
>
> + DCTELEM (*block)[6][64];
> + int n_allocated_blks, cur_blk_idx, left_blk_idx, topleft_blk_idx,
> top_blk_idx;
> uint32_t *cbp_base, *cbp;
> uint8_t *is_intra_base, *is_intra;
> int16_t (*luma_mv_base)[2], (*luma_mv)[2];
> diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
> index 7097c81..2768ad0 100644
> --- a/libavcodec/vc1dec.c
> +++ b/libavcodec/vc1dec.c
> @@ -160,6 +160,70 @@ enum Imode {
>
> /** @} */ //Bitplane group
>
> +static void vc1_put_signed_blocks_clamped(VC1Context *v)
> +{
> + MpegEncContext *s= &v->s;
iam against such formatting
> +
> + /* The put pixels loop is always one MB row behind the decoding loop,
> + * because we can only put pixels when overlap filtering is done, and
> + * for filtering of the bottom edge of a MB, we need the next MB row
> + * present as well.
> + * Within the row, the put pixels loop is also one MB col behind the
> + * decoding loop. The reason for this is again, because for filtering
> + * of the right MB edge, we need the next MB present. */
> + if (!s->first_slice_line) {
> + if (s->mb_x) {
> + s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][0],
> + s->dest[0] - 16 * s->linesize -
> 16,
> + s->linesize);
> + s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][1],
> + s->dest[0] - 16 * s->linesize -
> 8,
> + s->linesize);
> + s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][2],
> + s->dest[0] - 8 * s->linesize -
> 16,
> + s->linesize);
> + s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][3],
> + s->dest[0] - 8 * s->linesize -
> 8,
> + s->linesize);
> + s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][4],
> + s->dest[1] - 8 * s->uvlinesize
> - 8,
> + s->uvlinesize);
> + s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][5],
> + s->dest[2] - 8 * s->uvlinesize
> - 8,
> + s->uvlinesize);
> + }
> + if (s->mb_x == s->mb_width - 1) {
> + s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][0],
> + s->dest[0] - 16 * s->linesize,
> + s->linesize);
> + s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][1],
> + s->dest[0] - 16 * s->linesize +
> 8,
> + s->linesize);
> + s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][2],
> + s->dest[0] - 8 * s->linesize,
> + s->linesize);
> + s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][3],
> + s->dest[0] - 8 * s->linesize +
> 8,
> + s->linesize);
> + s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][4],
> + s->dest[1] - 8 * s->uvlinesize,
> + s->uvlinesize);
> + s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][5],
> + s->dest[2] - 8 * s->uvlinesize,
> + s->uvlinesize);
> + }
> + }
> +
> +#define inc_blk_idx(x) do { \
> + if (++x >= v->n_allocated_blks) x = 0; \
> + } while (0)
> +
> + inc_blk_idx(v->topleft_blk_idx);
> + inc_blk_idx(v->top_blk_idx);
> + inc_blk_idx(v->left_blk_idx);
> + inc_blk_idx(v->cur_blk_idx);
I'd simply use
idx++;
if (idx >= v->n_allocated_blks)
idx = 0;
and it's a bit silly to define rather simple statement just for four lines
> +}
> +
> static void vc1_loop_filter_iblk(VC1Context *v, int pq)
> {
> MpegEncContext *s = &v->s;
> @@ -187,6 +251,150 @@ static void vc1_loop_filter_iblk(VC1Context *v, int pq)
[...]
> +
> +static void vc1_smooth_overlap_filter_iblk(VC1Context *v)
> +{
> + MpegEncContext *s = &v->s;
> + int mb_pos;
> +
> + if (v->condover == CONDOVER_NONE) return;
you can add newline here if you want to
[...]
> @@ -2764,7 +2972,7 @@ static void vc1_decode_i_blocks(VC1Context *v)
>
> /** Decode blocks of I-frame for advanced profile
> */
> -static void vc1_decode_i_blocks_adv(VC1Context *v, int mby_start, int
> mby_end)
> +static void vc1_decode_i_blocks_adv(VC1Context *v)
this seems to be a bit independent change of passing slice start/end, maybe
make it separate patch?
[...]
> @@ -3056,20 +3239,24 @@ static void vc1_decode_skip_blocks(VC1Context *v)
> s->pict_type = FF_P_TYPE;
> }
>
> -static void vc1_decode_blocks(VC1Context *v, int mby_start, int mby_end)
> +static void vc1_decode_blocks(VC1Context *v)
> {
>
> v->s.esc3_level_length = 0;
> if(v->x8_type){
> ff_intrax8_decode_picture(&v->x8, 2*v->pq+v->halfpq,
> v->pq*(!v->pquantizer) );
> }else{
> + v->cur_blk_idx = 0;
> + v->left_blk_idx = -1;
> + v->topleft_blk_idx = 1;
> + v->top_blk_idx = 2;
> switch(v->s.pict_type) {
> case FF_I_TYPE:
> if(v->profile == PROFILE_ADVANCED)
> {
> #undef printf
> //printf("I\n");
ahem
> - vc1_decode_i_blocks_adv(v, mby_start, mby_end);
> + vc1_decode_i_blocks_adv(v);
> }
> else
> vc1_decode_i_blocks(v);
> @@ -3083,7 +3270,7 @@ static void vc1_decode_blocks(VC1Context *v, int
> mby_start, int mby_end)
> else
> {
> //printf("P\n");
Diego would cry because of those ugly debug lines
> - vc1_decode_p_blocks(v, mby_start, mby_end);
> + vc1_decode_p_blocks(v);
> }
> break;
> case FF_B_TYPE:
> @@ -3091,14 +3278,14 @@ static void vc1_decode_blocks(VC1Context *v, int
> mby_start, int mby_end)
> if(v->profile == PROFILE_ADVANCED)
> {
> //printf("BI\n");
> - vc1_decode_i_blocks_adv(v, mby_start, mby_end);
> + vc1_decode_i_blocks_adv(v);
> }
> else
> vc1_decode_i_blocks(v);
> }else
> {
> //printf("B\n");
> - vc1_decode_b_blocks(v, mby_start, mby_end);
> + vc1_decode_b_blocks(v);
> }
> break;
> }
> @@ -3349,6 +3536,8 @@ static av_cold int vc1_decode_init(AVCodecContext
> *avctx)
> v->acpred_plane = av_malloc(s->mb_stride * s->mb_height);
> v->over_flags_plane = av_malloc(s->mb_stride * s->mb_height);
>
> + v->n_allocated_blks = s->mb_width + 2;
> + v->block = av_malloc(sizeof(*v->block) * v->n_allocated_blks);
I wonder why nobody cared about checking allocation result beck then.
> v->cbp_base = av_malloc(sizeof(v->cbp_base[0]) * 2 * s->mb_stride);
> v->cbp = v->cbp_base + s->mb_stride;
> v->ttblk_base = av_malloc(sizeof(v->ttblk_base[0]) * 2 * s->mb_stride);
> @@ -3555,8 +3744,9 @@ static int vc1_decode_frame(AVCodecContext *avctx,
> for (i = 0; i <= n_slices; i++) {
> if (i && get_bits1(&s->gb))
> vc1_parse_frame_header_adv(v, &s->gb);
> - vc1_decode_blocks(v, i == 0 ? 0 : FFMAX(0,
> slices[i-1].mby_start),
> - i == n_slices ? s->mb_height : FFMIN(s->mb_height,
> slices[i].mby_start));
> + s->start_mb_y = (i == 0) ? 0 : FFMAX(0,
> slices[i-1].mby_start);
> + s->end_mb_y = (i == n_slices) ? s->mb_height :
> FFMIN(s->mb_height, slices[i].mby_start);
> + vc1_decode_blocks(v);
> if (i != n_slices) s->gb = slices[i].gb;
> }
> //av_log(s->avctx, AV_LOG_INFO, "Consumed %i/%i bits\n",
> get_bits_count(&s->gb), s->gb.size_in_bits);
> @@ -3613,6 +3803,7 @@ static av_cold int vc1_decode_end(AVCodecContext *avctx)
> av_freep(&v->acpred_plane);
> av_freep(&v->over_flags_plane);
> av_freep(&v->mb_type_base);
> + av_freep(&v->block);
> av_freep(&v->cbp_base);
> av_freep(&v->ttblk_base);
> av_freep(&v->is_intra_base); // FIXME use v->mb_type[]
> diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c
> index e131553..14f0dc3 100644
> --- a/libavcodec/vc1dsp.c
> +++ b/libavcodec/vc1dsp.c
> @@ -78,6 +78,58 @@ static void vc1_h_overlap_c(uint8_t* src, int stride)
> }
> }
>
> +static void vc1_v_s_overlap_c(DCTELEM *top, DCTELEM *bottom)
> +{
> + int i;
> + int a, b, c, d;
> + int d1, d2;
> + int rnd1 = 4, rnd2 = 3;
> + for(i = 0; i < 8; i++) {
> + a = top[48];
> + b = top[56];
> + c = bottom[0];
> + d = bottom[8];
> + d1 = a - d;
> + d2 = a - d + b - c;
> +
> + top[48] = ((a << 3) - d1 + rnd1) >> 3;
> + top[56] = ((b << 3) - d2 + rnd2) >> 3;
> + bottom[0] = ((c << 3) + d2 + rnd1) >> 3;
> + bottom[8] = ((d << 3) + d1 + rnd2) >> 3;
> +
> + bottom++;
> + top++;
> + rnd2 = 7 - rnd2;
> + rnd1 = 7 - rnd1;
> + }
> +}
> +
> +static void vc1_h_s_overlap_c(DCTELEM *left, DCTELEM *right)
> +{
> + int i;
> + int a, b, c, d;
> + int d1, d2;
> + int rnd1 = 4, rnd2 = 3;
> + for(i = 0; i < 8; i++) {
> + a = left[6];
> + b = left[7];
> + c = right[0];
> + d = right[1];
> + d1 = a - d;
> + d2 = a - d + b - c;
> +
> + left[6] = ((a << 3) - d1 + rnd1) >> 3;
> + left[7] = ((b << 3) - d2 + rnd2) >> 3;
> + right[0] = ((c << 3) + d2 + rnd1) >> 3;
> + right[1] = ((d << 3) + d1 + rnd2) >> 3;
> +
> + right += 8;
> + left += 8;
> + rnd2 = 7 - rnd2;
> + rnd1 = 7 - rnd1;
> + }
> +}
> +
> /**
> * VC-1 in-loop deblocking filter for one line
> * @param src source block type
> @@ -672,6 +724,8 @@ av_cold void ff_vc1dsp_init(VC1DSPContext* dsp) {
> dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_c;
> dsp->vc1_h_overlap = vc1_h_overlap_c;
> dsp->vc1_v_overlap = vc1_v_overlap_c;
> + dsp->vc1_h_s_overlap = vc1_h_s_overlap_c;
> + dsp->vc1_v_s_overlap = vc1_v_s_overlap_c;
> dsp->vc1_v_loop_filter4 = vc1_v_loop_filter4_c;
> dsp->vc1_h_loop_filter4 = vc1_h_loop_filter4_c;
> dsp->vc1_v_loop_filter8 = vc1_v_loop_filter8_c;
> @@ -718,6 +772,6 @@ av_cold void ff_vc1dsp_init(VC1DSPContext* dsp) {
>
> if (HAVE_ALTIVEC)
> ff_vc1dsp_init_altivec(dsp);
> - if (HAVE_MMX)
> - ff_vc1dsp_init_mmx(dsp);
> + //if (HAVE_MMX)
> + // ff_vc1dsp_init_mmx(dsp);
ahem, why?
> }
> diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h
> index 7b1ae10..e1b6ba0 100644
> --- a/libavcodec/vc1dsp.h
> +++ b/libavcodec/vc1dsp.h
> @@ -40,8 +40,10 @@ typedef struct VC1DSPContext {
> void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM
> *block);
> void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM
> *block);
> void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM
> *block);
> - void (*vc1_v_overlap)(uint8_t* src, int stride);
> - void (*vc1_h_overlap)(uint8_t* src, int stride);
> + void (*vc1_v_overlap)(uint8_t *src, int stride);
> + void (*vc1_h_overlap)(uint8_t *src, int stride);
> + void (*vc1_v_s_overlap)(DCTELEM *top, DCTELEM *bottom);
> + void (*vc1_h_s_overlap)(DCTELEM *left, DCTELEM *right);
> void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq);
> void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq);
> void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq);
> --
Overall, very nice work
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel