Hi, On Fri, Oct 21, 2011 at 12:22 AM, Kostya Shishkov <[email protected]> wrote: > On Fri, Oct 21, 2011 at 12:13:13AM -0700, Ronald S. Bultje wrote: >> From: Baptiste Coudurier <[email protected]> >> >> Signed-off-by: Diego Biurrun <[email protected]> >> Signed-off-by: Ronald S. Bultje <[email protected]> >> --- >> Changelog | 1 + >> libavcodec/arm/h264dsp_init_arm.c | 9 +- >> libavcodec/arm/h264pred_init_arm.c | 6 +- >> libavcodec/dsputil.h | 2 + >> libavcodec/h264.c | 95 +++++++++++++++++------ >> libavcodec/h264.h | 12 +-- >> libavcodec/h264_cabac.c | 84 ++++++++++++++++++-- >> libavcodec/h264_cavlc.c | 127 +++++++++++++++++++++++++++++-- >> libavcodec/h264_loopfilter.c | 61 ++++++++++++--- >> libavcodec/h264_mvpred.h | 7 ++- >> libavcodec/h264_ps.c | 5 +- >> libavcodec/h264data.h | 9 ++- >> libavcodec/h264dsp.c | 38 +++++++--- >> libavcodec/h264dsp.h | 8 +- >> libavcodec/h264dsp_template.c | 16 ++++ >> libavcodec/h264idct_template.c | 50 ++++++++++++ >> libavcodec/h264pred.c | 51 +++++++++---- >> libavcodec/h264pred.h | 6 +- >> libavcodec/h264pred_template.c | 138 >> ++++++++++++++++++++++++++++++++++ >> libavcodec/ppc/h264_altivec.c | 5 +- >> libavcodec/rv34.c | 2 +- >> libavcodec/vp8.c | 2 +- >> libavcodec/x86/h264_intrapred_init.c | 47 +++++++---- >> libavcodec/x86/h264dsp_mmx.c | 23 ++++-- >> 24 files changed, 671 insertions(+), 133 deletions(-) >> >> diff --git a/Changelog b/Changelog >> index f61d3e2..ecfc34f 100644 >> --- a/Changelog >> +++ b/Changelog >> @@ -53,6 +53,7 @@ easier to use. The changes are: >> - lut, lutrgb, and lutyuv filters >> - boxblur filter >> - Ut Video decoder >> +- 4:2:2 H.264 decoding support >> >> >> version 0.7: >> diff --git a/libavcodec/arm/h264dsp_init_arm.c >> b/libavcodec/arm/h264dsp_init_arm.c >> index c2399e5..c1ca217 100644 >> --- a/libavcodec/arm/h264dsp_init_arm.c >> +++ b/libavcodec/arm/h264dsp_init_arm.c >> @@ -92,7 +92,7 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int >> *block_offset, >> DCTELEM *block, int stride, >> const uint8_t nnzc[6*8]); >> >> -static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth) >> +static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, >> const int chroma_format_idc) >> { >> if (bit_depth == 8) { >> c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; >> @@ -122,14 +122,15 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, >> const int bit_depth) >> c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; >> c->h264_idct_add16 = ff_h264_idct_add16_neon; >> c->h264_idct_add16intra = ff_h264_idct_add16intra_neon; >> - c->h264_idct_add8 = ff_h264_idct_add8_neon; >> + if (chroma_format_idc == 1) >> + c->h264_idct_add8 = ff_h264_idct_add8_neon; >> c->h264_idct8_add = ff_h264_idct8_add_neon; >> c->h264_idct8_dc_add = ff_h264_idct8_dc_add_neon; >> c->h264_idct8_add4 = ff_h264_idct8_add4_neon; >> } >> } >> >> -void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth) >> +void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int >> chroma_format_idc) >> { >> - if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth); >> + if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc); >> } >> diff --git a/libavcodec/arm/h264pred_init_arm.c >> b/libavcodec/arm/h264pred_init_arm.c >> index e96f339..5fc07bc 100644 >> --- a/libavcodec/arm/h264pred_init_arm.c >> +++ b/libavcodec/arm/h264pred_init_arm.c >> @@ -42,7 +42,7 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride); >> void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride); >> void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride); >> >> -static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const >> int bit_depth) >> +static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const >> int bit_depth, const int chroma_format_idc) >> { >> const int high_depth = bit_depth > 8; >> >> @@ -74,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int >> codec_id, const int b >> h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon; >> } >> >> -void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth) >> +void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth, >> const int chroma_format_idc) >> { >> - if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id, bit_depth); >> + if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id, bit_depth, >> chroma_format_idc); >> } >> diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h >> index bef2cdd..acb2041 100644 >> --- a/libavcodec/dsputil.h >> +++ b/libavcodec/dsputil.h >> @@ -63,8 +63,10 @@ void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, >> DCTELEM *block, int strid >> void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int >> *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ >> void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int >> *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ >> void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int >> *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ >> +void ff_h264_idct_add8_422_ ## depth ## _c(uint8_t **dest, const int >> *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ >> void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int >> *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ >> void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM >> *input, int qmul);\ >> +void ff_h264_chroma422_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int >> qmul);\ >> void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int >> qmul); >> >> H264_IDCT( 8) >> diff --git a/libavcodec/h264.c b/libavcodec/h264.c >> index 1faaaa6..d78ec7d 100644 >> --- a/libavcodec/h264.c >> +++ b/libavcodec/h264.c >> @@ -942,7 +942,7 @@ static void clone_tables(H264Context *dst, H264Context >> *src, int i){ >> dst->list_counts = src->list_counts; >> >> dst->s.obmc_scratchpad = NULL; >> - ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma); >> + ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma, >> src->sps.chroma_format_idc); >> } >> >> /** >> @@ -970,8 +970,8 @@ static av_cold void common_init(H264Context *h){ >> s->height = s->avctx->height; >> s->codec_id= s->avctx->codec->id; >> >> - ff_h264dsp_init(&h->h264dsp, 8); >> - ff_h264_pred_init(&h->hpc, s->codec_id, 8); >> + ff_h264dsp_init(&h->h264dsp, 8, 1); >> + ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1); >> >> h->dequant_coeff_pps= -1; >> s->unrestricted_mv=1; >> @@ -1432,11 +1432,16 @@ static void decode_postinit(H264Context *h, int >> setup_finished){ >> ff_thread_finish_setup(s->avctx); >> } >> >> -static av_always_inline void backup_mb_border(H264Context *h, uint8_t >> *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int >> chroma444, int simple){ >> +static av_always_inline void backup_mb_border(H264Context *h, uint8_t >> *src_y, >> + uint8_t *src_cb, uint8_t >> *src_cr, >> + int linesize, int uvlinesize, >> int simple) >> +{ >> MpegEncContext * const s = &h->s; >> uint8_t *top_border; >> int top_idx = 1; >> const int pixel_shift = h->pixel_shift; >> + int chroma444 = CHROMA444; >> + int chroma422 = CHROMA422; >> >> src_y -= linesize; >> src_cb -= uvlinesize; >> @@ -1460,6 +1465,14 @@ static av_always_inline void >> backup_mb_border(H264Context *h, uint8_t *src_y, ui >> AV_COPY128(top_border+16, src_cb + >> 15*uvlinesize); >> AV_COPY128(top_border+32, src_cr + >> 15*uvlinesize); >> } >> + } else if(chroma422) { >> + if (pixel_shift) { >> + AV_COPY128(top_border+32, src_cb + >> 15*uvlinesize); >> + AV_COPY128(top_border+48, src_cr + >> 15*uvlinesize); >> + } else { >> + AV_COPY64(top_border+16, src_cb + >> 15*uvlinesize); >> + AV_COPY64(top_border+24, src_cr + >> 15*uvlinesize); >> + } >> } else { >> if (pixel_shift) { >> AV_COPY128(top_border+32, src_cb+7*uvlinesize); >> @@ -1495,6 +1508,14 @@ static av_always_inline void >> backup_mb_border(H264Context *h, uint8_t *src_y, ui >> AV_COPY128(top_border+16, src_cb + 16*linesize); >> AV_COPY128(top_border+32, src_cr + 16*linesize); >> } >> + } else if(chroma422) { >> + if (pixel_shift) { >> + AV_COPY128(top_border+32, src_cb+16*uvlinesize); >> + AV_COPY128(top_border+48, src_cr+16*uvlinesize); >> + } else { >> + AV_COPY64(top_border+16, src_cb+16*uvlinesize); >> + AV_COPY64(top_border+24, src_cr+16*uvlinesize); >> + } >> } else { >> if (pixel_shift) { >> AV_COPY128(top_border+32, src_cb+8*uvlinesize); >> @@ -1773,10 +1794,11 @@ static av_always_inline void >> hl_decode_mb_internal(H264Context *h, int simple, i >> /* is_h264 should always be true if SVQ3 is disabled. */ >> const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == >> CODEC_ID_H264; >> void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); >> + const int block_h = 16 >> s->chroma_y_shift; >> >> dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y >> * s->linesize ) * 16; >> - dest_cb = s->current_picture.f.data[1] + ((mb_x << pixel_shift) + mb_y >> * s->uvlinesize) * 8; >> - dest_cr = s->current_picture.f.data[2] + ((mb_x << pixel_shift) + mb_y >> * s->uvlinesize) * 8; >> + dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*8 + mb_y >> * s->uvlinesize * block_h; >> + dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift)*8 + mb_y >> * s->uvlinesize * block_h; >> >> s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << >> pixel_shift), s->linesize, 4); >> s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << >> pixel_shift), dest_cr - dest_cb, 2); >> @@ -1789,8 +1811,8 @@ static av_always_inline void >> hl_decode_mb_internal(H264Context *h, int simple, i >> block_offset = &h->block_offset[48]; >> if(mb_y&1){ //FIXME move out of this function? >> dest_y -= s->linesize*15; >> - dest_cb-= s->uvlinesize*7; >> - dest_cr-= s->uvlinesize*7; >> + dest_cb-= s->uvlinesize * (block_h - 1); >> + dest_cr-= s->uvlinesize * (block_h - 1); >> } >> if(FRAME_MBAFF) { >> int list; >> @@ -1842,12 +1864,12 @@ static av_always_inline void >> hl_decode_mb_internal(H264Context *h, int simple, i >> } >> } >> } else { >> - for (i = 0; i < 8; i++) { >> + for (i = 0; i < block_h; i++) { >> uint16_t *tmp_cb = (uint16_t*)(dest_cb + >> i*uvlinesize); >> for (j = 0; j < 8; j++) >> tmp_cb[j] = get_bits(&gb, bit_depth); >> } >> - for (i = 0; i < 8; i++) { >> + for (i = 0; i < block_h; i++) { >> uint16_t *tmp_cr = (uint16_t*)(dest_cr + >> i*uvlinesize); >> for (j = 0; j < 8; j++) >> tmp_cr[j] = get_bits(&gb, bit_depth); >> @@ -1865,7 +1887,7 @@ static av_always_inline void >> hl_decode_mb_internal(H264Context *h, int simple, i >> memset(dest_cr + i*uvlinesize, 128, 8); >> } >> } else { >> - for (i = 0; i < 8; i++) { >> + for (i = 0; i < block_h; i++) { >> memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4, >> 8); >> memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4, >> 8); >> } >> @@ -1913,10 +1935,18 @@ static av_always_inline void >> hl_decode_mb_internal(H264Context *h, int simple, i >> } >> }else{ >> if(is_h264){ >> + int qp[2]; >> + if (CHROMA422) { >> + qp[0] = h->chroma_qp[0] + 3; >> + qp[1] = h->chroma_qp[1] + 3; >> + } else { >> + qp[0] = h->chroma_qp[0]; >> + qp[1] = h->chroma_qp[1]; >> + } >> if(h->non_zero_count_cache[ >> scan8[CHROMA_DC_BLOCK_INDEX+0] ]) >> - h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + >> (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? >> 1:4][h->chroma_qp[0]][0]); >> + h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + >> (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? >> 1:4][qp[0]][0]); >> if(h->non_zero_count_cache[ >> scan8[CHROMA_DC_BLOCK_INDEX+1] ]) >> - h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + >> (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? >> 2:5][h->chroma_qp[1]][0]); >> + h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + >> (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? >> 2:5][qp[1]][0]); >> h->h264dsp.h264_idct_add8(dest, block_offset, >> h->mb, uvlinesize, >> h->non_zero_count_cache); >> @@ -2555,11 +2585,13 @@ static int decode_slice_header(H264Context *h, >> H264Context *h0){ >> >> h->b_stride= s->mb_width*4; >> >> + s->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p >> + >> s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, >> (8<<CHROMA444)-1); >> if(h->sps.frame_mbs_only_flag) >> - s->height= 16*s->mb_height - >> (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1); >> + s->height= 16*s->mb_height - >> (1<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1); >> else >> - s->height= 16*s->mb_height - >> (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1); >> + s->height= 16*s->mb_height - >> (2<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1); >> >> if (s->context_initialized >> && ( s->width != s->avctx->width || s->height != s->avctx->height >> @@ -2601,14 +2633,24 @@ static int decode_slice_header(H264Context *h, >> H264Context *h0){ >> >> switch (h->sps.bit_depth_luma) { >> case 9 : >> - s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : >> PIX_FMT_YUV420P9; >> + if (CHROMA444) >> + s->avctx->pix_fmt = PIX_FMT_YUV444P9; >> + else >> + s->avctx->pix_fmt = PIX_FMT_YUV420P9; > > Is there no 422 possible here?
Likely we're missing the proper pix-fmt. I'll add. > Also why some SIMD funcs (like IDCT and pred) are enabled only for > chroma_idc=1 ? 444 uses 16x16 luma prediction functions, IDCT also. 422 needs special-case, so they're only enabled for 420 (1). Ronald _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
