Re: [libav-devel] [PATCH 1/5] h264: 4:2:2 intra decoding support

Ronald S. Bultje Fri, 21 Oct 2011 00:24:38 -0700

Hi,

On Fri, Oct 21, 2011 at 12:22 AM, Kostya Shishkov
<[email protected]> wrote:
> On Fri, Oct 21, 2011 at 12:13:13AM -0700, Ronald S. Bultje wrote:
>> From: Baptiste Coudurier <[email protected]>
>>
>> Signed-off-by: Diego Biurrun <[email protected]>
>> Signed-off-by: Ronald S. Bultje <[email protected]>
>> ---
>>  Changelog                            |    1 +
>>  libavcodec/arm/h264dsp_init_arm.c    |    9 +-
>>  libavcodec/arm/h264pred_init_arm.c   |    6 +-
>>  libavcodec/dsputil.h                 |    2 +
>>  libavcodec/h264.c                    |   95 +++++++++++++++++------
>>  libavcodec/h264.h                    |   12 +--
>>  libavcodec/h264_cabac.c              |   84 ++++++++++++++++++--
>>  libavcodec/h264_cavlc.c              |  127 +++++++++++++++++++++++++++++--
>>  libavcodec/h264_loopfilter.c         |   61 ++++++++++++---
>>  libavcodec/h264_mvpred.h             |    7 ++-
>>  libavcodec/h264_ps.c                 |    5 +-
>>  libavcodec/h264data.h                |    9 ++-
>>  libavcodec/h264dsp.c                 |   38 +++++++---
>>  libavcodec/h264dsp.h                 |    8 +-
>>  libavcodec/h264dsp_template.c        |   16 ++++
>>  libavcodec/h264idct_template.c       |   50 ++++++++++++
>>  libavcodec/h264pred.c                |   51 +++++++++----
>>  libavcodec/h264pred.h                |    6 +-
>>  libavcodec/h264pred_template.c       |  138 
>> ++++++++++++++++++++++++++++++++++
>>  libavcodec/ppc/h264_altivec.c        |    5 +-
>>  libavcodec/rv34.c                    |    2 +-
>>  libavcodec/vp8.c                     |    2 +-
>>  libavcodec/x86/h264_intrapred_init.c |   47 +++++++----
>>  libavcodec/x86/h264dsp_mmx.c         |   23 ++++--
>>  24 files changed, 671 insertions(+), 133 deletions(-)
>>
>> diff --git a/Changelog b/Changelog
>> index f61d3e2..ecfc34f 100644
>> --- a/Changelog
>> +++ b/Changelog
>> @@ -53,6 +53,7 @@ easier to use. The changes are:
>>  - lut, lutrgb, and lutyuv filters
>>  - boxblur filter
>>  - Ut Video decoder
>> +- 4:2:2 H.264 decoding support
>>
>>
>>  version 0.7:
>> diff --git a/libavcodec/arm/h264dsp_init_arm.c 
>> b/libavcodec/arm/h264dsp_init_arm.c
>> index c2399e5..c1ca217 100644
>> --- a/libavcodec/arm/h264dsp_init_arm.c
>> +++ b/libavcodec/arm/h264dsp_init_arm.c
>> @@ -92,7 +92,7 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int 
>> *block_offset,
>>                               DCTELEM *block, int stride,
>>                               const uint8_t nnzc[6*8]);
>>
>> -static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth)
>> +static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, 
>> const int chroma_format_idc)
>>  {
>>      if (bit_depth == 8) {
>>      c->h264_v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
>> @@ -122,14 +122,15 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, 
>> const int bit_depth)
>>      c->h264_idct_dc_add     = ff_h264_idct_dc_add_neon;
>>      c->h264_idct_add16      = ff_h264_idct_add16_neon;
>>      c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
>> -    c->h264_idct_add8       = ff_h264_idct_add8_neon;
>> +    if (chroma_format_idc == 1)
>> +        c->h264_idct_add8   = ff_h264_idct_add8_neon;
>>      c->h264_idct8_add       = ff_h264_idct8_add_neon;
>>      c->h264_idct8_dc_add    = ff_h264_idct8_dc_add_neon;
>>      c->h264_idct8_add4      = ff_h264_idct8_add4_neon;
>>      }
>>  }
>>
>> -void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth)
>> +void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int 
>> chroma_format_idc)
>>  {
>> -    if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth);
>> +    if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc);
>>  }
>> diff --git a/libavcodec/arm/h264pred_init_arm.c 
>> b/libavcodec/arm/h264pred_init_arm.c
>> index e96f339..5fc07bc 100644
>> --- a/libavcodec/arm/h264pred_init_arm.c
>> +++ b/libavcodec/arm/h264pred_init_arm.c
>> @@ -42,7 +42,7 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride);
>>  void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride);
>>  void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride);
>>
>> -static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const 
>> int bit_depth)
>> +static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const 
>> int bit_depth, const int chroma_format_idc)
>>  {
>>      const int high_depth = bit_depth > 8;
>>
>> @@ -74,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int 
>> codec_id, const int b
>>          h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_neon;
>>  }
>>
>> -void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth)
>> +void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth, 
>> const int chroma_format_idc)
>>  {
>> -    if (HAVE_NEON)    ff_h264_pred_init_neon(h, codec_id, bit_depth);
>> +    if (HAVE_NEON)    ff_h264_pred_init_neon(h, codec_id, bit_depth, 
>> chroma_format_idc);
>>  }
>> diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
>> index bef2cdd..acb2041 100644
>> --- a/libavcodec/dsputil.h
>> +++ b/libavcodec/dsputil.h
>> @@ -63,8 +63,10 @@ void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, 
>> DCTELEM *block, int strid
>>  void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int 
>> *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
>>  void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int 
>> *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
>>  void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int 
>> *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
>> +void ff_h264_idct_add8_422_ ## depth ## _c(uint8_t **dest, const int 
>> *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
>>  void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int 
>> *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
>>  void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM 
>> *input, int qmul);\
>> +void ff_h264_chroma422_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int 
>> qmul);\
>>  void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int 
>> qmul);
>>
>>  H264_IDCT( 8)
>> diff --git a/libavcodec/h264.c b/libavcodec/h264.c
>> index 1faaaa6..d78ec7d 100644
>> --- a/libavcodec/h264.c
>> +++ b/libavcodec/h264.c
>> @@ -942,7 +942,7 @@ static void clone_tables(H264Context *dst, H264Context 
>> *src, int i){
>>      dst->list_counts              = src->list_counts;
>>
>>      dst->s.obmc_scratchpad = NULL;
>> -    ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma);
>> +    ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma, 
>> src->sps.chroma_format_idc);
>>  }
>>
>>  /**
>> @@ -970,8 +970,8 @@ static av_cold void common_init(H264Context *h){
>>      s->height = s->avctx->height;
>>      s->codec_id= s->avctx->codec->id;
>>
>> -    ff_h264dsp_init(&h->h264dsp, 8);
>> -    ff_h264_pred_init(&h->hpc, s->codec_id, 8);
>> +    ff_h264dsp_init(&h->h264dsp, 8, 1);
>> +    ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1);
>>
>>      h->dequant_coeff_pps= -1;
>>      s->unrestricted_mv=1;
>> @@ -1432,11 +1432,16 @@ static void decode_postinit(H264Context *h, int 
>> setup_finished){
>>          ff_thread_finish_setup(s->avctx);
>>  }
>>
>> -static av_always_inline void backup_mb_border(H264Context *h, uint8_t 
>> *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int 
>> chroma444, int simple){
>> +static av_always_inline void backup_mb_border(H264Context *h, uint8_t 
>> *src_y,
>> +                                              uint8_t *src_cb, uint8_t 
>> *src_cr,
>> +                                              int linesize, int uvlinesize, 
>> int simple)
>> +{
>>      MpegEncContext * const s = &h->s;
>>      uint8_t *top_border;
>>      int top_idx = 1;
>>      const int pixel_shift = h->pixel_shift;
>> +    int chroma444 = CHROMA444;
>> +    int chroma422 = CHROMA422;
>>
>>      src_y  -=   linesize;
>>      src_cb -= uvlinesize;
>> @@ -1460,6 +1465,14 @@ static av_always_inline void 
>> backup_mb_border(H264Context *h, uint8_t *src_y, ui
>>                              AV_COPY128(top_border+16, src_cb + 
>> 15*uvlinesize);
>>                              AV_COPY128(top_border+32, src_cr + 
>> 15*uvlinesize);
>>                          }
>> +                    } else if(chroma422) {
>> +                        if (pixel_shift) {
>> +                            AV_COPY128(top_border+32, src_cb + 
>> 15*uvlinesize);
>> +                            AV_COPY128(top_border+48, src_cr + 
>> 15*uvlinesize);
>> +                        } else {
>> +                            AV_COPY64(top_border+16, src_cb +  
>> 15*uvlinesize);
>> +                            AV_COPY64(top_border+24, src_cr +  
>> 15*uvlinesize);
>> +                        }
>>                      } else {
>>                          if (pixel_shift) {
>>                              AV_COPY128(top_border+32, src_cb+7*uvlinesize);
>> @@ -1495,6 +1508,14 @@ static av_always_inline void 
>> backup_mb_border(H264Context *h, uint8_t *src_y, ui
>>                  AV_COPY128(top_border+16, src_cb + 16*linesize);
>>                  AV_COPY128(top_border+32, src_cr + 16*linesize);
>>              }
>> +        } else if(chroma422) {
>> +            if (pixel_shift) {
>> +                AV_COPY128(top_border+32, src_cb+16*uvlinesize);
>> +                AV_COPY128(top_border+48, src_cr+16*uvlinesize);
>> +            } else {
>> +                AV_COPY64(top_border+16, src_cb+16*uvlinesize);
>> +                AV_COPY64(top_border+24, src_cr+16*uvlinesize);
>> +            }
>>          } else {
>>              if (pixel_shift) {
>>                  AV_COPY128(top_border+32, src_cb+8*uvlinesize);
>> @@ -1773,10 +1794,11 @@ static av_always_inline void 
>> hl_decode_mb_internal(H264Context *h, int simple, i
>>      /* is_h264 should always be true if SVQ3 is disabled. */
>>      const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == 
>> CODEC_ID_H264;
>>      void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
>> +    const int block_h = 16 >> s->chroma_y_shift;
>>
>>      dest_y  = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y 
>> * s->linesize  ) * 16;
>> -    dest_cb = s->current_picture.f.data[1] + ((mb_x << pixel_shift) + mb_y 
>> * s->uvlinesize) *  8;
>> -    dest_cr = s->current_picture.f.data[2] + ((mb_x << pixel_shift) + mb_y 
>> * s->uvlinesize) *  8;
>> +    dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*8 + mb_y 
>> * s->uvlinesize * block_h;
>> +    dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift)*8 + mb_y 
>> * s->uvlinesize * block_h;
>>
>>      s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << 
>> pixel_shift), s->linesize, 4);
>>      s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << 
>> pixel_shift), dest_cr - dest_cb, 2);
>> @@ -1789,8 +1811,8 @@ static av_always_inline void 
>> hl_decode_mb_internal(H264Context *h, int simple, i
>>          block_offset = &h->block_offset[48];
>>          if(mb_y&1){ //FIXME move out of this function?
>>              dest_y -= s->linesize*15;
>> -            dest_cb-= s->uvlinesize*7;
>> -            dest_cr-= s->uvlinesize*7;
>> +            dest_cb-= s->uvlinesize * (block_h - 1);
>> +            dest_cr-= s->uvlinesize * (block_h - 1);
>>          }
>>          if(FRAME_MBAFF) {
>>              int list;
>> @@ -1842,12 +1864,12 @@ static av_always_inline void 
>> hl_decode_mb_internal(H264Context *h, int simple, i
>>                          }
>>                      }
>>                  } else {
>> -                    for (i = 0; i < 8; i++) {
>> +                    for (i = 0; i < block_h; i++) {
>>                          uint16_t *tmp_cb = (uint16_t*)(dest_cb + 
>> i*uvlinesize);
>>                          for (j = 0; j < 8; j++)
>>                              tmp_cb[j] = get_bits(&gb, bit_depth);
>>                      }
>> -                    for (i = 0; i < 8; i++) {
>> +                    for (i = 0; i < block_h; i++) {
>>                          uint16_t *tmp_cr = (uint16_t*)(dest_cr + 
>> i*uvlinesize);
>>                          for (j = 0; j < 8; j++)
>>                              tmp_cr[j] = get_bits(&gb, bit_depth);
>> @@ -1865,7 +1887,7 @@ static av_always_inline void 
>> hl_decode_mb_internal(H264Context *h, int simple, i
>>                          memset(dest_cr + i*uvlinesize, 128, 8);
>>                      }
>>                  } else {
>> -                    for (i = 0; i < 8; i++) {
>> +                    for (i = 0; i < block_h; i++) {
>>                          memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4,  
>> 8);
>>                          memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4,  
>> 8);
>>                      }
>> @@ -1913,10 +1935,18 @@ static av_always_inline void 
>> hl_decode_mb_internal(H264Context *h, int simple, i
>>                  }
>>              }else{
>>                  if(is_h264){
>> +                    int qp[2];
>> +                    if (CHROMA422) {
>> +                        qp[0] = h->chroma_qp[0] + 3;
>> +                        qp[1] = h->chroma_qp[1] + 3;
>> +                    } else {
>> +                        qp[0] = h->chroma_qp[0];
>> +                        qp[1] = h->chroma_qp[1];
>> +                    }
>>                      if(h->non_zero_count_cache[ 
>> scan8[CHROMA_DC_BLOCK_INDEX+0] ])
>> -                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 
>> (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 
>> 1:4][h->chroma_qp[0]][0]);
>> +                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 
>> (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 
>> 1:4][qp[0]][0]);
>>                      if(h->non_zero_count_cache[ 
>> scan8[CHROMA_DC_BLOCK_INDEX+1] ])
>> -                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 
>> (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 
>> 2:5][h->chroma_qp[1]][0]);
>> +                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 
>> (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 
>> 2:5][qp[1]][0]);
>>                      h->h264dsp.h264_idct_add8(dest, block_offset,
>>                                                h->mb, uvlinesize,
>>                                                h->non_zero_count_cache);
>> @@ -2555,11 +2585,13 @@ static int decode_slice_header(H264Context *h, 
>> H264Context *h0){
>>
>>      h->b_stride=  s->mb_width*4;
>>
>> +    s->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p
>> +
>>      s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, 
>> (8<<CHROMA444)-1);
>>      if(h->sps.frame_mbs_only_flag)
>> -        s->height= 16*s->mb_height - 
>> (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
>> +        s->height= 16*s->mb_height - 
>> (1<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1);
>>      else
>> -        s->height= 16*s->mb_height - 
>> (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
>> +        s->height= 16*s->mb_height - 
>> (2<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1);
>>
>>      if (s->context_initialized
>>          && (   s->width != s->avctx->width || s->height != s->avctx->height
>> @@ -2601,14 +2633,24 @@ static int decode_slice_header(H264Context *h, 
>> H264Context *h0){
>>
>>          switch (h->sps.bit_depth_luma) {
>>              case 9 :
>> -                s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : 
>> PIX_FMT_YUV420P9;
>> +                if (CHROMA444)
>> +                    s->avctx->pix_fmt = PIX_FMT_YUV444P9;
>> +                else
>> +                    s->avctx->pix_fmt = PIX_FMT_YUV420P9;
>
> Is there no 422 possible here?


Likely we're missing the proper pix-fmt. I'll add.

> Also why some SIMD funcs (like IDCT and pred) are enabled only for
> chroma_idc=1 ?

444 uses 16x16 luma prediction functions, IDCT also. 422 needs
special-case, so they're only enabled for 420 (1).

Ronald
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 1/5] h264: 4:2:2 intra decoding support

Reply via email to