On Fri, Jun 22, 2012 at 12:26 AM, Luca Barbato <[email protected]> wrote:

> On 06/22/2012 04:19 AM, Daniel Kang wrote:
> > In preparation for sliced threading.
> > ---
> >  libavcodec/vp8.c |   54
> ++++++++++++++++++++++++++++++------------------------
> >  libavcodec/vp8.h |   11 +++++------
> >  2 files changed, 35 insertions(+), 30 deletions(-)
> >
> > diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
> > index 6ab4b26..bc2476e 100644
> > --- a/libavcodec/vp8.c
> > +++ b/libavcodec/vp8.c
> > @@ -38,7 +38,6 @@ static void free_buffers(VP8Context *s)
> >  {
> >      av_freep(&s->macroblocks_base);
> >      av_freep(&s->filter_strength);
> > -    av_freep(&s->intra4x4_pred_mode_top);
> >      av_freep(&s->top_nnz);
> >      av_freep(&s->edge_emu_buffer);
> >      av_freep(&s->top_border);
> > @@ -123,15 +122,14 @@ static int update_dimensions(VP8Context *s, int
> width, int height)
> >
> >      s->macroblocks_base        =
> av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
> >      s->filter_strength         =
> av_mallocz(s->mb_width*sizeof(*s->filter_strength));
> > -    s->intra4x4_pred_mode_top  = av_mallocz(s->mb_width*4);
> >      s->top_nnz                 =
> av_mallocz(s->mb_width*sizeof(*s->top_nnz));
> >      s->top_border              =
> av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
> >
> > -    if (!s->macroblocks_base || !s->filter_strength ||
> !s->intra4x4_pred_mode_top ||
> > +    if (!s->macroblocks_base || !s->filter_strength ||
> >          !s->top_nnz || !s->top_border)
> >          return AVERROR(ENOMEM);
> >
> > -    s->macroblocks        = s->macroblocks_base + s->mb_width + 2;
> > +    s->macroblocks        = s->macroblocks_base + s->mb_width + 1;
> >
> >      return 0;
> >  }
> > @@ -622,14 +620,19 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb,
> int mb_x, int mb_y)
> >  }
> >
> >  static av_always_inline
> > -void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
> > +void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
> VP8Macroblock *mb,
> >                             int mb_x, int keyframe)
> >  {
> > -    uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
> > +    VP8Macroblock *mb_top  = mb - s->mb_width - 1;
> > +    VP8Macroblock *mb_left = mb - 1;
> > +    uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
> > +
> > +    memcpy(mb->intra4x4_pred_mode_left,
> mb_left->intra4x4_pred_mode_left, 4);
> > +    memcpy(mb->intra4x4_pred_mode_top,  mb_top->intra4x4_pred_mode_top,
>   4);
> >      if (keyframe) {
> >          int x, y;
> > -        uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
> > -        uint8_t* const left = s->intra4x4_pred_mode_left;
> > +        uint8_t* const top = mb->intra4x4_pred_mode_top;
> > +        uint8_t* const left = mb->intra4x4_pred_mode_left;
> >          for (y = 0; y < 4; y++) {
> >              for (x = 0; x < 4; x++) {
> >                  const uint8_t *ctx;
> > @@ -655,7 +658,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock
> *mb, int mb_x, int mb_y, uint8_
> >          *segment = vp8_rac_get_tree(c, vp8_segmentid_tree,
> s->prob->segmentid);
> >      else if (s->segmentation.enabled)
> >          *segment = ref ? *ref : *segment;
> > -    s->segment = *segment;
> > +    mb->segment = *segment;
> >
> >      mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c,
> s->prob->mbskip) : 0;
> >
> > @@ -663,14 +666,14 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock
> *mb, int mb_x, int mb_y, uint8_
> >          mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
> vp8_pred16x16_prob_intra);
> >
> >          if (mb->mode == MODE_I4x4) {
> > -            decode_intra4x4_modes(s, c, mb_x, 1);
> > +            decode_intra4x4_modes(s, c, mb, mb_x, 1);
> >          } else {
> >              const uint32_t modes = vp8_pred4x4_mode[mb->mode] *
> 0x01010101u;
> > -            AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
> > -            AV_WN32A(s->intra4x4_pred_mode_left, modes);
> > +            AV_WN32A(mb->intra4x4_pred_mode_top,  modes);
> > +            AV_WN32A(mb->intra4x4_pred_mode_left, modes);
> >          }
> >
> > -        s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
> vp8_pred8x8c_prob_intra);
> > +        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
> vp8_pred8x8c_prob_intra);
> >          mb->ref_frame = VP56_FRAME_CURRENT;
> >      } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
> >          // inter MB, 16.2
> > @@ -688,9 +691,9 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock
> *mb, int mb_x, int mb_y, uint8_
> >          mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter,
> s->prob->pred16x16);
> >
> >          if (mb->mode == MODE_I4x4)
> > -            decode_intra4x4_modes(s, c, mb_x, 0);
> > +            decode_intra4x4_modes(s, c, mb, mb_x, 0);
> >
> > -        s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
> s->prob->pred8x8c);
> > +        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
> s->prob->pred8x8c);
> >          mb->ref_frame = VP56_FRAME_CURRENT;
> >          mb->partitioning = VP8_SPLITMVMODE_NONE;
> >          AV_ZERO32(&mb->bmv[0]);
> > @@ -791,7 +794,7 @@ void decode_mb_coeffs(VP8Context *s, VP56RangeCoder
> *c, VP8Macroblock *mb,
> >  {
> >      int i, x, y, luma_start = 0, luma_ctx = 3;
> >      int nnz_pred, nnz, nnz_total = 0;
> > -    int segment = s->segment;
> > +    int segment = mb->segment;
> >      int block_dc = 0;
> >
> >      if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
> > @@ -1002,7 +1005,7 @@ void intra_predict(VP8Context *s, uint8_t *dst[3],
> VP8Macroblock *mb,
> >          s->hpc.pred16x16[mode](dst[0], s->linesize);
> >      } else {
> >          uint8_t *ptr = dst[0];
> > -        uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
> > +        uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
> >          uint8_t tr_top[4] = { 127, 127, 127, 127 };
> >
> >          // all blocks on the right edge of the macroblock use bottom
> edge
> > @@ -1087,9 +1090,9 @@ void intra_predict(VP8Context *s, uint8_t *dst[3],
> VP8Macroblock *mb,
> >      }
> >
> >      if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
> > -        mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode,
> mb_x, mb_y);
> > +        mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
> mb_x, mb_y);
> >      } else {
> > -        mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x,
> mb_y);
> > +        mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x,
> mb_y);
> >      }
> >      s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
> >      s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
> > @@ -1408,7 +1411,7 @@ static av_always_inline void
> filter_level_for_mb(VP8Context *s, VP8Macroblock *m
> >      int interior_limit, filter_level;
> >
> >      if (s->segmentation.enabled) {
> > -        filter_level = s->segmentation.filter_level[s->segment];
> > +        filter_level = s->segmentation.filter_level[mb->segment];
> >          if (!s->segmentation.absolute_vals)
> >              filter_level += s->filter.level;
> >      } else
> > @@ -1578,7 +1581,7 @@ static void release_queued_segmaps(VP8Context *s,
> int is_close)
> >  static void vp8_decode_mb_row(AVCodecContext *avctx, AVFrame *curframe,
> AVFrame *prev_frame, int mb_y) {
> >      VP8Context *s = avctx->priv_data;
> >      VP56RangeCoder *c = &s->coeff_partition[mb_y &
> (s->num_coeff_partitions-1)];
> > -    VP8Macroblock *mb = s->macroblocks + ((s->mb_width+1)*(mb_y + 1) +
> 1);
> > +    VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y +
> 1) + 1);
> >      int i, y, mb_x, mb_xy = mb_y*s->mb_width;
> >      uint8_t *dst[3] = {
> >          curframe->data[0] + 16*mb_y*s->linesize,
> > @@ -1588,7 +1591,7 @@ static void vp8_decode_mb_row(AVCodecContext
> *avctx, AVFrame *curframe, AVFrame
> >
> >      memset(mb - 1, 0, sizeof(*mb));   // zero left macroblock
> >      memset(s->left_nnz, 0, sizeof(s->left_nnz));
> > -    AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
> > +    AV_WN32A((mb-1)->intra4x4_pred_mode_left, DC_PRED*0x01010101);
> >
> >      // left edge of 129 for intra prediction
> >      if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
> > @@ -1765,8 +1768,11 @@ static int vp8_decode_frame(AVCodecContext
> *avctx, void *data, int *data_size,
> >          memset(s->top_border[1]-1, 127,
> s->mb_width*sizeof(*s->top_border)+1);
> >      }
> >      memset(s->ref_count, 0, sizeof(s->ref_count));
> > -    if (s->keyframe)
> > -        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
> > +    if (s->keyframe) {
> > +        VP8Macroblock *mb = s->macroblocks_base;
> > +        for (i = 0; i < s->mb_width+1; i++, mb++)
> > +            memset(mb->intra4x4_pred_mode_top, DC_PRED, 4);
> > +    }
> >
> >      s->mv_min.y = -MARGIN;
> >      s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
> > diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
> > index a738cb7..c533147 100644
> > --- a/libavcodec/vp8.h
> > +++ b/libavcodec/vp8.h
> > @@ -79,6 +79,11 @@ typedef struct {
> >      uint8_t mode;
> >      uint8_t ref_frame;
> >      uint8_t partitioning;
> > +    uint8_t chroma_pred_mode;
> > +    uint8_t segment;
> > +    uint8_t intra4x4_pred_mode_mb[16];
> > +    uint8_t intra4x4_pred_mode_top[4];
> > +    uint8_t intra4x4_pred_mode_left[4];
>
> In two patches you made the memory requirement to decode to increase a
> lot, is that really necessary?
>
> After being burnt by the 128k of args in webkit build system (explodes
> on arm) I'm a bit concerned about memory usage.
>

With this current framework, it is.

I am testing Jason's idea, which only need a little more memory. However,
from the timings and preliminary tests I have done, I don't think it scales
nearly as well.

A modification of Jason's idea might scale better, but it will require as
much memory.
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to