Quoting Mark Thompson (2016-09-04 14:43:21)
> Also adds some extra fields to the main context structure that may
> be needed by a hwaccel decoder.
> ---
> libavcodec/vp8.c | 190
> +++++++++++++++++++++++++++++++++++++++----------------
> libavcodec/vp8.h | 25 ++++++++
> 2 files changed, 162 insertions(+), 53 deletions(-)
>
> diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
> index 546124c..b825d4c 100644
> --- a/libavcodec/vp8.c
> +++ b/libavcodec/vp8.c
> @@ -64,16 +64,29 @@ static int vp8_alloc_frame(VP8Context *s, VP8Frame *f,
> int ref)
> if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
> ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
> return ret;
> - if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
> - ff_thread_release_buffer(s->avctx, &f->tf);
> - return AVERROR(ENOMEM);
> + if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
> + goto fail;
> + if (s->avctx->hwaccel) {
> + const AVHWAccel *hwaccel = s->avctx->hwaccel;
> + if (hwaccel->frame_priv_data_size) {
> + f->hwaccel_priv_buf =
> av_buffer_allocz(hwaccel->frame_priv_data_size);
> + if (!f->hwaccel_priv_buf)
> + goto fail;
Doesn't this leak seg_map?
> + f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
> + }
> }
> return 0;
> +
> +fail:
> + ff_thread_release_buffer(s->avctx, &f->tf);
> + return AVERROR(ENOMEM);
> }
>
> static void vp8_release_frame(VP8Context *s, VP8Frame *f)
> {
> av_buffer_unref(&f->seg_map);
> + av_buffer_unref(&f->hwaccel_priv_buf);
> + f->hwaccel_picture_private = NULL;
> ff_thread_release_buffer(s->avctx, &f->tf);
> }
>
> @@ -91,6 +104,12 @@ static int vp8_ref_frame(VP8Context *s, VP8Frame *dst,
> VP8Frame *src)
> vp8_release_frame(s, dst);
> return AVERROR(ENOMEM);
> }
> + if (src->hwaccel_picture_private) {
> + dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
> + if (!dst->hwaccel_priv_buf)
> + return AVERROR(ENOMEM);
> + dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
> + }
>
> return 0;
> }
> @@ -132,7 +151,7 @@ static VP8Frame *vp8_find_free_buffer(VP8Context *s)
> av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
> abort();
> }
> - if (frame->tf.f->data[0])
> + if (frame->tf.f->data[0] || frame->tf.f->buf[0])
Just checking for buf[0] should be enough.
> vp8_release_frame(s, frame);
>
> return frame;
> @@ -209,8 +228,9 @@ static void parse_segment_info(VP8Context *s)
> int i;
>
> s->segmentation.update_map = vp8_rac_get(c);
> + s->segmentation.update_feature_data = vp8_rac_get(c);
>
> - if (vp8_rac_get(c)) { // update segment feature data
> + if (s->segmentation.update_feature_data) {
> s->segmentation.absolute_vals = vp8_rac_get(c);
>
> for (i = 0; i < 4; i++)
> @@ -264,11 +284,14 @@ static int setup_partitions(VP8Context *s, const
> uint8_t *buf, int buf_size)
> int size = AV_RL24(sizes + 3 * i);
> if (buf_size - size < 0)
> return -1;
> + s->coeff_partition_size[i] = size;
>
> ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
> buf += size;
> buf_size -= size;
> }
> +
> + s->coeff_partition_size[i] = buf_size;
> ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
>
> return 0;
> @@ -313,13 +336,19 @@ static void get_quants(VP8Context *s)
> } else
> base_qi = yac_qi;
>
> - s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi +
> ydc_delta, 7)];
> - s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi,
> 7)];
> - s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi +
> y2dc_delta, 7)] * 2;
> + s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[s->qmat_raw[i][1] =
> + av_clip_uintp2(base_qi +
> ydc_delta, 7)];
> + s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[s->qmat_raw[i][0] =
> + av_clip_uintp2(base_qi,
> 7)];
> + s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[s->qmat_raw[i][2] =
> + av_clip_uintp2(base_qi +
> y2dc_delta, 7)] * 2;
> /* 101581>>16 is equivalent to 155/100 */
> - s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi +
> y2ac_delta, 7)] * 101581 >> 16;
> - s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi +
> uvdc_delta, 7)];
> - s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi +
> uvac_delta, 7)];
> + s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[s->qmat_raw[i][3] =
> + av_clip_uintp2(base_qi +
> y2ac_delta, 7)] * 101581 >> 16;
> + s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[s->qmat_raw[i][4] =
> + av_clip_uintp2(base_qi +
> uvdc_delta, 7)];
> + s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[s->qmat_raw[i][5] =
> + av_clip_uintp2(base_qi +
> uvac_delta, 7)];
Those sneaky assignments are quite nasty, I'd rather see them done
separately.
>
> s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
> s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
> @@ -637,6 +666,8 @@ static int vp8_decode_frame_header(VP8Context *s, const
> uint8_t *buf, int buf_si
> buf += 3;
> buf_size -= 3;
>
> + s->header_partition_size = header_size;
> +
> if (s->profile > 3)
> av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
>
> @@ -700,9 +731,11 @@ static int vp8_decode_frame_header(VP8Context *s, const
> uint8_t *buf, int buf_si
> s->filter.level = vp8_rac_get_uint(c, 6);
> s->filter.sharpness = vp8_rac_get_uint(c, 3);
>
> - if ((s->lf_delta.enabled = vp8_rac_get(c)))
> - if (vp8_rac_get(c))
> + if ((s->lf_delta.enabled = vp8_rac_get(c))) {
> + s->lf_delta.update = vp8_rac_get(c);
> + if (s->lf_delta.update)
> update_lf_deltas(s);
> + }
>
> if (setup_partitions(s, buf, buf_size)) {
> av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
> @@ -741,6 +774,13 @@ static int vp8_decode_frame_header(VP8Context *s, const
> uint8_t *buf, int buf_si
> vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
> }
>
> + // Record the entropy coder state here so that hwaccels can use it.
> + s->c.code_word = vp56_rac_renorm(&s->c);
> + s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
> + s->coder_state_at_header_end.range = s->c.high;
> + s->coder_state_at_header_end.value = s->c.code_word >> 16;
> + s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
> +
> return 0;
> }
>
> @@ -2462,6 +2502,24 @@ static int vp8_decode_mb_row_sliced(AVCodecContext
> *avctx, void *tdata,
> return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
> }
>
> +static enum AVPixelFormat vp8_get_pixel_format(AVCodecContext *avctx)
> +{
> + enum AVPixelFormat pix_fmts[] = {
> +#if CONFIG_VP8_VAAPI_HWACCEL
> + AV_PIX_FMT_VAAPI,
> +#endif
This should be in the following patch.
> + AV_PIX_FMT_YUV420P,
> + AV_PIX_FMT_NONE,
> + };
> + int i;
> +
> + for (i = 0; pix_fmts[i] != AV_PIX_FMT_NONE; i++) {
> + if (avctx->pix_fmt == pix_fmts[i])
> + return pix_fmts[i];
> + }
> +
> + return ff_get_format(avctx, pix_fmts);
> +}
>
> static av_always_inline
> int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
> @@ -2480,6 +2538,16 @@ int vp78_decode_frame(AVCodecContext *avctx, void
> *data, int *got_frame,
> if (ret < 0)
> goto err;
>
> + if (is_vp7) {
> + avctx->pix_fmt = AV_PIX_FMT_YUV420P;
> + } else {
> + avctx->pix_fmt = vp8_get_pixel_format(avctx);
> + if (avctx->pix_fmt == AV_PIX_FMT_NONE) {
> + ret = AVERROR_BUG;
> + goto err;
> + }
> + }
> +
> prev_frame = s->framep[VP56_FRAME_CURRENT];
>
> referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
> @@ -2555,51 +2623,67 @@ int vp78_decode_frame(AVCodecContext *avctx, void
> *data, int *got_frame,
>
> ff_thread_finish_setup(avctx);
>
> - s->linesize = curframe->tf.f->linesize[0];
> - s->uvlinesize = curframe->tf.f->linesize[1];
> + if (avctx->hwaccel) {
> + ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
> + if (ret < 0)
> + goto err;
>
> - memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
> - /* Zero macroblock structures for top/top-left prediction
> - * from outside the frame. */
> - if (!s->mb_layout)
> - memset(s->macroblocks + s->mb_height * 2 - 1, 0,
> - (s->mb_width + 1) * sizeof(*s->macroblocks));
> - if (!s->mb_layout && s->keyframe)
> - memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
> + ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
> + if (ret < 0)
> + goto err;
>
> - memset(s->ref_count, 0, sizeof(s->ref_count));
> + ret = avctx->hwaccel->end_frame(avctx);
> + if (ret < 0)
> + goto err;
>
> - if (s->mb_layout == 1) {
> - // Make sure the previous frame has read its segmentation map,
> - // if we re-use the same map.
> - if (prev_frame && s->segmentation.enabled &&
> - !s->segmentation.update_map)
> - ff_thread_await_progress(&prev_frame->tf, 1, 0);
> - if (is_vp7)
> - vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
> + } else {
> + s->linesize = curframe->tf.f->linesize[0];
> + s->uvlinesize = curframe->tf.f->linesize[1];
> +
> + memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
> + /* Zero macroblock structures for top/top-left prediction
> + * from outside the frame. */
> + if (!s->mb_layout)
> + memset(s->macroblocks + s->mb_height * 2 - 1, 0,
> + (s->mb_width + 1) * sizeof(*s->macroblocks));
> + if (!s->mb_layout && s->keyframe)
> + memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
> +
> + memset(s->ref_count, 0, sizeof(s->ref_count));
> +
> + if (s->mb_layout == 1) {
> + // Make sure the previous frame has read its segmentation map,
> + // if we re-use the same map.
> + if (prev_frame && s->segmentation.enabled &&
> + !s->segmentation.update_map)
> + ff_thread_await_progress(&prev_frame->tf, 1, 0);
> + if (is_vp7)
> + vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
> + else
> + vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
> + }
> +
> + if (avctx->active_thread_type == FF_THREAD_FRAME)
> + num_jobs = 1;
> else
> - vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
> - }
> + num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
> + s->num_jobs = num_jobs;
> + s->curframe = curframe;
> + s->prev_frame = prev_frame;
> + s->mv_min.y = -MARGIN;
> + s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
> + for (i = 0; i < MAX_THREADS; i++) {
> + s->thread_data[i].thread_mb_pos = 0;
> + s->thread_data[i].wait_mb_pos = INT_MAX;
> + }
>
> - if (avctx->active_thread_type == FF_THREAD_FRAME)
> - num_jobs = 1;
> - else
> - num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
> - s->num_jobs = num_jobs;
> - s->curframe = curframe;
> - s->prev_frame = prev_frame;
> - s->mv_min.y = -MARGIN;
> - s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
> - for (i = 0; i < MAX_THREADS; i++) {
> - s->thread_data[i].thread_mb_pos = 0;
> - s->thread_data[i].wait_mb_pos = INT_MAX;
> + if (is_vp7)
> + avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data,
> NULL,
> + num_jobs);
> + else
> + avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data,
> NULL,
> + num_jobs);
> }
> - if (is_vp7)
> - avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data,
> NULL,
> - num_jobs);
> - else
> - avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data,
> NULL,
> - num_jobs);
>
> ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
> memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
> @@ -2666,7 +2750,7 @@ int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
> int ret;
>
> s->avctx = avctx;
> - avctx->pix_fmt = AV_PIX_FMT_YUV420P;
> + avctx->pix_fmt = AV_PIX_FMT_NONE;
It's probably better to leave this as is. The initial value of pix_fmt
before decoding starts is just a hint to the caller about the probable
pixel format, so they don't have to do expensive decoding to find out
the actual value.
--
Anton Khirnov
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel