On 09/09/17 11:28, Mark Thompson wrote:
> On 06/09/17 03:35, Jun Zhao wrote:
>> On 2017/8/28 20:28, Mark Thompson wrote:
>>> On 24/08/17 02:13, Jun Zhao wrote:
>>>> V5: - In h265_vaapi encoder, when setting slice number > max slice number
>>>>       supported by driver, report error and return. Same as h264_vaapi.
>>>>     - Clean the logic when setting first_slice_segment_in_pic_flags  
>>>> V4: - Change the array malloc function.
>>>>     - Clean the pointless condition check when free the memory.
>>>>
>>>> V3: - Making pic->slices be VAAPIEncodeSlice* instead of
>>>> VAAPIEncodeSlice**. - Fix resource (vaBuffer) lead when realloc
>>>> pic->param_buffers fail. - Adjust max_slices location in
>>>> VAAPIEncodeContext. - Re-work distributing the macro-blocks for
>>>> multi-slices function. V2: - Change the slice/parameter buffers to
>>>> dynamic alloc and split the mutil-slice support for AVC/HEVC.
>>>>
>>>>
>>>> From a1078385915d53ec94559ed897eb253e537d1f65 Mon Sep 17 00:00:00 2001
>>>> From: Jun Zhao <jun.z...@intel.com>
>>>> Date: Mon, 31 Jul 2017 22:46:23 -0400
>>>> Subject: [V5 1/4] lavc/vaapi_encode: Change the slice/parameter buffers to
>>>>  dynamic alloc.
>>>>
>>>> Change the slice/parameter buffers to be allocated dynamically.
>>>>
>>>> Signed-off-by: Wang, Yi A <yi.a.w...@intel.com>
>>>> Signed-off-by: Jun Zhao <jun.z...@intel.com>
>>>> ---
>>>>  libavcodec/vaapi_encode.c | 44 
>>>> ++++++++++++++++++++++++++++++++------------
>>>>  libavcodec/vaapi_encode.h |  6 ++----
>>>>  2 files changed, 34 insertions(+), 16 deletions(-)
>>>>
>>>> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
>>>> index 22114bedbe..f49e0e3b91 100644
>>>> --- a/libavcodec/vaapi_encode.c
>>>> +++ b/libavcodec/vaapi_encode.c
>>>> @@ -36,13 +36,18 @@ static int 
>>>> vaapi_encode_make_packed_header(AVCodecContext *avctx,
>>>>      VAAPIEncodeContext *ctx = avctx->priv_data;
>>>>      VAStatus vas;
>>>>      VABufferID param_buffer, data_buffer;
>>>> +    VABufferID *tmp;
>>>>      VAEncPackedHeaderParameterBuffer params = {
>>>>          .type = type,
>>>>          .bit_length = bit_len,
>>>>          .has_emulation_bytes = 1,
>>>>      };
>>>>  
>>>> -    av_assert0(pic->nb_param_buffers + 2 <= MAX_PARAM_BUFFERS);
>>>> +    tmp = av_realloc_array(pic->param_buffers, sizeof(*tmp), 
>>>> pic->nb_param_buffers + 2);
>>>> +    if (!tmp) {
>>>> +        return AVERROR(ENOMEM);
>>>> +    }
>>>> +    pic->param_buffers = tmp;
>>>>  
>>>>      vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
>>>>                           VAEncPackedHeaderParameterBufferType,
>>>> @@ -77,9 +82,14 @@ static int 
>>>> vaapi_encode_make_param_buffer(AVCodecContext *avctx,
>>>>  {
>>>>      VAAPIEncodeContext *ctx = avctx->priv_data;
>>>>      VAStatus vas;
>>>> +    VABufferID *tmp;
>>>>      VABufferID buffer;
>>>>  
>>>> -    av_assert0(pic->nb_param_buffers + 1 <= MAX_PARAM_BUFFERS);
>>>> +    tmp = av_realloc_array(pic->param_buffers, sizeof(*tmp), 
>>>> pic->nb_param_buffers + 1);
>>>> +    if (!tmp) {
>>>> +        return AVERROR(ENOMEM);
>>>> +    }
>>>> +    pic->param_buffers = tmp;
>>>>  
>>>>      vas = vaCreateBuffer(ctx->hwctx->display, ctx->va_context,
>>>>                           type, len, 1, data, &buffer);
>>>> @@ -313,15 +323,14 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
>>>>          }
>>>>      }
>>>>  
>>>> -    av_assert0(pic->nb_slices <= MAX_PICTURE_SLICES);
>>>> +    pic->slices = av_mallocz_array(pic->nb_slices, sizeof(*pic->slices));
>>>> +    if (!pic->slices) {
>>>> +        err = AVERROR(ENOMEM);
>>>> +        goto fail;
>>>> +    }
>>>>      for (i = 0; i < pic->nb_slices; i++) {
>>>> -        slice = av_mallocz(sizeof(*slice));
>>>> -        if (!slice) {
>>>> -            err = AVERROR(ENOMEM);
>>>> -            goto fail;
>>>> -        }
>>>> +        slice = &pic->slices[i];
>>>>          slice->index = i;
>>>> -        pic->slices[i] = slice;
>>>>  
>>>>          if (ctx->codec->slice_params_size > 0) {
>>>>              slice->codec_slice_params = 
>>>> av_mallocz(ctx->codec->slice_params_size);
>>>> @@ -425,8 +434,16 @@ fail_with_picture:
>>>>  fail:
>>>>      for(i = 0; i < pic->nb_param_buffers; i++)
>>>>          vaDestroyBuffer(ctx->hwctx->display, pic->param_buffers[i]);
>>>> +    for (i = 0; i < pic->nb_slices; i++) {
>>>> +        if (pic->slices) {
>>>> +            av_freep(&pic->slices[i].priv_data);
>>>> +            av_freep(&pic->slices[i].codec_slice_params);
>>>> +        }
>>>> +    }
>>>>  fail_at_end:
>>>>      av_freep(&pic->codec_picture_params);
>>>> +    av_freep(&pic->param_buffers);
>>>> +    av_freep(&pic->slices);
>>>>      av_frame_free(&pic->recon_image);
>>>>      av_buffer_unref(&pic->output_buffer_ref);
>>>>      pic->output_buffer = VA_INVALID_ID;
>>>> @@ -535,15 +552,18 @@ static int vaapi_encode_free(AVCodecContext *avctx,
>>>>          vaapi_encode_discard(avctx, pic);
>>>>  
>>>>      for (i = 0; i < pic->nb_slices; i++) {
>>>> -        av_freep(&pic->slices[i]->priv_data);
>>>> -        av_freep(&pic->slices[i]->codec_slice_params);
>>>> -        av_freep(&pic->slices[i]);
>>>> +        if (pic->slices) {
>>>> +            av_freep(&pic->slices[i].priv_data);
>>>> +            av_freep(&pic->slices[i].codec_slice_params);
>>>> +        }
>>>>      }
>>>>      av_freep(&pic->codec_picture_params);
>>>>  
>>>>      av_frame_free(&pic->input_image);
>>>>      av_frame_free(&pic->recon_image);
>>>>  
>>>> +    av_freep(&pic->param_buffers);
>>>> +    av_freep(&pic->slices);
>>>>      // Output buffer should already be destroyed.
>>>>      av_assert0(pic->output_buffer == VA_INVALID_ID);
>>>>  
>>>> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
>>>> index 3bf0cc87c7..364b5b8028 100644
>>>> --- a/libavcodec/vaapi_encode.h
>>>> +++ b/libavcodec/vaapi_encode.h
>>>> @@ -35,8 +35,6 @@ enum {
>>>>      MAX_CONFIG_ATTRIBUTES  = 4,
>>>>      MAX_GLOBAL_PARAMS      = 4,
>>>>      MAX_PICTURE_REFERENCES = 2,
>>>> -    MAX_PICTURE_SLICES     = 112,
>>>> -    MAX_PARAM_BUFFERS      = 128,
>>>>      MAX_REORDER_DELAY      = 16,
>>>>      MAX_PARAM_BUFFER_SIZE  = 1024,
>>>>  };
>>>> @@ -73,7 +71,7 @@ typedef struct VAAPIEncodePicture {
>>>>      VASurfaceID     recon_surface;
>>>>  
>>>>      int          nb_param_buffers;
>>>> -    VABufferID      param_buffers[MAX_PARAM_BUFFERS];
>>>> +    VABufferID      *param_buffers;
>>>>  
>>>>      AVBufferRef    *output_buffer_ref;
>>>>      VABufferID      output_buffer;
>>>> @@ -85,7 +83,7 @@ typedef struct VAAPIEncodePicture {
>>>>      struct VAAPIEncodePicture *refs[MAX_PICTURE_REFERENCES];
>>>>  
>>>>      int          nb_slices;
>>>> -    VAAPIEncodeSlice *slices[MAX_PICTURE_SLICES];
>>>> +    VAAPIEncodeSlice *slices;
>>>>  } VAAPIEncodePicture;
>>>>  
>>>>  typedef struct VAAPIEncodeContext {
>>>> -- 
>>>> 2.11.0
>>>>
>>> Applied this one.
>>>
>>> The others look ok, but I'm still dubious about what the result should 
>>> actually be with multiple slices.  Would you mind answering 
>>> <3b7ccc48-25f7-71d3-ac98-8b40b8594...@jkqxz.net>, particularly around cases 
>>> (2) and (4), or any others applying to your use-case?  I don't think error 
>>> resilience is relevant, since we aren't actually using any of the relevant 
>>> features to make it effective (e.g. redundant coding).
>>>
>>> Thanks,
>>>
>>> - Mark
>> For 2). I think is yes, some SW decode can parallelism when encoder
>> support mutil-slice.
> 
> Do said decoders place any requirements on the slices?
> 
> From a bit of research I suspect that bluray slice requirements are actually 
> for rectangular slices, but I don't have a reference for that.  Maybe someone 
> else can confirm or deny that?

<http://www.blu-raydisc.com/assets/Downloadablefile/BD-ROM_Part3_V3.0_WhitePaper_150724.pdf>:

"A slice segment shall be composed of one or more rows of a coding tree block. 
A row of a coding tree block indicates all the coding tree blocks in a 
horizontal row of coding tree block."

Can't find an equivalent reference for H.264, though.

>> For 3). when send NALU with RTP, customer can use mutil-slice to
>> adaptive the coded slice size to different MTU size, this means when we
>> send NALU in RTP, we can save CPU power and give low latency in link 
>> because we don't need to  fragment the RTP packet in local CPU.
> 
> This (RFC 6184 in single NAL unit packet mode) requires support for arbitrary 
> numbers of slices limited by MTU - that's not possible in current VAAPI, and 
> even if it were then selection of slices by count only as here is not 
> sufficient (complexity need not be uniformly distributed through the picture).
> 
> - Mark
> 
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to