On 2012-12-12 16:13:40 -0800, Ronald S. Bultje wrote:
> Hi,
>
> On Wed, Dec 12, 2012 at 3:12 PM, Janne Grunau <[email protected]> wrote:
> > On 2012-12-12 14:30:43 -0800, Ronald S. Bultje wrote:
> >> Hi,
> >>
> >> On Wed, Dec 12, 2012 at 12:30 PM, Janne Grunau <[email protected]>
> >> wrote:
> >> > cmdutis.c's alloc_buffer() uses aligned to 32 width plus 2 edges of 32
> >> > pixels as linesize. emu_edge_buffer has to work with the same stride.
> >> > This makes only a difference for > 8 bit per pixel bit depths since we
> >> > always allocate for 16 bit per pixel.
> >> >
> >> > Fixes fuzzed sample nasa-8s2.ts_s244342.
> >> > ---
> >> > libavcodec/mpegvideo.c | 13 ++++++++-----
> >> > 1 file changed, 8 insertions(+), 5 deletions(-)
> >>
> >> What if we implement a custom get_buffer() which returns bigger strides?
> >
> > it fails horrible for high pixel bit depth like it does with this
> > sample. I could add an assumed_linesize into the context and realloc if
> > the linesize returned by get_buffer is larger or maybe preallocate a
> > buffer here if it' feasible.
>
> Can't you allocate these buffers after you receive the first buffer
> from get_buffer() and actually know the linesize?
I tried but it's annoying since just doing it immediately after
get_buffer() is not enough. It needs to be done for each context copy
during slice threading since those obviously don't call get_buffer().
Since h264 doesn't use mpegvideo to manage the embedded MpegEncContext
for slice threads it has to be done twice. It also needs to done in the
update_thread_copy for h264 since not every thread calls get_buffer().
Not sure if I like this patch although it is the correct solution.
Janne
---8<---
Since we can't know which stride a custom get_buffer() implementation is
going to use we have to allocate this scratch buffers after the linesize
is known. It was pretty safe for 8 bit per pixel pixel formats since we
always allocated memory for up to 16 bits per pixel. It broke hoever
with cmdutis.c's alloc_buffer() and high pixel bit depth since it
allocated larger edges than mpegvideo expected.
Fixes fuzzed sample nasa-8s2.ts_s244342.
---
libavcodec/h264.c | 17 +++++++++--
libavcodec/mpeg12.c | 5 ++-
libavcodec/mpegvideo.c | 76 +++++++++++++++++++++++++++++++++++++---------
libavcodec/mpegvideo.h | 3 +-
libavcodec/mpegvideo_enc.c | 6 ++--
5 files changed, 85 insertions(+), 22 deletions(-)
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 12af810..f01ff67 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -2291,8 +2291,10 @@ static int field_end(H264Context *h, int in_setup)
/**
* Replicate H264 "master" context to thread contexts.
*/
-static void clone_slice(H264Context *dst, H264Context *src)
+static int clone_slice(H264Context *dst, H264Context *src)
{
+ int ret;
+
memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
dst->s.current_picture_ptr = src->s.current_picture_ptr;
dst->s.current_picture = src->s.current_picture;
@@ -2300,6 +2302,13 @@ static void clone_slice(H264Context *dst, H264Context
*src)
dst->s.uvlinesize = src->s.uvlinesize;
dst->s.first_field = src->s.first_field;
+ if (!dst->s.edge_emu_buffer &&
+ (ret = ff_mpv_frame_size_alloc(&dst->s, dst->s.linesize))) {
+ av_log(dst->s.avctx, AV_LOG_ERROR,
+ "Failed to allocate scratch buffers\n");
+ return ret;
+ }
+
dst->prev_poc_msb = src->prev_poc_msb;
dst->prev_poc_lsb = src->prev_poc_lsb;
dst->prev_frame_num_offset = src->prev_frame_num_offset;
@@ -2313,6 +2322,8 @@ static void clone_slice(H264Context *dst, H264Context
*src)
memcpy(dst->dequant4_coeff, src->dequant4_coeff,
sizeof(src->dequant4_coeff));
memcpy(dst->dequant8_coeff, src->dequant8_coeff,
sizeof(src->dequant8_coeff));
+
+ return 0;
}
/**
@@ -2846,8 +2857,8 @@ static int decode_slice_header(H264Context *h,
H264Context *h0)
ff_release_unused_pictures(s, 0);
}
}
- if (h != h0)
- clone_slice(h, h0);
+ if (h != h0 && (ret = clone_slice(h, h0)) < 0)
+ return ret;
s->current_picture_ptr->frame_num = h->frame_num; // FIXME frame_num
cleanup
diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index df8afec..83069f5 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -2394,7 +2394,10 @@ static int decode_chunks(AVCodecContext *avctx,
thread_context->end_mb_y = s2->mb_height;
if (s->slice_count) {
s2->thread_context[s->slice_count-1]->end_mb_y =
mb_y;
- ff_update_duplicate_context(thread_context, s2);
+ ret = ff_update_duplicate_context(thread_context,
+ s2);
+ if (ret < 0)
+ return ret;
}
init_get_bits(&thread_context->gb, buf_ptr,
input_size*8);
s->slice_count++;
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 773c9e2..8cd8df8 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -235,12 +235,35 @@ static void free_frame_buffer(MpegEncContext *s, Picture
*pic)
av_freep(&pic->f.hwaccel_picture_private);
}
+int ff_mpv_frame_size_alloc(MpegEncContext *s, int linesize)
+{
+ int alloc_size = FFALIGN(FFABS(linesize) + 32, 32);
+
+ // edge emu needs blocksize + filter length - 1
+ // (= 17x17 for halfpel / 21x21 for h264)
+ // linesize * interlaced * MBsize
+ FF_ALLOCZ_OR_GOTO(s->avctx, s->edge_emu_buffer, alloc_size * 2 * 21,
+ fail);
+
+ FF_ALLOCZ_OR_GOTO(s->avctx, s->me.scratchpad, alloc_size * 2 * 16 * 2,
+ fail)
+ s->me.temp = s->me.scratchpad;
+ s->rd_scratchpad = s->me.scratchpad;
+ s->b_scratchpad = s->me.scratchpad;
+ s->obmc_scratchpad = s->me.scratchpad + 16;
+
+ return 0;
+fail:
+ av_freep(&s->edge_emu_buffer);
+ return AVERROR(ENOMEM);
+}
+
/**
* Allocate a frame buffer
*/
static int alloc_frame_buffer(MpegEncContext *s, Picture *pic)
{
- int r;
+ int r, ret;
if (s->avctx->hwaccel) {
assert(!pic->f.hwaccel_picture_private);
@@ -282,6 +305,14 @@ static int alloc_frame_buffer(MpegEncContext *s, Picture
*pic)
return -1;
}
+ if (!s->edge_emu_buffer &&
+ (ret = ff_mpv_frame_size_alloc(s, pic->f.linesize[0])) < 0) {
+ av_log(s->avctx, AV_LOG_ERROR,
+ "get_buffer() failed to allocate context scratch buffers.\n");
+ free_frame_buffer(s, pic);
+ return ret;
+ }
+
return 0;
}
@@ -419,19 +450,13 @@ static int init_duplicate_context(MpegEncContext *s,
MpegEncContext *base)
int yc_size = y_size + 2 * c_size;
int i;
- // edge emu needs blocksize + filter length - 1
- // (= 17x17 for halfpel / 21x21 for h264)
- FF_ALLOCZ_OR_GOTO(s->avctx, s->edge_emu_buffer,
- (s->width + 64) * 2 * 21 * 2, fail); // (width + edge
+ align)*interlaced*MBsize*tolerance
+ s->edge_emu_buffer =
+ s->me.scratchpad =
+ s->me.temp =
+ s->rd_scratchpad =
+ s->b_scratchpad =
+ s->obmc_scratchpad = NULL;
- // FIXME should be linesize instead of s->width * 2
- // but that is not known before get_buffer()
- FF_ALLOCZ_OR_GOTO(s->avctx, s->me.scratchpad,
- (s->width + 64) * 4 * 16 * 2 * sizeof(uint8_t), fail)
- s->me.temp = s->me.scratchpad;
- s->rd_scratchpad = s->me.scratchpad;
- s->b_scratchpad = s->me.scratchpad;
- s->obmc_scratchpad = s->me.scratchpad + 16;
if (s->encoding) {
FF_ALLOCZ_OR_GOTO(s->avctx, s->me.map,
ME_MAP_SIZE * sizeof(uint32_t), fail)
@@ -510,10 +535,10 @@ static void backup_duplicate_context(MpegEncContext *bak,
MpegEncContext *src)
#undef COPY
}
-void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src)
+int ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src)
{
MpegEncContext bak;
- int i;
+ int i, ret;
// FIXME copy only needed parts
// START_TIMER
backup_duplicate_context(&bak, dst);
@@ -522,8 +547,15 @@ void ff_update_duplicate_context(MpegEncContext *dst,
MpegEncContext *src)
for (i = 0; i < 12; i++) {
dst->pblocks[i] = &dst->block[i];
}
+ if (!dst->edge_emu_buffer &&
+ (ret = ff_mpv_frame_size_alloc(dst, dst->linesize)) < 0) {
+ av_log(dst->avctx, AV_LOG_ERROR, "failed to allocate context "
+ "scratch buffers.\n");
+ return ret;
+ }
// STOP_TIMER("update_duplicate_context")
// about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
+ return 0;
}
int ff_mpeg_update_thread_context(AVCodecContext *dst,
@@ -608,6 +640,20 @@ int ff_mpeg_update_thread_context(AVCodecContext *dst,
FF_INPUT_BUFFER_PADDING_SIZE);
}
+ // linesize dependend scratch buffer allocation
+ if (!s->edge_emu_buffer)
+ if (s1->linesize) {
+ if (ff_mpv_frame_size_alloc(s, s1->linesize) < 0) {
+ av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate context "
+ "scratch buffers.\n");
+ return AVERROR(ENOMEM);
+ }
+ } else {
+ av_log(s->avctx, AV_LOG_ERROR, "Context scratch buffers could not "
+ "be allocated due to unknown size.\n");
+ return AVERROR_BUG;
+ }
+
// MPEG2/interlacing info
memcpy(&s->progressive_sequence, &s1->progressive_sequence,
(char *) &s1->rtp_mode - (char *) &s1->progressive_sequence);
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index 6c9a7cb..65997ce 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -759,6 +759,7 @@ void ff_MPV_common_defaults(MpegEncContext *s);
void ff_MPV_decode_defaults(MpegEncContext *s);
int ff_MPV_common_init(MpegEncContext *s);
+int ff_mpv_frame_size_alloc(MpegEncContext *s, int linesize);
int ff_MPV_common_frame_size_change(MpegEncContext *s);
void ff_MPV_common_end(MpegEncContext *s);
void ff_MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]);
@@ -782,7 +783,7 @@ void ff_write_quant_matrix(PutBitContext *pb, uint16_t
*matrix);
void ff_release_unused_pictures(MpegEncContext *s, int remove_current);
int ff_find_unused_picture(MpegEncContext *s, int shared);
void ff_denoise_dct(MpegEncContext *s, DCTELEM *block);
-void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src);
+int ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src);
int ff_MPV_lowest_referenced_row(MpegEncContext *s, int dir);
void ff_MPV_report_decode_progress(MpegEncContext *s);
int ff_mpeg_update_thread_context(AVCodecContext *dst, const AVCodecContext
*src);
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 7c92ad2..816f44c 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -3107,7 +3107,7 @@ static void set_frame_distances(MpegEncContext * s){
static int encode_picture(MpegEncContext *s, int picture_number)
{
- int i;
+ int i, ret;
int bits;
int context_count = s->slice_context_count;
@@ -3150,7 +3150,9 @@ static int encode_picture(MpegEncContext *s, int
picture_number)
s->mb_intra=0; //for the rate distortion & bit compare functions
for(i=1; i<context_count; i++){
- ff_update_duplicate_context(s->thread_context[i], s);
+ ret = ff_update_duplicate_context(s->thread_context[i], s);
+ if (ret < 0)
+ return ret;
}
if(ff_init_me(s)<0)
--
1.7.12.4
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel