Re: [FFmpeg-devel] [PATCH 2/2] libavcodec/zmbvenc: motion estimation improvements/bug fixes:
sön 2019-02-10 klockan 17:04 +0100 skrev Tomas Härdin: > lör 2019-02-09 klockan 13:10 + skrev Matthew Fearnley: > > - Clamp ME range to -64..63 (prevents corruption when me_range is too high) > > - Allow MV's up to *and including* the positive range limit > > - Allow out-of-edge ME by padding the prev buffer with a border of 0's > > - Try previous MV before checking the rest (improves speed in some cases) > > - More robust logic in code - ensure *mx,*my,*xored are updated together > > --- > > libavcodec/zmbvenc.c | 64 +++- > > 1 file changed, 46 insertions(+), 18 deletions(-) > > Passes FATE > > The only maybe suspicious thing is this part: > > > -c->pstride = FFALIGN(avctx->width, 16); > > -if (!(c->prev = av_malloc(c->pstride * avctx->height))) { > > + > > +/* Allocate prev buffer - leave border around the outside for out of > > edge ME */ > > +c->pstride = FFALIGN(avctx->width + c->lrange, 16); > > Shouldn't this be with + lrange + urange? I guess it works out fine due > to wraparound and lrange >= urange, but it makes me feel slightly > uneasy > > > +prev_offset = FFALIGN(c->lrange + (c->pstride * c->lrange), 16); > > +prev_size = prev_offset + (c->pstride * (avctx->height + c->urange)); > > The way I'd do this is compute the size first, then the offset. But I > guess this works out the same way. Maybe someone else wants to chime > in? Pushed patch 1 and a version of this that I only just now noticed I got off-list. Attaching here for posterity. We might want a test for me_range >= 64 as well. /TomasFrom 0ffbd9124e27bf15a66aa9e7b11004963423a9be Mon Sep 17 00:00:00 2001 From: Matthew Fearnley Date: Thu, 7 Feb 2019 12:54:59 + Subject: [PATCH] libavcodec/zmbvenc: motion estimation improvements/bug fixes: - Clamp ME range to -64..63 (prevents corruption when me_range is too high) - Allow MV's up to *and including* the positive range limit - Allow out-of-edge ME by padding the prev buffer with a border of 0's - Try previous MV before checking the rest (improves speed in some cases) - More robust logic in code - ensure *mx,*my,*xored are updated together --- libavcodec/zmbvenc.c | 71 +--- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/libavcodec/zmbvenc.c b/libavcodec/zmbvenc.c index 3df6e724c8..c9d50b6adf 100644 --- a/libavcodec/zmbvenc.c +++ b/libavcodec/zmbvenc.c @@ -45,11 +45,11 @@ typedef struct ZmbvEncContext { AVCodecContext *avctx; -int range; +int lrange, urange; uint8_t *comp_buf, *work_buf; uint8_t pal[768]; uint32_t pal2[256]; //for quick comparisons -uint8_t *prev; +uint8_t *prev, *prev_buf; int pstride; int comp_size; int keyint, curfrm; @@ -61,7 +61,6 @@ typedef struct ZmbvEncContext { /** Block comparing function * XXX should be optimized and moved to DSPContext - * TODO handle out of edge ME */ static inline int block_cmp(ZmbvEncContext *c, uint8_t *src, int stride, uint8_t *src2, int stride2, int bw, int bh, @@ -100,23 +99,42 @@ static inline int block_cmp(ZmbvEncContext *c, uint8_t *src, int stride, static int zmbv_me(ZmbvEncContext *c, uint8_t *src, int sstride, uint8_t *prev, int pstride, int x, int y, int *mx, int *my, int *xored) { -int dx, dy, tx, ty, tv, bv, bw, bh; +int dx, dy, txored, tv, bv, bw, bh; +int mx0, my0; -*mx = *my = 0; +mx0 = *mx; +my0 = *my; bw = FFMIN(ZMBV_BLOCK, c->avctx->width - x); bh = FFMIN(ZMBV_BLOCK, c->avctx->height - y); + +/* Try (0,0) */ bv = block_cmp(c, src, sstride, prev, pstride, bw, bh, xored); +*mx = *my = 0; if(!bv) return 0; -for(ty = FFMAX(y - c->range, 0); ty < FFMIN(y + c->range, c->avctx->height - bh); ty++){ -for(tx = FFMAX(x - c->range, 0); tx < FFMIN(x + c->range, c->avctx->width - bw); tx++){ -if(tx == x && ty == y) continue; // we already tested this block -dx = tx - x; -dy = ty - y; -tv = block_cmp(c, src, sstride, prev + dx + dy * pstride, pstride, bw, bh, xored); + +/* Try previous block's MV (if not 0,0) */ +if (mx0 || my0){ +tv = block_cmp(c, src, sstride, prev + mx0 + my0 * pstride, pstride, bw, bh, ); +if(tv < bv){ +bv = tv; +*mx = mx0; +*my = my0; +*xored = txored; +if(!bv) return 0; +} +} + +/* Try other MVs from top-to-bottom, left-to-right */ +for(dy = -c->lrange; dy <= c->urange; dy++){ +for(dx = -c->lrange; dx <= c->urange; dx++){ +if(!dx && !dy) continue; // we already tested this block +if(dx == mx0 && dy == my0) continue; // this one too +tv = block_cmp(c, src, sstride, prev + dx + dy * pstride, pstride, bw, bh, ); if(tv < bv){ bv = tv; *mx = dx;
Re: [FFmpeg-devel] [PATCH 2/2] libavcodec/zmbvenc: motion estimation improvements/bug fixes:
lör 2019-02-09 klockan 13:10 + skrev Matthew Fearnley: > - Clamp ME range to -64..63 (prevents corruption when me_range is too high) > - Allow MV's up to *and including* the positive range limit > - Allow out-of-edge ME by padding the prev buffer with a border of 0's > - Try previous MV before checking the rest (improves speed in some cases) > - More robust logic in code - ensure *mx,*my,*xored are updated together > --- > libavcodec/zmbvenc.c | 64 +++- > 1 file changed, 46 insertions(+), 18 deletions(-) Passes FATE The only maybe suspicious thing is this part: > -c->pstride = FFALIGN(avctx->width, 16); > -if (!(c->prev = av_malloc(c->pstride * avctx->height))) { > + > +/* Allocate prev buffer - leave border around the outside for out of > edge ME */ > +c->pstride = FFALIGN(avctx->width + c->lrange, 16); Shouldn't this be with + lrange + urange? I guess it works out fine due to wraparound and lrange >= urange, but it makes me feel slightly uneasy > +prev_offset = FFALIGN(c->lrange + (c->pstride * c->lrange), 16); > +prev_size = prev_offset + (c->pstride * (avctx->height + c->urange)); The way I'd do this is compute the size first, then the offset. But I guess this works out the same way. Maybe someone else wants to chime in? /Tomas ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 2/2] libavcodec/zmbvenc: motion estimation improvements/bug fixes:
- Clamp ME range to -64..63 (prevents corruption when me_range is too high) - Allow MV's up to *and including* the positive range limit - Allow out-of-edge ME by padding the prev buffer with a border of 0's - Try previous MV before checking the rest (improves speed in some cases) - More robust logic in code - ensure *mx,*my,*xored are updated together --- libavcodec/zmbvenc.c | 64 +++- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/libavcodec/zmbvenc.c b/libavcodec/zmbvenc.c index 3df6e724c8..e92193478b 100644 --- a/libavcodec/zmbvenc.c +++ b/libavcodec/zmbvenc.c @@ -45,11 +45,11 @@ typedef struct ZmbvEncContext { AVCodecContext *avctx; -int range; +int lrange, urange; uint8_t *comp_buf, *work_buf; uint8_t pal[768]; uint32_t pal2[256]; //for quick comparisons -uint8_t *prev; +uint8_t *prev, *prev_buf; int pstride; int comp_size; int keyint, curfrm; @@ -61,7 +61,6 @@ typedef struct ZmbvEncContext { /** Block comparing function * XXX should be optimized and moved to DSPContext - * TODO handle out of edge ME */ static inline int block_cmp(ZmbvEncContext *c, uint8_t *src, int stride, uint8_t *src2, int stride2, int bw, int bh, @@ -100,23 +99,42 @@ static inline int block_cmp(ZmbvEncContext *c, uint8_t *src, int stride, static int zmbv_me(ZmbvEncContext *c, uint8_t *src, int sstride, uint8_t *prev, int pstride, int x, int y, int *mx, int *my, int *xored) { -int dx, dy, tx, ty, tv, bv, bw, bh; +int dx, dy, txored, tv, bv, bw, bh; +int mx0, my0; -*mx = *my = 0; +mx0 = *mx; +my0 = *my; bw = FFMIN(ZMBV_BLOCK, c->avctx->width - x); bh = FFMIN(ZMBV_BLOCK, c->avctx->height - y); + +/* Try (0,0) */ bv = block_cmp(c, src, sstride, prev, pstride, bw, bh, xored); +*mx = *my = 0; if(!bv) return 0; -for(ty = FFMAX(y - c->range, 0); ty < FFMIN(y + c->range, c->avctx->height - bh); ty++){ -for(tx = FFMAX(x - c->range, 0); tx < FFMIN(x + c->range, c->avctx->width - bw); tx++){ -if(tx == x && ty == y) continue; // we already tested this block -dx = tx - x; -dy = ty - y; -tv = block_cmp(c, src, sstride, prev + dx + dy * pstride, pstride, bw, bh, xored); + +/* Try previous block's MV (if not 0,0) */ +if (mx0 || my0){ +tv = block_cmp(c, src, sstride, prev + mx0 + my0 * pstride, pstride, bw, bh, ); +if(tv < bv){ +bv = tv; +*mx = mx0; +*my = my0; +*xored = txored; +if(!bv) return 0; +} +} + +/* Try other MVs from top-to-bottom, left-to-right */ +for(dy = -c->lrange; dy <= c->urange; dy++){ +for(dx = -c->lrange; dx <= c->urange; dx++){ +if(!dx && !dy) continue; // we already tested this block +if(dx == mx0 && dy == my0) continue; // this one too +tv = block_cmp(c, src, sstride, prev + dx + dy * pstride, pstride, bw, bh, ); if(tv < bv){ bv = tv; *mx = dx; *my = dy; + *xored = txored; if(!bv) return 0; } } @@ -181,7 +199,7 @@ FF_ENABLE_DEPRECATION_WARNINGS int x, y, bh2, bw2, xored; uint8_t *tsrc, *tprev; uint8_t *mv; -int mx, my; +int mx = 0, my = 0; bw = (avctx->width + ZMBV_BLOCK - 1) / ZMBV_BLOCK; bh = (avctx->height + ZMBV_BLOCK - 1) / ZMBV_BLOCK; @@ -269,7 +287,7 @@ static av_cold int encode_end(AVCodecContext *avctx) av_freep(>work_buf); deflateEnd(>zstream); -av_freep(>prev); +av_freep(>prev_buf); return 0; } @@ -283,6 +301,7 @@ static av_cold int encode_init(AVCodecContext *avctx) int zret; // Zlib return code int i; int lvl = 9; +int prev_size, prev_offset; /* Entropy-based score tables for comparing blocks. * Suitable for blocks up to (ZMBV_BLOCK * ZMBV_BLOCK) bytes. @@ -295,9 +314,13 @@ static av_cold int encode_init(AVCodecContext *avctx) c->curfrm = 0; c->keyint = avctx->keyint_min; -c->range = 8; -if(avctx->me_range > 0) -c->range = FFMIN(avctx->me_range, 127); + +/* Motion estimation range: maximum distance is -64..63 */ +c->lrange = c->urange = 8; +if(avctx->me_range > 0){ +c->lrange = FFMIN(avctx->me_range, 64); +c->urange = FFMIN(avctx->me_range, 63); +} if(avctx->compression_level >= 0) lvl = avctx->compression_level; @@ -323,11 +346,16 @@ static av_cold int encode_init(AVCodecContext *avctx) av_log(avctx, AV_LOG_ERROR, "Can't allocate compression buffer.\n"); return AVERROR(ENOMEM); } -c->pstride = FFALIGN(avctx->width, 16); -if (!(c->prev = av_malloc(c->pstride * avctx->height))) { + +/* Allocate prev buffer - leave border around