Re: [FFmpeg-devel] [PATCH 2/2] libavcodec/zmbvenc: motion estimation improvements/bug fixes:

2019-02-19 Thread Tomas Härdin
sön 2019-02-10 klockan 17:04 +0100 skrev Tomas Härdin:
> lör 2019-02-09 klockan 13:10 + skrev Matthew Fearnley:
> > - Clamp ME range to -64..63 (prevents corruption when me_range is too high)
> > - Allow MV's up to *and including* the positive range limit
> > - Allow out-of-edge ME by padding the prev buffer with a border of 0's
> > - Try previous MV before checking the rest (improves speed in some cases)
> > - More robust logic in code - ensure *mx,*my,*xored are updated together
> > ---
> >  libavcodec/zmbvenc.c | 64 +++-
> >  1 file changed, 46 insertions(+), 18 deletions(-)
> 
> Passes FATE
> 
> The only maybe suspicious thing is this part:
> 
> > -c->pstride = FFALIGN(avctx->width, 16);
> > -if (!(c->prev = av_malloc(c->pstride * avctx->height))) {
> > +
> > +/* Allocate prev buffer - leave border around the outside for out of 
> > edge ME */
> > +c->pstride = FFALIGN(avctx->width + c->lrange, 16);
> 
> Shouldn't this be with + lrange + urange? I guess it works out fine due
> to wraparound and lrange >= urange, but it makes me feel slightly
> uneasy
> 
> > +prev_offset = FFALIGN(c->lrange + (c->pstride * c->lrange), 16);
> > +prev_size = prev_offset + (c->pstride * (avctx->height + c->urange));
> 
> The way I'd do this is compute the size first, then the offset. But I
> guess this works out the same way. Maybe someone else wants to chime
> in?

Pushed patch 1 and a version of this that I only just now noticed I got
off-list. Attaching here for posterity.

We might want a test for me_range >= 64 as well.

/TomasFrom 0ffbd9124e27bf15a66aa9e7b11004963423a9be Mon Sep 17 00:00:00 2001
From: Matthew Fearnley 
Date: Thu, 7 Feb 2019 12:54:59 +
Subject: [PATCH] libavcodec/zmbvenc: motion estimation improvements/bug fixes:

- Clamp ME range to -64..63 (prevents corruption when me_range is too high)
- Allow MV's up to *and including* the positive range limit
- Allow out-of-edge ME by padding the prev buffer with a border of 0's
- Try previous MV before checking the rest (improves speed in some cases)
- More robust logic in code - ensure *mx,*my,*xored are updated together
---
 libavcodec/zmbvenc.c | 71 +---
 1 file changed, 53 insertions(+), 18 deletions(-)

diff --git a/libavcodec/zmbvenc.c b/libavcodec/zmbvenc.c
index 3df6e724c8..c9d50b6adf 100644
--- a/libavcodec/zmbvenc.c
+++ b/libavcodec/zmbvenc.c
@@ -45,11 +45,11 @@
 typedef struct ZmbvEncContext {
 AVCodecContext *avctx;
 
-int range;
+int lrange, urange;
 uint8_t *comp_buf, *work_buf;
 uint8_t pal[768];
 uint32_t pal2[256]; //for quick comparisons
-uint8_t *prev;
+uint8_t *prev, *prev_buf;
 int pstride;
 int comp_size;
 int keyint, curfrm;
@@ -61,7 +61,6 @@ typedef struct ZmbvEncContext {
 
 /** Block comparing function
  * XXX should be optimized and moved to DSPContext
- * TODO handle out of edge ME
  */
 static inline int block_cmp(ZmbvEncContext *c, uint8_t *src, int stride,
 uint8_t *src2, int stride2, int bw, int bh,
@@ -100,23 +99,42 @@ static inline int block_cmp(ZmbvEncContext *c, uint8_t *src, int stride,
 static int zmbv_me(ZmbvEncContext *c, uint8_t *src, int sstride, uint8_t *prev,
int pstride, int x, int y, int *mx, int *my, int *xored)
 {
-int dx, dy, tx, ty, tv, bv, bw, bh;
+int dx, dy, txored, tv, bv, bw, bh;
+int mx0, my0;
 
-*mx = *my = 0;
+mx0 = *mx;
+my0 = *my;
 bw = FFMIN(ZMBV_BLOCK, c->avctx->width - x);
 bh = FFMIN(ZMBV_BLOCK, c->avctx->height - y);
+
+/* Try (0,0) */
 bv = block_cmp(c, src, sstride, prev, pstride, bw, bh, xored);
+*mx = *my = 0;
 if(!bv) return 0;
-for(ty = FFMAX(y - c->range, 0); ty < FFMIN(y + c->range, c->avctx->height - bh); ty++){
-for(tx = FFMAX(x - c->range, 0); tx < FFMIN(x + c->range, c->avctx->width - bw); tx++){
-if(tx == x && ty == y) continue; // we already tested this block
-dx = tx - x;
-dy = ty - y;
-tv = block_cmp(c, src, sstride, prev + dx + dy * pstride, pstride, bw, bh, xored);
+
+/* Try previous block's MV (if not 0,0) */
+if (mx0 || my0){
+tv = block_cmp(c, src, sstride, prev + mx0 + my0 * pstride, pstride, bw, bh, );
+if(tv < bv){
+bv = tv;
+*mx = mx0;
+*my = my0;
+*xored = txored;
+if(!bv) return 0;
+}
+}
+
+/* Try other MVs from top-to-bottom, left-to-right */
+for(dy = -c->lrange; dy <= c->urange; dy++){
+for(dx = -c->lrange; dx <= c->urange; dx++){
+if(!dx && !dy) continue; // we already tested this block
+if(dx == mx0 && dy == my0) continue; // this one too
+tv = block_cmp(c, src, sstride, prev + dx + dy * pstride, pstride, bw, bh, );
 if(tv < bv){
  bv = tv;
  *mx = dx;
  

Re: [FFmpeg-devel] [PATCH 2/2] libavcodec/zmbvenc: motion estimation improvements/bug fixes:

2019-02-10 Thread Tomas Härdin
lör 2019-02-09 klockan 13:10 + skrev Matthew Fearnley:
> - Clamp ME range to -64..63 (prevents corruption when me_range is too high)
> - Allow MV's up to *and including* the positive range limit
> - Allow out-of-edge ME by padding the prev buffer with a border of 0's
> - Try previous MV before checking the rest (improves speed in some cases)
> - More robust logic in code - ensure *mx,*my,*xored are updated together
> ---
>  libavcodec/zmbvenc.c | 64 +++-
>  1 file changed, 46 insertions(+), 18 deletions(-)

Passes FATE

The only maybe suspicious thing is this part:

> -c->pstride = FFALIGN(avctx->width, 16);
> -if (!(c->prev = av_malloc(c->pstride * avctx->height))) {
> +
> +/* Allocate prev buffer - leave border around the outside for out of 
> edge ME */
> +c->pstride = FFALIGN(avctx->width + c->lrange, 16);

Shouldn't this be with + lrange + urange? I guess it works out fine due
to wraparound and lrange >= urange, but it makes me feel slightly
uneasy

> +prev_offset = FFALIGN(c->lrange + (c->pstride * c->lrange), 16);
> +prev_size = prev_offset + (c->pstride * (avctx->height + c->urange));

The way I'd do this is compute the size first, then the offset. But I
guess this works out the same way. Maybe someone else wants to chime
in?

/Tomas

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/2] libavcodec/zmbvenc: motion estimation improvements/bug fixes:

2019-02-09 Thread Matthew Fearnley
- Clamp ME range to -64..63 (prevents corruption when me_range is too high)
- Allow MV's up to *and including* the positive range limit
- Allow out-of-edge ME by padding the prev buffer with a border of 0's
- Try previous MV before checking the rest (improves speed in some cases)
- More robust logic in code - ensure *mx,*my,*xored are updated together
---
 libavcodec/zmbvenc.c | 64 +++-
 1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/libavcodec/zmbvenc.c b/libavcodec/zmbvenc.c
index 3df6e724c8..e92193478b 100644
--- a/libavcodec/zmbvenc.c
+++ b/libavcodec/zmbvenc.c
@@ -45,11 +45,11 @@
 typedef struct ZmbvEncContext {
 AVCodecContext *avctx;
 
-int range;
+int lrange, urange;
 uint8_t *comp_buf, *work_buf;
 uint8_t pal[768];
 uint32_t pal2[256]; //for quick comparisons
-uint8_t *prev;
+uint8_t *prev, *prev_buf;
 int pstride;
 int comp_size;
 int keyint, curfrm;
@@ -61,7 +61,6 @@ typedef struct ZmbvEncContext {
 
 /** Block comparing function
  * XXX should be optimized and moved to DSPContext
- * TODO handle out of edge ME
  */
 static inline int block_cmp(ZmbvEncContext *c, uint8_t *src, int stride,
 uint8_t *src2, int stride2, int bw, int bh,
@@ -100,23 +99,42 @@ static inline int block_cmp(ZmbvEncContext *c, uint8_t 
*src, int stride,
 static int zmbv_me(ZmbvEncContext *c, uint8_t *src, int sstride, uint8_t *prev,
int pstride, int x, int y, int *mx, int *my, int *xored)
 {
-int dx, dy, tx, ty, tv, bv, bw, bh;
+int dx, dy, txored, tv, bv, bw, bh;
+int mx0, my0;
 
-*mx = *my = 0;
+mx0 = *mx;
+my0 = *my;
 bw = FFMIN(ZMBV_BLOCK, c->avctx->width - x);
 bh = FFMIN(ZMBV_BLOCK, c->avctx->height - y);
+
+/* Try (0,0) */
 bv = block_cmp(c, src, sstride, prev, pstride, bw, bh, xored);
+*mx = *my = 0;
 if(!bv) return 0;
-for(ty = FFMAX(y - c->range, 0); ty < FFMIN(y + c->range, c->avctx->height 
- bh); ty++){
-for(tx = FFMAX(x - c->range, 0); tx < FFMIN(x + c->range, 
c->avctx->width - bw); tx++){
-if(tx == x && ty == y) continue; // we already tested this block
-dx = tx - x;
-dy = ty - y;
-tv = block_cmp(c, src, sstride, prev + dx + dy * pstride, pstride, 
bw, bh, xored);
+
+/* Try previous block's MV (if not 0,0) */
+if (mx0 || my0){
+tv = block_cmp(c, src, sstride, prev + mx0 + my0 * pstride, pstride, 
bw, bh, );
+if(tv < bv){
+bv = tv;
+*mx = mx0;
+*my = my0;
+*xored = txored;
+if(!bv) return 0;
+}
+}
+
+/* Try other MVs from top-to-bottom, left-to-right */
+for(dy = -c->lrange; dy <= c->urange; dy++){
+for(dx = -c->lrange; dx <= c->urange; dx++){
+if(!dx && !dy) continue; // we already tested this block
+if(dx == mx0 && dy == my0) continue; // this one too
+tv = block_cmp(c, src, sstride, prev + dx + dy * pstride, pstride, 
bw, bh, );
 if(tv < bv){
  bv = tv;
  *mx = dx;
  *my = dy;
+ *xored = txored;
  if(!bv) return 0;
  }
  }
@@ -181,7 +199,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
 int x, y, bh2, bw2, xored;
 uint8_t *tsrc, *tprev;
 uint8_t *mv;
-int mx, my;
+int mx = 0, my = 0;
 
 bw = (avctx->width + ZMBV_BLOCK - 1) / ZMBV_BLOCK;
 bh = (avctx->height + ZMBV_BLOCK - 1) / ZMBV_BLOCK;
@@ -269,7 +287,7 @@ static av_cold int encode_end(AVCodecContext *avctx)
 av_freep(>work_buf);
 
 deflateEnd(>zstream);
-av_freep(>prev);
+av_freep(>prev_buf);
 
 return 0;
 }
@@ -283,6 +301,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
 int zret; // Zlib return code
 int i;
 int lvl = 9;
+int prev_size, prev_offset;
 
 /* Entropy-based score tables for comparing blocks.
  * Suitable for blocks up to (ZMBV_BLOCK * ZMBV_BLOCK) bytes.
@@ -295,9 +314,13 @@ static av_cold int encode_init(AVCodecContext *avctx)
 
 c->curfrm = 0;
 c->keyint = avctx->keyint_min;
-c->range = 8;
-if(avctx->me_range > 0)
-c->range = FFMIN(avctx->me_range, 127);
+
+/* Motion estimation range: maximum distance is -64..63 */
+c->lrange = c->urange = 8;
+if(avctx->me_range > 0){
+c->lrange = FFMIN(avctx->me_range, 64);
+c->urange = FFMIN(avctx->me_range, 63);
+}
 
 if(avctx->compression_level >= 0)
 lvl = avctx->compression_level;
@@ -323,11 +346,16 @@ static av_cold int encode_init(AVCodecContext *avctx)
 av_log(avctx, AV_LOG_ERROR, "Can't allocate compression buffer.\n");
 return AVERROR(ENOMEM);
 }
-c->pstride = FFALIGN(avctx->width, 16);
-if (!(c->prev = av_malloc(c->pstride * avctx->height))) {
+
+/* Allocate prev buffer - leave border around