Re: [FFmpeg-devel] [PATCH 2/3] avcodec/gdv: Optimize and factorize scaling loops
On Sat, Jan 12, 2019 at 04:49:40PM +0100, Carl Eugen Hoyos wrote: > 2019-01-12 16:46 GMT+01:00, Michael Niedermayer : > > On Sat, Jan 12, 2019 at 04:07:42PM +0100, Carl Eugen Hoyos wrote: > >> 2019-01-04 20:22 GMT+01:00, Michael Niedermayer : > >> > >> > +static void scaledown(uint8_t *dst, const uint8_t *src, int w) > >> > +{ > >> > +int x; > >> > +for (x = 0; x < w - 7; x+=8) { > >> > +dst[x + 0] = src[2*x + 0]; > >> > +dst[x + 1] = src[2*x + 2]; > >> > +dst[x + 2] = src[2*x + 4]; > >> > +dst[x + 3] = src[2*x + 6]; > >> > +dst[x + 4] = src[2*x + 8]; > >> > +dst[x + 5] = src[2*x +10]; > >> > +dst[x + 6] = src[2*x +12]; > >> > +dst[x + 7] = src[2*x +14]; > >> > >> Could you add to the commit message the information > >> which compiler is able to optimize this? > >> (Assuming this is a reason for the speedup) > > > > if what you ask for is "which compiler turns this into SIMD" > > i do not know, and i suspect mine does not from the limited > > increase in performance > > I think the speedup is primarly from simply unrolling the trivial loop > > > > is there something you want me to change in the commit message still ? > > No, I am a little surprised that unrolling without SIMD makes > a difference. will apply thx > > Thank you for the explanation, Carl Eugen > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB There will always be a question for which you do not know the correct answer. signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 2/3] avcodec/gdv: Optimize and factorize scaling loops
2019-01-12 16:46 GMT+01:00, Michael Niedermayer : > On Sat, Jan 12, 2019 at 04:07:42PM +0100, Carl Eugen Hoyos wrote: >> 2019-01-04 20:22 GMT+01:00, Michael Niedermayer : >> >> > +static void scaledown(uint8_t *dst, const uint8_t *src, int w) >> > +{ >> > +int x; >> > +for (x = 0; x < w - 7; x+=8) { >> > +dst[x + 0] = src[2*x + 0]; >> > +dst[x + 1] = src[2*x + 2]; >> > +dst[x + 2] = src[2*x + 4]; >> > +dst[x + 3] = src[2*x + 6]; >> > +dst[x + 4] = src[2*x + 8]; >> > +dst[x + 5] = src[2*x +10]; >> > +dst[x + 6] = src[2*x +12]; >> > +dst[x + 7] = src[2*x +14]; >> >> Could you add to the commit message the information >> which compiler is able to optimize this? >> (Assuming this is a reason for the speedup) > > if what you ask for is "which compiler turns this into SIMD" > i do not know, and i suspect mine does not from the limited > increase in performance > I think the speedup is primarly from simply unrolling the trivial loop > > is there something you want me to change in the commit message still ? No, I am a little surprised that unrolling without SIMD makes a difference. Thank you for the explanation, Carl Eugen ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 2/3] avcodec/gdv: Optimize and factorize scaling loops
On Sat, Jan 12, 2019 at 04:07:42PM +0100, Carl Eugen Hoyos wrote: > 2019-01-04 20:22 GMT+01:00, Michael Niedermayer : > > > +static void scaledown(uint8_t *dst, const uint8_t *src, int w) > > +{ > > +int x; > > +for (x = 0; x < w - 7; x+=8) { > > +dst[x + 0] = src[2*x + 0]; > > +dst[x + 1] = src[2*x + 2]; > > +dst[x + 2] = src[2*x + 4]; > > +dst[x + 3] = src[2*x + 6]; > > +dst[x + 4] = src[2*x + 8]; > > +dst[x + 5] = src[2*x +10]; > > +dst[x + 6] = src[2*x +12]; > > +dst[x + 7] = src[2*x +14]; > > Could you add to the commit message the information > which compiler is able to optimize this? > (Assuming this is a reason for the speedup) if what you ask for is "which compiler turns this into SIMD" i do not know, and i suspect mine does not from the limited increase in performance I think the speedup is primarly from simply unrolling the trivial loop is there something you want me to change in the commit message still ? > > Sorry for the late comment, Carl Eugen > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Dictatorship: All citizens are under surveillance, all their steps and actions recorded, for the politicians to enforce control. Democracy: All politicians are under surveillance, all their steps and actions recorded, for the citizens to enforce control. signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 2/3] avcodec/gdv: Optimize and factorize scaling loops
2019-01-04 20:22 GMT+01:00, Michael Niedermayer : > +static void scaledown(uint8_t *dst, const uint8_t *src, int w) > +{ > +int x; > +for (x = 0; x < w - 7; x+=8) { > +dst[x + 0] = src[2*x + 0]; > +dst[x + 1] = src[2*x + 2]; > +dst[x + 2] = src[2*x + 4]; > +dst[x + 3] = src[2*x + 6]; > +dst[x + 4] = src[2*x + 8]; > +dst[x + 5] = src[2*x +10]; > +dst[x + 6] = src[2*x +12]; > +dst[x + 7] = src[2*x +14]; Could you add to the commit message the information which compiler is able to optimize this? (Assuming this is a reason for the speedup) Sorry for the late comment, Carl Eugen ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 2/3] avcodec/gdv: Optimize and factorize scaling loops
On Fri, Jan 04, 2019 at 08:22:35PM +0100, Michael Niedermayer wrote: > Fixes: Timeout > Fixes: > 11067/clusterfuzz-testcase-minimized-ffmpeg_AV_CODEC_ID_GDV_fuzzer-5686623711264768 > > Before change: Executed > clusterfuzz-testcase-minimized-ffmpeg_AV_CODEC_ID_GDV_fuzzer-5686623711264768 > in 34386 ms > After change: Executed > clusterfuzz-testcase-minimized-ffmpeg_AV_CODEC_ID_GDV_fuzzer-5686623711264768 > in 24327 ms > > Found-by: continuous fuzzing process > https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg > Signed-off-by: Michael Niedermayer > --- > libavcodec/gdv.c | 87 +++- > 1 file changed, 64 insertions(+), 23 deletions(-) will apply [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB No human being will ever know the Truth, for even if they happen to say it by chance, they would not even known they had done so. -- Xenophanes signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 2/3] avcodec/gdv: Optimize and factorize scaling loops
Fixes: Timeout Fixes: 11067/clusterfuzz-testcase-minimized-ffmpeg_AV_CODEC_ID_GDV_fuzzer-5686623711264768 Before change: Executed clusterfuzz-testcase-minimized-ffmpeg_AV_CODEC_ID_GDV_fuzzer-5686623711264768 in 34386 ms After change: Executed clusterfuzz-testcase-minimized-ffmpeg_AV_CODEC_ID_GDV_fuzzer-5686623711264768 in 24327 ms Found-by: continuous fuzzing process https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg Signed-off-by: Michael Niedermayer --- libavcodec/gdv.c | 87 +++- 1 file changed, 64 insertions(+), 23 deletions(-) diff --git a/libavcodec/gdv.c b/libavcodec/gdv.c index 132ec7288a..c5d80f43f6 100644 --- a/libavcodec/gdv.c +++ b/libavcodec/gdv.c @@ -72,9 +72,64 @@ static av_cold int gdv_decode_init(AVCodecContext *avctx) return 0; } +static void scaleup(uint8_t *dst, const uint8_t *src, int w) +{ +int x; +for (x = 0; x < w - 7; x+=8) { +dst[x + 0] = +dst[x + 1] = src[(x>>1) + 0]; +dst[x + 2] = +dst[x + 3] = src[(x>>1) + 1]; +dst[x + 4] = +dst[x + 5] = src[(x>>1) + 2]; +dst[x + 6] = +dst[x + 7] = src[(x>>1) + 3]; +} +for (; x < w; x++) { +dst[x] = src[(x>>1)]; +} +} + +static void scaleup_rev(uint8_t *dst, const uint8_t *src, int w) +{ +int x; + +for (x = w - 1; (x+1) & 7; x--) { +dst[x] = src[(x>>1)]; +} +for (x -= 7; x >= 0; x -= 8) { +dst[x + 6] = +dst[x + 7] = src[(x>>1) + 3]; +dst[x + 4] = +dst[x + 5] = src[(x>>1) + 2]; +dst[x + 2] = +dst[x + 3] = src[(x>>1) + 1]; +dst[x + 0] = +dst[x + 1] = src[(x>>1) + 0]; +} +} + +static void scaledown(uint8_t *dst, const uint8_t *src, int w) +{ +int x; +for (x = 0; x < w - 7; x+=8) { +dst[x + 0] = src[2*x + 0]; +dst[x + 1] = src[2*x + 2]; +dst[x + 2] = src[2*x + 4]; +dst[x + 3] = src[2*x + 6]; +dst[x + 4] = src[2*x + 8]; +dst[x + 5] = src[2*x +10]; +dst[x + 6] = src[2*x +12]; +dst[x + 7] = src[2*x +14]; +} +for (; x < w; x++) { +dst[x] = src[2*x]; +} +} + static void rescale(GDVContext *gdv, uint8_t *dst, int w, int h, int scale_v, int scale_h) { -int j, y, x; +int j, y; if ((gdv->scale_v == scale_v) && (gdv->scale_h == scale_h)) { return; @@ -86,14 +141,7 @@ static void rescale(GDVContext *gdv, uint8_t *dst, int w, int h, int scale_v, in uint8_t *dst1 = dst + PREAMBLE_SIZE + y * w; uint8_t *src1 = dst + PREAMBLE_SIZE + (y>>!!gdv->scale_h) * (w>>1); -for (x = w - 1; x >= 0 && !(x&1); x--) { -dst1[x] = src1[(x>>1)]; -} - -for (x--; x >= 0; x-=2) { -dst1[x ] = -dst1[x+1] = src1[(x>>1)]; -} +scaleup_rev(dst1, src1, w); } } else if (gdv->scale_h) { for (j = 0; j < h; j++) { @@ -108,9 +156,7 @@ static void rescale(GDVContext *gdv, uint8_t *dst, int w, int h, int scale_v, in for (y = 0; y < (h>>1); y++) { uint8_t *dst1 = dst + PREAMBLE_SIZE + y * (w>>1); uint8_t *src1 = dst + PREAMBLE_SIZE + y*2 * w; -for (x = 0; x < (w>>1); x++) { -dst1[x] = src1[x*2]; -} +scaledown(dst1, src1, w>>1); } } else if (scale_h) { for (y = 0; y < (h>>1); y++) { @@ -121,9 +167,7 @@ static void rescale(GDVContext *gdv, uint8_t *dst, int w, int h, int scale_v, in } else if (scale_v) { for (y = 0; y < h; y++) { uint8_t *dst1 = dst + PREAMBLE_SIZE + y * w; -for (x = 0; x < (w>>1); x++) { -dst1[x] = dst1[x*2]; -} +scaledown(dst1, dst1, w>>1); } } @@ -479,19 +523,16 @@ static int gdv_decode_frame(AVCodecContext *avctx, void *data, } } else { int sidx = PREAMBLE_SIZE, didx = 0; -int y, x; +int y; for (y = 0; y < avctx->height; y++) { if (!gdv->scale_v) { memcpy(dst + didx, gdv->frame + sidx, avctx->width); } else { -for (x = 0; x < avctx->width - 1; x+=2) { -dst[didx + x] = -dst[didx + x + 1] = gdv->frame[sidx + (x>>1)]; -} -for (; x < avctx->width; x++) { -dst[didx + x] = gdv->frame[sidx + (x>>1)]; -} +uint8_t *dst2 = dst + didx; +uint8_t *src2 = gdv->frame + sidx; + +scaleup(dst2, src2, avctx->width); } if (!gdv->scale_h || ((y & 1) == 1)) { sidx += !gdv->scale_v ? avctx->width : avctx->width/2; -- 2.20.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org