Re: [FFmpeg-devel] [PATCH] avcodec/prores_ks reduce twice fdct calls

2019-07-17 Thread Paul B Mahol
Do we want to apply this?

On 1/4/19, Alex Mogurenko  wrote:
> ping
>
> On Mon, Dec 31, 2018 at 11:02 PM Alex Mogurenko  wrote:
>
>> thanks,
>> just sent new patch
>>
>> On Mon, Dec 31, 2018 at 5:56 PM Derek Buitenhuis <
>> derek.buitenh...@gmail.com> wrote:
>>
>>> On 31/12/2018 09:12, Alex Mogurenko wrote:
>>> > I seems to be lame as failed to find how to run fate to check prores_ks
>>> :(
>>>
>>> General way to run all of FATE:
>>>
>>> $ configure --samples=/home/path/for/samples/ --enable-gpl [...]
>>> $ make fate-rsync
>>> $ make fate
>>>
>>> There are sub-targets, too, as listed in Michaels mail, like
>>> fate-vsynth1.
>>>
>>> - Derek
>>> ___
>>> ffmpeg-devel mailing list
>>> ffmpeg-devel@ffmpeg.org
>>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>
>>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH] avcodec/prores_ks reduce twice fdct calls

2019-01-04 Thread Alex Mogurenko
ping

On Mon, Dec 31, 2018 at 11:02 PM Alex Mogurenko  wrote:

> thanks,
> just sent new patch
>
> On Mon, Dec 31, 2018 at 5:56 PM Derek Buitenhuis <
> derek.buitenh...@gmail.com> wrote:
>
>> On 31/12/2018 09:12, Alex Mogurenko wrote:
>> > I seems to be lame as failed to find how to run fate to check prores_ks
>> :(
>>
>> General way to run all of FATE:
>>
>> $ configure --samples=/home/path/for/samples/ --enable-gpl [...]
>> $ make fate-rsync
>> $ make fate
>>
>> There are sub-targets, too, as listed in Michaels mail, like fate-vsynth1.
>>
>> - Derek
>> ___
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec/prores_ks reduce twice fdct calls

2018-12-31 Thread Alex Mogurenko
thanks,
just sent new patch

On Mon, Dec 31, 2018 at 5:56 PM Derek Buitenhuis 
wrote:

> On 31/12/2018 09:12, Alex Mogurenko wrote:
> > I seems to be lame as failed to find how to run fate to check prores_ks
> :(
>
> General way to run all of FATE:
>
> $ configure --samples=/home/path/for/samples/ --enable-gpl [...]
> $ make fate-rsync
> $ make fate
>
> There are sub-targets, too, as listed in Michaels mail, like fate-vsynth1.
>
> - Derek
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avcodec/prores_ks reduce twice fdct calls

2018-12-31 Thread Alex Mogurenko
fdct done twice for each block. first time during quant calculation, second 
during slice encoding. so if we pre-save dct coefficients no need to do fdct 
second time.
disadvantages: requires more memory
advantages: improves performance ~4-5%
---
 libavcodec/proresenc_kostya.c | 74 ---
 1 file changed, 52 insertions(+), 22 deletions(-)

diff --git a/libavcodec/proresenc_kostya.c b/libavcodec/proresenc_kostya.c
index e045a972f1..d2f81e73f4 100644
--- a/libavcodec/proresenc_kostya.c
+++ b/libavcodec/proresenc_kostya.c
@@ -219,7 +219,6 @@ struct TrellisNode {
 #define MAX_STORED_Q 16
 
 typedef struct ProresThreadData {
-DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * 
MAX_MBS_PER_SLICE];
 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
 int16_t custom_q[64];
 int16_t custom_chroma_q[64];
@@ -228,7 +227,6 @@ typedef struct ProresThreadData {
 
 typedef struct ProresContext {
 AVClass *class;
-DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * 
MAX_MBS_PER_SLICE];
 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 int16_t quants[MAX_STORED_Q][64];
 int16_t quants_chroma[MAX_STORED_Q][64];
@@ -237,6 +235,7 @@ typedef struct ProresContext {
 const uint8_t *quant_mat;
 const uint8_t *quant_chroma_mat;
 const uint8_t *scantable;
+int16_t *blocks[MAX_PLANES];
 
 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
  ptrdiff_t linesize, int16_t *block);
@@ -562,6 +561,8 @@ static int encode_slice(AVCodecContext *avctx, const 
AVFrame *pic,
 int plane_factor, is_chroma;
 uint16_t *qmat;
 uint16_t *qmat_chroma;
+int16_t *blocks;
+DECLARE_ALIGNED(16, int16_t, dct_blocks)[16 * 16 * MAX_MBS_PER_SLICE];
 
 if (ctx->pictures_per_frame == 1)
 line_add = 0;
@@ -604,28 +605,38 @@ static int encode_slice(AVCodecContext *avctx, const 
AVFrame *pic,
 src = (const uint16_t*)(pic->data[i] + yp * linesize +
 line_add * pic->linesize[i]) + xp;
 
+if (!ctx->force_quant) {
+blocks = ctx->blocks[i] + (y * ctx->slices_width * 
ctx->mbs_per_slice + x) * 16 * 16;
+} else {
+blocks = dct_blocks;
+}
+
 if (i < 3) {
-get_slice_data(ctx, src, linesize, xp, yp,
-   pwidth, avctx->height / ctx->pictures_per_frame,
-   ctx->blocks[0], ctx->emu_buf,
-   mbs_per_slice, num_cblocks, is_chroma);
+if (ctx->force_quant) {
+get_slice_data(ctx, src, linesize, xp, yp,
+   pwidth, avctx->height / ctx->pictures_per_frame,
+   blocks, ctx->emu_buf,
+   mbs_per_slice, num_cblocks, is_chroma);
+}
 if (!is_chroma) {/* luma quant */
 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
-  mbs_per_slice, ctx->blocks[0],
+  mbs_per_slice, blocks,
   num_cblocks, plane_factor,
   qmat);
 } else { /* chroma plane */
 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
-  mbs_per_slice, ctx->blocks[0],
+  mbs_per_slice, blocks,
   num_cblocks, plane_factor,
   qmat_chroma);
 }
 } else {
-get_alpha_data(ctx, src, linesize, xp, yp,
-   pwidth, avctx->height / ctx->pictures_per_frame,
-   ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
+if (ctx->force_quant) {
+get_alpha_data(ctx, src, linesize, xp, yp,
+   pwidth, avctx->height / ctx->pictures_per_frame,
+   blocks, mbs_per_slice, ctx->alpha_bits);
+}
 sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
-  ctx->blocks[0], quant);
+  blocks, quant);
 }
 total_size += sizes[i];
 if (put_bits_left(pb) < 0) {
@@ -730,15 +741,15 @@ static int estimate_slice_plane(ProresContext *ctx, int 
*error, int plane,
 const uint16_t *src, ptrdiff_t linesize,
 int mbs_per_slice,
 int blocks_per_mb, int plane_size_factor,
-const int16_t *qmat, ProresThreadData *td)
+const int16_t *qmat, int16_t *blocks)
 {
 int blocks_per_slice;
 int bits;
 
 blocks_per_slice = mbs_per_slice * blocks_per_mb;
 
-bits  = 

Re: [FFmpeg-devel] [PATCH] avcodec/prores_ks reduce twice fdct calls

2018-12-31 Thread Derek Buitenhuis
On 31/12/2018 09:12, Alex Mogurenko wrote:
> I seems to be lame as failed to find how to run fate to check prores_ks :(

General way to run all of FATE:

$ configure --samples=/home/path/for/samples/ --enable-gpl [...]
$ make fate-rsync
$ make fate

There are sub-targets, too, as listed in Michaels mail, like fate-vsynth1.

- Derek
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec/prores_ks reduce twice fdct calls

2018-12-31 Thread Alex Mogurenko
I seems to be lame as failed to find how to run fate to check prores_ks :(

On Mon, Dec 31, 2018 at 4:46 AM Michael Niedermayer 
wrote:

> On Sun, Dec 30, 2018 at 10:57:23PM +0200, Alex Mogurenko wrote:
> > fdct done twice for each block. first time during quant calculation,
> second during slice encoding. so if we pre-save dct coefficients no need to
> do fdct second time.
> > disadvantages: requires more memory
> > advantages: improves performance ~4-5%
> > ---
> >  libavcodec/proresenc_kostya.c | 74 ---
> >  1 file changed, 52 insertions(+), 22 deletions(-)
>
> breaks fate
>
> TESTvsynth1-prores_ks
> --- ./tests/ref/vsynth/vsynth1-prores_ks2018-12-28
> 17:54:41.361383177 +0100
> +++ tests/data/fate/vsynth1-prores_ks   2018-12-31 03:45:17.207673799 +0100
> @@ -1,4 +1,4 @@
> -fe41a284da97ea5ec8866ca9a55b84da *tests/data/fate/vsynth1-prores_ks.mov
> -3858911 tests/data/fate/vsynth1-prores_ks.mov
> -100eb002413fe7a632d440dfbdf7e3ff
> *tests/data/fate/vsynth1-prores_ks.out.rawvideo
> -stddev:3.17 PSNR: 38.09 MAXDIFF:   39 bytes:  7603200/  7603200
> +ba6294d95b96f032b90f804f112ab98a *tests/data/fate/vsynth1-prores_ks.mov
> +3867422 tests/data/fate/vsynth1-prores_ks.mov
> +e85510eadb1ff115e85c480d8e1011a4
> *tests/data/fate/vsynth1-prores_ks.out.rawvideo
> +stddev:   19.38 PSNR: 22.38 MAXDIFF:  210 bytes:  7603200/  7603200
> Test vsynth1-prores_ks failed. Look at
> tests/data/fate/vsynth1-prores_ks.err for details.
> make: *** [fate-vsynth1-prores_ks] Error 1
>
> [...]
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> "You are 36 times more likely to die in a bathtub than at the hands of a
> terrorist. Also, you are 2.5 times more likely to become a president and
> 2 times more likely to become an astronaut, than to die in a terrorist
> attack." -- Thoughty2
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec/prores_ks reduce twice fdct calls

2018-12-30 Thread Michael Niedermayer
On Sun, Dec 30, 2018 at 10:57:23PM +0200, Alex Mogurenko wrote:
> fdct done twice for each block. first time during quant calculation, second 
> during slice encoding. so if we pre-save dct coefficients no need to do fdct 
> second time.
> disadvantages: requires more memory
> advantages: improves performance ~4-5%
> ---
>  libavcodec/proresenc_kostya.c | 74 ---
>  1 file changed, 52 insertions(+), 22 deletions(-)

breaks fate

TESTvsynth1-prores_ks
--- ./tests/ref/vsynth/vsynth1-prores_ks2018-12-28 17:54:41.361383177 
+0100
+++ tests/data/fate/vsynth1-prores_ks   2018-12-31 03:45:17.207673799 +0100
@@ -1,4 +1,4 @@
-fe41a284da97ea5ec8866ca9a55b84da *tests/data/fate/vsynth1-prores_ks.mov
-3858911 tests/data/fate/vsynth1-prores_ks.mov
-100eb002413fe7a632d440dfbdf7e3ff 
*tests/data/fate/vsynth1-prores_ks.out.rawvideo
-stddev:3.17 PSNR: 38.09 MAXDIFF:   39 bytes:  7603200/  7603200
+ba6294d95b96f032b90f804f112ab98a *tests/data/fate/vsynth1-prores_ks.mov
+3867422 tests/data/fate/vsynth1-prores_ks.mov
+e85510eadb1ff115e85c480d8e1011a4 
*tests/data/fate/vsynth1-prores_ks.out.rawvideo
+stddev:   19.38 PSNR: 22.38 MAXDIFF:  210 bytes:  7603200/  7603200
Test vsynth1-prores_ks failed. Look at tests/data/fate/vsynth1-prores_ks.err 
for details.
make: *** [fate-vsynth1-prores_ks] Error 1

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

"You are 36 times more likely to die in a bathtub than at the hands of a
terrorist. Also, you are 2.5 times more likely to become a president and
2 times more likely to become an astronaut, than to die in a terrorist
attack." -- Thoughty2



signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avcodec/prores_ks reduce twice fdct calls

2018-12-30 Thread Alex Mogurenko
fdct done twice for each block. first time during quant calculation, second 
during slice encoding. so if we pre-save dct coefficients no need to do fdct 
second time.
disadvantages: requires more memory
advantages: improves performance ~4-5%
---
 libavcodec/proresenc_kostya.c | 74 ---
 1 file changed, 52 insertions(+), 22 deletions(-)

diff --git a/libavcodec/proresenc_kostya.c b/libavcodec/proresenc_kostya.c
index e045a972f1..4d49d6521a 100644
--- a/libavcodec/proresenc_kostya.c
+++ b/libavcodec/proresenc_kostya.c
@@ -219,7 +219,6 @@ struct TrellisNode {
 #define MAX_STORED_Q 16
 
 typedef struct ProresThreadData {
-DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * 
MAX_MBS_PER_SLICE];
 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
 int16_t custom_q[64];
 int16_t custom_chroma_q[64];
@@ -228,7 +227,6 @@ typedef struct ProresThreadData {
 
 typedef struct ProresContext {
 AVClass *class;
-DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * 
MAX_MBS_PER_SLICE];
 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 int16_t quants[MAX_STORED_Q][64];
 int16_t quants_chroma[MAX_STORED_Q][64];
@@ -237,6 +235,7 @@ typedef struct ProresContext {
 const uint8_t *quant_mat;
 const uint8_t *quant_chroma_mat;
 const uint8_t *scantable;
+int16_t *blocks[MAX_PLANES];
 
 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
  ptrdiff_t linesize, int16_t *block);
@@ -562,6 +561,8 @@ static int encode_slice(AVCodecContext *avctx, const 
AVFrame *pic,
 int plane_factor, is_chroma;
 uint16_t *qmat;
 uint16_t *qmat_chroma;
+int16_t *blocks;
+DECLARE_ALIGNED(16, int16_t, dct_blocks)[16 * 16 * MAX_MBS_PER_SLICE];
 
 if (ctx->pictures_per_frame == 1)
 line_add = 0;
@@ -604,28 +605,38 @@ static int encode_slice(AVCodecContext *avctx, const 
AVFrame *pic,
 src = (const uint16_t*)(pic->data[i] + yp * linesize +
 line_add * pic->linesize[i]) + xp;
 
+if (!ctx->force_quant) {
+blocks = ctx->blocks[i] + (y * ctx->slices_width + x / 
ctx->mbs_per_slice) * 16 * 16 * ctx->mbs_per_slice;
+} else {
+blocks = dct_blocks;
+}
+
 if (i < 3) {
-get_slice_data(ctx, src, linesize, xp, yp,
-   pwidth, avctx->height / ctx->pictures_per_frame,
-   ctx->blocks[0], ctx->emu_buf,
-   mbs_per_slice, num_cblocks, is_chroma);
+if (ctx->force_quant) {
+get_slice_data(ctx, src, linesize, xp, yp,
+   pwidth, avctx->height / ctx->pictures_per_frame,
+   blocks, ctx->emu_buf,
+   mbs_per_slice, num_cblocks, is_chroma);
+}
 if (!is_chroma) {/* luma quant */
 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
-  mbs_per_slice, ctx->blocks[0],
+  mbs_per_slice, blocks,
   num_cblocks, plane_factor,
   qmat);
 } else { /* chroma plane */
 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
-  mbs_per_slice, ctx->blocks[0],
+  mbs_per_slice, blocks,
   num_cblocks, plane_factor,
   qmat_chroma);
 }
 } else {
-get_alpha_data(ctx, src, linesize, xp, yp,
-   pwidth, avctx->height / ctx->pictures_per_frame,
-   ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
+if (ctx->force_quant) {
+get_alpha_data(ctx, src, linesize, xp, yp,
+   pwidth, avctx->height / ctx->pictures_per_frame,
+   blocks, mbs_per_slice, ctx->alpha_bits);
+}
 sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
-  ctx->blocks[0], quant);
+  blocks, quant);
 }
 total_size += sizes[i];
 if (put_bits_left(pb) < 0) {
@@ -730,15 +741,15 @@ static int estimate_slice_plane(ProresContext *ctx, int 
*error, int plane,
 const uint16_t *src, ptrdiff_t linesize,
 int mbs_per_slice,
 int blocks_per_mb, int plane_size_factor,
-const int16_t *qmat, ProresThreadData *td)
+const int16_t *qmat, int16_t *blocks)
 {
 int blocks_per_slice;
 int bits;
 
 blocks_per_slice = mbs_per_slice * blocks_per_mb;
 
-bits