[FFmpeg-devel] [PATCH] libavcodec/rscc.c: add missing semicolon

2019-01-31 Thread Mateusz
Signed-off-by: Mateusz Brzostek 
---
 libavcodec/rscc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/rscc.c b/libavcodec/rscc.c
index e4b51973d8..7d4e842cd3 100644
--- a/libavcodec/rscc.c
+++ b/libavcodec/rscc.c
@@ -64,7 +64,7 @@ typedef struct RsccContext {
 /* zlib interaction */
 uint8_t *inflated_buf;
 uLongf inflated_size;
-int valid_pixels
+int valid_pixels;
 } RsccContext;
 
 static av_cold int rscc_init(AVCodecContext *avctx)
-- 
2.20.1.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/2] swscale: fix dithers table for DITHER_COPY macro

2018-02-02 Thread Mateusz
W dniu 24.10.2017 o 10:02, Mateusz pisze:
> The Bayer matrix 8x8 used in DITHER_COPY macro is table dithers[5].
> Remaining dithers[] matrixes are generated from this matrix by
> downshift or upshift.
> 
> This patch fixes dithers[6] and dithers[7] matrixes -- they were
> too dark.
> 
> Signed-off-by: Mateusz Brzostek <mateu...@poczta.onet.pl>
> ---
>  libswscale/swscale_unscaled.c | 18 +-
>  1 file changed, 9 insertions(+), 9 deletions(-)

ping 2

I've attached simple C program that generates all dithers[] tables
from dithers[5] table. There are 512 numbers to check, so it is simpler
to generate all tables, copy result to source file and use 'git diff'
to check them all.

It's time to finally fix DITHER_COPY macro and maybe optionally copy 
the changes to release 3.4.2.

Mateusz
#include 
#include 

const uint8_t dithers5[8][8] = {
  {  18, 34, 30, 46, 17, 33, 29, 45,},
  {  50,  2, 62, 14, 49,  1, 61, 13,},
  {  26, 42, 22, 38, 25, 41, 21, 37,},
  {  58, 10, 54,  6, 57,  9, 53,  5,},
  {  16, 32, 28, 44, 19, 35, 31, 47,},
  {  48,  0, 60, 12, 51,  3, 63, 15,},
  {  24, 40, 20, 36, 27, 43, 23, 39,},
  {  56,  8, 52,  4, 59, 11, 55,  7,},
};

int main() {
for (int b = 0; b < 8; b++) {
for (int y = 0; y < 8; y++) {
printf("  { ");
for (int x = 0; x < 8; x++)
printf("%3d,", b <= 5 ? dithers5[y][x] >> (5-b) : 
dithers5[y][x] << (b-5));
printf("},\n");
}
printf("},{\n");
}
return 0;
}
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avfilter/vf_overlay: fix packed_rgb case

2017-12-17 Thread Mateusz
Signed-off-by: Mateusz Brzostek <mateu...@poczta.onet.pl>
---
 libavfilter/vf_overlay.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c
index a7d3906016..aa5835ae3a 100644
--- a/libavfilter/vf_overlay.c
+++ b/libavfilter/vf_overlay.c
@@ -456,9 +456,12 @@ static av_always_inline void 
blend_image_packed_rgb(AVFilterContext *ctx,
 default:
 // main_value = main_value * (1 - alpha) + overlay_value * 
alpha
 // since alpha is in the range 0-255, the result must divided 
by 255
-d[dr] = is_straight ? FAST_DIV255(d[dr] * (255 - alpha) + 
S[sr] * alpha) : FAST_DIV255(d[dr] * (255 - alpha) + S[sr]);
-d[dg] = is_straight ? FAST_DIV255(d[dg] * (255 - alpha) + 
S[sg] * alpha) : FAST_DIV255(d[dr] * (255 - alpha) + S[sr]);
-d[db] = is_straight ? FAST_DIV255(d[db] * (255 - alpha) + 
S[sb] * alpha) : FAST_DIV255(d[dr] * (255 - alpha) + S[sr]);
+d[dr] = is_straight ? FAST_DIV255(d[dr] * (255 - alpha) + 
S[sr] * alpha) :
+FFMIN(FAST_DIV255(d[dr] * (255 - alpha)) + S[sr], 255);
+d[dg] = is_straight ? FAST_DIV255(d[dg] * (255 - alpha) + 
S[sg] * alpha) :
+FFMIN(FAST_DIV255(d[dg] * (255 - alpha)) + S[sg], 255);
+d[db] = is_straight ? FAST_DIV255(d[db] * (255 - alpha) + 
S[sb] * alpha) :
+FFMIN(FAST_DIV255(d[db] * (255 - alpha)) + S[sb], 255);
 }
 if (main_has_alpha) {
 switch (alpha) {
@@ -742,22 +745,22 @@ static void blend_image_gbrap_pm(AVFilterContext *ctx, 
AVFrame *dst, const AVFra
 
 static void blend_image_rgb(AVFilterContext *ctx, AVFrame *dst, const AVFrame 
*src, int x, int y)
 {
-blend_image_packed_rgb(ctx, dst, src, 0, x, y, 0);
+blend_image_packed_rgb(ctx, dst, src, 0, x, y, 1);
 }
 
 static void blend_image_rgba(AVFilterContext *ctx, AVFrame *dst, const AVFrame 
*src, int x, int y)
 {
-blend_image_packed_rgb(ctx, dst, src, 1, x, y, 0);
+blend_image_packed_rgb(ctx, dst, src, 1, x, y, 1);
 }
 
 static void blend_image_rgb_pm(AVFilterContext *ctx, AVFrame *dst, const 
AVFrame *src, int x, int y)
 {
-blend_image_packed_rgb(ctx, dst, src, 0, x, y, 1);
+blend_image_packed_rgb(ctx, dst, src, 0, x, y, 0);
 }
 
 static void blend_image_rgba_pm(AVFilterContext *ctx, AVFrame *dst, const 
AVFrame *src, int x, int y)
 {
-blend_image_packed_rgb(ctx, dst, src, 1, x, y, 1);
+blend_image_packed_rgb(ctx, dst, src, 1, x, y, 0);
 }
 
 static int config_input_main(AVFilterLink *inlink)
-- 
2.15.1.windows.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] checkasm/vf_hflip : add test for vf_hflip SIMD

2017-12-11 Thread Mateusz
W dniu 11.12.2017 o 11:28, Martin Vignali pisze:
>>
>> It doesn't run (the test is skipped) on 32-bit VS 2017 with command:
>> configure --enable-gpl --toolchain=msvc && make fate-rsync
>> SAMPLES=../fate-suite && make fate SAMPLES=../fate-suite
>>
>> With exactly the same command it runs on 64-bit VS 2017.
>> With similar command line (without '--toolchain=msvc') it runs on 32-bit
>> and 64-bit mingw/GCC.
>>
> 
> Strange, the test is not supposed to be run with fate, using the previous
> patch.
> 
> New patch in attach, with an add in fate/checkasm.mak

My bad, sorry for misinformation. The old patch that broke fate was in fate,
next wasn't and I checked only end of fate tests (I wrongly assume that it
passes fate) but 32-bit VS 2017 hangs at 'fate-api-png-codec-param' and I
saw that it was no vf_hflip in checkasm.

Mateusz

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] checkasm/vf_hflip : add test for vf_hflip SIMD

2017-12-11 Thread Mateusz
W dniu 11.12.2017 o 11:00, Martin Vignali pisze:
> 2017-12-11 10:49 GMT+01:00 Mateusz <mateu...@poczta.onet.pl>:
> 
>> W dniu 11.12.2017 o 00:51, Mateusz pisze:
>>> W dniu 10.12.2017 o 21:13, Martin Vignali pisze:
>>>>>
>>>>> For me there is no "src + (width - 1) * step" in
>> tests/checkasm/vf_hflip.c
>>>>>
>>>>> You pass start of the src buffer but you should pass end of the buffer.
>>>>>
>>>>>
>>>>>
>>>>> Thanks !
>>>>
>>>> New patch in attach.
>>>
>>> Now it is OK in my system (mingw 32/64-bit, VS 2017 64-bit). Thanks!
>>
>> VS 2017 32-bit doesn't run this test:
>> CC  tests/checkasm/vf_colorspace.o
>> vf_colorspace.c
>> CC  tests/checkasm/vf_hflip.o
>> vf_hflip.c
>> CC  tests/checkasm/vf_threshold.o
>> vf_threshold.c
>> CC  tests/checkasm/videodsp.o
>> videodsp.c
>> CC  tests/checkasm/vp8dsp.o
>> vp8dsp.c
>> CC  tests/checkasm/vp9dsp.o
>> vp9dsp.c
>> f:\ffmpeg\libavcodec\get_bits.h(308): warning C4101: 're_cache':
>> unreferenced local variable
>> X86ASM  tests/checkasm/x86/checkasm.o
>> STRIP   tests/checkasm/x86/checkasm.o
>> skipping strip -x tests/checkasm/x86/checkasm.o
>> LD  tests/checkasm/checkasm.exe
>> TESTcheckasm-aacpsdsp
>> TESTcheckasm-alacdsp
>> TESTcheckasm-audiodsp
>> TESTcheckasm-blockdsp
>> TESTcheckasm-bswapdsp
>> TESTcheckasm-exrdsp
>> TESTcheckasm-fixed_dsp
>> TESTcheckasm-flacdsp
>> TESTcheckasm-float_dsp
>> TESTcheckasm-fmtconvert
>> TESTcheckasm-g722dsp
>> TESTcheckasm-h264dsp
>> TESTcheckasm-h264pred
>> TESTcheckasm-h264qpel
>> TESTcheckasm-hevc_add_res
>> TESTcheckasm-hevc_idct
>> TESTcheckasm-jpeg2000dsp
>> TESTcheckasm-llviddsp
>> TESTcheckasm-pixblockdsp
>> TESTcheckasm-sbrdsp
>> TESTcheckasm-synth_filter
>> TESTcheckasm-v210enc
>> TESTcheckasm-vf_blend
>> TESTcheckasm-vf_colorspace
>> TESTcheckasm-vf_threshold
>> TESTcheckasm-videodsp
>>
>> I don't know why. 64-bit VS 2017 tests vf_hflip, 32/64-bit mingw also.
>>
>> If I run this test from command line it works (in 32-bit VS 2017):
>> $ tests/checkasm/checkasm --test=vf_hflip --bench --seed 1616253308
>> benchmarking with native FFmpeg timers
>> nop: 19.3
>> checkasm: using random seed 1616253308
>> SSSE3:
>>  - vf_hflip.hflip_byte  [OK]
>>  - vf_hflip.hflip_short [OK]
>> checkasm: all 2 tests passed
>> hflip_byte_c: 274.1
>> hflip_byte_ssse3: 18.1
>> hflip_short_c: 194.1
>> hflip_short_ssse3: 18.9
>>
>> Any ideas?
>>
>>
>> Do you mean, it's not run when you use make fate-checkasm ?
> or not run when you use ./tests/checkasm/checkasm ?
> 
> If it's for the fate, i not put in the latest patch, the fate line, to run
> this test
> need to add in ./tests/fate/checkasm.mak :
> fate-checkasm-vf_hflip
> in the fate-checkasm part

It doesn't run (the test is skipped) on 32-bit VS 2017 with command:
configure --enable-gpl --toolchain=msvc && make fate-rsync 
SAMPLES=../fate-suite && make fate SAMPLES=../fate-suite

With exactly the same command it runs on 64-bit VS 2017.
With similar command line (without '--toolchain=msvc') it runs on 32-bit and 
64-bit mingw/GCC.

Mateusz

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] checkasm/vf_hflip : add test for vf_hflip SIMD

2017-12-11 Thread Mateusz
W dniu 11.12.2017 o 00:51, Mateusz pisze:
> W dniu 10.12.2017 o 21:13, Martin Vignali pisze:
>>>
>>> For me there is no "src + (width - 1) * step" in tests/checkasm/vf_hflip.c
>>>
>>> You pass start of the src buffer but you should pass end of the buffer.
>>>
>>>
>>>
>>> Thanks !
>>
>> New patch in attach.
> 
> Now it is OK in my system (mingw 32/64-bit, VS 2017 64-bit). Thanks!

VS 2017 32-bit doesn't run this test:
CC  tests/checkasm/vf_colorspace.o
vf_colorspace.c
CC  tests/checkasm/vf_hflip.o
vf_hflip.c
CC  tests/checkasm/vf_threshold.o
vf_threshold.c
CC  tests/checkasm/videodsp.o
videodsp.c
CC  tests/checkasm/vp8dsp.o
vp8dsp.c
CC  tests/checkasm/vp9dsp.o
vp9dsp.c
f:\ffmpeg\libavcodec\get_bits.h(308): warning C4101: 're_cache': unreferenced 
local variable
X86ASM  tests/checkasm/x86/checkasm.o
STRIP   tests/checkasm/x86/checkasm.o
skipping strip -x tests/checkasm/x86/checkasm.o
LD  tests/checkasm/checkasm.exe
TESTcheckasm-aacpsdsp
TESTcheckasm-alacdsp
TESTcheckasm-audiodsp
TESTcheckasm-blockdsp
TESTcheckasm-bswapdsp
TESTcheckasm-exrdsp
TESTcheckasm-fixed_dsp
TESTcheckasm-flacdsp
TESTcheckasm-float_dsp
TESTcheckasm-fmtconvert
TESTcheckasm-g722dsp
TESTcheckasm-h264dsp
TESTcheckasm-h264pred
TESTcheckasm-h264qpel
TESTcheckasm-hevc_add_res
TESTcheckasm-hevc_idct
TESTcheckasm-jpeg2000dsp
TESTcheckasm-llviddsp
TESTcheckasm-pixblockdsp
TESTcheckasm-sbrdsp
TESTcheckasm-synth_filter
TESTcheckasm-v210enc
TESTcheckasm-vf_blend
TESTcheckasm-vf_colorspace
TESTcheckasm-vf_threshold
TESTcheckasm-videodsp

I don't know why. 64-bit VS 2017 tests vf_hflip, 32/64-bit mingw also.

If I run this test from command line it works (in 32-bit VS 2017):
$ tests/checkasm/checkasm --test=vf_hflip --bench --seed 1616253308
benchmarking with native FFmpeg timers
nop: 19.3
checkasm: using random seed 1616253308
SSSE3:
 - vf_hflip.hflip_byte  [OK]
 - vf_hflip.hflip_short [OK]
checkasm: all 2 tests passed
hflip_byte_c: 274.1
hflip_byte_ssse3: 18.1
hflip_short_c: 194.1
hflip_short_ssse3: 18.9

Any ideas?

Mateusz

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] checkasm/vf_hflip : add test for vf_hflip SIMD

2017-12-10 Thread Mateusz
W dniu 10.12.2017 o 21:13, Martin Vignali pisze:
>>
>> For me there is no "src + (width - 1) * step" in tests/checkasm/vf_hflip.c
>>
>> You pass start of the src buffer but you should pass end of the buffer.
>>
>>
>>
>> Thanks !
> 
> New patch in attach.

Now it is OK in my system (mingw 32/64-bit, VS 2017 64-bit). Thanks!

Mateusz

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] checkasm/vf_hflip : add test for vf_hflip SIMD

2017-12-10 Thread Mateusz
W dniu 09.12.2017 o 15:46, Martin Vignali pisze:
>>
>>> Do you test on X86_32 or x86_64 ?
>>
>> failure occurs on both
>>
>>
>>> Nasm or Yasm ?
>>
>> NASM version 2.10.09 compiled on Dec 29 2013
>>
>>
>>
>> I try to compile with the same nasm version (on os X, X86_64)
> using --x86asmexe=nasm_exe_path2.10rc9 in the configure
> 
> And the checkasm also pass
> 
> ./tests/checkasm/checkasm --test=vf_hflip --bench --seed 1616253308
> benchmarking with native FFmpeg timers
> nop: 34.9
> checkasm: using random seed 1616253308
> SSSE3:
>  - vf_hflip.hflip_byte  [OK]
>  - vf_hflip.hflip_short [OK]
> checkasm: all 2 tests passed
> hflip_byte_c: 28.0
> hflip_byte_ssse3: 27.0
> hflip_short_c: 277.7
> hflip_short_ssse3: 71.0
> 
> 
> Any ideas, why it fail for you ?

For me it fails too. Please look at part from libavfilter/vf_hfilp.c:
outrow = out->data[plane] + start * out->linesize[plane];
inrow  = in ->data[plane] + start * in->linesize[plane] + (width - 1) * 
step;
for (i = start; i < end; i++) {
s->flip_line[plane](inrow, outrow, width);
and now at part from tests/checkasm/vf_hflip.c:
call_ref(src, dst_ref, w);
call_new(src, dst_new, w);

For me there is no "src + (width - 1) * step" in tests/checkasm/vf_hflip.c

You pass start of the src buffer but you should pass end of the buffer.

Mateusz

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] fix MSVC compilation errors

2017-12-07 Thread Mateusz
W dniu 07.12.2017 o 22:58, Hendrik Leppkes pisze:
> Am 07.12.2017 20:40 schrieb "Mateusz" <mateu...@poczta.onet.pl>:
> 
> W dniu 07.12.2017 o 10:42, Hendrik Leppkes pisze:
>> On Thu, Dec 7, 2017 at 2:02 AM, Mateusz <mateu...@poczta.onet.pl> wrote:
>>> After commit 3701d49 'error_resilience: remove avpriv_atomic usage'
>>> we have included windows.h in much more files and we should
>>> avoid conflicts with defines/function declarations.
>>>
>>> We should declare compatible variables for atomic compat wrappers
>>> that expect fixed size variables in atomic_compare_exchange* macro.
>>>
>>> Signed-off-by: Mateusz Brzostek <mateu...@poczta.onet.pl>
>>> ---
>>>  libavcodec/jpegls.h   |  2 ++
>>>  libavcodec/mjpegdec.h |  2 ++
>>>  libavcodec/mss2.c |  6 +++---
>>>  libavcodec/utils.c| 12 
>>>  libavformat/mxfenc.c  |  2 +-
>>>  5 files changed, 20 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/libavcodec/jpegls.h b/libavcodec/jpegls.h
>>> index c8997c7861..6b89b2afa3 100644
>>> --- a/libavcodec/jpegls.h
>>> +++ b/libavcodec/jpegls.h
>>> @@ -32,6 +32,8 @@
>>>  #include "avcodec.h"
>>>  #include "internal.h"
>>>
>>> +#undef near /* This file uses struct member 'near' which in windows.h
> is defined as empty. */
>>> +
>>>  typedef struct JpeglsContext {
>>>  AVCodecContext *avctx;
>>>  } JpeglsContext;
>>> diff --git a/libavcodec/mjpegdec.h b/libavcodec/mjpegdec.h
>>> index c84a40aa6e..c36fba5f22 100644
>>> --- a/libavcodec/mjpegdec.h
>>> +++ b/libavcodec/mjpegdec.h
>>> @@ -39,6 +39,8 @@
>>>  #include "hpeldsp.h"
>>>  #include "idctdsp.h"
>>>
>>> +#undef near /* This file uses struct member 'near' which in windows.h
> is defined as empty. */
>>> +
>>>  #define MAX_COMPONENTS 4
>>>
>>>  typedef struct MJpegDecodeContext {
>>> diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c
>>> index 9e7cc466de..3180af1d60 100644
>>> --- a/libavcodec/mss2.c
>>> +++ b/libavcodec/mss2.c
>>> @@ -464,9 +464,9 @@ static int decode_wmv9(AVCodecContext *avctx, const
> uint8_t *buf, int buf_size,
>>>  return 0;
>>>  }
>>>
>>> -typedef struct Rectangle {
>>> +struct Rectangle {
>>>  int coded, x, y, w, h;
>>> -} Rectangle;
>>> +};
>>>
>>>  #define MAX_WMV9_RECTANGLES 20
>>>  #define ARITH2_PADDING 2
>>> @@ -485,7 +485,7 @@ static int mss2_decode_frame(AVCodecContext *avctx,
> void *data, int *got_frame,
>>>
>>>  int keyframe, has_wmv9, has_mv, is_rle, is_555, ret;
>>>
>>> -Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r;
>>> +struct Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r;
>>>  int used_rects = 0, i, implicit_rect = 0, av_uninit(wmv9_mask);
>>>
>>>  if ((ret = init_get_bits8(, buf, buf_size)) < 0)
>>> diff --git a/libavcodec/utils.c b/libavcodec/utils.c
>>> index baf09119fe..70a0764714 100644
>>> --- a/libavcodec/utils.c
>>> +++ b/libavcodec/utils.c
>>> @@ -1943,7 +1943,13 @@ int av_lockmgr_register(int (*cb)(void **mutex,
> enum AVLockOp op))
>>>
>>>  int ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec)
>>>  {
>>> +#if !defined(COMPAT_ATOMICS_DUMMY_STDATOMIC_H) &&
> !defined(COMPAT_ATOMICS_PTHREAD_STDATOMIC_H) && \
>>> +!defined(COMPAT_ATOMICS_SUNCC_STDATOMIC_H) &&
> !defined(COMPAT_ATOMICS_WIN32_STDATOMIC_H)
>>>  _Bool exp = 0;
>>> +#else
>>> +atomic_bool exp = 0;
>>> +#endif
>>> +
>>>  if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE ||
> !codec->init)
>>>  return 0;
>>>
>>> @@ -1969,7 +1975,13 @@ int ff_lock_avcodec(AVCodecContext *log_ctx,
> const AVCodec *codec)
>>>
>>>  int ff_unlock_avcodec(const AVCodec *codec)
>>>  {
>>> +#if !defined(COMPAT_ATOMICS_DUMMY_STDATOMIC_H) &&
> !defined(COMPAT_ATOMICS_PTHREAD_STDATOMIC_H) && \
>>> +!defined(COMPAT_ATOMICS_SUNCC_STDATOMIC_H) &&
> !defined(COMPAT_ATOMICS_WIN32_STDATOMIC_H)
>>>  _Bool exp = 1;
>>> +#else
>>> +atomic_bool exp = 1;
>>> +#endif
>>> +
>>>  if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE ||
> !codec->init)
>>>  return 0;
>>>
>>
>> These ifdefs here are very ugly, and as mentioned in another mail, the
>> atomics in those two functions arent even required - all access to
>> those variables is supposed to be protected by the lockmgr anyway.
>> So it would be easier to just remove any atomic nature of those
>> variables (or at the very lease replace the compare_exchange with a
>> store to solve this problem at hand).
> 
> I'm not sure but are you proposed to revert commit
> https://github.com/FFmpeg/FFmpeg/commit/590136e78da3d091ea99ab5432543d
> 47a559a461
> 
> 
> Basically, yes. Atomics are not needed for this variable, as access to it
> should be serialized anyways.

OK for me. I've sent smaller patch (only near/Rectangle stuff).

Mateusz

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] fix MSVC compilation errors

2017-12-07 Thread Mateusz
After commit 3701d49 'error_resilience: remove avpriv_atomic usage'
we have included windows.h in much more files and we should
avoid conflicts with defines/function declarations.

Signed-off-by: Mateusz Brzostek <mateu...@poczta.onet.pl>
---
 libavcodec/jpegls.h   | 2 ++
 libavcodec/mjpegdec.h | 2 ++
 libavcodec/mss2.c | 6 +++---
 libavformat/mxfenc.c  | 2 +-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/libavcodec/jpegls.h b/libavcodec/jpegls.h
index c8997c7861..6b89b2afa3 100644
--- a/libavcodec/jpegls.h
+++ b/libavcodec/jpegls.h
@@ -32,6 +32,8 @@
 #include "avcodec.h"
 #include "internal.h"
 
+#undef near /* This file uses struct member 'near' which in windows.h is 
defined as empty. */
+
 typedef struct JpeglsContext {
 AVCodecContext *avctx;
 } JpeglsContext;
diff --git a/libavcodec/mjpegdec.h b/libavcodec/mjpegdec.h
index c84a40aa6e..c36fba5f22 100644
--- a/libavcodec/mjpegdec.h
+++ b/libavcodec/mjpegdec.h
@@ -39,6 +39,8 @@
 #include "hpeldsp.h"
 #include "idctdsp.h"
 
+#undef near /* This file uses struct member 'near' which in windows.h is 
defined as empty. */
+
 #define MAX_COMPONENTS 4
 
 typedef struct MJpegDecodeContext {
diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c
index 9e7cc466de..3180af1d60 100644
--- a/libavcodec/mss2.c
+++ b/libavcodec/mss2.c
@@ -464,9 +464,9 @@ static int decode_wmv9(AVCodecContext *avctx, const uint8_t 
*buf, int buf_size,
 return 0;
 }
 
-typedef struct Rectangle {
+struct Rectangle {
 int coded, x, y, w, h;
-} Rectangle;
+};
 
 #define MAX_WMV9_RECTANGLES 20
 #define ARITH2_PADDING 2
@@ -485,7 +485,7 @@ static int mss2_decode_frame(AVCodecContext *avctx, void 
*data, int *got_frame,
 
 int keyframe, has_wmv9, has_mv, is_rle, is_555, ret;
 
-Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r;
+struct Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r;
 int used_rects = 0, i, implicit_rect = 0, av_uninit(wmv9_mask);
 
 if ((ret = init_get_bits8(, buf, buf_size)) < 0)
diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
index ed6ecbf541..407acdcaaa 100644
--- a/libavformat/mxfenc.c
+++ b/libavformat/mxfenc.c
@@ -1444,7 +1444,7 @@ static int mxf_write_header_metadata_sets(AVFormatContext 
*s)
 AVStream *st = NULL;
 int i;
 
-MXFPackage packages[2] = {};
+MXFPackage packages[2] = {{NULL}};
 int package_count = 2;
 packages[0].type = MaterialPackage;
 packages[1].type = SourcePackage;
-- 
2.15.1.windows.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] fix MSVC compilation errors

2017-12-07 Thread Mateusz
W dniu 07.12.2017 o 10:42, Hendrik Leppkes pisze:
> On Thu, Dec 7, 2017 at 2:02 AM, Mateusz <mateu...@poczta.onet.pl> wrote:
>> After commit 3701d49 'error_resilience: remove avpriv_atomic usage'
>> we have included windows.h in much more files and we should
>> avoid conflicts with defines/function declarations.
>>
>> We should declare compatible variables for atomic compat wrappers
>> that expect fixed size variables in atomic_compare_exchange* macro.
>>
>> Signed-off-by: Mateusz Brzostek <mateu...@poczta.onet.pl>
>> ---
>>  libavcodec/jpegls.h   |  2 ++
>>  libavcodec/mjpegdec.h |  2 ++
>>  libavcodec/mss2.c |  6 +++---
>>  libavcodec/utils.c| 12 
>>  libavformat/mxfenc.c  |  2 +-
>>  5 files changed, 20 insertions(+), 4 deletions(-)
>>
>> diff --git a/libavcodec/jpegls.h b/libavcodec/jpegls.h
>> index c8997c7861..6b89b2afa3 100644
>> --- a/libavcodec/jpegls.h
>> +++ b/libavcodec/jpegls.h
>> @@ -32,6 +32,8 @@
>>  #include "avcodec.h"
>>  #include "internal.h"
>>
>> +#undef near /* This file uses struct member 'near' which in windows.h is 
>> defined as empty. */
>> +
>>  typedef struct JpeglsContext {
>>  AVCodecContext *avctx;
>>  } JpeglsContext;
>> diff --git a/libavcodec/mjpegdec.h b/libavcodec/mjpegdec.h
>> index c84a40aa6e..c36fba5f22 100644
>> --- a/libavcodec/mjpegdec.h
>> +++ b/libavcodec/mjpegdec.h
>> @@ -39,6 +39,8 @@
>>  #include "hpeldsp.h"
>>  #include "idctdsp.h"
>>
>> +#undef near /* This file uses struct member 'near' which in windows.h is 
>> defined as empty. */
>> +
>>  #define MAX_COMPONENTS 4
>>
>>  typedef struct MJpegDecodeContext {
>> diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c
>> index 9e7cc466de..3180af1d60 100644
>> --- a/libavcodec/mss2.c
>> +++ b/libavcodec/mss2.c
>> @@ -464,9 +464,9 @@ static int decode_wmv9(AVCodecContext *avctx, const 
>> uint8_t *buf, int buf_size,
>>  return 0;
>>  }
>>
>> -typedef struct Rectangle {
>> +struct Rectangle {
>>  int coded, x, y, w, h;
>> -} Rectangle;
>> +};
>>
>>  #define MAX_WMV9_RECTANGLES 20
>>  #define ARITH2_PADDING 2
>> @@ -485,7 +485,7 @@ static int mss2_decode_frame(AVCodecContext *avctx, void 
>> *data, int *got_frame,
>>
>>  int keyframe, has_wmv9, has_mv, is_rle, is_555, ret;
>>
>> -Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r;
>> +struct Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r;
>>  int used_rects = 0, i, implicit_rect = 0, av_uninit(wmv9_mask);
>>
>>  if ((ret = init_get_bits8(, buf, buf_size)) < 0)
>> diff --git a/libavcodec/utils.c b/libavcodec/utils.c
>> index baf09119fe..70a0764714 100644
>> --- a/libavcodec/utils.c
>> +++ b/libavcodec/utils.c
>> @@ -1943,7 +1943,13 @@ int av_lockmgr_register(int (*cb)(void **mutex, enum 
>> AVLockOp op))
>>
>>  int ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec)
>>  {
>> +#if !defined(COMPAT_ATOMICS_DUMMY_STDATOMIC_H) && 
>> !defined(COMPAT_ATOMICS_PTHREAD_STDATOMIC_H) && \
>> +!defined(COMPAT_ATOMICS_SUNCC_STDATOMIC_H) && 
>> !defined(COMPAT_ATOMICS_WIN32_STDATOMIC_H)
>>  _Bool exp = 0;
>> +#else
>> +atomic_bool exp = 0;
>> +#endif
>> +
>>  if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE || !codec->init)
>>  return 0;
>>
>> @@ -1969,7 +1975,13 @@ int ff_lock_avcodec(AVCodecContext *log_ctx, const 
>> AVCodec *codec)
>>
>>  int ff_unlock_avcodec(const AVCodec *codec)
>>  {
>> +#if !defined(COMPAT_ATOMICS_DUMMY_STDATOMIC_H) && 
>> !defined(COMPAT_ATOMICS_PTHREAD_STDATOMIC_H) && \
>> +!defined(COMPAT_ATOMICS_SUNCC_STDATOMIC_H) && 
>> !defined(COMPAT_ATOMICS_WIN32_STDATOMIC_H)
>>  _Bool exp = 1;
>> +#else
>> +atomic_bool exp = 1;
>> +#endif
>> +
>>  if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE || !codec->init)
>>  return 0;
>>
> 
> These ifdefs here are very ugly, and as mentioned in another mail, the
> atomics in those two functions arent even required - all access to
> those variables is supposed to be protected by the lockmgr anyway.
> So it would be easier to just remove any atomic nature of those
> variables (or at the very lease replace the compare_exchange with a
> store to solve this problem at hand).

I'm not sure but are you proposed to revert commit
https://github.com/FFmpeg/FFmpeg/commit/590136e78da3d091ea99ab5432543d47a559a461
?

If commit 590136e should stay we could use atomic_intptr_t/intptr_t
for ff_avcodec_locked/exp -- it will be working with all wrappers without
ugly ifdefs.

Mateusz

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] fix MSVC compilation errors

2017-12-06 Thread Mateusz
After commit 3701d49 'error_resilience: remove avpriv_atomic usage'
we have included windows.h in much more files and we should
avoid conflicts with defines/function declarations.

We should declare compatible variables for atomic compat wrappers
that expect fixed size variables in atomic_compare_exchange* macro.

Signed-off-by: Mateusz Brzostek <mateu...@poczta.onet.pl>
---
 libavcodec/jpegls.h   |  2 ++
 libavcodec/mjpegdec.h |  2 ++
 libavcodec/mss2.c |  6 +++---
 libavcodec/utils.c| 12 
 libavformat/mxfenc.c  |  2 +-
 5 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/libavcodec/jpegls.h b/libavcodec/jpegls.h
index c8997c7861..6b89b2afa3 100644
--- a/libavcodec/jpegls.h
+++ b/libavcodec/jpegls.h
@@ -32,6 +32,8 @@
 #include "avcodec.h"
 #include "internal.h"
 
+#undef near /* This file uses struct member 'near' which in windows.h is 
defined as empty. */
+
 typedef struct JpeglsContext {
 AVCodecContext *avctx;
 } JpeglsContext;
diff --git a/libavcodec/mjpegdec.h b/libavcodec/mjpegdec.h
index c84a40aa6e..c36fba5f22 100644
--- a/libavcodec/mjpegdec.h
+++ b/libavcodec/mjpegdec.h
@@ -39,6 +39,8 @@
 #include "hpeldsp.h"
 #include "idctdsp.h"
 
+#undef near /* This file uses struct member 'near' which in windows.h is 
defined as empty. */
+
 #define MAX_COMPONENTS 4
 
 typedef struct MJpegDecodeContext {
diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c
index 9e7cc466de..3180af1d60 100644
--- a/libavcodec/mss2.c
+++ b/libavcodec/mss2.c
@@ -464,9 +464,9 @@ static int decode_wmv9(AVCodecContext *avctx, const uint8_t 
*buf, int buf_size,
 return 0;
 }
 
-typedef struct Rectangle {
+struct Rectangle {
 int coded, x, y, w, h;
-} Rectangle;
+};
 
 #define MAX_WMV9_RECTANGLES 20
 #define ARITH2_PADDING 2
@@ -485,7 +485,7 @@ static int mss2_decode_frame(AVCodecContext *avctx, void 
*data, int *got_frame,
 
 int keyframe, has_wmv9, has_mv, is_rle, is_555, ret;
 
-Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r;
+struct Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r;
 int used_rects = 0, i, implicit_rect = 0, av_uninit(wmv9_mask);
 
 if ((ret = init_get_bits8(, buf, buf_size)) < 0)
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index baf09119fe..70a0764714 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -1943,7 +1943,13 @@ int av_lockmgr_register(int (*cb)(void **mutex, enum 
AVLockOp op))
 
 int ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec)
 {
+#if !defined(COMPAT_ATOMICS_DUMMY_STDATOMIC_H) && 
!defined(COMPAT_ATOMICS_PTHREAD_STDATOMIC_H) && \
+!defined(COMPAT_ATOMICS_SUNCC_STDATOMIC_H) && 
!defined(COMPAT_ATOMICS_WIN32_STDATOMIC_H)
 _Bool exp = 0;
+#else
+atomic_bool exp = 0;
+#endif
+
 if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE || !codec->init)
 return 0;
 
@@ -1969,7 +1975,13 @@ int ff_lock_avcodec(AVCodecContext *log_ctx, const 
AVCodec *codec)
 
 int ff_unlock_avcodec(const AVCodec *codec)
 {
+#if !defined(COMPAT_ATOMICS_DUMMY_STDATOMIC_H) && 
!defined(COMPAT_ATOMICS_PTHREAD_STDATOMIC_H) && \
+!defined(COMPAT_ATOMICS_SUNCC_STDATOMIC_H) && 
!defined(COMPAT_ATOMICS_WIN32_STDATOMIC_H)
 _Bool exp = 1;
+#else
+atomic_bool exp = 1;
+#endif
+
 if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE || !codec->init)
 return 0;
 
diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
index ed6ecbf541..407acdcaaa 100644
--- a/libavformat/mxfenc.c
+++ b/libavformat/mxfenc.c
@@ -1444,7 +1444,7 @@ static int mxf_write_header_metadata_sets(AVFormatContext 
*s)
 AVStream *st = NULL;
 int i;
 
-MXFPackage packages[2] = {};
+MXFPackage packages[2] = {{NULL}};
 int package_count = 2;
 packages[0].type = MaterialPackage;
 packages[1].type = SourcePackage;
-- 
2.15.1.windows.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] fix MSVC compilation errors

2017-12-05 Thread Mateusz
W dniu 05.12.2017 o 12:12, Hendrik Leppkes pisze:
> On Tue, Dec 5, 2017 at 12:31 AM, Mateusz <mateu...@poczta.onet.pl> wrote:
>> After some tests:
>> 1) #undef far
>> after #include  is wrong -- in oleauto.h is declaration
>> WINOLEAUTAPI VarUI1FromI8(LONG64 i64In, _Out_ BYTE FAR* pbOut);
>> and 'FAR' is defined as 'far' which is define as empty.
> 
> Yeah generally undefing all of them might result in errors in other
> windows headers, so thats bad.
> 
>>
>> 2) #undef near
>> after #include  works in ffmpeg but is danger -- see 1)
> 
> Doing it rather late,  ie. right before its used in the file should be
> relatively safe. If not, we can always rename the variable.
> But it seems to work so far. And from the nature of this bug, it
> should just error out instead of resulting in other breakage - if it
> happens.
> 
>>
>> 3) after
>> git revert 3701d499f8734ec5f3e7359de7311b15d92070b0
>> git revert 590136e78da3d091ea99ab5432543d47a559a461
>> and patch
>> diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
>> -MXFPackage packages[2] = {};
>> +MXFPackage packages[2] = {{NULL}};
>> VS 2017 can compile ffmpeg and fate stops at 
>> audiomatch-nero-16000-mono-he-m4a
>> (without reverting 590136e hangs at api-flac-test.exe)
>>
>> 4) if for any reasons commits 3701d49 and 590136e shouldn't be reverted,
>> we can apply this patch and
> 
> We can't really revert those, so fixing them is better.
> 
>> diff --git a/libavcodec/utils.c b/libavcodec/utils.c
>> index baf09119fe..b34a3803b8 100644
>> --- a/libavcodec/utils.c
>> +++ b/libavcodec/utils.c
>> @@ -1943,7 +1943,7 @@ int av_lockmgr_register(int (*cb)(void **mutex, enum 
>> AVLockOp op))
>>
>>  int ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec)
>>  {
>> -_Bool exp = 0;
>> +atomic_bool exp = 0;
>>  if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE || !codec->init)
>>  return 0;
>>
>> @@ -1969,7 +1969,7 @@ int ff_lock_avcodec(AVCodecContext *log_ctx, const 
>> AVCodec *codec)
>>
>>  int ff_unlock_avcodec(const AVCodec *codec)
>>  {
>> -_Bool exp = 1;
>> +atomic_bool exp = 1;
>>  if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE || !codec->init)
>>  return 0;
>>
>>
> 
> atomic_compare_exchange* usage is a bit iffy because it uses pointers,
> and the compat wrappers don't fully represent all types properly (for
> simplicity, implementing them in a generic way would be madness).
> So this work-around seems fine to me, if it builds with GCC as well.
> 
> - Hendrik

Thanks for review.

I will try to write patches that not break ffmpeg on Linux
and will work with MSVC -- now it looks to me more complicated 
than before but it should be possible.

Mateusz

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] fix MSVC compilation errors

2017-12-05 Thread Mateusz
W dniu 05.12.2017 o 17:44, James Almer pisze:
> On 12/5/2017 1:40 PM, Mateusz wrote:
>> W dniu 05.12.2017 o 15:20, James Almer pisze:
>>> On 12/4/2017 8:31 PM, Mateusz wrote:
>>>> After some tests:
>>>> 1) #undef far
>>>> after #include  is wrong -- in oleauto.h is declaration
>>>> WINOLEAUTAPI VarUI1FromI8(LONG64 i64In, _Out_ BYTE FAR* pbOut);
>>>> and 'FAR' is defined as 'far' which is define as empty.
>>>>
>>>> 2) #undef near
>>>> after #include  works in ffmpeg but is danger -- see 1)
>>>>
>>>> 3) after
>>>> git revert 3701d499f8734ec5f3e7359de7311b15d92070b0
>>>> git revert 590136e78da3d091ea99ab5432543d47a559a461
>>>> and patch
>>>> diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
>>>> -MXFPackage packages[2] = {};
>>>> +MXFPackage packages[2] = {{NULL}};
>>>> VS 2017 can compile ffmpeg and fate stops at 
>>>> audiomatch-nero-16000-mono-he-m4a
>>>> (without reverting 590136e hangs at api-flac-test.exe)
>>>>
>>>> 4) if for any reasons commits 3701d49 and 590136e shouldn't be reverted,
>>>> we can apply this patch and
>>>> diff --git a/libavcodec/utils.c b/libavcodec/utils.c
>>>> index baf09119fe..b34a3803b8 100644
>>>> --- a/libavcodec/utils.c
>>>> +++ b/libavcodec/utils.c
>>>> @@ -1943,7 +1943,7 @@ int av_lockmgr_register(int (*cb)(void **mutex, enum 
>>>> AVLockOp op))
>>>>
>>>>  int ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec)
>>>>  {
>>>> -_Bool exp = 0;
>>>> +atomic_bool exp = 0;
>>>>  if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE || 
>>>> !codec->init)
>>>>  return 0;
>>>>
>>>> @@ -1969,7 +1969,7 @@ int ff_lock_avcodec(AVCodecContext *log_ctx, const 
>>>> AVCodec *codec)
>>>>
>>>>  int ff_unlock_avcodec(const AVCodec *codec)
>>>>  {
>>>> -_Bool exp = 1;
>>>> +atomic_bool exp = 1;
>>>
>>> No, these are not meant to be atomic.
>>>
>>> _Bool is c99, so maybe we could replace its usage with int in these lock
>>> functions. The result will be the exact same.
>>
>> In atomic types the main thing is width (sizeof).
>> In atomic operations destination memory MUST BE sufficient
>> to copy all bytes from source, in
>> atomic_compare_exchange_strong(_avcodec_locked, , 1)
>> sizeof(exp) MUST BE >= sizeof(ff_avcodec_locked) == sizeof(atomic_bool)
>>
>> In ffmpeg implementation of atomic code for MSVC sizeof(atomic_bool) == 8
>> and sizeof(_Bool) < 8 -- it is the reason of hangs.
>>
>> And all volatile objects MUST BE explicitly volatile -- instead of
>> static atomic_bool ff_avcodec_locked;
>> should be
>> static volatile atomic_bool ff_avcodec_locked;
> 
> No, the win32 wrapper should add volatile to the atomic_* typedefs if
> anything. No other implementation needs it, so it must not be used here.

Yes, you are right -- it should work not only in Windows. It is a bit tricky...
In Linux stdatomic.h looks quite different than ffmpeg win32/gcc wrappers.

Now I have no opinion how it should be done right.

Mateusz

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] fix MSVC compilation errors

2017-12-05 Thread Mateusz
W dniu 05.12.2017 o 15:20, James Almer pisze:
> On 12/4/2017 8:31 PM, Mateusz wrote:
>> After some tests:
>> 1) #undef far
>> after #include  is wrong -- in oleauto.h is declaration
>> WINOLEAUTAPI VarUI1FromI8(LONG64 i64In, _Out_ BYTE FAR* pbOut);
>> and 'FAR' is defined as 'far' which is define as empty.
>>
>> 2) #undef near
>> after #include  works in ffmpeg but is danger -- see 1)
>>
>> 3) after
>> git revert 3701d499f8734ec5f3e7359de7311b15d92070b0
>> git revert 590136e78da3d091ea99ab5432543d47a559a461
>> and patch
>> diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
>> -MXFPackage packages[2] = {};
>> +MXFPackage packages[2] = {{NULL}};
>> VS 2017 can compile ffmpeg and fate stops at 
>> audiomatch-nero-16000-mono-he-m4a
>> (without reverting 590136e hangs at api-flac-test.exe)
>>
>> 4) if for any reasons commits 3701d49 and 590136e shouldn't be reverted,
>> we can apply this patch and
>> diff --git a/libavcodec/utils.c b/libavcodec/utils.c
>> index baf09119fe..b34a3803b8 100644
>> --- a/libavcodec/utils.c
>> +++ b/libavcodec/utils.c
>> @@ -1943,7 +1943,7 @@ int av_lockmgr_register(int (*cb)(void **mutex, enum 
>> AVLockOp op))
>>
>>  int ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec)
>>  {
>> -_Bool exp = 0;
>> +atomic_bool exp = 0;
>>  if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE || !codec->init)
>>  return 0;
>>
>> @@ -1969,7 +1969,7 @@ int ff_lock_avcodec(AVCodecContext *log_ctx, const 
>> AVCodec *codec)
>>
>>  int ff_unlock_avcodec(const AVCodec *codec)
>>  {
>> -_Bool exp = 1;
>> +atomic_bool exp = 1;
> 
> No, these are not meant to be atomic.
> 
> _Bool is c99, so maybe we could replace its usage with int in these lock
> functions. The result will be the exact same.

In atomic types the main thing is width (sizeof).
In atomic operations destination memory MUST BE sufficient
to copy all bytes from source, in
atomic_compare_exchange_strong(_avcodec_locked, , 1)
sizeof(exp) MUST BE >= sizeof(ff_avcodec_locked) == sizeof(atomic_bool)

In ffmpeg implementation of atomic code for MSVC sizeof(atomic_bool) == 8
and sizeof(_Bool) < 8 -- it is the reason of hangs.

And all volatile objects MUST BE explicitly volatile -- instead of
static atomic_bool ff_avcodec_locked;
should be
static volatile atomic_bool ff_avcodec_locked;

I will prepare a patch (but not before Friday -- lots of work).

Mateusz

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] fix MSVC compilation errors

2017-12-04 Thread Mateusz
After some tests:
1) #undef far
after #include  is wrong -- in oleauto.h is declaration
WINOLEAUTAPI VarUI1FromI8(LONG64 i64In, _Out_ BYTE FAR* pbOut);
and 'FAR' is defined as 'far' which is define as empty.

2) #undef near
after #include  works in ffmpeg but is danger -- see 1)

3) after
git revert 3701d499f8734ec5f3e7359de7311b15d92070b0
git revert 590136e78da3d091ea99ab5432543d47a559a461
and patch
diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
-MXFPackage packages[2] = {};
+MXFPackage packages[2] = {{NULL}};
VS 2017 can compile ffmpeg and fate stops at audiomatch-nero-16000-mono-he-m4a
(without reverting 590136e hangs at api-flac-test.exe)

4) if for any reasons commits 3701d49 and 590136e shouldn't be reverted,
we can apply this patch and
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index baf09119fe..b34a3803b8 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -1943,7 +1943,7 @@ int av_lockmgr_register(int (*cb)(void **mutex, enum 
AVLockOp op))

 int ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec)
 {
-_Bool exp = 0;
+atomic_bool exp = 0;
 if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE || !codec->init)
 return 0;

@@ -1969,7 +1969,7 @@ int ff_lock_avcodec(AVCodecContext *log_ctx, const 
AVCodec *codec)

 int ff_unlock_avcodec(const AVCodec *codec)
 {
-_Bool exp = 1;
+atomic_bool exp = 1;
 if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE || !codec->init)
     return 0;


Mateusz



W dniu 04.12.2017 o 20:27, Mateusz pisze:
> W dniu 04.12.2017 o 15:02, Derek Buitenhuis pisze:
>> On 12/4/2017 8:03 AM, Mateusz wrote:
>>> After commit 3701d49 'error_resilience: remove avpriv_atomic usage'
>>> we have included windows.h in much more files and we should
>>> avoid conflicts with defines/function declarations.
>>>
>>> Signed-off-by: Mateusz Brzostek <mateu...@poczta.onet.pl>
>>> ---
>>>  libavcodec/jpegls.h  | 4 
>>>  libavcodec/mss2.c| 6 +++---
>>>  libavformat/mxfenc.c | 2 +-
>>>  3 files changed, 8 insertions(+), 4 deletions(-)
>>
>> Sprinkling these weird ifdefs and renames around is pretty ugly. Is there
>> some sort of canonical list on MSDN or something we can use globally-ish?
>>
>> - Derek
> 
> There is a list of "Predefined Macros" in MSVC -- IMO there are OK
> https://msdn.microsoft.com/en-us/library/b0084kay(v=vs.140).aspx
> 
> More danger are macros from windows.h -- there is a list of macros
> to exclude some parts (from MSDN and windows.h):
> Define WIN32_LEAN_AND_MEAN to exclude APIs such as
> Cryptography, DDE, RPC, Shell, and Windows Sockets.
> 
> /*  If defined, the following flags inhibit definition
>  * of the indicated items.
>  *
>  *  NOGDICAPMASKS - CC_*, LC_*, PC_*, CP_*, TC_*, RC_
>  *  NOVIRTUALKEYCODES - VK_*
>  *  NOWINMESSAGES - WM_*, EM_*, LB_*, CB_*
>  *  NOWINSTYLES   - WS_*, CS_*, ES_*, LBS_*, SBS_*, CBS_*
>  *  NOSYSMETRICS  - SM_*
>  *  NOMENUS   - MF_*
>  *  NOICONS   - IDI_*
>  *  NOKEYSTATES   - MK_*
>  *  NOSYSCOMMANDS - SC_*
>  *  NORASTEROPS   - Binary and Tertiary raster ops
>  *  NOSHOWWINDOW  - SW_*
>  *  OEMRESOURCE   - OEM Resource values
>  *  NOATOM- Atom Manager routines
>  *  NOCLIPBOARD   - Clipboard routines
>  *  NOCOLOR   - Screen colors
>  *  NOCTLMGR  - Control and Dialog routines
>  *  NODRAWTEXT- DrawText() and DT_*
>  *  NOGDI - All GDI defines and routines
>  *  NOKERNEL  - All KERNEL defines and routines
>  *  NOUSER- All USER defines and routines
>  *  NONLS - All NLS defines and routines
>  *  NOMB  - MB_* and MessageBox()
>  *  NOMEMMGR  - GMEM_*, LMEM_*, GHND, LHND, associated routines
>  *  NOMETAFILE- typedef METAFILEPICT
>  *  NOMINMAX  - Macros min(a,b) and max(a,b)
>  *  NOMSG - typedef MSG and associated routines
>  *  NOOPENFILE- OpenFile(), OemToAnsi, AnsiToOem, and OF_*
>  *  NOSCROLL  - SB_* and scrolling routines
>  *  NOSERVICE - All Service Controller routines, SERVICE_ equates, 
> etc.
>  *  NOSOUND   - Sound driver routines
>  *  NOTEXTMETRIC  - typedef TEXTMETRIC and associated routines
>  *  NOWH  - SetWindowsHook and WH_*
>  *  NOWINOFFSETS  - GWL_*, GCL_*, associated routines
>  *  NOCOMM- COMM driver routines
>  *  NOKANJI   - Kanji support stuff.
>  *  NOHELP- Help engine interface.
>  *  NOPROFILER- Profiler interface.
>  *  NODEFERWINDOWPOS  - DeferWindowPos routines
>  *  NOMCX - Modem Configuration Extensions

Re: [FFmpeg-devel] [PATCH] fix MSVC compilation errors

2017-12-04 Thread Mateusz
W dniu 04.12.2017 o 15:02, Derek Buitenhuis pisze:
> On 12/4/2017 8:03 AM, Mateusz wrote:
>> After commit 3701d49 'error_resilience: remove avpriv_atomic usage'
>> we have included windows.h in much more files and we should
>> avoid conflicts with defines/function declarations.
>>
>> Signed-off-by: Mateusz Brzostek <mateu...@poczta.onet.pl>
>> ---
>>  libavcodec/jpegls.h  | 4 
>>  libavcodec/mss2.c| 6 +++---
>>  libavformat/mxfenc.c | 2 +-
>>  3 files changed, 8 insertions(+), 4 deletions(-)
> 
> Sprinkling these weird ifdefs and renames around is pretty ugly. Is there
> some sort of canonical list on MSDN or something we can use globally-ish?
> 
> - Derek

There is a list of "Predefined Macros" in MSVC -- IMO there are OK
https://msdn.microsoft.com/en-us/library/b0084kay(v=vs.140).aspx

More danger are macros from windows.h -- there is a list of macros
to exclude some parts (from MSDN and windows.h):
Define WIN32_LEAN_AND_MEAN to exclude APIs such as
Cryptography, DDE, RPC, Shell, and Windows Sockets.

/*  If defined, the following flags inhibit definition
 * of the indicated items.
 *
 *  NOGDICAPMASKS - CC_*, LC_*, PC_*, CP_*, TC_*, RC_
 *  NOVIRTUALKEYCODES - VK_*
 *  NOWINMESSAGES - WM_*, EM_*, LB_*, CB_*
 *  NOWINSTYLES   - WS_*, CS_*, ES_*, LBS_*, SBS_*, CBS_*
 *  NOSYSMETRICS  - SM_*
 *  NOMENUS   - MF_*
 *  NOICONS   - IDI_*
 *  NOKEYSTATES   - MK_*
 *  NOSYSCOMMANDS - SC_*
 *  NORASTEROPS   - Binary and Tertiary raster ops
 *  NOSHOWWINDOW  - SW_*
 *  OEMRESOURCE   - OEM Resource values
 *  NOATOM- Atom Manager routines
 *  NOCLIPBOARD   - Clipboard routines
 *  NOCOLOR   - Screen colors
 *  NOCTLMGR  - Control and Dialog routines
 *  NODRAWTEXT- DrawText() and DT_*
 *  NOGDI - All GDI defines and routines
 *  NOKERNEL  - All KERNEL defines and routines
 *  NOUSER- All USER defines and routines
 *  NONLS - All NLS defines and routines
 *  NOMB  - MB_* and MessageBox()
 *  NOMEMMGR  - GMEM_*, LMEM_*, GHND, LHND, associated routines
 *  NOMETAFILE- typedef METAFILEPICT
 *  NOMINMAX  - Macros min(a,b) and max(a,b)
 *  NOMSG - typedef MSG and associated routines
 *  NOOPENFILE- OpenFile(), OemToAnsi, AnsiToOem, and OF_*
 *  NOSCROLL  - SB_* and scrolling routines
 *  NOSERVICE - All Service Controller routines, SERVICE_ equates, etc.
 *  NOSOUND   - Sound driver routines
 *  NOTEXTMETRIC  - typedef TEXTMETRIC and associated routines
 *  NOWH  - SetWindowsHook and WH_*
 *  NOWINOFFSETS  - GWL_*, GCL_*, associated routines
 *  NOCOMM- COMM driver routines
 *  NOKANJI   - Kanji support stuff.
 *  NOHELP- Help engine interface.
 *  NOPROFILER- Profiler interface.
 *  NODEFERWINDOWPOS  - DeferWindowPos routines
 *  NOMCX - Modem Configuration Extensions
 */

The most danger are small caps macros defined as empty in minwindef.h:
far
near
pascal
cdecl

I think it is possible to make in ffmpeg's *.h files which include windows.h
something like this:
#define WIN32_LEAN_AND_MEAN
#define NOMINMAX
#define NOGDI
#include 
#undef far
#undef near
#undef pascal
#undef cdecl

I will make some test and write back.

Mateusz

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] fix MSVC compilation errors

2017-12-04 Thread Mateusz
After commit 3701d49 'error_resilience: remove avpriv_atomic usage'
we have included windows.h in much more files and we should
avoid conflicts with defines/function declarations.

Signed-off-by: Mateusz Brzostek <mateu...@poczta.onet.pl>
---
 libavcodec/jpegls.h  | 4 
 libavcodec/mss2.c| 6 +++---
 libavformat/mxfenc.c | 2 +-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/libavcodec/jpegls.h b/libavcodec/jpegls.h
index c8997c7861..69a57b9538 100644
--- a/libavcodec/jpegls.h
+++ b/libavcodec/jpegls.h
@@ -32,6 +32,10 @@
 #include "avcodec.h"
 #include "internal.h"
 
+#ifdef near
+#undef near
+#endif
+
 typedef struct JpeglsContext {
 AVCodecContext *avctx;
 } JpeglsContext;
diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c
index 9e7cc466de..3180af1d60 100644
--- a/libavcodec/mss2.c
+++ b/libavcodec/mss2.c
@@ -464,9 +464,9 @@ static int decode_wmv9(AVCodecContext *avctx, const uint8_t 
*buf, int buf_size,
 return 0;
 }
 
-typedef struct Rectangle {
+struct Rectangle {
 int coded, x, y, w, h;
-} Rectangle;
+};
 
 #define MAX_WMV9_RECTANGLES 20
 #define ARITH2_PADDING 2
@@ -485,7 +485,7 @@ static int mss2_decode_frame(AVCodecContext *avctx, void 
*data, int *got_frame,
 
 int keyframe, has_wmv9, has_mv, is_rle, is_555, ret;
 
-Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r;
+struct Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r;
 int used_rects = 0, i, implicit_rect = 0, av_uninit(wmv9_mask);
 
 if ((ret = init_get_bits8(, buf, buf_size)) < 0)
diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
index ed6ecbf541..407acdcaaa 100644
--- a/libavformat/mxfenc.c
+++ b/libavformat/mxfenc.c
@@ -1444,7 +1444,7 @@ static int mxf_write_header_metadata_sets(AVFormatContext 
*s)
 AVStream *st = NULL;
 int i;
 
-MXFPackage packages[2] = {};
+MXFPackage packages[2] = {{NULL}};
 int package_count = 2;
 packages[0].type = MaterialPackage;
 packages[1].type = SourcePackage;
-- 
2.15.1.windows.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] fix MSVC compilation errors

2017-12-03 Thread Mateusz
W dniu 03.12.2017 o 22:26, Michael Niedermayer pisze:
> On Sat, Dec 02, 2017 at 09:50:26PM +0100, Mateusz wrote:
>> After commit 3701d49 'error_resilience: remove avpriv_atomic usage'
>> we have included windows.h in much more files and we should
>> avoid conflicts with defines/function declarations.
>>
>> Signed-off-by: Mateusz Brzostek <mateu...@poczta.onet.pl>
>> ---
>>  libavcodec/jpegls.h  | 4 
>>  libavcodec/mss2.c| 6 +++---
>>  libavformat/mxfenc.c | 2 +-
>>  3 files changed, 8 insertions(+), 4 deletions(-)
>>
>> diff --git a/libavcodec/jpegls.h b/libavcodec/jpegls.h
>> index c8997c7861..69a57b9538 100644
>> --- a/libavcodec/jpegls.h
>> +++ b/libavcodec/jpegls.h
>> @@ -32,6 +32,10 @@
>>  #include "avcodec.h"
>>  #include "internal.h"
>>  
>> +#ifdef near
>> +#undef near
>> +#endif
>> +
>>  typedef struct JpeglsContext {
>>  AVCodecContext *avctx;
>>  } JpeglsContext;
>> diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c
>> index 9e7cc466de..f850349a0a 100644
>> --- a/libavcodec/mss2.c
>> +++ b/libavcodec/mss2.c
>> @@ -464,9 +464,9 @@ static int decode_wmv9(AVCodecContext *avctx, const 
>> uint8_t *buf, int buf_size,
>>  return 0;
>>  }
>>  
>> -typedef struct Rectangle {
>> +typedef struct ff_Rectangle {
> 
> Does Rect instead of ff_Rectangle work too ?
> 
> The ff_ is a bit confusng as its outside how we name structs
> (struct identifers are capitalized)

Yes, Rect works. I've checked also MSS2Rectangle -- works too and in mss2.c 
there is for example
typedef struct MSS2Context

Should I resent this patch with Rect or MSS2Rectangle?

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] fix MSVC compilation errors

2017-12-02 Thread Mateusz
After commit 3701d49 'error_resilience: remove avpriv_atomic usage'
we have included windows.h in much more files and we should
avoid conflicts with defines/function declarations.

Signed-off-by: Mateusz Brzostek <mateu...@poczta.onet.pl>
---
 libavcodec/jpegls.h  | 4 
 libavcodec/mss2.c| 6 +++---
 libavformat/mxfenc.c | 2 +-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/libavcodec/jpegls.h b/libavcodec/jpegls.h
index c8997c7861..69a57b9538 100644
--- a/libavcodec/jpegls.h
+++ b/libavcodec/jpegls.h
@@ -32,6 +32,10 @@
 #include "avcodec.h"
 #include "internal.h"
 
+#ifdef near
+#undef near
+#endif
+
 typedef struct JpeglsContext {
 AVCodecContext *avctx;
 } JpeglsContext;
diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c
index 9e7cc466de..f850349a0a 100644
--- a/libavcodec/mss2.c
+++ b/libavcodec/mss2.c
@@ -464,9 +464,9 @@ static int decode_wmv9(AVCodecContext *avctx, const uint8_t 
*buf, int buf_size,
 return 0;
 }
 
-typedef struct Rectangle {
+typedef struct ff_Rectangle {
 int coded, x, y, w, h;
-} Rectangle;
+} ff_Rectangle;
 
 #define MAX_WMV9_RECTANGLES 20
 #define ARITH2_PADDING 2
@@ -485,7 +485,7 @@ static int mss2_decode_frame(AVCodecContext *avctx, void 
*data, int *got_frame,
 
 int keyframe, has_wmv9, has_mv, is_rle, is_555, ret;
 
-Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r;
+ff_Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r;
 int used_rects = 0, i, implicit_rect = 0, av_uninit(wmv9_mask);
 
 if ((ret = init_get_bits8(, buf, buf_size)) < 0)
diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
index ed6ecbf541..407acdcaaa 100644
--- a/libavformat/mxfenc.c
+++ b/libavformat/mxfenc.c
@@ -1444,7 +1444,7 @@ static int mxf_write_header_metadata_sets(AVFormatContext 
*s)
 AVStream *st = NULL;
 int i;
 
-MXFPackage packages[2] = {};
+MXFPackage packages[2] = {{NULL}};
 int package_count = 2;
 packages[0].type = MaterialPackage;
 packages[1].type = SourcePackage;
-- 
2.15.1.windows.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] configure: ERROR: x265 not found using pkg-config

2017-11-12 Thread Mateusz
W dniu 12.11.2017 o 20:35, Helmut K. C. Tessarek pisze:
> I get the following error during configure:
> 
> ERROR: x265 not found using pkg-config
> 
> My last compile run (3 days ago) worked perfectly. Somebody must have
> changed the configure script.

This time it is x265 bug. Please apply patch 
https://patches.videolan.org/patch/18630/
to x265 and recheck.

Mateusz

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/2] swscale: fix dithers table for DITHER_COPY macro

2017-11-11 Thread Mateusz
W dniu 24.10.2017 o 10:02, Mateusz pisze:
> The Bayer matrix 8x8 used in DITHER_COPY macro is table dithers[5].
> Remaining dithers[] matrixes are generated from this matrix by
> downshift or upshift.
> 
> This patch fixes dithers[6] and dithers[7] matrixes -- they were
> too dark.

ping

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avformat/yuv4mpeg: add gray9/10/12 support

2017-10-26 Thread Mateusz
W dniu 2017-10-07 o 21:41, Paul B Mahol pisze:
> On 10/7/17, Mateusz <mateu...@poczta.onet.pl> wrote:
>> Lately ffmpeg supports gray9/10/12 pixel formats.
>>
>> This patch adds gray9/10/12 pixel formats to y4m.
>>
>> It also moves gray16 to 'strict -1' section.
>>
>> Please review.
>>
>> Mateusz
>>
>>
> 
> lgtm

ping

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/2] swscale: fix dithers table for DITHER_COPY macro

2017-10-24 Thread Mateusz
The Bayer matrix 8x8 used in DITHER_COPY macro is table dithers[5].
Remaining dithers[] matrixes are generated from this matrix by
downshift or upshift.

This patch fixes dithers[6] and dithers[7] matrixes -- they were
too dark.

Signed-off-by: Mateusz Brzostek <mateu...@poczta.onet.pl>
---
 libswscale/swscale_unscaled.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 5d81cd5af9..6ffde1ec59 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -90,15 +90,6 @@ DECLARE_ALIGNED(8, static const uint8_t, dithers)[8][8][8]={
   {  48,  0, 60, 12, 51,  3, 63, 15,},
   {  24, 40, 20, 36, 27, 43, 23, 39,},
   {  56,  8, 52,  4, 59, 11, 55,  7,},
-},{
-  {  18, 34, 30, 46, 17, 33, 29, 45,},
-  {  50,  2, 62, 14, 49,  1, 61, 13,},
-  {  26, 42, 22, 38, 25, 41, 21, 37,},
-  {  58, 10, 54,  6, 57,  9, 53,  5,},
-  {  16, 32, 28, 44, 19, 35, 31, 47,},
-  {  48,  0, 60, 12, 51,  3, 63, 15,},
-  {  24, 40, 20, 36, 27, 43, 23, 39,},
-  {  56,  8, 52,  4, 59, 11, 55,  7,},
 },{
   {  36, 68, 60, 92, 34, 66, 58, 90,},
   { 100,  4,124, 28, 98,  2,122, 26,},
@@ -108,6 +99,15 @@ DECLARE_ALIGNED(8, static const uint8_t, dithers)[8][8][8]={
   {  96,  0,120, 24,102,  6,126, 30,},
   {  48, 80, 40, 72, 54, 86, 46, 78,},
   { 112, 16,104,  8,118, 22,110, 14,},
+},{
+  {  72,136,120,184, 68,132,116,180,},
+  { 200,  8,248, 56,196,  4,244, 52,},
+  { 104,168, 88,152,100,164, 84,148,},
+  { 232, 40,216, 24,228, 36,212, 20,},
+  {  64,128,112,176, 76,140,124,188,},
+  { 192,  0,240, 48,204, 12,252, 60,},
+  {  96,160, 80,144,108,172, 92,156,},
+  { 224, 32,208, 16,236, 44,220, 28,},
 }};
 
 
-- 
2.14.2.windows.3

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/2] swscale: use dithering in DITHER_COPY only if not set -sws_dither none

2017-10-24 Thread Mateusz
This patch uses dithering in DITHER_COPY macro only if
it was not used option '-sws_dither none'.
With option '-sws_dither none' it uses downshift.

For human eye dithering is OK, for video codecs not necessarily.
If user don't want to use dithering, we should respect that.

Signed-off-by: Mateusz Brzostek <mateu...@poczta.onet.pl>
---
 libswscale/swscale_unscaled.c | 20 +++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 6ffde1ec59..01d6d653bc 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -1485,7 +1485,25 @@ static int packedCopyWrapper(SwsContext *c, const 
uint8_t *src[],
 
 #define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
 unsigned shift= src_depth-dst_depth, tmp;\
-if (shiftonly) {\
+if (c->dither == SWS_DITHER_NONE) {\
+for (i = 0; i < height; i++) {\
+for (j = 0; j < length-7; j+=8) {\
+dst[j+0] = dbswap(bswap(src[j+0])>>shift);\
+dst[j+1] = dbswap(bswap(src[j+1])>>shift);\
+dst[j+2] = dbswap(bswap(src[j+2])>>shift);\
+dst[j+3] = dbswap(bswap(src[j+3])>>shift);\
+dst[j+4] = dbswap(bswap(src[j+4])>>shift);\
+dst[j+5] = dbswap(bswap(src[j+5])>>shift);\
+dst[j+6] = dbswap(bswap(src[j+6])>>shift);\
+dst[j+7] = dbswap(bswap(src[j+7])>>shift);\
+}\
+for (; j < length; j++) {\
+dst[j] = dbswap(bswap(src[j])>>shift);\
+}\
+dst += dstStride;\
+src += srcStride;\
+}\
+} else if (shiftonly) {\
 for (i = 0; i < height; i++) {\
 const uint8_t *dither= dithers[shift-1][i&7];\
 for (j = 0; j < length-7; j+=8) {\
-- 
2.14.2.windows.3

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] swscale: more accurate DITHER_COPY macro for full and limited range

2017-10-23 Thread Mateusz
W dniu 2017-10-20 o 20:07, Jan Ekstrom pisze:
> On Fri, Oct 20, 2017 at 10:26 AM, Mateusz <mateu...@poczta.onet.pl> wrote:
>> W dniu 2017-10-06 o 17:33, Mateusz pisze:
>>> Fixed DITHER_COPY macro (only C code), updated FATE tests.
>>>
>>> PSNR in tests that needed update goes from 50 to 999.99 -- the quality is 
>>> there.
>>
>> Ping.
> 
> Hi,
> 
> The updated PSNR values look really good (and the max difference going
> from 1 to 0), but unfortunately I lack the capacity to verify that the
> code is doing the same thing as the original thing.
> 
> Can we please have someone's eyes on this? If a patchwork URL makes it
> simpler, it's https://patchwork.ffmpeg.org/patch/5440/ .
> 
> 
> Jan

There was discussion about this code in thread "swscale_unscaled: fix 
DITHER_COPY macro, use it only for dst_depth == 8". Conclusion was that it 
should be the same code for dst_depth from 8 to 15 bit (in all possible cases).

The worse quality scenario for this DITHER_COPY macro is when dst_depth is 8. 
It is hard to see differences from normal 8 bit to dithered 8 bit (too good 
quality).

I've prepared example movies with new DITHER_COPY macro versus downshift only 
with dst_depth == 5 (impossible in normal code, only for testing).

Movie Sintel in resolution 2K with bit depth 5 (original 5 most significant 
bits):
www.msystem.waw.pl/x265/sintel-org5.mkv

The same movie with bit depth 5, but dithered 5 bit instead of original:
www.msystem.waw.pl/x265/sintel-dit5.mkv

Mateusz

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] swscale: more accurate DITHER_COPY macro for full and limited range

2017-10-20 Thread Mateusz
W dniu 2017-10-06 o 17:33, Mateusz pisze:
> Fixed DITHER_COPY macro (only C code), updated FATE tests.
> 
> PSNR in tests that needed update goes from 50 to 999.99 -- the quality is 
> there.

Ping.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avformat/yuv4mpeg: add gray9/10/12 support

2017-10-07 Thread Mateusz
Lately ffmpeg supports gray9/10/12 pixel formats.

This patch adds gray9/10/12 pixel formats to y4m.

It also moves gray16 to 'strict -1' section.

Please review.

Mateusz

From f2b31ef66931e02a355e3140d47b17f0d307dec7 Mon Sep 17 00:00:00 2001
From: Mateusz <mateu...@poczta.onet.pl>
Date: Sat, 7 Oct 2017 19:05:53 +0200
Subject: [PATCH] avformat/yuv4mpeg: add gray9/10/12 support

Signed-off-by: Mateusz Brzostek <mateu...@poczta.onet.pl>
---
 libavformat/yuv4mpegdec.c |  6 ++
 libavformat/yuv4mpegenc.c | 23 ---
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/libavformat/yuv4mpegdec.c b/libavformat/yuv4mpegdec.c
index 462b823860..ff0125e4cf 100644
--- a/libavformat/yuv4mpegdec.c
+++ b/libavformat/yuv4mpegdec.c
@@ -126,6 +126,12 @@ static int yuv4_read_header(AVFormatContext *s)
 pix_fmt = AV_PIX_FMT_YUV444P;
 } else if (strncmp("mono16", tokstart, 6) == 0) {
 pix_fmt = AV_PIX_FMT_GRAY16;
+} else if (strncmp("mono12", tokstart, 6) == 0) {
+pix_fmt = AV_PIX_FMT_GRAY12;
+} else if (strncmp("mono10", tokstart, 6) == 0) {
+pix_fmt = AV_PIX_FMT_GRAY10;
+} else if (strncmp("mono9", tokstart, 5) == 0) {
+pix_fmt = AV_PIX_FMT_GRAY9;
 } else if (strncmp("mono", tokstart, 4) == 0) {
 pix_fmt = AV_PIX_FMT_GRAY8;
 } else {
diff --git a/libavformat/yuv4mpegenc.c b/libavformat/yuv4mpegenc.c
index b4dc6e9ef6..44f40bbad9 100644
--- a/libavformat/yuv4mpegenc.c
+++ b/libavformat/yuv4mpegenc.c
@@ -69,6 +69,15 @@ static int yuv4_generate_header(AVFormatContext *s, char* 
buf)
 case AV_PIX_FMT_GRAY8:
 colorspace = " Cmono";
 break;
+case AV_PIX_FMT_GRAY9:
+colorspace = " Cmono9";
+break;
+case AV_PIX_FMT_GRAY10:
+colorspace = " Cmono10";
+break;
+case AV_PIX_FMT_GRAY12:
+colorspace = " Cmono12";
+break;
 case AV_PIX_FMT_GRAY16:
 colorspace = " Cmono16";
 break;
@@ -184,6 +193,9 @@ static int yuv4_write_packet(AVFormatContext *s, AVPacket 
*pkt)
 case AV_PIX_FMT_YUV422P:
 case AV_PIX_FMT_YUV444P:
 break;
+case AV_PIX_FMT_GRAY9:
+case AV_PIX_FMT_GRAY10:
+case AV_PIX_FMT_GRAY12:
 case AV_PIX_FMT_GRAY16:
 case AV_PIX_FMT_YUV420P9:
 case AV_PIX_FMT_YUV422P9:
@@ -213,7 +225,8 @@ static int yuv4_write_packet(AVFormatContext *s, AVPacket 
*pkt)
 ptr += frame->linesize[0];
 }
 
-if (st->codecpar->format != AV_PIX_FMT_GRAY8 &&
+if (st->codecpar->format != AV_PIX_FMT_GRAY8 && st->codecpar->format != 
AV_PIX_FMT_GRAY9 &&
+st->codecpar->format != AV_PIX_FMT_GRAY10 && st->codecpar->format != 
AV_PIX_FMT_GRAY12 &&
 st->codecpar->format != AV_PIX_FMT_GRAY16) {
 // Adjust for smaller Cb and Cr planes
 av_pix_fmt_get_chroma_sub_sample(st->codecpar->format, _chroma_shift,
@@ -255,11 +268,14 @@ static int yuv4_write_header(AVFormatContext *s)
"stream, some mjpegtools might not work.\n");
 break;
 case AV_PIX_FMT_GRAY8:
-case AV_PIX_FMT_GRAY16:
 case AV_PIX_FMT_YUV420P:
 case AV_PIX_FMT_YUV422P:
 case AV_PIX_FMT_YUV444P:
 break;
+case AV_PIX_FMT_GRAY9:
+case AV_PIX_FMT_GRAY10:
+case AV_PIX_FMT_GRAY12:
+case AV_PIX_FMT_GRAY16:
 case AV_PIX_FMT_YUV420P9:
 case AV_PIX_FMT_YUV422P9:
 case AV_PIX_FMT_YUV444P9:
@@ -291,7 +307,8 @@ static int yuv4_write_header(AVFormatContext *s)
"yuv444p10, yuv422p10, yuv420p10, "
"yuv444p12, yuv422p12, yuv420p12, "
"yuv444p14, yuv422p14, yuv420p14, "
-   "yuv444p16, yuv422p16, yuv420p16 "
+   "yuv444p16, yuv422p16, yuv420p16, "
+   "gray9, gray10, gray12 "
"and gray16 pixel formats. "
"Use -pix_fmt to select one.\n");
 return AVERROR(EIO);
-- 
2.14.2.windows.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] swscale: more accurate DITHER_COPY macro for full and limited range

2017-10-06 Thread Mateusz
Fixed DITHER_COPY macro (only C code), updated FATE tests.

PSNR in tests that needed update goes from 50 to 999.99 -- the quality is there.

Please review.

Mateusz

From d870ba10aa4b3520fc30215fbbd57565faa13df4 Mon Sep 17 00:00:00 2001
From: Mateusz <mateu...@poczta.onet.pl>
Date: Fri, 6 Oct 2017 16:49:54 +0200
Subject: [PATCH] swscale: more accurate DITHER_COPY macro for full and limited
 range

---
 libswscale/swscale_unscaled.c  | 73 --
 tests/ref/vsynth/vsynth1-ffvhuff420p12 |  4 +-
 tests/ref/vsynth/vsynth1-vc2-420p10|  4 +-
 tests/ref/vsynth/vsynth1-vc2-420p12|  4 +-
 tests/ref/vsynth/vsynth2-ffvhuff420p12 |  4 +-
 tests/ref/vsynth/vsynth2-vc2-420p10|  4 +-
 tests/ref/vsynth/vsynth2-vc2-420p12|  4 +-
 tests/ref/vsynth/vsynth3-ffvhuff420p12 |  4 +-
 tests/ref/vsynth/vsynth_lena-ffvhuff420p12 |  4 +-
 tests/ref/vsynth/vsynth_lena-vc2-420p10|  4 +-
 tests/ref/vsynth/vsynth_lena-vc2-420p12|  4 +-
 11 files changed, 58 insertions(+), 55 deletions(-)

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index ef36aec..5d81cd5 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -110,24 +110,6 @@ DECLARE_ALIGNED(8, static const uint8_t, 
dithers)[8][8][8]={
   { 112, 16,104,  8,118, 22,110, 14,},
 }};
 
-static const uint16_t dither_scale[15][16]={
-{2,3,3,5,5,5,5,5,5,5,5,5,
5,5,5,5,},
-{2,3,7,7,   13,   13,   25,   25,   25,   25,   25,   25,   
25,   25,   25,   25,},
-{3,3,4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  
113,  113,  113,  113,},
-{3,4,4,5,   31,   31,   61,  121,  241,  241,  241,  241,  
481,  481,  481,  481,},
-{3,4,5,5,6,   63,   63,  125,  249,  497,  993,  993,  
993,  993,  993, 1985,},
-{3,5,6,6,6,7,  127,  127,  253,  505, 1009, 2017, 
4033, 4033, 4033, 4033,},
-{3,5,6,7,7,7,8,  255,  255,  509, 1017, 2033, 
4065, 8129,16257,16257,},
-{3,5,6,8,8,8,8,9,  511,  511, 1021, 2041, 
4081, 8161,16321,32641,},
-{3,5,7,8,9,9,9,9,   10, 1023, 1023, 2045, 
4089, 8177,16353,32705,},
-{3,5,7,8,   10,   10,   10,   10,   10,   11, 2047, 2047, 
4093, 8185,16369,32737,},
-{3,5,7,8,   10,   11,   11,   11,   11,   11,   12, 4095, 
4095, 8189,16377,32753,},
-{3,5,7,9,   10,   12,   12,   12,   12,   12,   12,   13, 
8191, 8191,16381,32761,},
-{3,5,7,9,   10,   12,   13,   13,   13,   13,   13,   13,   
14,16383,16383,32765,},
-{3,5,7,9,   10,   12,   14,   14,   14,   14,   14,   14,   
14,   15,32767,32767,},
-{3,5,7,9,   11,   12,   14,   15,   15,   15,   15,   15,   
15,   15,   16,65535,},
-};
-
 
 static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
   uint8_t val)
@@ -1502,24 +1484,45 @@ static int packedCopyWrapper(SwsContext *c, const 
uint8_t *src[],
 }
 
 #define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
-uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
-int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
-for (i = 0; i < height; i++) {\
-const uint8_t *dither= dithers[src_depth-9][i&7];\
-for (j = 0; j < length-7; j+=8){\
-dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\
-dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\
-dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\
-dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\
-dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\
-dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\
-dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\
-dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\
+unsigned shift= src_depth-dst_depth, tmp;\
+if (shiftonly) {\
+for (i = 0; i < height; i++) {\
+const uint8_t *dither= dithers[shift-1][i&7];\
+for (j = 0; j < length-7; j+=8) {\
+tmp = (bswap(src[j+0]) + dither[0])>>shift; dst[j+0] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+1]) + dither[1])>>shift; dst[j+1] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+2]) + dither[2])>>shift; dst[j+2] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+3]) + dither[3])>>shift; dst[j+3] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+4]) + dither[4])>>shift; dst[j+4] = 
dbswap

Re: [FFmpeg-devel] Create .lib static libraries on Windows instead of .a libraries (using MSVC compiler)

2017-09-27 Thread Mateusz
W dniu 2017-09-27 o 17:40, Hendrik Leppkes pisze:
> On Wed, Sep 27, 2017 at 4:43 PM, Anonymous Maarten
> <anonymous.maar...@gmail.com> wrote:
>> Hey,
>>
>> Building the static ffmpeg library using vcpkg is currently broken [1][2].
>> This is mainly due to the fact that the built static libraries are .a files
>> with lib prefixes.
>>
>> The attached patch sets the lib prefix to "" and the lib suffix to ".lib".
>>
>> Tested by building ffmpeg using vcpkg and Visual Studio 2017 Community
>> Edition on Windows 10.
>> The resulted libraries link correctly to my application.
>>
> This would break the workflow for everyone that is currently working
> with the current names, however.

For gcc it breaks nothing (it still makes lib...a files), for msvc it is 
improvement.
Is there any person happy with lib...a names for lib in msvc?

Mateusz


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] swscale_unscaled: fix DITHER_COPY macro, use it only for dst_depth == 8

2017-09-27 Thread Mateusz
W dniu 2017-09-26 o 13:31, Carl Eugen Hoyos pisze:
> 2017-09-26 1:33 GMT+02:00 Mateusz <mateu...@poczta.onet.pl>:
> 
>> I've sent C code patch 2017-09-06 (and nothing) so I thought that the
>> problem is with speed. For simplicity I've attached this patch.
> 
> You could (wait a day or two and) either add an option to
> select your dithering code or put it under #ifdef so more
> people can test it.

I've attached patch that do nothing unless you specify
--extra-cflags="-DNEW_DITHER_COPY" or export CFLAGS="-DNEW_DITHER_COPY"

>> In theory it is enough to make only dst = (src + dither)>>shift;
>> -- white in limited range has 0 on bits to remove (235*4 for example)
>> so overflow is impossible. For files with full range not marked as
>> full range overflow is possible (for dither > 0) and white goes
>> to black. tmp - (tmp>>dst_depth) undoing this overflow.
> 
> (Not necessarily related, sorry if I misunderstand:)
> Valid limited-range frames can contain some pixels with peak
> values outside of the defined range.
> 
> Carl Eugen

OK, so this fight with possible overflow is even more needed.

Mateusz
From 2c0adb2d9a0fc0fbbffc643d27860fbb779c08fc Mon Sep 17 00:00:00 2001
From: Mateusz <mateu...@poczta.onet.pl>
Date: Tue, 26 Sep 2017 22:20:10 +0200
Subject: [PATCH] swscale: new precise DITHER_COPY macro (if
 "-DNEW_DITHER_COPY" CFLAGS)

---
 libswscale/swscale_unscaled.c | 47 ++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index ef36aec..0d41695 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -110,6 +110,7 @@ DECLARE_ALIGNED(8, static const uint8_t, dithers)[8][8][8]={
   { 112, 16,104,  8,118, 22,110, 14,},
 }};
 
+#ifndef NEW_DITHER_COPY
 static const uint16_t dither_scale[15][16]={
 {2,3,3,5,5,5,5,5,5,5,5,5,
5,5,5,5,},
 {2,3,7,7,   13,   13,   25,   25,   25,   25,   25,   25,   
25,   25,   25,   25,},
@@ -127,7 +128,7 @@ static const uint16_t dither_scale[15][16]={
 {3,5,7,9,   10,   12,   14,   14,   14,   14,   14,   14,   
14,   15,32767,32767,},
 {3,5,7,9,   11,   12,   14,   15,   15,   15,   15,   15,   
15,   15,   16,65535,},
 };
-
+#endif
 
 static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
   uint8_t val)
@@ -1501,6 +1502,7 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t 
*src[],
 return srcSliceH;
 }
 
+#ifndef NEW_DITHER_COPY
 #define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
 uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
 int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
@@ -1521,6 +1523,49 @@ static int packedCopyWrapper(SwsContext *c, const 
uint8_t *src[],
 dst += dstStride;\
 src += srcStride;\
 }
+#else
+#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
+unsigned shift= src_depth-dst_depth, tmp;\
+if (shiftonly) {\
+for (i = 0; i < height; i++) {\
+const uint8_t *dither= dithers[shift-1][i&7];\
+for (j = 0; j < length-7; j+=8){\
+tmp = (bswap(src[j+0]) + dither[0])>>shift; dst[j+0] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+1]) + dither[1])>>shift; dst[j+1] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+2]) + dither[2])>>shift; dst[j+2] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+3]) + dither[3])>>shift; dst[j+3] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+4]) + dither[4])>>shift; dst[j+4] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+5]) + dither[5])>>shift; dst[j+5] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+6]) + dither[6])>>shift; dst[j+6] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+7]) + dither[7])>>shift; dst[j+7] = 
dbswap(tmp - (tmp>>dst_depth));\
+}\
+for (; j < length; j++){\
+tmp = (bswap(src[j]) + dither[j&7])>>shift; dst[j] = 
dbswap(tmp - (tmp>>dst_depth));\
+}\
+dst += dstStride;\
+src += srcStride;\
+}\
+} else {\
+for (i = 0; i < height; i++) {\
+const uint8_t *dither= dithers[shift-1][i&7];\
+for (j = 0; j < length-7; j+=8){\
+tmp = bswap(src[j+0]); dst[j+0] = dbswap((tmp - 
(tmp>>dst_depth) + dither[0])>>shift);\
+tmp = bswap(src[j+1]); dst[j+1] = dbswap((tmp - 
(tmp&g

Re: [FFmpeg-devel] [PATCH] swscale_unscaled: fix DITHER_COPY macro, use it only for dst_depth == 8

2017-09-25 Thread Mateusz
W dniu 2017-09-25 o 22:53, Carl Eugen Hoyos pisze:
> 2017-09-23 19:18 GMT+02:00 Mateusz <mateu...@poczta.onet.pl>:
>> W dniu 2017-09-23 o 17:01, Michael Niedermayer pisze:
>>> On Fri, Sep 22, 2017 at 02:10:01AM +0200, Mateusz wrote:
>>>> To reduce bit depth in planar YUV or gray pixel formats
>>>> ffmpeg uses DITHER_COPY macro.
>>>> Now it makes images greener and with visible dither pattern.
>>>>
>>>> In my opinion there is no point to use dither tables for
>>>> destination bit depth >= 9, we can use simple down-shift
>>>> which is neutral in full and limited range -- result images
>>>> are with the same brightness and with the same colors.
>>>
>>> Theres no reason why dither should mess up the average color tone.
>>
>> In theory -- yes, I agree.
>> In reality -- current version of DITHER_COPY mess up the
>> average color tone.
>> It's one of the reasons why I sending these patches.
>>
>>> And if the user asks for >= 9 bit depth and has >= 10 bit
>>> input the user likely wants to get the best quality.
>>> Thats more so in a world where computers get faster
>>> every few years, this isnt 1995 where shaving off a add
>>> or a multiply per pixel was actually making a difference
>>> in being able to play something in realtime
>>> More so coverting between bit depths might be memory
>>> speed limited and not limited by arithmetic computations
>>> once its done in SIMD
>>
>> Yes, I agree. Now I can't write patches in NASM syntax, but
>> I started reading and learning.
>> I hope I'll back in a few months...
> 
> I strongly suspect there should be agreement over the C code
> first, asm optimizations can be done once C code is agreed
> upon.
> 
> Thank you for the samples, Carl Eugen

I've sent C code patch 2017-09-06 (and nothing) so I thought that the
problem is with speed. For simplicity I've attached this patch.

This code full-fill all rules (for limited and for full range):
white -> white
black -> black
white-1 -> gray (it means white-1 -> white usually but sometimes white-1 -> 
white-1)
black+1 -> gray (it means black+1 -> black usually but sometimes black+1 -> 
black+1)
increase bitdepth & decrease bitdepth = identity

About code:
limited range (shiftonly == 1)
tmp = (src + dither)>>shift; dst = tmp - (tmp>>dst_depth);

In theory it is enough to make only dst = (src + dither)>>shift;
-- white in limited range has 0 on bits to remove (235*4 for example)
so overflow is impossible. For files with full range not marked as
full range overflow is possible (for dither > 0) and white goes
to black. tmp - (tmp>>dst_depth) undoing this overflow.

full range (shiftonly == 0)
dst = (src - (src>>dst_depth) + dither)>>shift;

If we want to remove 2 bits from source (for example 10-bit -> 8-bit)
white in full range: ...111|11
white-1 in full range: ...111|10
so we should subtract dither for rule 'white-1 -> gray'
black in full range: ...000|00
black+1 in full range: ...000|01
so we should add dither for rule 'black+1 -> gray'
(src>>dst_depth) is 0 for small values (close to black) and max possible
dither for big values (close to white)

For rule 'increase bitdepth & decrease bitdepth = identity' in full range,
we have increase bitdepth: (v<<(dst_depth-src_depth)) | 
(v>>(2*src_depth-dst_depth))
so (src - (src>>dst_depth))>>shift is exactly inverse operation when we
decreasing bitdepth.
For full range when 'src' is from increased 'v'
(src - (src>>dst_depth)) is equal 'v' shifted only, so we are as in limited 
range.
And overflow is impossible in operation (src - (src>>dst_depth) + dither).

Mateusz
From fd9e271ea531d25bc5a708d0dfeb1be5415b90d0 Mon Sep 17 00:00:00 2001
From: Mateusz <mateu...@poczta.onet.pl>
Date: Wed, 6 Sep 2017 09:05:02 +0200
Subject: [PATCH] fix DITHER_COPY macro according to shiftonly state

---
 libswscale/swscale_unscaled.c | 73 ++-
 1 file changed, 38 insertions(+), 35 deletions(-)

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index ef36aec..e3e375a 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -110,24 +110,6 @@ DECLARE_ALIGNED(8, static const uint8_t, 
dithers)[8][8][8]={
   { 112, 16,104,  8,118, 22,110, 14,},
 }};
 
-static const uint16_t dither_scale[15][16]={
-{2,3,3,5,5,5,5,5,5,5,5,5,
5,5,5,5,},
-{2,3,7,7,   13,   13,   25,   25,   25,   25,   25,   25,   
25,   25,   25,   25,},
-{3,3,4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  
113,  113,  113,  113,},

Re: [FFmpeg-devel] [PATCH] swscale_unscaled: fix DITHER_COPY macro, use it only for dst_depth == 8

2017-09-25 Thread Mateusz
W dniu 2017-09-25 o 01:42, Carl Eugen Hoyos pisze:
> 2017-09-23 19:18 GMT+02:00 Mateusz <mateu...@poczta.onet.pl>:
> 
>> In reality -- current version of DITHER_COPY mess
>> up the average color tone.
> 
> You could explain how we can reproduce this.

Please take any video with colors that you can see change to green color --
it could be with human faces, for example free
http://media.xiph.org/video/derf/y4m/KristenAndSara_1280x720_60.y4m

Copy the video to o.y4m and make some iteration of 8-bit -> 10-bit -> 8-bit 
conversions:
ffmpeg -i o.y4m -y -strict -1 -pix_fmt yuv420p10 t10.y4m
ffmpeg -i t10.y4m -y -strict -1 -pix_fmt yuv420p o.y4m

Please watch result video -- it should be more green, with pattern and darker.

You can watch 'KristenAndSara' video after 100 iteration
http://www.msystem.waw.pl/x265/limited.mkv

And 100th iteration of 'KristenAndSara' with '-color_range 2' option
http://www.msystem.waw.pl/x265/full.mkv

Now DITHER_COPY works wrong in both cases (full and limited range).

Mateusz

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] swscale_unscaled: fix DITHER_COPY macro, use it only for dst_depth == 8

2017-09-23 Thread Mateusz
W dniu 2017-09-23 o 17:01, Michael Niedermayer pisze:
> On Fri, Sep 22, 2017 at 02:10:01AM +0200, Mateusz wrote:
>> To reduce bit depth in planar YUV or gray pixel formats ffmpeg uses 
>> DITHER_COPY macro.
>> Now it makes images greener and with visible dither pattern.
>>
>> In my opinion there is no point to use dither tables for destination bit 
>> depth >= 9,
>> we can use simple down-shift which is neutral in full and limited range -- 
>> result images
>> are with the same brightness and with the same colors.
> 
> Theres no reason why dither should mess up the average color tone.

In theory -- yes, I agree.
In reality -- current version of DITHER_COPY mess up the average color tone.
It's one of the reasons why I sending these patches.

> And if the user asks for >= 9 bit depth and has >= 10 bit input the
> user likely wants to get the best quality.
> Thats more so in a world where computers get faster every few years,
> this isnt 1995 where shaving off a add or a multiply per pixel was
> actually making a difference in being able to play something in
> realtime
> More so coverting between bit depths might be memory speed limited and
> not limited by arithmetic computations once its done in SIMD

Yes, I agree. Now I can't write patches in NASM syntax, but I started reading 
and learning.
I hope I'll back in a few months...

Mateusz

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] swscale_unscaled: fix and speed up DITHER_COPY macro for x86 with SSE2

2017-09-22 Thread Mateusz
W dniu 2017-09-22 o 17:47, James Almer pisze:
> On 9/22/2017 12:23 PM, Mateusz wrote:
>> New version of the patch -- now it uses the same logic independent of the 
>> target bitdepth.
>>
>> For x86_64 it is much faster than current code (with perfect quality), for 
>> x86_32 it is fast
>> if you add to configure: --extra-cflags="-msse2"
>> (for x86_32 with default configure options it is slower than current code 
>> but with better quality)
>>
>> Please review/test.
>>
>> Mateusz
> 
> We don't accept intrinsics, or new arch specific code outside of arch
> specific folders.
> 
> Either write this in NASM syntax, or if it *really* needs to be inlined,
> use __asm__() inline blocks. But whichever you use, it needs to go in
> the x86/ folder.

Thank you for the information! I'm starting learning NASM syntax (it could last 
for months).

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] swscale_unscaled: fix and speed up DITHER_COPY macro for x86 with SSE2

2017-09-22 Thread Mateusz
New version of the patch -- now it uses the same logic independent of the 
target bitdepth.

For x86_64 it is much faster than current code (with perfect quality), for 
x86_32 it is fast
if you add to configure: --extra-cflags="-msse2"
(for x86_32 with default configure options it is slower than current code but 
with better quality)

Please review/test.

Mateusz

From 8eaa76fc82550f62f1a22e9388a51dc61c031a2c Mon Sep 17 00:00:00 2001
From: Mateusz <mateu...@poczta.onet.pl>
Date: Fri, 22 Sep 2017 14:54:53 +0200
Subject: [PATCH] swscale_unscaled: fix and speed up DITHER_COPY macro for x86
 with SSE2

---
 libswscale/swscale_unscaled.c | 220 +++---
 1 file changed, 185 insertions(+), 35 deletions(-)

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index ef36aec..cd3e917 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -35,6 +35,10 @@
 #include "libavutil/avassert.h"
 #include "libavutil/avconfig.h"
 
+#if ARCH_X86_64 || (ARCH_X86_32 && defined(__SSE2__))
+#include 
+#endif
+
 DECLARE_ALIGNED(8, static const uint8_t, dithers)[8][8][8]={
 {
   {   0,  1,  0,  1,  0,  1,  0,  1,},
@@ -110,24 +114,6 @@ DECLARE_ALIGNED(8, static const uint8_t, 
dithers)[8][8][8]={
   { 112, 16,104,  8,118, 22,110, 14,},
 }};
 
-static const uint16_t dither_scale[15][16]={
-{2,3,3,5,5,5,5,5,5,5,5,5,
5,5,5,5,},
-{2,3,7,7,   13,   13,   25,   25,   25,   25,   25,   25,   
25,   25,   25,   25,},
-{3,3,4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  
113,  113,  113,  113,},
-{3,4,4,5,   31,   31,   61,  121,  241,  241,  241,  241,  
481,  481,  481,  481,},
-{3,4,5,5,6,   63,   63,  125,  249,  497,  993,  993,  
993,  993,  993, 1985,},
-{3,5,6,6,6,7,  127,  127,  253,  505, 1009, 2017, 
4033, 4033, 4033, 4033,},
-{3,5,6,7,7,7,8,  255,  255,  509, 1017, 2033, 
4065, 8129,16257,16257,},
-{3,5,6,8,8,8,8,9,  511,  511, 1021, 2041, 
4081, 8161,16321,32641,},
-{3,5,7,8,9,9,9,9,   10, 1023, 1023, 2045, 
4089, 8177,16353,32705,},
-{3,5,7,8,   10,   10,   10,   10,   10,   11, 2047, 2047, 
4093, 8185,16369,32737,},
-{3,5,7,8,   10,   11,   11,   11,   11,   11,   12, 4095, 
4095, 8189,16377,32753,},
-{3,5,7,9,   10,   12,   12,   12,   12,   12,   12,   13, 
8191, 8191,16381,32761,},
-{3,5,7,9,   10,   12,   13,   13,   13,   13,   13,   13,   
14,16383,16383,32765,},
-{3,5,7,9,   10,   12,   14,   14,   14,   14,   14,   14,   
14,   15,32767,32767,},
-{3,5,7,9,   11,   12,   14,   15,   15,   15,   15,   15,   
15,   15,   16,65535,},
-};
-
 
 static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
   uint8_t val)
@@ -1502,24 +1488,164 @@ static int packedCopyWrapper(SwsContext *c, const 
uint8_t *src[],
 }
 
 #define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
-uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
-int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
-for (i = 0; i < height; i++) {\
-const uint8_t *dither= dithers[src_depth-9][i&7];\
-for (j = 0; j < length-7; j+=8){\
-dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\
-dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\
-dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\
-dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\
-dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\
-dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\
-dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\
-dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\
+unsigned shift= src_depth-dst_depth, tmp;\
+if (shiftonly) {\
+for (i = 0; i < height; i++) {\
+const uint8_t *dither= dithers[shift-1][i&7];\
+for (j = 0; j < length-7; j+=8) {\
+tmp = (bswap(src[j+0]) + dither[0])>>shift; dst[j+0] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+1]) + dither[1])>>shift; dst[j+1] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+2]) + dither[2])>>shift; dst[j+2] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+3]) + dither[3])>>shift; dst[j+3] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+4]) + dither[4])>>shift; dst[j+4] = 
dbswap(tmp - (tmp>>dst_dep

[FFmpeg-devel] [PATCH] swscale_unscaled: fix DITHER_COPY macro, use it only for dst_depth == 8

2017-09-21 Thread Mateusz
To reduce bit depth in planar YUV or gray pixel formats ffmpeg uses DITHER_COPY 
macro.
Now it makes images greener and with visible dither pattern.

In my opinion there is no point to use dither tables for destination bit depth 
>= 9,
we can use simple down-shift which is neutral in full and limited range -- 
result images
are with the same brightness and with the same colors.

For destination bit depth == 8 we could use new bit exact precise DITHER_COPY 
macro
(which is slower).

If the problem is with speed only, I've attached second patch with Intel 
Intrinsics for x86_64
that makes code faster (I don't see any Intel Intrinsics in ffmpeg so it's 
probably for testing only).

Please review.

Mateusz

From a52417a3817ac774eb364bbef20c954a3d278d45 Mon Sep 17 00:00:00 2001
From: Mateusz <mateu...@poczta.onet.pl>
Date: Fri, 22 Sep 2017 01:22:59 +0200
Subject: [PATCH] swscale_unscaled: fix and speed up DITHER_COPY macro for
 x86_64, use it only for dst_depth == 8

---
 libswscale/swscale_unscaled.c | 185 ++
 1 file changed, 150 insertions(+), 35 deletions(-)

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index ef36aec..7d1cbed 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -35,6 +35,10 @@
 #include "libavutil/avassert.h"
 #include "libavutil/avconfig.h"
 
+#if ARCH_X86_64
+#include 
+#endif
+
 DECLARE_ALIGNED(8, static const uint8_t, dithers)[8][8][8]={
 {
   {   0,  1,  0,  1,  0,  1,  0,  1,},
@@ -110,24 +114,6 @@ DECLARE_ALIGNED(8, static const uint8_t, 
dithers)[8][8][8]={
   { 112, 16,104,  8,118, 22,110, 14,},
 }};
 
-static const uint16_t dither_scale[15][16]={
-{2,3,3,5,5,5,5,5,5,5,5,5,
5,5,5,5,},
-{2,3,7,7,   13,   13,   25,   25,   25,   25,   25,   25,   
25,   25,   25,   25,},
-{3,3,4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  
113,  113,  113,  113,},
-{3,4,4,5,   31,   31,   61,  121,  241,  241,  241,  241,  
481,  481,  481,  481,},
-{3,4,5,5,6,   63,   63,  125,  249,  497,  993,  993,  
993,  993,  993, 1985,},
-{3,5,6,6,6,7,  127,  127,  253,  505, 1009, 2017, 
4033, 4033, 4033, 4033,},
-{3,5,6,7,7,7,8,  255,  255,  509, 1017, 2033, 
4065, 8129,16257,16257,},
-{3,5,6,8,8,8,8,9,  511,  511, 1021, 2041, 
4081, 8161,16321,32641,},
-{3,5,7,8,9,9,9,9,   10, 1023, 1023, 2045, 
4089, 8177,16353,32705,},
-{3,5,7,8,   10,   10,   10,   10,   10,   11, 2047, 2047, 
4093, 8185,16369,32737,},
-{3,5,7,8,   10,   11,   11,   11,   11,   11,   12, 4095, 
4095, 8189,16377,32753,},
-{3,5,7,9,   10,   12,   12,   12,   12,   12,   12,   13, 
8191, 8191,16381,32761,},
-{3,5,7,9,   10,   12,   13,   13,   13,   13,   13,   13,   
14,16383,16383,32765,},
-{3,5,7,9,   10,   12,   14,   14,   14,   14,   14,   14,   
14,   15,32767,32767,},
-{3,5,7,9,   11,   12,   14,   15,   15,   15,   15,   15,   
15,   15,   16,65535,},
-};
-
 
 static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
   uint8_t val)
@@ -1502,22 +1488,127 @@ static int packedCopyWrapper(SwsContext *c, const 
uint8_t *src[],
 }
 
 #define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
-uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
-int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
+unsigned shift= src_depth-dst_depth, tmp;\
+if (shiftonly) {\
+for (i = 0; i < height; i++) {\
+const uint8_t *dither= dithers[shift-1][i&7];\
+for (j = 0; j < length-7; j+=8) {\
+tmp = (bswap(src[j+0]) + dither[0])>>shift; dst[j+0] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+1]) + dither[1])>>shift; dst[j+1] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+2]) + dither[2])>>shift; dst[j+2] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+3]) + dither[3])>>shift; dst[j+3] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+4]) + dither[4])>>shift; dst[j+4] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+5]) + dither[5])>>shift; dst[j+5] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+6]) + dither[6])>>shift; dst[j+6] = 
dbswap(tmp - (tmp>>dst_depth));\
+tmp = (bswap(src[j+7]) + dither[7])>>shift; dst[j+7] = 
dbswap(tmp - (tmp>>dst_depth));\
+}\
+for (; j < length; j++) {\
+tmp = (bswap(src[j]) + dither[j&7])>>shift

Re: [FFmpeg-devel] [PATCH] libswscale/swscale_unscaled: fix DITHER_COPY macro

2017-09-11 Thread Mateusz
W dniu 2017-09-06 o 09:27, Mateusz pisze:
> W dniu 2017-09-06 o 02:07, Michael Niedermayer pisze:
>> On Wed, Sep 06, 2017 at 01:25:45AM +0200, Mateusz wrote:
>>> W dniu 2017-09-05 o 23:37, Michael Niedermayer pisze:
>>>> On Tue, Sep 05, 2017 at 04:42:06PM +0200, Mateusz wrote:
>>>>> W dniu 2017-09-05 o 15:40, Michael Niedermayer pisze:
>>>>>> On Mon, Sep 04, 2017 at 09:33:34AM +0200, Mateusz wrote:
>>>>>>> If ffmpeg reduces bit-depth to 8-bit or more, it uses DITHER_COPY macro.
>>>>>>> The problem is DITHER_COPY macro make images darker (on all planes).
>>>>>>>
>>>>>>> In x265 project there is issue #365 which is caused by this DITHER_COPY 
>>>>>>> bug.
>>>>>>> I think it is time to fix -- there are more and more high bit-depth 
>>>>>>> sources.
>>>>>>>
>>>>>>> Please review.
>>>>>>>
>>>>>>>  libswscale/swscale_unscaled.c | 44 
>>>>>>> +--
>>>>>>>  1 file changed, 13 insertions(+), 31 deletions(-)
>>>>>>
>>>>>> please provide a git compatible patch with with commit message as 
>>>>>> produced
>>>>>> by git format-patch or git send-email
>>>>>>
>>>>>> this mail is not accepted as is by git
>>>>>> Applying: libswscale/swscale_unscaled: fix DITHER_COPY macro
>>>>>> error: corrupt patch at line 6
>>>>>> error: could not build fake ancestor
>>>>>> Patch failed at 0001 libswscale/swscale_unscaled: fix DITHER_COPY macro
>>>>>>
>>>>>> [...]
>>>>>
>>>>> I've attached the result of git format-patch command.
>>>>>
>>>>> Sorry for 1 private e-mail (I clicked wrong button).
>>>>>
>>>>> Mateusz
>>>>
>>>>>  swscale_unscaled.c |   44 +---
>>>>>  1 file changed, 13 insertions(+), 31 deletions(-)
>>>>> 9973b13b3f74319abe9c97302ee87b2b3468b3b6  
>>>>> 0001-fix-DITHER_COPY-macro-to-avoid-make-images-darker.patch
>>>>> From 9b96d612fef46ccec7e148cd7f8e8666b4e7a4cd Mon Sep 17 00:00:00 2001
>>>>> From: Mateusz <mateu...@poczta.onet.pl>
>>>>> Date: Tue, 5 Sep 2017 16:09:28 +0200
>>>>> Subject: [PATCH] fix DITHER_COPY macro to avoid make images darker
>>>>>
>>>>> ---
>>>>>  libswscale/swscale_unscaled.c | 44 
>>>>> +--
>>>>>  1 file changed, 13 insertions(+), 31 deletions(-)
>>>>>
>>>>> diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
>>>>> index ef36aec..3b7a5a9 100644
>>>>> --- a/libswscale/swscale_unscaled.c
>>>>> +++ b/libswscale/swscale_unscaled.c
>>>>> @@ -110,24 +110,6 @@ DECLARE_ALIGNED(8, static const uint8_t, 
>>>>> dithers)[8][8][8]={
>>>>>{ 112, 16,104,  8,118, 22,110, 14,},
>>>>>  }};
>>>>>  
>>>>> -static const uint16_t dither_scale[15][16]={
>>>>> -{2,3,3,5,5,5,5,5,5,5,5,
>>>>> 5,5,5,5,5,},
>>>>> -{2,3,7,7,   13,   13,   25,   25,   25,   25,   25,   
>>>>> 25,   25,   25,   25,   25,},
>>>>> -{3,3,4,   15,   15,   29,   57,   57,   57,  113,  113,  
>>>>> 113,  113,  113,  113,  113,},
>>>>> -{3,4,4,5,   31,   31,   61,  121,  241,  241,  241,  
>>>>> 241,  481,  481,  481,  481,},
>>>>> -{3,4,5,5,6,   63,   63,  125,  249,  497,  993,  
>>>>> 993,  993,  993,  993, 1985,},
>>>>> -{3,5,6,6,6,7,  127,  127,  253,  505, 1009, 
>>>>> 2017, 4033, 4033, 4033, 4033,},
>>>>> -{3,5,6,7,7,7,8,  255,  255,  509, 1017, 
>>>>> 2033, 4065, 8129,16257,16257,},
>>>>> -{3,5,6,8,8,8,8,9,  511,  511, 1021, 
>>>>> 2041, 4081, 8161,16321,32641,},
>>>>> -{3,5,7,8,9,9,9,9,   10, 1023, 1023, 
>>>>> 2045, 4089, 8177,16353,32705,},
>>>>> -{3,5,7,8,   10,

Re: [FFmpeg-devel] [PATCH] libswscale/swscale_unscaled: fix DITHER_COPY macro

2017-09-06 Thread Mateusz
W dniu 2017-09-06 o 02:07, Michael Niedermayer pisze:
> On Wed, Sep 06, 2017 at 01:25:45AM +0200, Mateusz wrote:
>> W dniu 2017-09-05 o 23:37, Michael Niedermayer pisze:
>>> On Tue, Sep 05, 2017 at 04:42:06PM +0200, Mateusz wrote:
>>>> W dniu 2017-09-05 o 15:40, Michael Niedermayer pisze:
>>>>> On Mon, Sep 04, 2017 at 09:33:34AM +0200, Mateusz wrote:
>>>>>> If ffmpeg reduces bit-depth to 8-bit or more, it uses DITHER_COPY macro.
>>>>>> The problem is DITHER_COPY macro make images darker (on all planes).
>>>>>>
>>>>>> In x265 project there is issue #365 which is caused by this DITHER_COPY 
>>>>>> bug.
>>>>>> I think it is time to fix -- there are more and more high bit-depth 
>>>>>> sources.
>>>>>>
>>>>>> Please review.
>>>>>>
>>>>>>  libswscale/swscale_unscaled.c | 44 
>>>>>> +--
>>>>>>  1 file changed, 13 insertions(+), 31 deletions(-)
>>>>>
>>>>> please provide a git compatible patch with with commit message as produced
>>>>> by git format-patch or git send-email
>>>>>
>>>>> this mail is not accepted as is by git
>>>>> Applying: libswscale/swscale_unscaled: fix DITHER_COPY macro
>>>>> error: corrupt patch at line 6
>>>>> error: could not build fake ancestor
>>>>> Patch failed at 0001 libswscale/swscale_unscaled: fix DITHER_COPY macro
>>>>>
>>>>> [...]
>>>>
>>>> I've attached the result of git format-patch command.
>>>>
>>>> Sorry for 1 private e-mail (I clicked wrong button).
>>>>
>>>> Mateusz
>>>
>>>>  swscale_unscaled.c |   44 +---
>>>>  1 file changed, 13 insertions(+), 31 deletions(-)
>>>> 9973b13b3f74319abe9c97302ee87b2b3468b3b6  
>>>> 0001-fix-DITHER_COPY-macro-to-avoid-make-images-darker.patch
>>>> From 9b96d612fef46ccec7e148cd7f8e8666b4e7a4cd Mon Sep 17 00:00:00 2001
>>>> From: Mateusz <mateu...@poczta.onet.pl>
>>>> Date: Tue, 5 Sep 2017 16:09:28 +0200
>>>> Subject: [PATCH] fix DITHER_COPY macro to avoid make images darker
>>>>
>>>> ---
>>>>  libswscale/swscale_unscaled.c | 44 
>>>> +--
>>>>  1 file changed, 13 insertions(+), 31 deletions(-)
>>>>
>>>> diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
>>>> index ef36aec..3b7a5a9 100644
>>>> --- a/libswscale/swscale_unscaled.c
>>>> +++ b/libswscale/swscale_unscaled.c
>>>> @@ -110,24 +110,6 @@ DECLARE_ALIGNED(8, static const uint8_t, 
>>>> dithers)[8][8][8]={
>>>>{ 112, 16,104,  8,118, 22,110, 14,},
>>>>  }};
>>>>  
>>>> -static const uint16_t dither_scale[15][16]={
>>>> -{2,3,3,5,5,5,5,5,5,5,5,5, 
>>>>5,5,5,5,},
>>>> -{2,3,7,7,   13,   13,   25,   25,   25,   25,   25,   25, 
>>>>   25,   25,   25,   25,},
>>>> -{3,3,4,   15,   15,   29,   57,   57,   57,  113,  113,  113, 
>>>>  113,  113,  113,  113,},
>>>> -{3,4,4,5,   31,   31,   61,  121,  241,  241,  241,  241, 
>>>>  481,  481,  481,  481,},
>>>> -{3,4,5,5,6,   63,   63,  125,  249,  497,  993,  993, 
>>>>  993,  993,  993, 1985,},
>>>> -{3,5,6,6,6,7,  127,  127,  253,  505, 1009, 2017, 
>>>> 4033, 4033, 4033, 4033,},
>>>> -{3,5,6,7,7,7,8,  255,  255,  509, 1017, 2033, 
>>>> 4065, 8129,16257,16257,},
>>>> -{3,5,6,8,8,8,8,9,  511,  511, 1021, 2041, 
>>>> 4081, 8161,16321,32641,},
>>>> -{3,5,7,8,9,9,9,9,   10, 1023, 1023, 2045, 
>>>> 4089, 8177,16353,32705,},
>>>> -{3,5,7,8,   10,   10,   10,   10,   10,   11, 2047, 2047, 
>>>> 4093, 8185,16369,32737,},
>>>> -{3,5,7,8,   10,   11,   11,   11,   11,   11,   12, 4095, 
>>>> 4095, 8189,16377,32753,},
>>>> -{3,5,7,9,   10,   12,   12,   12,   12,   12,   12,   13, 
>>>> 8191, 8191,16381,32761,},
>>>> -{3, 

Re: [FFmpeg-devel] [PATCH] libswscale/swscale_unscaled: fix DITHER_COPY macro

2017-09-05 Thread Mateusz
W dniu 2017-09-05 o 23:37, Michael Niedermayer pisze:
> On Tue, Sep 05, 2017 at 04:42:06PM +0200, Mateusz wrote:
>> W dniu 2017-09-05 o 15:40, Michael Niedermayer pisze:
>>> On Mon, Sep 04, 2017 at 09:33:34AM +0200, Mateusz wrote:
>>>> If ffmpeg reduces bit-depth to 8-bit or more, it uses DITHER_COPY macro.
>>>> The problem is DITHER_COPY macro make images darker (on all planes).
>>>>
>>>> In x265 project there is issue #365 which is caused by this DITHER_COPY 
>>>> bug.
>>>> I think it is time to fix -- there are more and more high bit-depth 
>>>> sources.
>>>>
>>>> Please review.
>>>>
>>>>  libswscale/swscale_unscaled.c | 44 
>>>> +--
>>>>  1 file changed, 13 insertions(+), 31 deletions(-)
>>>
>>> please provide a git compatible patch with with commit message as produced
>>> by git format-patch or git send-email
>>>
>>> this mail is not accepted as is by git
>>> Applying: libswscale/swscale_unscaled: fix DITHER_COPY macro
>>> error: corrupt patch at line 6
>>> error: could not build fake ancestor
>>> Patch failed at 0001 libswscale/swscale_unscaled: fix DITHER_COPY macro
>>>
>>> [...]
>>
>> I've attached the result of git format-patch command.
>>
>> Sorry for 1 private e-mail (I clicked wrong button).
>>
>> Mateusz
> 
>>  swscale_unscaled.c |   44 +---
>>  1 file changed, 13 insertions(+), 31 deletions(-)
>> 9973b13b3f74319abe9c97302ee87b2b3468b3b6  
>> 0001-fix-DITHER_COPY-macro-to-avoid-make-images-darker.patch
>> From 9b96d612fef46ccec7e148cd7f8e8666b4e7a4cd Mon Sep 17 00:00:00 2001
>> From: Mateusz <mateu...@poczta.onet.pl>
>> Date: Tue, 5 Sep 2017 16:09:28 +0200
>> Subject: [PATCH] fix DITHER_COPY macro to avoid make images darker
>>
>> ---
>>  libswscale/swscale_unscaled.c | 44 
>> +--
>>  1 file changed, 13 insertions(+), 31 deletions(-)
>>
>> diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
>> index ef36aec..3b7a5a9 100644
>> --- a/libswscale/swscale_unscaled.c
>> +++ b/libswscale/swscale_unscaled.c
>> @@ -110,24 +110,6 @@ DECLARE_ALIGNED(8, static const uint8_t, 
>> dithers)[8][8][8]={
>>{ 112, 16,104,  8,118, 22,110, 14,},
>>  }};
>>  
>> -static const uint16_t dither_scale[15][16]={
>> -{2,3,3,5,5,5,5,5,5,5,5,5,   
>>  5,5,5,5,},
>> -{2,3,7,7,   13,   13,   25,   25,   25,   25,   25,   25,   
>> 25,   25,   25,   25,},
>> -{3,3,4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  
>> 113,  113,  113,  113,},
>> -{3,4,4,5,   31,   31,   61,  121,  241,  241,  241,  241,  
>> 481,  481,  481,  481,},
>> -{3,4,5,5,6,   63,   63,  125,  249,  497,  993,  993,  
>> 993,  993,  993, 1985,},
>> -{3,5,6,6,6,7,  127,  127,  253,  505, 1009, 2017, 
>> 4033, 4033, 4033, 4033,},
>> -{3,5,6,7,7,7,8,  255,  255,  509, 1017, 2033, 
>> 4065, 8129,16257,16257,},
>> -{3,5,6,8,8,8,8,9,  511,  511, 1021, 2041, 
>> 4081, 8161,16321,32641,},
>> -{3,5,7,8,9,9,9,9,   10, 1023, 1023, 2045, 
>> 4089, 8177,16353,32705,},
>> -{3,5,7,8,   10,   10,   10,   10,   10,   11, 2047, 2047, 
>> 4093, 8185,16369,32737,},
>> -{3,5,7,8,   10,   11,   11,   11,   11,   11,   12, 4095, 
>> 4095, 8189,16377,32753,},
>> -{3,5,7,9,   10,   12,   12,   12,   12,   12,   12,   13, 
>> 8191, 8191,16381,32761,},
>> -{3,5,7,9,   10,   12,   13,   13,   13,   13,   13,   13,   
>> 14,16383,16383,32765,},
>> -{3,5,7,9,   10,   12,   14,   14,   14,   14,   14,   14,   
>> 14,   15,32767,32767,},
>> -{3,5,7,9,   11,   12,   14,   15,   15,   15,   15,   15,   
>> 15,   15,   16,65535,},
>> -};
>> -
>>  
>>  static void fillPlane(uint8_t *plane, int stride, int width, int height, 
>> int y,
>>uint8_t val)
>> @@ -1502,22 +1484,22 @@ static int packedCopyWrapper(SwsContext *c, const 
>> uint8_t *src[],
>>  }
>>  
>>  #define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
>> -uint16_t scale= dither_scale[dst_depth-1][src_de

Re: [FFmpeg-devel] [PATCH] libswscale/swscale_unscaled: fix DITHER_COPY macro

2017-09-05 Thread Mateusz
W dniu 2017-09-05 o 15:40, Michael Niedermayer pisze:
> On Mon, Sep 04, 2017 at 09:33:34AM +0200, Mateusz wrote:
>> If ffmpeg reduces bit-depth to 8-bit or more, it uses DITHER_COPY macro.
>> The problem is DITHER_COPY macro make images darker (on all planes).
>>
>> In x265 project there is issue #365 which is caused by this DITHER_COPY bug.
>> I think it is time to fix -- there are more and more high bit-depth sources.
>>
>> Please review.
>>
>>  libswscale/swscale_unscaled.c | 44 
>> +--
>>  1 file changed, 13 insertions(+), 31 deletions(-)
> 
> please provide a git compatible patch with with commit message as produced
> by git format-patch or git send-email
> 
> this mail is not accepted as is by git
> Applying: libswscale/swscale_unscaled: fix DITHER_COPY macro
> error: corrupt patch at line 6
> error: could not build fake ancestor
> Patch failed at 0001 libswscale/swscale_unscaled: fix DITHER_COPY macro
> 
> [...]

I've attached the result of git format-patch command.

Sorry for 1 private e-mail (I clicked wrong button).

Mateusz
From 9b96d612fef46ccec7e148cd7f8e8666b4e7a4cd Mon Sep 17 00:00:00 2001
From: Mateusz <mateu...@poczta.onet.pl>
Date: Tue, 5 Sep 2017 16:09:28 +0200
Subject: [PATCH] fix DITHER_COPY macro to avoid make images darker

---
 libswscale/swscale_unscaled.c | 44 +--
 1 file changed, 13 insertions(+), 31 deletions(-)

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index ef36aec..3b7a5a9 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -110,24 +110,6 @@ DECLARE_ALIGNED(8, static const uint8_t, 
dithers)[8][8][8]={
   { 112, 16,104,  8,118, 22,110, 14,},
 }};
 
-static const uint16_t dither_scale[15][16]={
-{2,3,3,5,5,5,5,5,5,5,5,5,
5,5,5,5,},
-{2,3,7,7,   13,   13,   25,   25,   25,   25,   25,   25,   
25,   25,   25,   25,},
-{3,3,4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  
113,  113,  113,  113,},
-{3,4,4,5,   31,   31,   61,  121,  241,  241,  241,  241,  
481,  481,  481,  481,},
-{3,4,5,5,6,   63,   63,  125,  249,  497,  993,  993,  
993,  993,  993, 1985,},
-{3,5,6,6,6,7,  127,  127,  253,  505, 1009, 2017, 
4033, 4033, 4033, 4033,},
-{3,5,6,7,7,7,8,  255,  255,  509, 1017, 2033, 
4065, 8129,16257,16257,},
-{3,5,6,8,8,8,8,9,  511,  511, 1021, 2041, 
4081, 8161,16321,32641,},
-{3,5,7,8,9,9,9,9,   10, 1023, 1023, 2045, 
4089, 8177,16353,32705,},
-{3,5,7,8,   10,   10,   10,   10,   10,   11, 2047, 2047, 
4093, 8185,16369,32737,},
-{3,5,7,8,   10,   11,   11,   11,   11,   11,   12, 4095, 
4095, 8189,16377,32753,},
-{3,5,7,9,   10,   12,   12,   12,   12,   12,   12,   13, 
8191, 8191,16381,32761,},
-{3,5,7,9,   10,   12,   13,   13,   13,   13,   13,   13,   
14,16383,16383,32765,},
-{3,5,7,9,   10,   12,   14,   14,   14,   14,   14,   14,   
14,   15,32767,32767,},
-{3,5,7,9,   11,   12,   14,   15,   15,   15,   15,   15,   
15,   15,   16,65535,},
-};
-
 
 static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
   uint8_t val)
@@ -1502,22 +1484,22 @@ static int packedCopyWrapper(SwsContext *c, const 
uint8_t *src[],
 }
 
 #define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
-uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
-int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
+unsigned shift= src_depth-dst_depth, tmp;\
 for (i = 0; i < height; i++) {\
-const uint8_t *dither= dithers[src_depth-9][i&7];\
+const uint8_t *dither= dithers[shift-1][i&7];\
 for (j = 0; j < length-7; j+=8){\
-dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\
-dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\
-dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\
-dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\
-dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\
-dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\
-dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\
-dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\
+tmp = (bswap(src[j+0]) + dither[0])>>shift; dst[j+0] = dbswap(tmp 
- (tmp>>dst_depth));\
+tmp = (bswap(src[j+1]) + dither[1])>>shift; dst[j+1] = dbswap(tmp 
- (tmp>

[FFmpeg-devel] [PATCH] libswscale/swscale_unscaled: fix DITHER_COPY macro

2017-09-04 Thread Mateusz
If ffmpeg reduces bit-depth to 8-bit or more, it uses DITHER_COPY macro.
The problem is DITHER_COPY macro make images darker (on all planes).

In x265 project there is issue #365 which is caused by this DITHER_COPY bug.
I think it is time to fix -- there are more and more high bit-depth sources.

Please review.

 libswscale/swscale_unscaled.c | 44 +--
 1 file changed, 13 insertions(+), 31 deletions(-)

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index ef36aec..3b7a5a9 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -110,24 +110,6 @@ DECLARE_ALIGNED(8, static const uint8_t, 
dithers)[8][8][8]={
   { 112, 16,104,  8,118, 22,110, 14,},
 }};
 
-static const uint16_t dither_scale[15][16]={
-{    2,    3,    3,    5,    5,    5,    5,    5,    5,    5,    5,    5,    
5,    5,    5,    5,},
-{    2,    3,    7,    7,   13,   13,   25,   25,   25,   25,   25,   25,   
25,   25,   25,   25,},
-{    3,    3,    4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  
113,  113,  113,  113,},
-{    3,    4,    4,    5,   31,   31,   61,  121,  241,  241,  241,  241,  
481,  481,  481,  481,},
-{    3,    4,    5,    5,    6,   63,   63,  125,  249,  497,  993,  993,  
993,  993,  993, 1985,},
-{    3,    5,    6,    6,    6,    7,  127,  127,  253,  505, 1009, 2017, 
4033, 4033, 4033, 4033,},
-{    3,    5,    6,    7,    7,    7,    8,  255,  255,  509, 1017, 2033, 
4065, 8129,16257,16257,},
-{    3,    5,    6,    8,    8,    8,    8,    9,  511,  511, 1021, 2041, 
4081, 8161,16321,32641,},
-{    3,    5,    7,    8,    9,    9,    9,    9,   10, 1023, 1023, 2045, 
4089, 8177,16353,32705,},
-{    3,    5,    7,    8,   10,   10,   10,   10,   10,   11, 2047, 2047, 
4093, 8185,16369,32737,},
-{    3,    5,    7,    8,   10,   11,   11,   11,   11,   11,   12, 4095, 
4095, 8189,16377,32753,},
-{    3,    5,    7,    9,   10,   12,   12,   12,   12,   12,   12,   13, 
8191, 8191,16381,32761,},
-{    3,    5,    7,    9,   10,   12,   13,   13,   13,   13,   13,   13,   
14,16383,16383,32765,},
-{    3,    5,    7,    9,   10,   12,   14,   14,   14,   14,   14,   14,   
14,   15,32767,32767,},
-{    3,    5,    7,    9,   11,   12,   14,   15,   15,   15,   15,   15,   
15,   15,   16,65535,},
-};
-
 
 static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
   uint8_t val)
@@ -1502,22 +1484,22 @@ static int packedCopyWrapper(SwsContext *c, const 
uint8_t *src[],
 }
 
 #define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
-    uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
-    int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
+    unsigned shift= src_depth-dst_depth, tmp;\
 for (i = 0; i < height; i++) {\
-    const uint8_t *dither= dithers[src_depth-9][i&7];\
+    const uint8_t *dither= dithers[shift-1][i&7];\
 for (j = 0; j < length-7; j+=8){\
-    dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\
-    dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\
-    dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\
-    dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\
-    dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\
-    dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\
-    dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\
-    dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\
+    tmp = (bswap(src[j+0]) + dither[0])>>shift; dst[j+0] = dbswap(tmp 
- (tmp>>dst_depth));\
+    tmp = (bswap(src[j+1]) + dither[1])>>shift; dst[j+1] = dbswap(tmp 
- (tmp>>dst_depth));\
+    tmp = (bswap(src[j+2]) + dither[2])>>shift; dst[j+2] = dbswap(tmp 
- (tmp>>dst_depth));\
+    tmp = (bswap(src[j+3]) + dither[3])>>shift; dst[j+3] = dbswap(tmp 
- (tmp>>dst_depth));\
+    tmp = (bswap(src[j+4]) + dither[4])>>shift; dst[j+4] = dbswap(tmp 
- (tmp>>dst_depth));\
+    tmp = (bswap(src[j+5]) + dither[5])>>shift; dst[j+5] = dbswap(tmp 
- (tmp>>dst_depth));\
+    tmp = (bswap(src[j+6]) + dither[6])>>shift; dst[j+6] = dbswap(tmp 
- (tmp>>dst_depth));\
+    tmp = (bswap(src[j+7]) + dither[7])>>shift; dst[j+7] = dbswap(tmp 
- (tmp>>dst_depth));\
+    }\
+    for (; j < length; j++){\
+    tmp = (bswap(src[j]) + dither[j&7])>>shift; dst[j] = dbswap(tmp - 
(tmp>>dst_depth));\
 }\
-    for (; j < length; j++)\
-    dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\
 dst += dstStride;\
 src += srcStride;\
 }

 libswscale/swscale_unscaled.c | 44 +--
 1 file changed, 13 insertions(+), 31 deletions(-)

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index ef36aec..3b7a5a9 

Re: [FFmpeg-devel] [PATCH] libswscale/swscale_unscaled: fix DITHER_COPY macro

2017-03-20 Thread Mateusz Brzostek
W dniu 2017-03-20 o 10:00, Carl Eugen Hoyos pisze:
> 2017-03-15 22:52 GMT+01:00 Mateusz Brzostek <mate...@msystem.waw.pl>:
>> Hello!
>>
>> There are 3 problems with DITHER_COPY macro in libswscale/swscale_unscaled.c:
>> 1) there is overflow in dithering from 12-bit to 10-bit (output value > 
>> 1023);
>> 2) for limit range the lower limit is not respected, for example from 10-bit 
>> to 8-bit value 64 is converted to 15;
>> 3) for many iteration of downscale/upscale of the same image the 200th 
>> iteration is significantly darker.
>>
>> The first bug is due to wrong dithers table (now it is OK only for 8-bit 
>> destination), fix is:
>> -const uint8_t *dither= dithers[src_depth-9][i&7];\
>> +const uint8_t *dither= dithers[src_depth-dst_depth-1][i&7];\
> I believe this implies that you could split your patch.
>
> Carl Eugen

I've attached dither_tiny.patch -- it solve the biggest problem when dst_depth 
> 8 and white goes to black.
If you don't want to loose time for DITHER_COPY problem, please apply only this 
dither_tiny.patch -- for one iteration it is acceptable quality.

movie after 200 iterations 12bit -> 10bit -> 8bit ->12bit with dither_tiny.patch
msystem.waw.pl/x265/w200.mkv

and the same but with dither_sub.patch
msystem.waw.pl/x265/n200.mkv
 libswscale/swscale_unscaled.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index ba3d688..bd4e4f2 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -1488,7 +1488,7 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t 
*src[],
 uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
 int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
 for (i = 0; i < height; i++) {\
-const uint8_t *dither= dithers[src_depth-9][i&7];\
+const uint8_t *dither= dithers[src_depth-dst_depth-1][i&7];\
 for (j = 0; j < length-7; j+=8){\
 dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\
 dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] libswscale/swscale_unscaled: fix DITHER_COPY macro

2017-03-20 Thread Mateusz Brzostek
In previous patch there was missing braces {} in for (; j < length; j++) loop. 
Please ignore that patch.

New patch with braces and without FFMIN macro.

Mateusz


W dniu 2017-03-18 o 21:28, Mateusz Brzostek pisze:
> W dniu 2017-03-16 o 19:17, Michael Niedermayer pisze:
>> On Wed, Mar 15, 2017 at 10:52:29PM +0100, Mateusz Brzostek wrote:
>>> Hello!
>>>
>>> There are 3 problems with DITHER_COPY macro in 
>>> libswscale/swscale_unscaled.c:
>>> 1) there is overflow in dithering from 12-bit to 10-bit (output value > 
>>> 1023);
>>> 2) for limit range the lower limit is not respected, for example from 
>>> 10-bit to 8-bit value 64 is converted to 15;
>>> 3) for many iteration of downscale/upscale of the same image the 200th 
>>> iteration is significantly darker.
>>>
>>> The first bug is due to wrong dithers table (now it is OK only for 8-bit 
>>> destination), fix is:
>>> -const uint8_t *dither= dithers[src_depth-9][i&7];\
>>> +const uint8_t *dither= dithers[src_depth-dst_depth-1][i&7];\
>>>
>>> For bugs 2) and 3) it is needed formula that do not make images darker (in 
>>> attachment). So please review.
>> does your code maintain white and black levels ?
>> with 4 bits white is 15, with 7 bits white is 127 for example
>> white should stay white
>> black should stay black
>> in both directions
>>
>> [...]
> After speed tests I've prepared faster version of this patch (do exactly the 
> same but faster).
>
> ffmpeg0 - clean 64-bit gcc 6.3 build for Windows from snapshot 2017-03-18,
> ffmpeg1 - the same + old patch,
> ffmpeg2 - the same + new patch.
>
> Average speed was (from 10 laps):
> ffmpeg0 -- 2.625
> ffmpeg1 -- 2.540
> ffmpeg2 -- 2.628
>
> New patch is faster than old and is not slower than current (buggy) code.
>
> My speed test looks like this (copy from 324 frames movie 2400x1350, 
> yuv444p12):
> f:\speed\ff2>for /L %n in (1 1 10) do (for /L %i in (0 1 2) do (ffmpeg%i -y 
> -stats -i ../original_vc3.mxf -v error -strict -1 -p
> ix_fmt yuv444p10 -f yuv4mpegpipe NUL ) )
>
> f:\speed\ff2>(for /L %i in (0 1 2) do (ffmpeg%i -y -stats -i 
> ../original_vc3.mxf -v error -strict -1 -pix_fmt yuv444p10 -f yuv4m
> pegpipe NUL ) )
>
> f:\speed\ff2>(ffmpeg0 -y -stats -i ../original_vc3.mxf -v error -strict -1 
> -pix_fmt yuv444p10 -f yuv4mpegpipe NUL )
> frame=  324 fps= 63 q=-0.0 Lsize= 6150939kB time=00:00:13.50 
> bitrate=3732481.2kbits/s speed=2.64x
>
> f:\speed\ff2>(ffmpeg1 -y -stats -i ../original_vc3.mxf -v error -strict -1 
> -pix_fmt yuv444p10 -f yuv4mpegpipe NUL )
> frame=  324 fps= 61 q=-0.0 Lsize= 6150939kB time=00:00:13.50 
> bitrate=3732481.2kbits/s speed=2.54x
>
> f:\speed\ff2>(ffmpeg2 -y -stats -i ../original_vc3.mxf -v error -strict -1 
> -pix_fmt yuv444p10 -f yuv4mpegpipe NUL )
> frame=  324 fps= 63 q=-0.0 Lsize= 6150939kB time=00:00:13.50 
> bitrate=3732481.2kbits/s speed=2.62x
>
> New patch inline (+ in attachment for easy apply):
>  libswscale/swscale_unscaled.c | 41 +++--
>  1 file changed, 11 insertions(+), 30 deletions(-)
>
> diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
> index ba3d688..a1d0a8d 100644
> --- a/libswscale/swscale_unscaled.c
> +++ b/libswscale/swscale_unscaled.c
> @@ -110,24 +110,6 @@ DECLARE_ALIGNED(8, static const uint8_t, 
> dithers)[8][8][8]={
>{ 112, 16,104,  8,118, 22,110, 14,},
>  }};
>  
> -static const uint16_t dither_scale[15][16]={
> -{2,3,3,5,5,5,5,5,5,5,5,5,
> 5,5,5,5,},
> -{2,3,7,7,   13,   13,   25,   25,   25,   25,   25,   25,   
> 25,   25,   25,   25,},
> -{3,3,4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  
> 113,  113,  113,  113,},
> -{3,4,4,5,   31,   31,   61,  121,  241,  241,  241,  241,  
> 481,  481,  481,  481,},
> -{3,4,5,5,6,   63,   63,  125,  249,  497,  993,  993,  
> 993,  993,  993, 1985,},
> -{3,5,6,6,6,7,  127,  127,  253,  505, 1009, 2017, 
> 4033, 4033, 4033, 4033,},
> -{3,5,6,7,7,7,8,  255,  255,  509, 1017, 2033, 
> 4065, 8129,16257,16257,},
> -{3,5,6,8,8,8,8,9,  511,  511, 1021, 2041, 
> 4081, 8161,16321,32641,},
> -{3,5,7,8,9,9,9,9,   10, 1023, 1023, 2045, 
> 4089, 8177,16353,32705,},
> -{3,5,7,8,   10,   10,   10,   10,   10,   11, 2047, 2047, 
> 4093, 8185,16369,32737,},
> -{3,5,7,8,   10,   11,   11,   1

Re: [FFmpeg-devel] [PATCH] libswscale/swscale_unscaled: fix DITHER_COPY macro

2017-03-18 Thread Mateusz Brzostek
W dniu 2017-03-16 o 19:17, Michael Niedermayer pisze:
> On Wed, Mar 15, 2017 at 10:52:29PM +0100, Mateusz Brzostek wrote:
>> Hello!
>>
>> There are 3 problems with DITHER_COPY macro in libswscale/swscale_unscaled.c:
>> 1) there is overflow in dithering from 12-bit to 10-bit (output value > 
>> 1023);
>> 2) for limit range the lower limit is not respected, for example from 10-bit 
>> to 8-bit value 64 is converted to 15;
>> 3) for many iteration of downscale/upscale of the same image the 200th 
>> iteration is significantly darker.
>>
>> The first bug is due to wrong dithers table (now it is OK only for 8-bit 
>> destination), fix is:
>> -const uint8_t *dither= dithers[src_depth-9][i&7];\
>> +const uint8_t *dither= dithers[src_depth-dst_depth-1][i&7];\
>>
>> For bugs 2) and 3) it is needed formula that do not make images darker (in 
>> attachment). So please review.
> does your code maintain white and black levels ?
> with 4 bits white is 15, with 7 bits white is 127 for example
> white should stay white
> black should stay black
> in both directions
>
> [...]
After speed tests I've prepared faster version of this patch (do exactly the 
same but faster).

ffmpeg0 - clean 64-bit gcc 6.3 build for Windows from snapshot 2017-03-18,
ffmpeg1 - the same + old patch,
ffmpeg2 - the same + new patch.

Average speed was (from 10 laps):
ffmpeg0 -- 2.625
ffmpeg1 -- 2.540
ffmpeg2 -- 2.628

New patch is faster than old and is not slower than current (buggy) code.

My speed test looks like this (copy from 324 frames movie 2400x1350, yuv444p12):
f:\speed\ff2>for /L %n in (1 1 10) do (for /L %i in (0 1 2) do (ffmpeg%i -y 
-stats -i ../original_vc3.mxf -v error -strict -1 -p
ix_fmt yuv444p10 -f yuv4mpegpipe NUL ) )

f:\speed\ff2>(for /L %i in (0 1 2) do (ffmpeg%i -y -stats -i 
../original_vc3.mxf -v error -strict -1 -pix_fmt yuv444p10 -f yuv4m
pegpipe NUL ) )

f:\speed\ff2>(ffmpeg0 -y -stats -i ../original_vc3.mxf -v error -strict -1 
-pix_fmt yuv444p10 -f yuv4mpegpipe NUL )
frame=  324 fps= 63 q=-0.0 Lsize= 6150939kB time=00:00:13.50 
bitrate=3732481.2kbits/s speed=2.64x

f:\speed\ff2>(ffmpeg1 -y -stats -i ../original_vc3.mxf -v error -strict -1 
-pix_fmt yuv444p10 -f yuv4mpegpipe NUL )
frame=  324 fps= 61 q=-0.0 Lsize= 6150939kB time=00:00:13.50 
bitrate=3732481.2kbits/s speed=2.54x

f:\speed\ff2>(ffmpeg2 -y -stats -i ../original_vc3.mxf -v error -strict -1 
-pix_fmt yuv444p10 -f yuv4mpegpipe NUL )
frame=  324 fps= 63 q=-0.0 Lsize= 6150939kB time=00:00:13.50 
bitrate=3732481.2kbits/s speed=2.62x

New patch inline (+ in attachment for easy apply):
 libswscale/swscale_unscaled.c | 41 +++--
 1 file changed, 11 insertions(+), 30 deletions(-)

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index ba3d688..a1d0a8d 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -110,24 +110,6 @@ DECLARE_ALIGNED(8, static const uint8_t, 
dithers)[8][8][8]={
   { 112, 16,104,  8,118, 22,110, 14,},
 }};
 
-static const uint16_t dither_scale[15][16]={
-{2,3,3,5,5,5,5,5,5,5,5,5,
5,5,5,5,},
-{2,3,7,7,   13,   13,   25,   25,   25,   25,   25,   25,   
25,   25,   25,   25,},
-{3,3,4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  
113,  113,  113,  113,},
-{3,4,4,5,   31,   31,   61,  121,  241,  241,  241,  241,  
481,  481,  481,  481,},
-{3,4,5,5,6,   63,   63,  125,  249,  497,  993,  993,  
993,  993,  993, 1985,},
-{3,5,6,6,6,7,  127,  127,  253,  505, 1009, 2017, 
4033, 4033, 4033, 4033,},
-{3,5,6,7,7,7,8,  255,  255,  509, 1017, 2033, 
4065, 8129,16257,16257,},
-{3,5,6,8,8,8,8,9,  511,  511, 1021, 2041, 
4081, 8161,16321,32641,},
-{3,5,7,8,9,9,9,9,   10, 1023, 1023, 2045, 
4089, 8177,16353,32705,},
-{3,5,7,8,   10,   10,   10,   10,   10,   11, 2047, 2047, 
4093, 8185,16369,32737,},
-{3,5,7,8,   10,   11,   11,   11,   11,   11,   12, 4095, 
4095, 8189,16377,32753,},
-{3,5,7,9,   10,   12,   12,   12,   12,   12,   12,   13, 
8191, 8191,16381,32761,},
-{3,5,7,9,   10,   12,   13,   13,   13,   13,   13,   13,   
14,16383,16383,32765,},
-{3,5,7,9,   10,   12,   14,   14,   14,   14,   14,   14,   
14,   15,32767,32767,},
-{3,5,7,9,   11,   12,   14,   15,   15,   15,   15,   15,   
15,   15,   16,65535,},
-};
-
 
 static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
   uint8_t val)
@@ -1485,22 +1467,21 @@ static int packedCopyWrapper(SwsContext *c, const 
uint8_t *src[],
 }
 
 #define DITHE

Re: [FFmpeg-devel] [PATCH] libswscale/swscale_unscaled: fix DITHER_COPY macro

2017-03-16 Thread Mateusz Brzostek
W dniu 2017-03-16 o 19:17, Michael Niedermayer pisze:
> On Wed, Mar 15, 2017 at 10:52:29PM +0100, Mateusz Brzostek wrote:
>> Hello!
>>
>> There are 3 problems with DITHER_COPY macro in libswscale/swscale_unscaled.c:
>> 1) there is overflow in dithering from 12-bit to 10-bit (output value > 
>> 1023);
>> 2) for limit range the lower limit is not respected, for example from 10-bit 
>> to 8-bit value 64 is converted to 15;
>> 3) for many iteration of downscale/upscale of the same image the 200th 
>> iteration is significantly darker.
>>
>> The first bug is due to wrong dithers table (now it is OK only for 8-bit 
>> destination), fix is:
>> -const uint8_t *dither= dithers[src_depth-9][i&7];\
>> +const uint8_t *dither= dithers[src_depth-dst_depth-1][i&7];\
>>
>> For bugs 2) and 3) it is needed formula that do not make images darker (in 
>> attachment). So please review.
> does your code maintain white and black levels ?
> with 4 bits white is 15, with 7 bits white is 127 for example
> white should stay white
> black should stay black
> in both directions
=== short version ===

Yes, white goes to white, black goes to black.

=== long version ===

This DITHER_COPY macro is for case when destination bit-depth >= 8 and
source bit-depth > destination bit-depth and the picture type is the same
(for example yuv444p12 to yuv444p10). It is replacement for down-shift.
For destination bit-depth < 8 it is needed real dither (not simplified like 
this DITHER_COPY).

The formula before the patch was:
dst = (src + dither) * scale >> shift

where: dst - destination, src - source, dither - constant values from table 8x8,
scale and shift are chosen to fulfill formula (in mathematics/floating point 
arithmetic):
(max_value_src + max_value_dither) * scale / (2^shift) < max_value_dst + 1
This is due to lack of clip function (for speed reason).

If you consider (scale / (2^shift)) number for example for conversion 10-bit to 
8-bit,
we have
(scale / (2^shift)) < 1/4
because (1023 + 3) * 1/4 = 256.5 and 256.5 >= 255 + 1
(with 1/4 there will be overflow: 1023 go to 256 for dither > 0).

So for src > 0 with 2 least significant bits = 0 and dither = 0 we have
dst <= (src >> 2) - 1
which is the darker destination effect (it is unavoidable in this model).

In my patch the number
(scale / (2^shift)) == 1/4
for 10-bit to 8-bit example. This fix the darker destination effect but create 
possible overflow problem.

In my patch the possible overflow go to sign bit of int32_t variable and is 
clipped to 0x7FFF.
In my tests the new DITHER_COPY is a little bit slower than old but is much 
faster than real dither in x265.

Mateusz
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] libswscale/swscale_unscaled: fix DITHER_COPY macro

2017-03-15 Thread Mateusz Brzostek
Hello!

There are 3 problems with DITHER_COPY macro in libswscale/swscale_unscaled.c:
1) there is overflow in dithering from 12-bit to 10-bit (output value > 1023);
2) for limit range the lower limit is not respected, for example from 10-bit to 
8-bit value 64 is converted to 15;
3) for many iteration of downscale/upscale of the same image the 200th 
iteration is significantly darker.

The first bug is due to wrong dithers table (now it is OK only for 8-bit 
destination), fix is:
-const uint8_t *dither= dithers[src_depth-9][i&7];\
+const uint8_t *dither= dithers[src_depth-dst_depth-1][i&7];\

For bugs 2) and 3) it is needed formula that do not make images darker (in 
attachment). So please review.

Mateusz
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index ba3d688..429c98c 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -110,24 +110,6 @@ DECLARE_ALIGNED(8, static const uint8_t, 
dithers)[8][8][8]={
   { 112, 16,104,  8,118, 22,110, 14,},
 }};
 
-static const uint16_t dither_scale[15][16]={
-{2,3,3,5,5,5,5,5,5,5,5,5,
5,5,5,5,},
-{2,3,7,7,   13,   13,   25,   25,   25,   25,   25,   25,   
25,   25,   25,   25,},
-{3,3,4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  
113,  113,  113,  113,},
-{3,4,4,5,   31,   31,   61,  121,  241,  241,  241,  241,  
481,  481,  481,  481,},
-{3,4,5,5,6,   63,   63,  125,  249,  497,  993,  993,  
993,  993,  993, 1985,},
-{3,5,6,6,6,7,  127,  127,  253,  505, 1009, 2017, 
4033, 4033, 4033, 4033,},
-{3,5,6,7,7,7,8,  255,  255,  509, 1017, 2033, 
4065, 8129,16257,16257,},
-{3,5,6,8,8,8,8,9,  511,  511, 1021, 2041, 
4081, 8161,16321,32641,},
-{3,5,7,8,9,9,9,9,   10, 1023, 1023, 2045, 
4089, 8177,16353,32705,},
-{3,5,7,8,   10,   10,   10,   10,   10,   11, 2047, 2047, 
4093, 8185,16369,32737,},
-{3,5,7,8,   10,   11,   11,   11,   11,   11,   12, 4095, 
4095, 8189,16377,32753,},
-{3,5,7,9,   10,   12,   12,   12,   12,   12,   12,   13, 
8191, 8191,16381,32761,},
-{3,5,7,9,   10,   12,   13,   13,   13,   13,   13,   13,   
14,16383,16383,32765,},
-{3,5,7,9,   10,   12,   14,   14,   14,   14,   14,   14,   
14,   15,32767,32767,},
-{3,5,7,9,   11,   12,   14,   15,   15,   15,   15,   15,   
15,   15,   16,65535,},
-};
-
 
 static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
   uint8_t val)
@@ -1485,22 +1467,22 @@ static int packedCopyWrapper(SwsContext *c, const 
uint8_t *src[],
 }
 
 #define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
-uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
-int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
+int scale= 31-src_depth, shift= 31-dst_depth;\
 for (i = 0; i < height; i++) {\
-const uint8_t *dither= dithers[src_depth-9][i&7];\
+const uint8_t *dither= dithers[src_depth-dst_depth-1][i&7];\
 for (j = 0; j < length-7; j+=8){\
-dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\
-dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\
-dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\
-dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\
-dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\
-dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\
-dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\
-dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\
+int32_t tmp = (bswap(src[j+0]) + (unsigned)dither[0])<<scale; tmp 
= (tmp|(tmp>>31)) & 0x7FFF; dst[j+0] = dbswap(tmp>>shift);\
+tmp = (bswap(src[j+1]) + (unsigned)dither[1])<<scale; tmp = 
(tmp|(tmp>>31)) & 0x7FFF; dst[j+1] = dbswap(tmp>>shift);\
+tmp = (bswap(src[j+2]) + (unsigned)dither[2])<<scale; tmp = 
(tmp|(tmp>>31)) & 0x7FFF; dst[j+2] = dbswap(tmp>>shift);\
+tmp = (bswap(src[j+3]) + (unsigned)dither[3])<<scale; tmp = 
(tmp|(tmp>>31)) & 0x7FFF; dst[j+3] = dbswap(tmp>>shift);\
+tmp = (bswap(src[j+4]) + (unsigned)dither[4])<<scale; tmp = 
(tmp|(tmp>>31)) & 0x7FFF; dst[j+4] = dbswap(tmp>>shift);\
+tmp = (bswap(src[j+5]) + (unsigned)dither[5])<<scale; tmp = 
(tmp|(tmp>>31)) & 0x7FFF; dst[j+5] = dbswap(tmp>>shift);\
+tmp = (bswap(src[j+6]) + (uns