[FFmpeg-devel] [PATCH] avcodec/libaomenc: Get number of operating points

2022-06-16 Thread Wan-Teh Chang
Use the new codec control AV1E_GET_NUM_OPERATING_POINTS to get the
number of operating points. This is the size of the output arrays of
AV1E_GET_SEQ_LEVEL_IDX and AV1E_GET_TARGET_SEQ_LEVEL_IDX.

Signed-off-by: Wan-Teh Chang 
---
 libavcodec/libaomenc.c | 20 ++--
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/libavcodec/libaomenc.c b/libavcodec/libaomenc.c
index 7865ae161f..6b7e426bfd 100644
--- a/libavcodec/libaomenc.c
+++ b/libavcodec/libaomenc.c
@@ -199,6 +199,9 @@ static const char *const ctlidstr[] = {
 [AV1E_SET_ENABLE_SMOOTH_INTERINTRA] = "AV1E_SET_ENABLE_SMOOTH_INTERINTRA",
 [AV1E_SET_ENABLE_REF_FRAME_MVS] = "AV1E_SET_ENABLE_REF_FRAME_MVS",
 #endif
+#ifdef AOM_CTRL_AV1E_GET_NUM_OPERATING_POINTS
+[AV1E_GET_NUM_OPERATING_POINTS] = "AV1E_GET_NUM_OPERATING_POINTS",
+#endif
 #ifdef AOM_CTRL_AV1E_GET_SEQ_LEVEL_IDX
 [AV1E_GET_SEQ_LEVEL_IDX]= "AV1E_GET_SEQ_LEVEL_IDX",
 #endif
@@ -330,7 +333,8 @@ static av_cold int codecctl_int(AVCodecContext *avctx,
 return 0;
 }
 
-#if defined(AOM_CTRL_AV1E_GET_SEQ_LEVEL_IDX) && \
+#if defined(AOM_CTRL_AV1E_GET_NUM_OPERATING_POINTS) && \
+defined(AOM_CTRL_AV1E_GET_SEQ_LEVEL_IDX) && \
 defined(AOM_CTRL_AV1E_GET_TARGET_SEQ_LEVEL_IDX)
 static av_cold int codecctl_intp(AVCodecContext *avctx,
 #ifdef UENUM1BYTE
@@ -364,16 +368,20 @@ static av_cold int aom_free(AVCodecContext *avctx)
 {
 AOMContext *ctx = avctx->priv_data;
 
-#if defined(AOM_CTRL_AV1E_GET_SEQ_LEVEL_IDX) && \
+#if defined(AOM_CTRL_AV1E_GET_NUM_OPERATING_POINTS) && \
+defined(AOM_CTRL_AV1E_GET_SEQ_LEVEL_IDX) && \
 defined(AOM_CTRL_AV1E_GET_TARGET_SEQ_LEVEL_IDX)
 if (!(avctx->flags & AV_CODEC_FLAG_PASS1)) {
-int levels[32] = { 0 };
-int target_levels[32] = { 0 };
+int num_operating_points;
+int levels[32];
+int target_levels[32];
 
-if (!codecctl_intp(avctx, AV1E_GET_SEQ_LEVEL_IDX, levels) &&
+if (!codecctl_intp(avctx, AV1E_GET_NUM_OPERATING_POINTS,
+   _operating_points) &&
+!codecctl_intp(avctx, AV1E_GET_SEQ_LEVEL_IDX, levels) &&
 !codecctl_intp(avctx, AV1E_GET_TARGET_SEQ_LEVEL_IDX,
target_levels)) {
-for (int i = 0; i < 32; i++) {
+for (int i = 0; i < num_operating_points; i++) {
 if (levels[i] > target_levels[i]) {
 // Warn when the target level was not met
 av_log(avctx, AV_LOG_WARNING,
-- 
2.36.1.476.g0c4daa206d-goog

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] avcodec/hevcdec: hevc_await_progress: declare |y| only if used.

2017-07-20 Thread Wan-Teh Chang
hevc_await_progress() uses the variable |y| only inside the "if" block.
So |y| only needs to be declared and initialized in that block.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavcodec/hevcdec.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 55f51211c3..e084d75767 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -1684,10 +1684,11 @@ static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, 
ptrdiff_t dststride, AVF
 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
 const Mv *mv, int y0, int height)
 {
-int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
+if (s->threads_type == FF_THREAD_FRAME ) {
+int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
 
-if (s->threads_type == FF_THREAD_FRAME )
 ff_thread_await_progress(>tf, y, 0);
+}
 }
 
 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
-- 
2.14.0.rc0.284.gd933b75aa4-goog

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avcodec/x86/cavsdsp: Delete #include "libavcodec/x86/idctdsp.h".

2017-07-20 Thread Wan-Teh Chang
This file already has #include "idctdsp.h", which is resolved to the
idctdsp.h header in the directory where this file resides by compilers.
Two other files in this directory, libavcodec/x86/idctdsp_init.c and
libavcodec/x86/xvididct_init.c, also rely on #include "idctdsp.h"
working this way.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavcodec/x86/cavsdsp.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c
index a8a198b46d..becb3a4808 100644
--- a/libavcodec/x86/cavsdsp.c
+++ b/libavcodec/x86/cavsdsp.c
@@ -29,7 +29,6 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/cavsdsp.h"
 #include "libavcodec/idctdsp.h"
-#include "libavcodec/x86/idctdsp.h"
 #include "constants.h"
 #include "fpel.h"
 #include "idctdsp.h"
-- 
2.14.0.rc0.284.gd933b75aa4-goog

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avcodec/h264: Declare the local variable decode_chroma as const.

2017-07-20 Thread Wan-Teh Chang
ff_h264_decode_mb_cabac() and ff_h264_decode_mb_cavlc() are very long
functions. Declaring decode_chroma as const makes it clear the variable
doesn't change after initialization.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavcodec/h264_cabac.c | 4 ++--
 libavcodec/h264_cavlc.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 0973e30be9..345834645c 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1916,8 +1916,8 @@ int ff_h264_decode_mb_cabac(const H264Context *h, 
H264SliceContext *sl)
 const SPS *sps = h->ps.sps;
 int mb_xy;
 int mb_type, partition_count, cbp = 0;
-int dct8x8_allowed= h->ps.pps->transform_8x8_mode;
-int decode_chroma = sps->chroma_format_idc == 1 || sps->chroma_format_idc 
== 2;
+int dct8x8_allowed = h->ps.pps->transform_8x8_mode;
+const int decode_chroma = sps->chroma_format_idc == 1 || 
sps->chroma_format_idc == 2;
 const int pixel_shift = h->pixel_shift;
 
 mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride;
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index f01e76070c..187b1c64e2 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -704,8 +704,8 @@ int ff_h264_decode_mb_cavlc(const H264Context *h, 
H264SliceContext *sl)
 int mb_xy;
 int partition_count;
 unsigned int mb_type, cbp;
-int dct8x8_allowed= h->ps.pps->transform_8x8_mode;
-int decode_chroma = h->ps.sps->chroma_format_idc == 1 || 
h->ps.sps->chroma_format_idc == 2;
+int dct8x8_allowed = h->ps.pps->transform_8x8_mode;
+const int decode_chroma = h->ps.sps->chroma_format_idc == 1 || 
h->ps.sps->chroma_format_idc == 2;
 const int pixel_shift = h->pixel_shift;
 
 mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride;
-- 
2.14.0.rc0.284.gd933b75aa4-goog

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] tsan warning about a data race in libavcodec/h264_direct.c

2017-07-20 Thread Wan-Teh Chang
Hi Ronald,

On Wed, Jul 19, 2017 at 11:50 AM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
> Hi Wan-Teh,
>
> On Wed, Jul 19, 2017 at 2:31 PM, Wan-Teh Chang <wtc-at-google@ffmpeg.org
>> wrote:
>
>> In libavcodec/h264_direct.c, there is already an
>> await_reference_mb_row() call before the read of
>> sl->ref_list[1][0].parent->mb_type[mb_xy] at line 505:
>>
>> 487 static void pred_temp_direct_motion(const H264Context *const h,
>> H264SliceContext *sl,
>> 488 int *mb_type)
>> 489 {
>> ...
>> 501
>> 502 await_reference_mb_row(h, >ref_list[1][0],
>> 503sl->mb_y + !!IS_INTERLACED(*mb_type));
>> 504
>> 505 if (IS_INTERLACED(sl->ref_list[1][0].parent->mb_type[mb_xy]))
>> { // AFL/AFR/FR/FL -> AFL/FL
>>
>> This seems like the wait you suggested, but I guess it is not?
>
> Yes, but clearly it's not doing the correct thing. :-). The ""fun"" in
> these type of issues is to figure out why not. ;-).

I debugged this for fun for three hours last night. I studied other
ff_thread_await_progress() calls, especially the ones in the h264
decoder. But I couldn't figure out the meaning of the third argument
(int field) of ff_thread_await_progress(). It seems to be only used
for h264, and seems important for this tsan warning. By playing with
the third argument, I was able to come up with a patch (pasted below)
that eliminates the tsan warning and makes "make fate-h264 THREADS=4"
pass under tsan. I also played with the second argument (int
progress), but had no success.

Description of the patch:

1. The new function await_reference_mb_row_both_fields() is a variant
of await_reference_mb_row(). pred_temp_direct_motion() is changed to
call await_reference_mb_row_both_fields() instead of
await_reference_mb_row() before it reads
sl->ref_list[1][0].parent->mb_type[mb_xy].

2. If ref_field_picture is true, await_reference_mb_row_both_fields()
calls ff_thread_await_progress() with both field=0 and field=1.
(await_reference_mb_row() calls ff_thread_await_progress() with only
field=ref_field in this case.)

3. If ref_field_picture is false, await_reference_mb_row_both_fields()
calls ff_thread_await_progress() with only field=0, the same as
await_reference_mb_row().

I doubt this patch is correct, but I am publishing it to solicit
ideas. I will try to debug this more this weekend.

Thanks,
Wan-Teh Chang

diff --git a/libavcodec/h264_direct.c b/libavcodec/h264_direct.c
index a7a107c8c2..e8d3811c67 100644
--- a/libavcodec/h264_direct.c
+++ b/libavcodec/h264_direct.c
@@ -197,6 +197,25 @@ static void await_reference_mb_row(const
H264Context *const h, H264Ref *ref,
  ref_field_picture && ref_field);
 }

+/* Waits until it is also safe to access ref->parent->mb_type[mb_xy]. */
+static void await_reference_mb_row_both_fields(const H264Context *const h,
+   H264Ref *ref, int mb_y)
+{
+int ref_field_picture = ref->parent->field_picture;
+int ref_height= 16 * h->mb_height >> ref_field_picture;
+int row   = 16 * mb_y >> ref_field_picture;
+
+if (!HAVE_THREADS || !(h->avctx->active_thread_type & FF_THREAD_FRAME))
+return;
+
+/* FIXME: This is an educated guess. Is this right? */
+ff_thread_await_progress(>parent->tf, FFMIN(row, ref_height - 1), 0);
+if (ref_field_picture) {
+ff_thread_await_progress(>parent->tf, FFMIN(row, ref_height - 1),
+ 1);
+}
+}
+
 static void pred_spatial_direct_motion(const H264Context *const h,
H264SliceContext *sl,
int *mb_type)
 {
@@ -499,7 +518,7 @@ static void pred_temp_direct_motion(const
H264Context *const h, H264SliceContext

 assert(sl->ref_list[1][0].reference & 3);

-await_reference_mb_row(h, >ref_list[1][0],
+await_reference_mb_row_both_fields(h, >ref_list[1][0],
sl->mb_y + !!IS_INTERLACED(*mb_type));

 if (IS_INTERLACED(sl->ref_list[1][0].parent->mb_type[mb_xy])) {
// AFL/AFR/FR/FL -> AFL/FL
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] tsan warning about a data race in libavcodec/h264_direct.c

2017-07-19 Thread Wan-Teh Chang
Hi Ronald,

Thank you for the reply.

On Wed, Jul 19, 2017 at 8:56 AM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
> Hi,
>
> On Wed, Jul 19, 2017 at 12:26 AM, Wan-Teh Chang <
> wtc-at-google@ffmpeg.org> wrote:
>
>> WARNING: ThreadSanitizer: data race (pid=116081)
>>   Read of size 4 at 0x7b720118 by thread T3 (mutexes: write M2239):
>> #0 pred_temp_direct_motion ffmpeg/libavcodec/h264_direct.c:505:9
>> (ffmpeg+0x1159c60)
>>
> [..]
>
>>   Previous write of size 4 at 0x7b720118 by thread T1 (mutexes:
>> write M2234):
>> #0 ff_h264_decode_mb_cabac ffmpeg/libavcodec/h264_cabac.c:2386:31
>> (ffmpeg+0x1475e0a)
>
> I believe this is temporal motion vector prediction (so MV from frame 1 is
> used as a MV predictor in frame 2). The easy solution here is to wait for
> the frame to have completed reconstruction of that block (which fills in
> that mb_type) before accessing the predictor.

In libavcodec/h264_direct.c, there is already an
await_reference_mb_row() call before the read of
sl->ref_list[1][0].parent->mb_type[mb_xy] at line 505:

487 static void pred_temp_direct_motion(const H264Context *const h,
H264SliceContext *sl,
488 int *mb_type)
489 {
...
501
502 await_reference_mb_row(h, >ref_list[1][0],
503sl->mb_y + !!IS_INTERLACED(*mb_type));
504
505 if (IS_INTERLACED(sl->ref_list[1][0].parent->mb_type[mb_xy]))
{ // AFL/AFR/FR/FL -> AFL/FL

This seems like the wait you suggested, but I guess it is not?

I also found that there is a similar tsan warning about a data race
between libavcodec/h264_direct.c and libavcodec/h264_cavlc.c:

WARNING: ThreadSanitizer: data race (pid=115630)
  Read of size 4 at 0x7b730118 by thread T2 (mutexes: write M2283):
#0 pred_temp_direct_motion ffmpeg/libavcodec/h264_direct.c:505:9
(ffmpeg+0x1159c60)
#1 ff_h264_pred_direct_motion ffmpeg/libavcodec/h264_direct.c:727
(ffmpeg+0x1159c60)
#2 ff_h264_decode_mb_cavlc ffmpeg/libavcodec/h264_cavlc.c:856:17
(ffmpeg+0x11520b2)
#3 decode_slice ffmpeg/libavcodec/h264_slice.c:2641:19 (ffmpeg+0x11936ea)
#4 ff_h264_execute_decode_slices
ffmpeg/libavcodec/h264_slice.c:2748:15 (ffmpeg+0x1192377)
#5 decode_nal_units ffmpeg/libavcodec/h264dec.c:716:27 (ffmpeg+0x792022)
#6 h264_decode_frame ffmpeg/libavcodec/h264dec.c:1006 (ffmpeg+0x792022)
#7 frame_worker_thread ffmpeg/libavcodec/pthread_frame.c:201:21
(ffmpeg+0xae56cc)

  Previous write of size 4 at 0x7b730118 by thread T4 (mutexes:
write M2289):
#0 ff_h264_decode_mb_cavlc ffmpeg/libavcodec/h264_cavlc.c:1095:31
(ffmpeg+0x114f2be)
#1 decode_slice ffmpeg/libavcodec/h264_slice.c:2641:19 (ffmpeg+0x11936ea)
#2 ff_h264_execute_decode_slices
ffmpeg/libavcodec/h264_slice.c:2748:15 (ffmpeg+0x1192377)
#3 decode_nal_units ffmpeg/libavcodec/h264dec.c:716:27 (ffmpeg+0x792022)
#4 h264_decode_frame ffmpeg/libavcodec/h264dec.c:1006 (ffmpeg+0x792022)
#5 frame_worker_thread ffmpeg/libavcodec/pthread_frame.c:201:21
(ffmpeg+0xae56cc)

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] tsan warning about a data race in libavcodec/h264_direct.c

2017-07-18 Thread Wan-Teh Chang
Hi,

I'd like to report a tsan warning about a data race in libavcodec/h264_direct.c.

1. Steps to reproduce:

./configure --samples=~/fate-suite/ --toolchain=clang-tsan --disable-stripping

make fate-h264 THREADS=4

2. Here is an excerpt of the tsan warning in
tests/data/fate/h264-conformance-frext-hpcafl_bcrm_c.err:

WARNING: ThreadSanitizer: data race (pid=116081)
  Read of size 4 at 0x7b720118 by thread T3 (mutexes: write M2239):
#0 pred_temp_direct_motion ffmpeg/libavcodec/h264_direct.c:505:9
(ffmpeg+0x1159c60)
#1 ff_h264_pred_direct_motion ffmpeg/libavcodec/h264_direct.c:727
(ffmpeg+0x1159c60)
#2 ff_h264_decode_mb_cabac ffmpeg/libavcodec/h264_cabac.c:2118:17
(ffmpeg+0x1471f03)
#3 decode_slice ffmpeg/libavcodec/h264_slice.c:2569:19 (ffmpeg+0x11930ba)
#4 ff_h264_execute_decode_slices
ffmpeg/libavcodec/h264_slice.c:2748:15 (ffmpeg+0x1192377)
#5 decode_nal_units ffmpeg/libavcodec/h264dec.c:716:27 (ffmpeg+0x792022)
#6 h264_decode_frame ffmpeg/libavcodec/h264dec.c:1006 (ffmpeg+0x792022)
#7 frame_worker_thread ffmpeg/libavcodec/pthread_frame.c:201:21
(ffmpeg+0xae56cc)

  Previous write of size 4 at 0x7b720118 by thread T1 (mutexes:
write M2234):
#0 ff_h264_decode_mb_cabac ffmpeg/libavcodec/h264_cabac.c:2386:31
(ffmpeg+0x1475e0a)
#1 decode_slice ffmpeg/libavcodec/h264_slice.c:2569:19 (ffmpeg+0x11930ba)
#2 ff_h264_execute_decode_slices
ffmpeg/libavcodec/h264_slice.c:2748:15 (ffmpeg+0x1192377)
#3 decode_nal_units ffmpeg/libavcodec/h264dec.c:716:27 (ffmpeg+0x792022)
#4 h264_decode_frame ffmpeg/libavcodec/h264dec.c:1006 (ffmpeg+0x792022)
#5 frame_worker_thread ffmpeg/libavcodec/pthread_frame.c:201:21
(ffmpeg+0xae56cc)

3. The relevant source code is:

libavcodec/h264_direct.c:

487 static void pred_temp_direct_motion(const H264Context *const h,
H264SliceContext *sl,
488 int *mb_type)
489 {
490 int b8_stride = 2;
491 int b4_stride = h->b_stride;
492 int mb_xy = sl->mb_xy, mb_y = sl->mb_y;
...
502 await_reference_mb_row(h, >ref_list[1][0],
503sl->mb_y + !!IS_INTERLACED(*mb_type));
504
505 if (IS_INTERLACED(sl->ref_list[1][0].parent->mb_type[mb_xy]))
{ // AFL/AFR/FR/FL -> AFL/FL

Note: tsan warns about the read of mb_type[mb_xy], not the read of
sl->ref_list[1][0].parent.

libavcodec/h264_slice.c:

1914 int ff_h264_decode_mb_cabac(const H264Context *h, H264SliceContext *sl)
1915 {
...
2386 h->cur_pic.mb_type[mb_xy] = mb_type;

4. I've investigated this tsan warning for several hours in my spare
time, but I can't figure out how to fix it. I hope someone familiar
with the h264 decoder will see what's wrong or suggest fixes for me to
try.

Here are two facts that may be helpful.

- This tsan warning does not occur when I run "make fate-h264" with
THREADS=2. It starts to occur with THREADS=3.

- If I annotate libavcodec/h264_direct.c so that tsan ignores the read
of sl->ref_list[1][0].parent->mb_type[mb_xy] at line 505, then "make
fate-h264 THREADS=4" runs to completion with no tsan warning (assuming
the fix in http://ffmpeg.org/pipermail/ffmpeg-devel/2017-July/213777.html
is applied). So this seems to be the last tsan warning in fate-h264.

Thanks,
Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec/h264_slice: don't sync default_ref[] between threads.

2017-07-18 Thread Wan-Teh Chang
I researched the history of the line in h264_slice.c that this patch deleted.

That line was implicitly added in commit
4da2ac5c7a491b20be62ad19d77526e62aa57c69:

http://git.videolan.org/gitweb.cgi/ffmpeg.git/?p=ffmpeg.git;a=patch;h=4da2ac5c7a491b20be62ad19d77526e62aa57c69

Although commit 4da2ac5c7a491b20be62ad19d77526e62aa57c69 didn't modify
h264_slice.c, the copy_fields macro call in
ff_h264_update_thread_context() would copy the new default_ref[] field
of H264Context. The copying of default_ref[] was made explicit in
commit 98456d4d69e0fdcc328bb9e684ae776f5bc824e1, which replaced the
copy_fields macro with field-by-field copies.

http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=98456d4d69e0fdcc328bb9e684ae776f5bc824e1

Commit 4da2ac5c7a491b20be62ad19d77526e62aa57c69 makes it clear that
the default_ref[] field of H264Context is supposed to be initialized
(to values from the associated H264SliceContext) in
h264_initialise_ref_list(), so it should not be propagated from one
decoding thread to the next.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] tsan warning about data race in h264 decoder

2017-07-18 Thread Wan-Teh Chang
Hi Ronald,

On Tue, Jul 18, 2017 at 11:41 AM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
>
> Reminds me a little of
> http://git.videolan.org/?p=ffmpeg.git;a=commit;h=e72690b18da064f6c0f04f09ccde72b6636e3159.
> From a quick look, it appears that default_ref[] is unconditionally
> initialized in initialise_ref_list() (called from ff_h264_build_ref_list(),
> called from h264_slice_init()), so I think your suggested patch is correct.

Thanks a lot for taking a look and pointing me at the relevant code. I
reviewed those functions, and I think you are right.

I am curious: why do we need to sync short_ref[], long_ref[],
short_ref_count, and long_ref_count between threads?

> Can you send it as a git-formatted patch?

Done. Please see http://ffmpeg.org/pipermail/ffmpeg-devel/2017-July/213777.html.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avcodec/h264_slice: don't sync default_ref[] between threads.

2017-07-18 Thread Wan-Teh Chang
default_ref[] is unconditionally initialized in h264_initialise_ref_list()
(called from ff_h264_build_ref_list(), called from h264_slice_init()).

This fixes the following tsan warning when running fate-h264:

WARNING: ThreadSanitizer: data race (pid=31070)
  Write of size 8 at 0x7bbc82a8 by thread T1 (mutexes: write M1628):
#0 memcpy 
/work/release-test/final/llvm.src/projects/compiler-rt/lib/tsan/../sanitizer_common/sanitizer_common_interceptors.inc:655:5
(ffmpeg+0x10de9d)
#1 h264_initialise_ref_list ffmpeg/libavcodec/h264_refs.c:214:29 
(ffmpeg+0x1186b3f)
#2 ff_h264_build_ref_list ffmpeg/libavcodec/h264_refs.c:306 
(ffmpeg+0x1186b3f)
#3 h264_slice_init ffmpeg/libavcodec/h264_slice.c:1900:11 (ffmpeg+0x1191149)
[..]
  Previous read of size 8 at 0x7bbc82a8 by main thread (mutexes:
write M1630):
#0 memcpy 
/work/release-test/final/llvm.src/projects/compiler-rt/lib/tsan/../sanitizer_common/sanitizer_common_interceptors.inc:655:5
(ffmpeg+0x10de9d)
#1 ff_h264_update_thread_context ffmpeg/libavcodec/h264_slice.c:411:5 
(ffmpeg+0x118b7dc)

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavcodec/h264_slice.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 6deb08fe6d..2fb89b189d 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -408,7 +408,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
 
 memcpy(>poc,>poc,sizeof(h->poc));
 
-memcpy(h->default_ref, h1->default_ref, sizeof(h->default_ref));
 memcpy(h->short_ref,   h1->short_ref,   sizeof(h->short_ref));
 memcpy(h->long_ref,h1->long_ref,sizeof(h->long_ref));
 memcpy(h->delayed_pic, h1->delayed_pic, sizeof(h->delayed_pic));
-- 
2.13.2.932.g7449e964c-goog

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] tsan warning about data race in h264 decoder

2017-07-18 Thread Wan-Teh Chang
Hi,

I'd like to report a tsan warning about a data race in the h264 decoder.

1. Steps to reproduce:

./configure --samples=~/fate-suite/ --toolchain=clang-tsan --disable-stripping

make fate-h264 THREADS=4

2. Here is an excerpt of the tsan warning in
tests/data/fate/h264-conformance-ba1_ft_c.err:

WARNING: ThreadSanitizer: data race (pid=31070)
  Write of size 8 at 0x7bbc82a8 by thread T1 (mutexes: write M1628):
#0 memcpy 
/work/release-test/final/llvm.src/projects/compiler-rt/lib/tsan/../sanitizer_common/sanitizer_common_interceptors.inc:655:5
(ffmpeg+0x10de9d)
#1 h264_initialise_ref_list
home/wtc/tmp/ffmpeg/libavcodec/h264_refs.c:214:29 (ffmpeg+0x1186b3f)
#2 ff_h264_build_ref_list
home/wtc/tmp/ffmpeg/libavcodec/h264_refs.c:306 (ffmpeg+0x1186b3f)
#3 h264_slice_init
home/wtc/tmp/ffmpeg/libavcodec/h264_slice.c:1900:11 (ffmpeg+0x1191149)
#4 ff_h264_queue_decode_slice
home/wtc/tmp/ffmpeg/libavcodec/h264_slice.c:2129 (ffmpeg+0x1191149)
#5 decode_nal_units
home/wtc/tmp/ffmpeg/libavcodec/h264dec.c:675:24 (ffmpeg+0x7924ec)
#6 h264_decode_frame home/wtc/tmp/ffmpeg/libavcodec/h264dec.c:1006
(ffmpeg+0x7924ec)
#7 frame_worker_thread
home/wtc/tmp/ffmpeg/libavcodec/pthread_frame.c:201:21
(ffmpeg+0xae56dc)

  Previous read of size 8 at 0x7bbc82a8 by main thread (mutexes:
write M1630):
#0 memcpy 
/work/release-test/final/llvm.src/projects/compiler-rt/lib/tsan/../sanitizer_common/sanitizer_common_interceptors.inc:655:5
(ffmpeg+0x10de9d)
#1 ff_h264_update_thread_context
home/wtc/tmp/ffmpeg/libavcodec/h264_slice.c:411:5 (ffmpeg+0x118b7dc)
#2 update_context_from_thread
home/wtc/tmp/ffmpeg/libavcodec/pthread_frame.c:315:19
(ffmpeg+0xae4302)
#3 submit_packet
home/wtc/tmp/ffmpeg/libavcodec/pthread_frame.c:418:15
(ffmpeg+0xae3783)
#4 ff_thread_decode_frame
home/wtc/tmp/ffmpeg/libavcodec/pthread_frame.c:495 (ffmpeg+0xae3783)
#5 decode_simple_internal
home/wtc/tmp/ffmpeg/libavcodec/decode.c:415:15 (ffmpeg+0x665ae4)
#6 decode_simple_receive_frame
home/wtc/tmp/ffmpeg/libavcodec/decode.c:620 (ffmpeg+0x665ae4)
#7 decode_receive_frame_internal
home/wtc/tmp/ffmpeg/libavcodec/decode.c:638 (ffmpeg+0x665ae4)
#8 avcodec_send_packet
home/wtc/tmp/ffmpeg/libavcodec/decode.c:678:15 (ffmpeg+0x665017)
#9 decode home/wtc/tmp/ffmpeg/ffmpeg.c:2265:15 (ffmpeg+0x1a748e)
#10 decode_video home/wtc/tmp/ffmpeg/ffmpeg.c:2409 (ffmpeg+0x1a748e)
#11 process_input_packet home/wtc/tmp/ffmpeg/ffmpeg.c:2644
(ffmpeg+0x1a748e)#12 process_input
home/wtc/tmp/ffmpeg/ffmpeg.c:4432:5 (ffmpeg+0x1a2e69)
#13 transcode_step home/wtc/tmp/ffmpeg/ffmpeg.c:4543 (ffmpeg+0x1a2e69)
#14 transcode home/wtc/tmp/ffmpeg/ffmpeg.c:4597 (ffmpeg+0x1a2e69)
#15 main home/wtc/tmp/ffmpeg/ffmpeg.c:4803:9 (ffmpeg+0x19daef)

3. The relevant source code is:

libavcodec/h264_refs.c:

135 static void h264_initialise_ref_list(H264Context *h, H264SliceContext *sl)
136 {
...
213 for (i = 0; i < sl->list_count; i++)
214 h->default_ref[i] = sl->ref_list[i][0];
215 }

libavcodec/h264_slice.c:

288 int ff_h264_update_thread_context(AVCodecContext *dst,
289   const AVCodecContext *src)
290 {
...
409 memcpy(>poc,>poc,sizeof(h->poc));
410
411 memcpy(h->default_ref, h1->default_ref, sizeof(h->default_ref));
412 memcpy(h->short_ref,   h1->short_ref,   sizeof(h->short_ref));
413 memcpy(h->long_ref,h1->long_ref,sizeof(h->long_ref));
414 memcpy(h->delayed_pic, h1->delayed_pic, sizeof(h->delayed_pic));
415 memcpy(h->last_pocs,   h1->last_pocs,   sizeof(h->last_pocs));

4. I can eliminate the tsan warning by deleting line 411 in
libavcodec/h264_slice.c, but I don't know if that is the correct fix.

diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 6deb08fe6d..2fb89b189d 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -408,7 +408,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst,

 memcpy(>poc,>poc,sizeof(h->poc));

-memcpy(h->default_ref, h1->default_ref, sizeof(h->default_ref));
 memcpy(h->short_ref,   h1->short_ref,   sizeof(h->short_ref));
 memcpy(h->long_ref,h1->long_ref,    sizeof(h->long_ref));
 memcpy(h->delayed_pic, h1->delayed_pic, sizeof(h->delayed_pic));

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] pthread_frame: revert 2e664b9c1e73c80aab91070c1eb7676f04bdd12d.

2017-07-18 Thread Wan-Teh Chang
The patch does not fix the tsan warning it was intended to fix.
Reverting the patch moves the av_log() back to the outside of the lock.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavcodec/pthread_frame.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 2cea232a36..08e182c3d1 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -570,11 +570,12 @@ void ff_thread_report_progress(ThreadFrame *f, int n, int 
field)
 
 p = f->owner[field]->internal->thread_ctx;
 
-pthread_mutex_lock(>progress_mutex);
 if (atomic_load_explicit(>debug_threads, memory_order_relaxed))
 av_log(f->owner[field], AV_LOG_DEBUG,
"%p finished %d field %d\n", progress, n, field);
 
+pthread_mutex_lock(>progress_mutex);
+
 atomic_store_explicit([field], n, memory_order_release);
 
 pthread_cond_broadcast(>progress_cond);
@@ -592,10 +593,11 @@ void ff_thread_await_progress(ThreadFrame *f, int n, int 
field)
 
 p = f->owner[field]->internal->thread_ctx;
 
-pthread_mutex_lock(>progress_mutex);
 if (atomic_load_explicit(>debug_threads, memory_order_relaxed))
 av_log(f->owner[field], AV_LOG_DEBUG,
"thread awaiting %d field %d from %p\n", n, field, progress);
+
+pthread_mutex_lock(>progress_mutex);
 while (atomic_load_explicit([field], memory_order_relaxed) < n)
 pthread_cond_wait(>progress_cond, >progress_mutex);
 pthread_mutex_unlock(>progress_mutex);
-- 
2.13.2.932.g7449e964c-goog

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] pthread_frame: allow per-field ThreadFrame owners.

2017-07-11 Thread Wan-Teh Chang
Hi Ronald,

Thank you for the quick reply.

On Tue, Jul 11, 2017 at 2:09 PM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
> Hi Wan-Teh,
>
> On Tue, Jul 11, 2017 at 4:53 PM, Wan-Teh Chang <w...@google.com> wrote:
>>
>> Hi Ronald,
>>
>> I have a question about this patch.
>>
>> On Mon, Apr 3, 2017 at 7:24 AM, Ronald S. Bultje <rsbul...@gmail.com>
>> wrote:
[...]
>> > diff --git a/libavcodec/utils.c b/libavcodec/utils.c
>> > index 3e8677d..0c68836 100644
>> > --- a/libavcodec/utils.c
>> > +++ b/libavcodec/utils.c
>> > @@ -3971,7 +3971,8 @@ int ff_thread_ref_frame(ThreadFrame *dst,
>> > ThreadFrame *src)
>> >  {
>> >  int ret;
>> >
>> > -dst->owner = src->owner;
>> > +dst->owner[0] = src->owner[0];
>> > +dst->owner[1] = src->owner[1];
>> >
>> >  ret = av_frame_ref(dst->f, src->f);
>> >  if (ret < 0)
>> > @@ -3981,7 +3982,7 @@ int ff_thread_ref_frame(ThreadFrame *dst,
>> > ThreadFrame *src)
>> >
>> >  if (src->progress &&
>> >  !(dst->progress = av_buffer_ref(src->progress))) {
>> > -ff_thread_release_buffer(dst->owner, dst);
>> > +ff_thread_release_buffer(dst->owner[0], dst);
>> >  return AVERROR(ENOMEM);
>> >  }
>> >
>> [...]
>>
>> I don't understand why we pass dst->owner[0], not dst->owner[1], to
>> the ff_thread_release_buffer() call. Does this assume dst->owner[0] ==
>> dst->owner[1]?
>
> Neither is perfect, ideally (from an API programming PoV) you'd want to use
> both. But that's not possible...

Can ff_thread_release_buffer() use f->owner[0] and f->owner[1] and
ignore the avctx argument?

This should work for the ff_thread_release_buffer() call in
ff_thread_ref_frame(), but I don't know if this is correct in general.

> In practice, I don't think it matters much,
> since all it does is decide which release_buffer-queue it'll be appended to
> (if we do delayed free()'ing). Are there problems with this?

No, there aren't. With your explanation, I roughly understand the
ff_thread_release_buffer() code now. Thanks.

Note: I am cherry-picking tsan warning fixes to our copy of ffmpeg,
which is several months behind the tip. As due diligence, I review the
fixes before I merge them.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] pthread_frame: allow per-field ThreadFrame owners.

2017-07-11 Thread Wan-Teh Chang
Hi Ronald,

I have a question about this patch.

On Mon, Apr 3, 2017 at 7:24 AM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
> This tries to handle cases where separate invocations of decode_frame()
> (each running in separate threads) write to respective fields in the
> same AVFrame->data[]. Having per-field owners makes interaction between
> readers (the referencing thread) and writers (the decoding thread)
> slightly more optimal if both accesses are field-based, since they will
> use the respective producer's thread objects (mutex/cond) instead of
> sharing the thread objects of the first field's producer.
>
> In practice, this fixes the following tsan-warning in fate-h264:
>
> WARNING: ThreadSanitizer: data race (pid=21615)
>   Read of size 4 at 0x7d64d9fc by thread T2 (mutexes: write M1006):
> #0 ff_thread_report_progress pthread_frame.c:569 
> (ffmpeg:x86_64+0x100f7cf54)
> [..]
>   Previous write of size 4 at 0x7d64d9fc by main thread (mutexes: write 
> M1004):
> #0 update_context_from_user pthread_frame.c:335 
> (ffmpeg:x86_64+0x100f81abb)
> ---
>  libavcodec/h264_slice.c|  8 +---
>  libavcodec/pthread_frame.c | 18 ++
>  libavcodec/thread.h|  2 +-
>  libavcodec/utils.c |  7 ---
>  4 files changed, 20 insertions(+), 15 deletions(-)
>
> diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
> index fa1e9ae..d4d31cc 100644
> --- a/libavcodec/h264_slice.c
> +++ b/libavcodec/h264_slice.c
> @@ -1423,14 +1423,14 @@ static int h264_field_start(H264Context *h, const 
> H264SliceContext *sl,
>   * We have to do that before the "dummy" in-between frame allocation,
>   * since that can modify h->cur_pic_ptr. */
>  if (h->first_field) {
> +int last_field = last_pic_structure == PICT_BOTTOM_FIELD;
>  av_assert0(h->cur_pic_ptr);
>  av_assert0(h->cur_pic_ptr->f->buf[0]);
>  assert(h->cur_pic_ptr->reference != DELAYED_PIC_REF);
>
>  /* Mark old field/frame as completed */
> -if (h->cur_pic_ptr->tf.owner == h->avctx) {
> -ff_thread_report_progress(>cur_pic_ptr->tf, INT_MAX,
> -  last_pic_structure == 
> PICT_BOTTOM_FIELD);
> +if (h->cur_pic_ptr->tf.owner[last_field] == h->avctx) {
> +ff_thread_report_progress(>cur_pic_ptr->tf, INT_MAX, 
> last_field);
>  }
>
>  /* figure out if we have a complementary field pair */
> @@ -1568,7 +1568,9 @@ static int h264_field_start(H264Context *h, const 
> H264SliceContext *sl,
>  return AVERROR_INVALIDDATA;
>  }
>  } else {
> +int field = h->picture_structure == PICT_BOTTOM_FIELD;
>  release_unused_pictures(h, 0);
> +h->cur_pic_ptr->tf.owner[field] = h->avctx;
>  }
>  /* Some macroblocks can be accessed before they're available in case
>  * of lost slices, MBAFF or threading. */

Note: I have to admit I don't understand the changes to
libavcodec/h264_slice.c. The changes to the other files are
straightforward, except for the one issue I ask about below.

> diff --git a/libavcodec/utils.c b/libavcodec/utils.c
> index 3e8677d..0c68836 100644
> --- a/libavcodec/utils.c
> +++ b/libavcodec/utils.c
> @@ -3971,7 +3971,8 @@ int ff_thread_ref_frame(ThreadFrame *dst, ThreadFrame 
> *src)
>  {
>  int ret;
>
> -dst->owner = src->owner;
> +dst->owner[0] = src->owner[0];
> +dst->owner[1] = src->owner[1];
>
>  ret = av_frame_ref(dst->f, src->f);
>  if (ret < 0)
> @@ -3981,7 +3982,7 @@ int ff_thread_ref_frame(ThreadFrame *dst, ThreadFrame 
> *src)
>
>  if (src->progress &&
>  !(dst->progress = av_buffer_ref(src->progress))) {
> -ff_thread_release_buffer(dst->owner, dst);
> +ff_thread_release_buffer(dst->owner[0], dst);
>  return AVERROR(ENOMEM);
>  }
>
[...]

I don't understand why we pass dst->owner[0], not dst->owner[1], to
the ff_thread_release_buffer() call. Does this assume dst->owner[0] ==
dst->owner[1]?

Although dst->owner[0] and dst->owner[1] are initialized to the same
value, the changes to libavcodec/h264_slice.c seem to imply
dst->owner[0] and dst->owner[1] may become different.

Thanks,
Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] pthread_frame: save the FF_DEBUG_THREADS option in FrameThreadContext.

2017-07-10 Thread Wan-Teh Chang
On Sat, Jul 8, 2017 at 3:49 PM, Michael Niedermayer
<mich...@niedermayer.cc> wrote:
> On Sat, Jul 08, 2017 at 03:38:11PM -0700, Wan-Teh Chang wrote:
>> Hi Ronald,
>>
>> On Sat, Jul 8, 2017 at 2:33 PM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
>> >
>> > I can see the design from the patch.
>> >
>> > What's missing is a justification for the downside of the design, which is
>> > that updates to this variable by the user are no longer propagated to the
>> > worker threads.
>>
>> My justification is the YAGNI principle.
>>
>> Although the current code allows the FF_DEBUG_THREADS option to be
>> toggled dynamically, I believe that was not intended, and I believe
>> nobody actually does that. In my (admittedly limited) code search, I
>> only see the FF_DEBUG_THREADS option set via the -debug thread_ops
>> command-line option.
>
> ffmpeg (the command line tool) allows changing the AVCodecContext->debug
> value at runtime

Thank you. I abandoned this patch and wrote a new patch. Please see
http://ffmpeg.org/pipermail/ffmpeg-devel/2017-July/213501.html.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] pthread_frame: save the FF_DEBUG_THREADS option in PerThreadContext.

2017-07-10 Thread Wan-Teh Chang
Add the debug_threads boolean field to PerThreadContext. For
PerThreadContext *p, p->debug_threads records whether the
FF_DEBUG_THREADS bit is set in p->avctx->debug, and p->debug_threads and
p->avctx->debug are kept in sync. The debug_threads field is defined as
an atomic_int to allow atomic read by another thread in
ff_thread_await_progress().

This fixes the tsan warning that
2e664b9c1e73c80aab91070c1eb7676f04bdd12d attempted to fix:

WARNING: ThreadSanitizer: data race (pid=452658)
  Write of size 4 at 0x7b640003f4fc by main thread (mutexes: write M248499):
#0 update_context_from_user [..]/libavcodec/pthread_frame.c:335:19 
(5ab42bb1a6f4b068d7863dabe9b2bacc+0xe73859)
[..]
  Previous read of size 4 at 0x7b640003f4fc by thread T130 (mutexes: write 
M248502, write M248500):
#0 ff_thread_await_progress [..]/libavcodec/pthread_frame.c:591:26 
(5ab42bb1a6f4b068d7863dabe9b2bacc+0xe749a1)

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavcodec/pthread_frame.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 363b139f71..2cea232a36 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -107,6 +107,8 @@ typedef struct PerThreadContext {
 
 int hwaccel_serializing;
 int async_serializing;
+
+atomic_int debug_threads;   ///< Set if the FF_DEBUG_THREADS option is 
set.
 } PerThreadContext;
 
 /**
@@ -398,6 +400,9 @@ static int submit_packet(PerThreadContext *p, 
AVCodecContext *user_avctx,
 pthread_mutex_unlock(>mutex);
 return ret;
 }
+atomic_store_explicit(>debug_threads,
+  (p->avctx->debug & FF_DEBUG_THREADS) != 0,
+  memory_order_relaxed);
 
 release_delayed_buffers(p);
 
@@ -566,7 +571,7 @@ void ff_thread_report_progress(ThreadFrame *f, int n, int 
field)
 p = f->owner[field]->internal->thread_ctx;
 
 pthread_mutex_lock(>progress_mutex);
-if (f->owner[field]->debug_DEBUG_THREADS)
+if (atomic_load_explicit(>debug_threads, memory_order_relaxed))
 av_log(f->owner[field], AV_LOG_DEBUG,
"%p finished %d field %d\n", progress, n, field);
 
@@ -588,7 +593,7 @@ void ff_thread_await_progress(ThreadFrame *f, int n, int 
field)
 p = f->owner[field]->internal->thread_ctx;
 
 pthread_mutex_lock(>progress_mutex);
-if (f->owner[field]->debug_DEBUG_THREADS)
+if (atomic_load_explicit(>debug_threads, memory_order_relaxed))
 av_log(f->owner[field], AV_LOG_DEBUG,
"thread awaiting %d field %d from %p\n", n, field, progress);
 while (atomic_load_explicit([field], memory_order_relaxed) < n)
@@ -823,6 +828,8 @@ int ff_frame_thread_init(AVCodecContext *avctx)
 
 if (err) goto error;
 
+atomic_init(>debug_threads, (copy->debug & FF_DEBUG_THREADS) != 0);
+
 err = AVERROR(pthread_create(>thread, NULL, frame_worker_thread, 
p));
 p->thread_init= !err;
 if(!p->thread_init)
-- 
2.13.2.725.g09c95d1e9-goog

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] pthread_frame: save the FF_DEBUG_THREADS option in FrameThreadContext.

2017-07-08 Thread Wan-Teh Chang
Hi Ronald,

On Sat, Jul 8, 2017 at 2:33 PM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
>
> I can see the design from the patch.
>
> What's missing is a justification for the downside of the design, which is
> that updates to this variable by the user are no longer propagated to the
> worker threads.

My justification is the YAGNI principle.

Although the current code allows the FF_DEBUG_THREADS option to be
toggled dynamically, I believe that was not intended, and I believe
nobody actually does that. In my (admittedly limited) code search, I
only see the FF_DEBUG_THREADS option set via the -debug thread_ops
command-line option.

> The syncing is extremely trivial (simply by moving the
> assignment from init to update_from_user) and has no threading implications
> (since it's a boolean, so you can make it atomic) so this really shouldn't
> be a big deal to amend.

Yes, the syncing is trivial to add. When someone actually needs to
toggle the FF_DEBUG_THREADS option dynamically, we can easily add the
syncing at that time.

This is your call. Please let me know your decision. Thanks!

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] pthread_frame: save the FF_DEBUG_THREADS option in FrameThreadContext.

2017-07-08 Thread Wan-Teh Chang
Hi Ronald,

On Sat, Jul 8, 2017 at 6:19 AM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
> Hi,
>
> On Fri, Jul 7, 2017 at 5:02 PM, Wan-Teh Chang <wtc-at-google@ffmpeg.org>
> wrote:
>
>> @@ -763,6 +764,7 @@ int ff_frame_thread_init(AVCodecContext *avctx)
>>
>>  fctx->async_lock = 1;
>>  fctx->delaying = 1;
>> +fctx->debug_threads = (avctx->debug & FF_DEBUG_THREADS) != 0;
>
> Shouldn't this be done in update_context_from_user()?

This patch differs from the approach you outlined at the end of
http://ffmpeg.org/pipermail/ffmpeg-devel/2017-July/213259.html
as follows:

* The debug_threads field is added to FrameThreadContext and applies to
  all the decoding threads owned by the FrameThreadContext.
* The debug_threads field is set when avcodec_open2() is called, and
  never changes thereafter.

In this design, we just need to set fctx->debug_threads in
ff_frame_thread_init() (which is called by avcodec_open2()).

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] pthread_frame: save the FF_DEBUG_THREADS option in FrameThreadContext.

2017-07-07 Thread Wan-Teh Chang
Note: I suspect we can simply delete the following line from
update_context_from_user() in libavcodec/pthread_frame.c:

dst->debug= src->debug;

That also fixes the tsan warning, but it'll take more time to
investigate whether it is necessary to update the |debug| field from
the user's AVCodecContext (src).

That line in update_context_from_user() was added in the initial
commit of libavcodec/pthread.c:

http://git.videolan.org/?p=ffmpeg.git;a=commit;h=37b00b47cbeecd66bb34c5c7c534d016d6e8da24

Does any user actually modify avctx->debug after the avcodec_open2() call?

Thanks,
Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] pthread_frame: make accesses to debug field be protected by owner lock.

2017-07-07 Thread Wan-Teh Chang
Hi Ronald,

On Wed, Jul 5, 2017 at 7:24 PM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
> Hi Wan-Teh,
>
> On Wed, Jul 5, 2017 at 8:08 PM, Wan-Teh Chang <w...@google.com> wrote:
>>
>> Hi Ronald,
>>
>> On Wed, Jul 5, 2017 at 3:31 PM, Ronald S. Bultje <rsbul...@gmail.com>
>> wrote:
>> > Hi Wan-Teh,
>> >
>> > On Wed, Jul 5, 2017 at 6:12 PM, Wan-Teh Chang <w...@google.com> wrote:
>> >>
>> >> Thank you for all the tsan warning fixes. In the meantime, it would be
>> >> good to revert 2e664b9c1e73c80aab91070c1eb7676f04bdd12d to avoid
>> >> confusion.
>> >
>> >
>> > Why? I believe it fixes a subset of the issue.
>>
>> Could you explain why it fixes a subset of the issue?
>
> From what I remember, I was (before this patch) semi-reliably able to
> reproduce the issue locally, and it went away after. I've observed the same
> thing in the relevant fate station, where before the patch, this warning
> occurred semi-reliably in 3-4 tests per run, and after the patch it
> sometimes occurs in 1 test per run. I understand this doesn't satisfy you
> but I currently don't want to dig through the code to figure out why I
> thought this was a good idea, I have other things that take priority.
> Reverting the patch will require me to dig through this code also, and I
> really don't see the gain.

In addition to avoiding confusion, reverting the patch will move the
av_log() statements outside the lock. Since I don't use those two
av_log() statements, I won't insist on reverting the patch.

> Some more thoughts:
> - I don't think we want to grab a second lock for something trivial like
> this (e.g. grabbing progress_mutex when copying the debug field in
> update_context_from_user);
> - making the debug flags field atomic will be difficult because it's public
> API, and I don't think we're ready to expose C11 types in our public API;
> - I suppose you could make a private (inside PerThreadContext, which allows
> it to be atomic) copy of debug intended for cross-threading purposes and use
> that here.

After studying this problem for two more days, I implemented a variant
of your last suggestion:

http://ffmpeg.org/pipermail/ffmpeg-devel/2017-July/213384.html

Let's continue the discussion in that email thread.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] pthread_frame: save the FF_DEBUG_THREADS option in FrameThreadContext.

2017-07-07 Thread Wan-Teh Chang
Add the debug_threads boolean field to FrameThreadContext. The
debug_threads field records whether the FF_DEBUG_THREADS bit is set in
the debug field of the avctx passed to ff_frame_thread_init(). The
debug_threads field is set when avcodec_open2() is called, never changes
thereafter, and applies to all the decoding threads owned by a
FrameThreadContext. The current code allows different decoding threads
to have different FF_DEBUG_THREADS options, but that does not seem
useful.

This fixes the tsan warning that
2e664b9c1e73c80aab91070c1eb7676f04bdd12d attempted to fix:

WARNING: ThreadSanitizer: data race (pid=452658)
  Write of size 4 at 0x7b640003f4fc by main thread (mutexes: write M248499):
#0 update_context_from_user [..]/libavcodec/pthread_frame.c:335:19 
(5ab42bb1a6f4b068d7863dabe9b2bacc+0xe73859)
[..]
  Previous read of size 4 at 0x7b640003f4fc by thread T130 (mutexes: write 
M248502, write M248500):
#0 ff_thread_await_progress [..]/libavcodec/pthread_frame.c:591:26 
(5ab42bb1a6f4b068d7863dabe9b2bacc+0xe749a1)

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavcodec/pthread_frame.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 363b139f71..eb4d2d9458 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -133,6 +133,7 @@ typedef struct FrameThreadContext {
 * Set for the first N packets, where N is 
the number of threads.
 * While it is set, 
ff_thread_en/decode_frame won't return any results.
 */
+int debug_threads; ///< Set if the FF_DEBUG_THREADS option is 
set.
 } FrameThreadContext;
 
 #define THREAD_SAFE_CALLBACKS(avctx) \
@@ -566,7 +567,7 @@ void ff_thread_report_progress(ThreadFrame *f, int n, int 
field)
 p = f->owner[field]->internal->thread_ctx;
 
 pthread_mutex_lock(>progress_mutex);
-if (f->owner[field]->debug_DEBUG_THREADS)
+if (p->parent->debug_threads)
 av_log(f->owner[field], AV_LOG_DEBUG,
"%p finished %d field %d\n", progress, n, field);
 
@@ -588,7 +589,7 @@ void ff_thread_await_progress(ThreadFrame *f, int n, int 
field)
 p = f->owner[field]->internal->thread_ctx;
 
 pthread_mutex_lock(>progress_mutex);
-if (f->owner[field]->debug_DEBUG_THREADS)
+if (p->parent->debug_threads)
 av_log(f->owner[field], AV_LOG_DEBUG,
"thread awaiting %d field %d from %p\n", n, field, progress);
 while (atomic_load_explicit([field], memory_order_relaxed) < n)
@@ -763,6 +764,7 @@ int ff_frame_thread_init(AVCodecContext *avctx)
 
 fctx->async_lock = 1;
 fctx->delaying = 1;
+fctx->debug_threads = (avctx->debug & FF_DEBUG_THREADS) != 0;
 
 for (i = 0; i < thread_count; i++) {
 AVCodecContext *copy = av_malloc(sizeof(AVCodecContext));
-- 
2.13.2.725.g09c95d1e9-goog

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avformat/avio: Remove no-op code in url_find_protocol().

2017-07-06 Thread Wan-Teh Chang
Hi Muhammad,

On Thu, Jul 6, 2017 at 7:56 PM, Muhammad Faiz <mfc...@gmail.com> wrote:
> On Fri, Jul 7, 2017 at 9:18 AM, Wan-Teh Chang
> <wtc-at-google@ffmpeg.org> wrote:
>> In url_find_protocol(), proto_str is either "file" or a string
>> consisting of only the characters in URL_SCHEME_CHARS, which does not
>> include ','. Therefore the strchr(proto_str, ',') call always returns
>> NULL.
>>
>> Note: The code was added in commit
>> 6161c41817f6e53abb3021d67ca0f19def682718.
>>
>> Signed-off-by: Wan-Teh Chang <w...@google.com>
>> ---
>>  libavformat/avio.c | 2 --
>>  1 file changed, 2 deletions(-)
>>
>> diff --git a/libavformat/avio.c b/libavformat/avio.c
>> index 1e79c9dd5c..64248e098b 100644
>> --- a/libavformat/avio.c
>> +++ b/libavformat/avio.c
>> @@ -263,8 +263,6 @@ static const struct URLProtocol *url_find_protocol(const 
>> char *filename)
>>  av_strlcpy(proto_str, filename,
>> FFMIN(proto_len + 1, sizeof(proto_str)));
>>
>> -if ((ptr = strchr(proto_str, ',')))
>> -*ptr = '\0';
>
> What about handling "subfile," ?

I don't know what url_find_protocol() is intended to do, but I can
show you what it actually does.

Here is the relevant code in libavformat/avio.c:

==
#define URL_SCHEME_CHARS\
"abcdefghijklmnopqrstuvwxyz"\
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"\
"0123456789+-."

static const struct URLProtocol *url_find_protocol(const char *filename)
{
const URLProtocol **protocols;
char proto_str[128], proto_nested[128], *ptr;
size_t proto_len = strspn(filename, URL_SCHEME_CHARS);
int i;

if (filename[proto_len] != ':' &&
(strncmp(filename, "subfile,", 8) || !strchr(filename +
proto_len + 1, ':')) ||
is_dos_path(filename))
strcpy(proto_str, "file");
else
av_strlcpy(proto_str, filename,
   FFMIN(proto_len + 1, sizeof(proto_str)));

if ((ptr = strchr(proto_str, ',')))
*ptr = '\0';
==

Since I don't know how "subfile," should be handled by
url_find_protocol(), I ran the following three test inputs in the
debugger:

If |filename| is "subfile,", then proto_len is 7 and the if branch
copies "file" into proto_str.

If |filename| is "subfile,abcdefg", then proto_len is 7 and the if
branch copies "file" into proto_str.

If |filename| is "subfile,abcdefg:hijk", then proto_len is 7 and the
else branch copies "subfile" into proto_str.

Is this the expected behavior?

Note: The code snippet shows proto_str cannot possibly contain ','.
This is why the strchr(proto_str, ',') call always returns NULL.

Thanks,
Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avformat/avio: Remove no-op code in url_find_protocol().

2017-07-06 Thread Wan-Teh Chang
In url_find_protocol(), proto_str is either "file" or a string
consisting of only the characters in URL_SCHEME_CHARS, which does not
include ','. Therefore the strchr(proto_str, ',') call always returns
NULL.

Note: The code was added in commit
6161c41817f6e53abb3021d67ca0f19def682718.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavformat/avio.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavformat/avio.c b/libavformat/avio.c
index 1e79c9dd5c..64248e098b 100644
--- a/libavformat/avio.c
+++ b/libavformat/avio.c
@@ -263,8 +263,6 @@ static const struct URLProtocol *url_find_protocol(const 
char *filename)
 av_strlcpy(proto_str, filename,
FFMIN(proto_len + 1, sizeof(proto_str)));
 
-if ((ptr = strchr(proto_str, ',')))
-*ptr = '\0';
 av_strlcpy(proto_nested, proto_str, sizeof(proto_nested));
 if ((ptr = strchr(proto_nested, '+')))
 *ptr = '\0';
-- 
2.13.2.725.g09c95d1e9-goog

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] ffmpeg: Fix typos in the comment for decode() ("." vs. "->")

2017-07-06 Thread Wan-Teh Chang
pkt is a pointer, so it should be dereferenced with the -> operator.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 ffmpeg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 6dae6e9078..888d19a647 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -2253,8 +2253,8 @@ static int ifilter_send_eof(InputFilter *ifilter)
 
 // This does not quite work like avcodec_decode_audio4/avcodec_decode_video2.
 // There is the following difference: if you got a frame, you must call
-// it again with pkt=NULL. pkt==NULL is treated differently from pkt.size==0
-// (pkt==NULL means get more output, pkt.size==0 is a flush/drain packet)
+// it again with pkt=NULL. pkt==NULL is treated differently from pkt->size==0
+// (pkt==NULL means get more output, pkt->size==0 is a flush/drain packet)
 static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, 
AVPacket *pkt)
 {
 int ret;
-- 
2.13.2.725.g09c95d1e9-goog

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] pthread_frame: make accesses to debug field be protected by owner lock.

2017-07-05 Thread Wan-Teh Chang
Hi Ronald,

On Wed, Jul 5, 2017 at 3:31 PM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
> Hi Wan-Teh,
>
> On Wed, Jul 5, 2017 at 6:12 PM, Wan-Teh Chang <w...@google.com> wrote:
>>
>> Thank you for all the tsan warning fixes. In the meantime, it would be
>> good to revert 2e664b9c1e73c80aab91070c1eb7676f04bdd12d to avoid
>> confusion.
>
>
> Why? I believe it fixes a subset of the issue.

Could you explain why it fixes a subset of the issue?

The data race that tsan warned about was between a write and a read:

WARNING: ThreadSanitizer: data race (pid=10916)
  Write of size 4 at 0x7d64000174fc by main thread (mutexes: write M2313):
#0 update_context_from_user src/libavcodec/pthread_frame.c:335
(ffmpeg+0x00df7b06)
[..]
  Previous read of size 4 at 0x7d64000174fc by thread T1 (mutexes: write M2311):
#0 ff_thread_await_progress src/libavcodec/pthread_frame.c:592
(ffmpeg+0x00df8b3e)

But the write is not protected by the mutex that
2e664b9c1e73c80aab91070c1eb7676f04bdd12d uses to protect the reads.

I can't reproduce the tsan warning by running fate-h264 (with and
without 2e664b9c1e73c80aab91070c1eb7676f04bdd12d), but I can still
reproduce the tsan warning by running a test program in my project
with a recent (one or two weeks old) snapshot of ffmpeg:

WARNING: ThreadSanitizer: data race (pid=86821)
  Read of size 4 at 0x7b640003b8fc by thread T19 (mutexes: write
M525086672791228232, write M524523722837806536):
#0 ff_thread_await_progress
[...]/libavcodec/pthread_frame.c:591:26
(5ab42bb1a6f4b068d7863dabe9b2bacc+0xe749a1)
#1 await_references [...]/libavcodec/h264_mb.c
(5ab42bb1a6f4b068d7863dabe9b2bacc+0x9530c4)
#2 hl_motion_420_simple_8 [...]/libavcodec/h264_mc_template.c:80:9
(5ab42bb1a6f4b068d7863dabe9b2bacc+0x95106b)
#3 hl_decode_mb_simple_8 [...]/libavcodec/h264_mb_template.c:180
(5ab42bb1a6f4b068d7863dabe9b2bacc+0x95106b)
#4 ff_h264_hl_decode_mb [...]/libavcodec/h264_mb.c:816:9
(5ab42bb1a6f4b068d7863dabe9b2bacc+0x940953)
#5 decode_slice [...]/libavcodec/h264_slice.c:2572:17
(5ab42bb1a6f4b068d7863dabe9b2bacc+0x982ce7)
#6 ff_h264_execute_decode_slices
[...]/libavcodec/h264_slice.c:2747:15
(5ab42bb1a6f4b068d7863dabe9b2bacc+0x981fb9)
#7 decode_nal_units [...]/libavcodec/h264dec.c:718:27
(5ab42bb1a6f4b068d7863dabe9b2bacc+0x99257b)
#8 h264_decode_frame [...]/libavcodec/h264dec.c:1008
(5ab42bb1a6f4b068d7863dabe9b2bacc+0x99257b)
#9 frame_worker_thread [...]/libavcodec/pthread_frame.c:199:21
(5ab42bb1a6f4b068d7863dabe9b2bacc+0xe75b4c)

  Previous write of size 4 at 0x7b640003b8fc by main thread (mutexes:
write M524242247861095840):
#0 update_context_from_user
[...]/libavcodec/pthread_frame.c:335:19
(5ab42bb1a6f4b068d7863dabe9b2bacc+0xe73859)
#1 submit_packet [...]/libavcodec/pthread_frame.c:396
(5ab42bb1a6f4b068d7863dabe9b2bacc+0xe73859)
#2 ff_thread_decode_frame [...]/libavcodec/pthread_frame.c:490
(5ab42bb1a6f4b068d7863dabe9b2bacc+0xe73859)
#3 decode_simple_internal [...]/libavcodec/decode.c:415:15
(5ab42bb1a6f4b068d7863dabe9b2bacc+0x7d49b9)
#4 decode_simple_receive_frame [...]/libavcodec/decode.c:620
(5ab42bb1a6f4b068d7863dabe9b2bacc+0x7d49b9)
#5 decode_receive_frame_internal [...]/libavcodec/decode.c:638
(5ab42bb1a6f4b068d7863dabe9b2bacc+0x7d49b9)
#6 avcodec_send_packet [...]/libavcodec/decode.c:678:15
(5ab42bb1a6f4b068d7863dabe9b2bacc+0x7d3ea7)
#7 compat_decode [...]/libavcodec/decode.c:853:15
(5ab42bb1a6f4b068d7863dabe9b2bacc+0x7d627d)
#8 avcodec_decode_video2 [...]/libavcodec/decode.c:914:12
(5ab42bb1a6f4b068d7863dabe9b2bacc+0x7d617e)
...

The tsan warning shows the read and the write are still protected by
different mutexes. The read is now protected by two mutexes because of
2e664b9c1e73c80aab91070c1eb7676f04bdd12d.

I hope this convinces you that
2e664b9c1e73c80aab91070c1eb7676f04bdd12d is not working as intended
and should be reverted to avoid confusion.

Thanks,
Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] pthread_frame: make accesses to debug field be protected by owner lock.

2017-07-05 Thread Wan-Teh Chang
Hi Ronald,

Thank you for the quick reply!

On Wed, Jul 5, 2017 at 2:49 PM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
> Hi Wan-Teh,
>
> On Wed, Jul 5, 2017 at 5:30 PM, Wan-Teh Chang <w...@google.com> wrote:
>>
>> Hi Ronald,
>>
>> A variant of this patch is committed as
>> 2e664b9c1e73c80aab91070c1eb7676f04bdd12d:
>>
>> http://ffmpeg.org/pipermail/ffmpeg-cvslog/2017-April/106577.html
>>
>> I believe it does not fix the tsan warning.
>
> I had noticed that too.
>
> Unfortunately, I have spent a significant amount of (unpaid, personal) time
> on fixing most of these tsan warnings already (with help of ubitux -
> thanks!!), and given the incredibly mega-low impact of this particular
> outstanding issue, I'm currently not actively working on fixing this. I may
> get back to this issue later but I have some other things that I'd rather
> work on and that I believe are more important.

Thank you for all the tsan warning fixes. In the meantime, it would be
good to revert 2e664b9c1e73c80aab91070c1eb7676f04bdd12d to avoid
confusion.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] pthread_frame: make accesses to debug field be protected by owner lock.

2017-07-05 Thread Wan-Teh Chang
Hi Ronald,

A variant of this patch is committed as
2e664b9c1e73c80aab91070c1eb7676f04bdd12d:

http://ffmpeg.org/pipermail/ffmpeg-cvslog/2017-April/106577.html

I believe it does not fix the tsan warning.

On Thu, Apr 6, 2017 at 10:48 AM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
> ..., but this way the accesses to the
> field (reads and writes) are always protected by a mutex.

The patch protects the reads of the |debug| field with
p->progress_mutex, where |p| points to a PerThreadContext. But the
write of the |debug| field in update_context_from_user() is not
protected by progress_mutex of any PerThreadContext.

The tsan warning this patch tried to fix is:

> WARNING: ThreadSanitizer: data race (pid=10916)
>   Write of size 4 at 0x7d64000174fc by main thread (mutexes: write M2313):
> #0 update_context_from_user src/libavcodec/pthread_frame.c:335 
> (ffmpeg+0x00df7b06)
> [..]
>   Previous read of size 4 at 0x7d64000174fc by thread T1 (mutexes: write 
> M2311):
> #0 ff_thread_await_progress src/libavcodec/pthread_frame.c:592 
> (ffmpeg+0x00df8b3e)

The code in libavcodec/pthread_frame.c related to the write of the
|debug| field shown in the tsan warning is the following:

 326 static int update_context_from_user(AVCodecContext *dst,
AVCodecContext *sr c)
 327 {
 ,,,
 333
 334 dst->opaque   = src->opaque;
 335 dst->debug= src->debug;
 336 dst->debug_mv = src->debug_mv;

...

 383 static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
 384  AVPacket *avpkt)
 385 {
 ...
 393
 394 pthread_mutex_lock(>mutex);
 395
 396 ret = update_context_from_user(p->avctx, user_avctx);

...

 472 int ff_thread_decode_frame(AVCodecContext *avctx,
 473AVFrame *picture, int *got_picture_ptr,
 474AVPacket *avpkt)
 475 {
 ...
 480
 481 /* release the async lock, permitting blocked hwaccel threads to
 482  * go forward while we are in this function */
 483 async_unlock(fctx);
 484
 485 /*
 486  * Submit a packet to the next decoding thread.
 487  */
 488
 489 p = >threads[fctx->next_decoding];
 490 err = submit_packet(p, avctx, avpkt);

The code snippets show the write of dst->debug is only protected by
p->mutex (acquired at line 394) for some |p|. progress_mutex (of any
AVCodecContext) is not involved at all.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] configure: add -fPIE instead of -pie to C flags for ThreadSanitizer

2016-12-12 Thread Wan-Teh Chang
On Mon, Dec 12, 2016 at 6:42 AM, Carl Eugen Hoyos <ceffm...@gmail.com> wrote:
> 2016-12-02 23:04 GMT+01:00 Wan-Teh Chang <wtc-at-google@ffmpeg.org>:
>> -pie was added to C flags for ThreadSanitizer in commit
>> 19f251a2882a8d0779b432e63bf282e4d9c443bb. Under clang 3.8.0, the -pie
>> flag causes a compiler warning and a linker error when running configure
>> --toolchain=clang-tsan.
>
> Does the patch have any effect when using gcc?

Hi Carl,

Yes, the patch modifies the code shared by --toolchain=clang-tsan and
--toolchain=gcc-tsan.

I am using Ubuntu 14.04 LTS, which comes with gcc 4.8.4. gcc 4.8.4
does NOT work with and without my patch. I got an error in config.log
like this:

==
check_cc
BEGIN /tmp/ffconf.jceENASz.c
1   int main(void){ return 0; }
END /tmp/ffconf.jceENASz.c
gcc -fsanitize=thread -fPIE -fPIC -c -o /tmp/ffconf.LuPWPOJB.o
/tmp/ffconf.jceENASz.c
gcc -fsanitize=thread -pie -fPIC -o /tmp/ffconf.03RtebJv /tmp/ffconf.LuPWPOJB.o
/tmp/ffconf.LuPWPOJB.o: In function `_GLOBAL__sub_I_00099_0_ffconf.jceENASz.c':
ffconf.jceENASz.c:(.text+0x10): undefined reference to `__tsan_init'
collect2: error: ld returned 1 exit status
C compiler test failed.
==

This looks like the gcc 4.9.2 bug described in
https://bugs.launchpad.net/ubuntu/+source/gcc-5/+bug/1413474.
Unfortunately, I can't fix the undefined reference to __tsan_init even
if I add -ltsan to the linker flags.

I then built gcc 6.2.0 from sources and tested it. gcc 6.2.0 works
with and without my patch. So I consider my patch safe for gcc. If
there is any other gcc version I should test my patch with, please let
me know.

Thanks,
Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avutil: fix data race in av_get_cpu_flags()

2016-12-06 Thread Wan-Teh Chang
Make the one-time initialization in av_get_cpu_flags() thread-safe. The
static variable |cpu_flags| in libavutil/cpu.c is read and written using
normal load and store operations. These are considered as data races.
The fix is to use atomic load and store operations.

The fix can be verified by running the libavutil/tests/cpu_init.c test
program under ThreadSanitizer:
./configure --toolchain=clang-tsan
make libavutil/tests/cpu_init
libavutil/tests/cpu_init

There should be no warnings from ThreadSanitizer.

Co-author: Dmitry Vyukov of Google, who suggested the data race fix.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavutil/cpu.c | 12 +++-
 libavutil/cpu.h |  2 --
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 73317c4..16e0c92 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -17,6 +17,7 @@
  */
 
 #include 
+#include 
 
 #include "cpu.h"
 #include "cpu_internal.h"
@@ -44,7 +45,7 @@
 #include 
 #endif
 
-static int cpu_flags = -1;
+static atomic_int cpu_flags = ATOMIC_VAR_INIT(-1);
 
 static int get_cpu_flags(void)
 {
@@ -82,22 +83,23 @@ void av_force_cpu_flags(int arg){
 arg |= AV_CPU_FLAG_MMX;
 }
 
-cpu_flags = arg;
+atomic_store_explicit(_flags, arg, memory_order_relaxed);
 }
 
 int av_get_cpu_flags(void)
 {
-int flags = cpu_flags;
+int flags = atomic_load_explicit(_flags, memory_order_relaxed);
 if (flags == -1) {
 flags = get_cpu_flags();
-cpu_flags = flags;
+atomic_store_explicit(_flags, flags, memory_order_relaxed);
 }
 return flags;
 }
 
 void av_set_cpu_flags_mask(int mask)
 {
-cpu_flags = get_cpu_flags() & mask;
+atomic_store_explicit(_flags, get_cpu_flags() & mask,
+  memory_order_relaxed);
 }
 
 int av_parse_cpu_flags(const char *s)
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 4bff167..8499f0e 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -85,8 +85,6 @@ void av_force_cpu_flags(int flags);
  * Set a mask on flags returned by av_get_cpu_flags().
  * This function is mainly useful for testing.
  * Please use av_force_cpu_flags() and av_get_cpu_flags() instead which are 
more flexible
- *
- * @warning this function is not thread safe.
  */
 attribute_deprecated void av_set_cpu_flags_mask(int mask);
 
-- 
2.8.0.rc3.226.g39d4020

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avutil: fix data race in av_get_cpu_flags()

2016-12-06 Thread Wan-Teh Chang
On Tue, Dec 6, 2016 at 10:41 AM, Wan-Teh Chang <w...@google.com> wrote:
> Make the one-time initialization in av_get_cpu_flags() thread-safe. The
> static variable |cpu_flags| in libavutil/cpu.c is read and written using
> normal load and store operations. These are considered as data races.
> The fix is to use atomic load and store operations.
>
> The fix can be verified by running the libavutil/tests/cpu_init.c test
> program under ThreadSanitizer:
> ./configure --toolchain=clang-tsan
> make libavutil/tests/cpu_init
> libavutil/tests/cpu_init
>
> There should be no warnings from ThreadSanitizer.
>
> Co-author: Dmitry Vyukov of Google, which suggested the data race fix.

Please ignore this patch. I just noticed that the last line of the
commit message has a grammatical error ("which" should be changed to
"who").

I will correct that and send a new patch.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avutil: fix data race in av_get_cpu_flags().

2016-12-06 Thread Wan-Teh Chang
On Thu, Nov 24, 2016 at 1:56 AM, wm4 <nfx...@googlemail.com> wrote:
> On Wed, 23 Nov 2016 11:40:25 -0800
> Wan-Teh Chang <wtc-at-google@ffmpeg.org> wrote:
>
>> On Tue, Nov 22, 2016 at 3:30 PM, wm4 <nfx...@googlemail.com> wrote:
>> > On Tue, 22 Nov 2016 23:57:15 +0100
>> > Michael Niedermayer <mich...@niedermayer.cc> wrote:
>> >
>> >> For example the progress code in the frame threading.
>> >
>> > Which was recently fixed in Libav AFAIR...
>>
>> You're right. libav/libavcodec/pthread_frame.c has code similar to my
>> ffmpeg patch http://ffmpeg.org/pipermail/ffmpeg-devel/2016-March/190454.html,
>> and much more.
>>
>> Note: libav/libavcodec/pthread_frame.c uses unnecessary (too strong)
>> memory barriers in ff_thread_report_progress(). We can fix those when
>> the code is merged to ffmpeg.
>
> You could also just send a patch to them.

Done. I sent a patch to libav-devel last week:
https://lists.libav.org/pipermail/libav-devel/2016-November/080903.html

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avutil/tests: run the cpu_init.c test conditionally on HAVE_THREADS

2016-12-06 Thread Wan-Teh Chang
Also declare the main() function with void arguments because argc and
argv are unused.

These changes are suggested by Diego Biurrun and James Almer.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavutil/Makefile | 2 +-
 libavutil/tests/cpu_init.c | 9 +
 tests/fate/libavutil.mak   | 2 +-
 3 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/libavutil/Makefile b/libavutil/Makefile
index c61ca9c..607fe6a 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -188,7 +188,6 @@ TESTPROGS = adler32 
\
 camellia\
 color_utils \
 cpu \
-cpu_init\
 crc \
 des \
 dict\
@@ -223,6 +222,7 @@ TESTPROGS = adler32 
\
 tea \
 
 TESTPROGS-$(HAVE_LZO1X_999_COMPRESS) += lzo
+TESTPROGS-$(HAVE_THREADS) += cpu_init
 
 TOOLS = crypto_bench ffhash ffeval ffescape
 
diff --git a/libavutil/tests/cpu_init.c b/libavutil/tests/cpu_init.c
index 5a5ecda..a379e47 100644
--- a/libavutil/tests/cpu_init.c
+++ b/libavutil/tests/cpu_init.c
@@ -24,12 +24,9 @@
 #include 
 #include 
 
-#include "config.h"
-
 #include "libavutil/cpu.h"
 #include "libavutil/thread.h"
 
-#if HAVE_PTHREADS
 static void *thread_main(void *arg)
 {
 int *flags = arg;
@@ -37,12 +34,9 @@ static void *thread_main(void *arg)
 *flags = av_get_cpu_flags();
 return NULL;
 }
-#endif
-
 
-int main(int argc, char **argv)
+int main(void)
 {
-#if HAVE_PTHREADS
 int cpu_flags1;
 int cpu_flags2;
 int ret;
@@ -66,7 +60,6 @@ int main(int argc, char **argv)
 return 2;
 if (cpu_flags1 != cpu_flags2)
 return 3;
-#endif
 
 return 0;
 }
diff --git a/tests/fate/libavutil.mak b/tests/fate/libavutil.mak
index 0ab6fc5..4ecc741 100644
--- a/tests/fate/libavutil.mak
+++ b/tests/fate/libavutil.mak
@@ -44,7 +44,7 @@ fate-cpu: libavutil/tests/cpu$(EXESUF)
 fate-cpu: CMD = runecho libavutil/tests/cpu $(CPUFLAGS:%=-c%) $(THREADS:%=-t%)
 fate-cpu: REF = /dev/null
 
-FATE_LIBAVUTIL += fate-cpu_init
+FATE_LIBAVUTIL-$(HAVE_THREADS) += fate-cpu_init
 fate-cpu_init: libavutil/tests/cpu_init$(EXESUF)
 fate-cpu_init: CMD = run libavutil/tests/cpu_init
 fate-cpu_init: REF = /dev/null
-- 
2.8.0.rc3.226.g39d4020

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avutil: fix data race in av_get_cpu_flags()

2016-12-06 Thread Wan-Teh Chang
Make the one-time initialization in av_get_cpu_flags() thread-safe. The
static variable |cpu_flags| in libavutil/cpu.c is read and written using
normal load and store operations. These are considered as data races.
The fix is to use atomic load and store operations.

The fix can be verified by running the libavutil/tests/cpu_init.c test
program under ThreadSanitizer:
./configure --toolchain=clang-tsan
make libavutil/tests/cpu_init
libavutil/tests/cpu_init

There should be no warnings from ThreadSanitizer.

Co-author: Dmitry Vyukov of Google, which suggested the data race fix.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavutil/cpu.c | 12 +++-
 libavutil/cpu.h |  2 --
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 73317c4..16e0c92 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -17,6 +17,7 @@
  */
 
 #include 
+#include 
 
 #include "cpu.h"
 #include "cpu_internal.h"
@@ -44,7 +45,7 @@
 #include 
 #endif
 
-static int cpu_flags = -1;
+static atomic_int cpu_flags = ATOMIC_VAR_INIT(-1);
 
 static int get_cpu_flags(void)
 {
@@ -82,22 +83,23 @@ void av_force_cpu_flags(int arg){
 arg |= AV_CPU_FLAG_MMX;
 }
 
-cpu_flags = arg;
+atomic_store_explicit(_flags, arg, memory_order_relaxed);
 }
 
 int av_get_cpu_flags(void)
 {
-int flags = cpu_flags;
+int flags = atomic_load_explicit(_flags, memory_order_relaxed);
 if (flags == -1) {
 flags = get_cpu_flags();
-cpu_flags = flags;
+atomic_store_explicit(_flags, flags, memory_order_relaxed);
 }
 return flags;
 }
 
 void av_set_cpu_flags_mask(int mask)
 {
-cpu_flags = get_cpu_flags() & mask;
+atomic_store_explicit(_flags, get_cpu_flags() & mask,
+  memory_order_relaxed);
 }
 
 int av_parse_cpu_flags(const char *s)
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 4bff167..8499f0e 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -85,8 +85,6 @@ void av_force_cpu_flags(int flags);
  * Set a mask on flags returned by av_get_cpu_flags().
  * This function is mainly useful for testing.
  * Please use av_force_cpu_flags() and av_get_cpu_flags() instead which are 
more flexible
- *
- * @warning this function is not thread safe.
  */
 attribute_deprecated void av_set_cpu_flags_mask(int mask);
 
-- 
2.8.0.rc3.226.g39d4020

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] configure: add -fPIE instead of -pie to C flags for ThreadSanitizer

2016-12-02 Thread Wan-Teh Chang
-pie was added to C flags for ThreadSanitizer in commit
19f251a2882a8d0779b432e63bf282e4d9c443bb. Under clang 3.8.0, the -pie
flag causes a compiler warning and a linker error when running configure
--toolchain=clang-tsan. Here is an excerpt from config.log:

clang ... -fsanitize=thread -pie -std=c11 -fomit-frame-pointer -pthread -c -o 
/tmp/ffconf.hL61stP9.o /tmp/ffconf.YO6ZaSFG.c
clang: warning: argument unused during compilation: '-pie'
clang -fsanitize=thread -pie -Wl,--as-needed -Wl,-z,noexecstack -o 
/tmp/ffconf.W5c2e41l /tmp/ffconf.hL61stP9.o -lbz2 -pthread
/usr/bin/ld: /tmp/ffconf.hL61stP9.o: relocation R_X86_64_PC32 against undefined 
symbol `atan2f@@GLIBC_2.2.5' can not be used when making a shared object; 
recompile with -fPIC
/usr/bin/ld: final link failed: Bad value
clang: error: linker command failed with exit code 1 (use -v to see invocation)

To be conservative, I changed -pie to -fPIE. But the documentation seems
to imply just -fsanitize=thread is enough:

http://clang.llvm.org/docs/ThreadSanitizer.html
https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index ee473b9..0e1ae61 100755
--- a/configure
+++ b/configure
@@ -3523,7 +3523,7 @@ case "$toolchain" in
 ;;
 *-tsan)
 cc_default="${toolchain%-tsan}"
-add_cflags  -fsanitize=thread -pie
+add_cflags  -fsanitize=thread -fPIE
 add_ldflags -fsanitize=thread -pie
 case "$toolchain" in
 gcc-tsan)
-- 
2.8.0.rc3.226.g39d4020

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/4] Add a compat stdatomic.h implementation based on windows atomics

2016-11-29 Thread Wan-Teh Chang
I studied the history of vlc/include/vlc_atomic.h and compared it wth
libav/compat/atomics/win32/stdatomic.h.

1. vlc/include/vlc_atomic.h was initially written by Rémi
Denis-Courmont. This is the most substantial commit by Rémi
Denis-Courmont:

http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=ad3586579f956b6856d2f7c0c4cbd860fd8241af

2. The Windows port was written by Felix Abecassis. Note that he
didn't add his copyright notice to vlc/include/vlc_atomic.h. This is
the most substantial commit by Felix Abecassis:

http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=9c077c0d28df2ad7387b4db8702bd023b2556c86

3. libav/compat/atomics/win32/stdatomic.h contains code that is
apparently derived from the code in vlc/include/vlc_atomic.h written
by both Rémi Denis-Courmont and Felix Abecassis, but doesn't have a
copyright notice.

Similarly, libav/compat/atomics/suncc/stdatomic.h contains code that
is apparently derived from the code in vlc/include/vlc_atomic.h
written by Rémi Denis-Courmont, but doesn't have a copyright notice.

4. Given this information, should I add a copyright notice to
ffmpeg/compat/atomics/win32/stdatomic.h and
ffmpeg/compat/atomics/suncc/stdatomic.h?

5. A related question: libav/compat/atomics/pthread/stdatomic.c does
NOT contain any code that is apparently derived from the code in
vlc/include/vlc_atomic.h written by Rémi Denis-Courmont, but
libav/compat/atomics/pthread/stdatomic.c contains a copyright notice
of Rémi Denis-Courmont. Should I remove that copyright notice from
ffmpeg/compat/atomics/pthread/stdatomic.c?

Thanks,
Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/4] Add a compat stdatomic.h implementation based on windows atomics

2016-11-29 Thread Wan-Teh Chang
On Tue, Nov 29, 2016 at 6:46 AM, Carl Eugen Hoyos <ceffm...@gmail.com> wrote:
> 2016-11-29 0:29 GMT+01:00 Wan-Teh Chang <wtc-at-google@ffmpeg.org>:
>
>> Adapted from the code by Rémi Denis-Courmont from VLC
>
> Missing copyright statement, please do not commit as-is!

Hi Carl,

I made only the following changes to the Libav commits:

1. I changed Libav's LGPL license header to FFmpeg's LGPL license header.

2. I changed the header inclusion guard macros (from
LIBAV_COMPAT_ATOMICS_..._STDATOMIC_H to
COMPAT_ATOMICS_..._STDATOMIC_H).

Rémi Denis-Courmont's copyright notice is not in all the headers I
copied from Libav. I can do some source code archaeology in the VLC
source code repository.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/4] Add a compat stdatomic.h implementation based on suncc atomics

2016-11-28 Thread Wan-Teh Chang
From: Anton Khirnov <an...@khirnov.net>

Adapted from the code by Rémi Denis-Courmont from VLC

This merges libav commit bb81ed476569b912a37ed553e756e123b6b13b14.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 compat/atomics/suncc/stdatomic.h | 186 +++
 configure|   2 +
 2 files changed, 188 insertions(+)
 create mode 100644 compat/atomics/suncc/stdatomic.h

diff --git a/compat/atomics/suncc/stdatomic.h b/compat/atomics/suncc/stdatomic.h
new file mode 100644
index 000..119c2ba
--- /dev/null
+++ b/compat/atomics/suncc/stdatomic.h
@@ -0,0 +1,186 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef COMPAT_ATOMICS_SUNCC_STDATOMIC_H
+#define COMPAT_ATOMICS_SUNCC_STDATOMIC_H
+
+#include 
+#include 
+#include 
+#include 
+
+#define ATOMIC_FLAG_INIT 0
+
+#define ATOMIC_VAR_INIT(value) (value)
+
+#define atomic_init(obj, value) \
+do {\
+*(obj) = (value);   \
+} while(0)
+
+#define kill_dependency(y) ((void)0)
+
+#define atomic_thread_fence(order) \
+__machine_rw_barrier();
+
+#define atomic_signal_fence(order) \
+((void)0)
+
+#define atomic_is_lock_free(obj) 0
+
+typedef intptr_t atomic_flag;
+typedef intptr_t atomic_bool;
+typedef intptr_t atomic_char;
+typedef intptr_t atomic_schar;
+typedef intptr_t atomic_uchar;
+typedef intptr_t atomic_short;
+typedef intptr_t atomic_ushort;
+typedef intptr_t atomic_int;
+typedef intptr_t atomic_uint;
+typedef intptr_t atomic_long;
+typedef intptr_t atomic_ulong;
+typedef intptr_t atomic_llong;
+typedef intptr_t atomic_ullong;
+typedef intptr_t atomic_wchar_t;
+typedef intptr_t atomic_int_least8_t;
+typedef intptr_t atomic_uint_least8_t;
+typedef intptr_t atomic_int_least16_t;
+typedef intptr_t atomic_uint_least16_t;
+typedef intptr_t atomic_int_least32_t;
+typedef intptr_t atomic_uint_least32_t;
+typedef intptr_t atomic_int_least64_t;
+typedef intptr_t atomic_uint_least64_t;
+typedef intptr_t atomic_int_fast8_t;
+typedef intptr_t atomic_uint_fast8_t;
+typedef intptr_t atomic_int_fast16_t;
+typedef intptr_t atomic_uint_fast16_t;
+typedef intptr_t atomic_int_fast32_t;
+typedef intptr_t atomic_uint_fast32_t;
+typedef intptr_t atomic_int_fast64_t;
+typedef intptr_t atomic_uint_fast64_t;
+typedef intptr_t atomic_intptr_t;
+typedef intptr_t atomic_uintptr_t;
+typedef intptr_t atomic_size_t;
+typedef intptr_t atomic_ptrdiff_t;
+typedef intptr_t atomic_intmax_t;
+typedef intptr_t atomic_uintmax_t;
+
+static inline void atomic_store(intptr_t *object, intptr_t desired)
+{
+*object = desired;
+__machine_rw_barrier();
+}
+
+#define atomic_store_explicit(object, desired, order) \
+atomic_store(object, desired)
+
+static inline intptr_t atomic_load(intptr_t *object)
+{
+__machine_rw_barrier();
+return *object;
+}
+
+#define atomic_load_explicit(object, order) \
+atomic_load(object)
+
+#define atomic_exchange(object, desired) \
+atomic_swap_ptr(object, desired)
+
+#define atomic_exchange_explicit(object, desired, order) \
+atomic_exchange(object, desired)
+
+static inline int atomic_compare_exchange_strong(intptr_t *object, intptr_t 
*expected,
+ intptr_t desired)
+{
+intptr_t  old = *expected;
+*expected = atomic_cas_ptr(object, old, desired);
+return *expected == old;
+}
+
+#define atomic_compare_exchange_strong_explicit(object, expected, desired, 
success, failure) \
+atomic_compare_exchange_strong(object, expected, desired)
+
+#define atomic_compare_exchange_weak(object, expected, desired) \
+atomic_compare_exchange_strong(object, expected, desired)
+
+#define atomic_compare_exchange_weak_explicit(object, expected, desired, 
success, failure) \
+atomic_compare_exchange_weak(object, expected, desired)
+
+static inline intptr_t atomic_fetch_add(intptr_t *object, intptr_t operand)
+{
+return atomic_add_ptr_nv(object, operand) - operand;
+}
+
+#define atomic_fetch_sub(object, operand) \
+atomic_fetch_add(object, -(operand))
+
+static inline intptr_t atomic_fetch_or(intptr_t *object, intptr_t operand)
+{
+intptr_t old;
+do {
+old = atomic_load(object);
+} while (!atomic_compare_exchange_strong(obje

[FFmpeg-devel] [PATCH 3/4] Add a compat stdatomic.h implementation based on pthreads

2016-11-28 Thread Wan-Teh Chang
From: Anton Khirnov <an...@khirnov.net>

Adapted from the code by Rémi Denis-Courmont from VLC

This merges libav commit f9a6a80e065cdb95b233978f1d96ec9bc863daa1.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 compat/atomics/pthread/stdatomic.c |  39 
 compat/atomics/pthread/stdatomic.h | 197 +
 configure  |   3 +
 3 files changed, 239 insertions(+)
 create mode 100644 compat/atomics/pthread/stdatomic.c
 create mode 100644 compat/atomics/pthread/stdatomic.h

diff --git a/compat/atomics/pthread/stdatomic.c 
b/compat/atomics/pthread/stdatomic.c
new file mode 100644
index 000..9fca989
--- /dev/null
+++ b/compat/atomics/pthread/stdatomic.c
@@ -0,0 +1,39 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * based on vlc_atomic.h from VLC
+ * Copyright (C) 2010 Rémi Denis-Courmont
+ */
+
+#include 
+#include 
+
+#include "stdatomic.h"
+
+static pthread_mutex_t atomic_lock = PTHREAD_MUTEX_INITIALIZER;
+
+void avpriv_atomic_lock(void)
+{
+pthread_mutex_lock(_lock);
+}
+
+void avpriv_atomic_unlock(void)
+{
+pthread_mutex_unlock(_lock);
+}
diff --git a/compat/atomics/pthread/stdatomic.h 
b/compat/atomics/pthread/stdatomic.h
new file mode 100644
index 000..1b7278e
--- /dev/null
+++ b/compat/atomics/pthread/stdatomic.h
@@ -0,0 +1,197 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * based on vlc_atomic.h from VLC
+ * Copyright (C) 2010 Rémi Denis-Courmont
+ */
+
+#ifndef COMPAT_ATOMICS_PTHREAD_STDATOMIC_H
+#define COMPAT_ATOMICS_PTHREAD_STDATOMIC_H
+
+#include 
+
+#define ATOMIC_FLAG_INIT 0
+
+#define ATOMIC_VAR_INIT(value) (value)
+
+#define atomic_init(obj, value) \
+do {\
+*(obj) = (value);   \
+} while(0)
+
+#define kill_dependency(y) ((void)0)
+
+#define atomic_signal_fence(order) \
+((void)0)
+
+#define atomic_is_lock_free(obj) 0
+
+typedef intptr_t atomic_flag;
+typedef intptr_t atomic_bool;
+typedef intptr_t atomic_char;
+typedef intptr_t atomic_schar;
+typedef intptr_t atomic_uchar;
+typedef intptr_t atomic_short;
+typedef intptr_t atomic_ushort;
+typedef intptr_t atomic_int;
+typedef intptr_t atomic_uint;
+typedef intptr_t atomic_long;
+typedef intptr_t atomic_ulong;
+typedef intptr_t atomic_llong;
+typedef intptr_t atomic_ullong;
+typedef intptr_t atomic_wchar_t;
+typedef intptr_t atomic_int_least8_t;
+typedef intptr_t atomic_uint_least8_t;
+typedef intptr_t atomic_int_least16_t;
+typedef intptr_t atomic_uint_least16_t;
+typedef intptr_t atomic_int_least32_t;
+typedef intptr_t atomic_uint_least32_t;
+typedef intptr_t atomic_int_least64_t;
+typedef intptr_t atomic_uint_least64_t;
+typedef intptr_t atomic_int_fast8_t;
+typedef intptr_t atomic_uint_fast8_t;
+typedef intptr_t atomic_int_fast16_t;
+typedef intptr_t atomic_uint_fast16_t;
+typedef intptr_t atomic_int_fast32_t;
+typedef intptr_t atomic_uint_fast32_t;
+typedef intptr_t atomic_int_fast64_t;
+typedef intptr_t atomic_uint_fast64_t;
+typedef intptr_t atomic_intptr_t;
+typedef intptr_t atomic_uintptr_t;
+typedef intptr_t atomic_size_t;
+typedef intptr_t atomic_ptrdiff_t;
+typedef intptr_t atomic_intmax_t;
+typedef intptr_t atomic_uintmax_t;
+
+void avpriv_atomic_lock(void);
+void avpriv_atomic_unlock(void);
+
+static inline void atomic_thread_fence(int order)
+{
+avpriv_atomic_lock();
+avpriv_atomic_unlock();
+}
+
+static inline void atomic_store(intptr_t *object, intptr_t desired)
+{
+avpriv_atomic_lock();
+*object = desired;
+  

[FFmpeg-devel] [PATCH 4/4] Add a compat dummy stdatomic.h used when threading is disabled

2016-11-28 Thread Wan-Teh Chang
From: Anton Khirnov <an...@khirnov.net>

Adapted from the code by Rémi Denis-Courmont from VLC

This merges libav commit eb34d40354e2474517c9b9bd787e0dadc89c2a81.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 compat/atomics/dummy/stdatomic.h | 176 +++
 configure|   3 +
 2 files changed, 179 insertions(+)
 create mode 100644 compat/atomics/dummy/stdatomic.h

diff --git a/compat/atomics/dummy/stdatomic.h b/compat/atomics/dummy/stdatomic.h
new file mode 100644
index 000..c26f629
--- /dev/null
+++ b/compat/atomics/dummy/stdatomic.h
@@ -0,0 +1,176 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * based on vlc_atomic.h from VLC
+ * Copyright (C) 2010 Rémi Denis-Courmont
+ */
+
+#ifndef COMPAT_ATOMICS_DUMMY_STDATOMIC_H
+#define COMPAT_ATOMICS_DUMMY_STDATOMIC_H
+
+#include 
+
+#define ATOMIC_FLAG_INIT 0
+
+#define ATOMIC_VAR_INIT(value) (value)
+
+#define atomic_init(obj, value) \
+do {\
+*(obj) = (value);   \
+} while(0)
+
+#define kill_dependency(y) ((void)0)
+
+#define atomic_thread_fence(order) \
+((void)0)
+
+#define atomic_signal_fence(order) \
+((void)0)
+
+#define atomic_is_lock_free(obj) 0
+
+typedef intptr_t atomic_flag;
+typedef intptr_t atomic_bool;
+typedef intptr_t atomic_char;
+typedef intptr_t atomic_schar;
+typedef intptr_t atomic_uchar;
+typedef intptr_t atomic_short;
+typedef intptr_t atomic_ushort;
+typedef intptr_t atomic_int;
+typedef intptr_t atomic_uint;
+typedef intptr_t atomic_long;
+typedef intptr_t atomic_ulong;
+typedef intptr_t atomic_llong;
+typedef intptr_t atomic_ullong;
+typedef intptr_t atomic_wchar_t;
+typedef intptr_t atomic_int_least8_t;
+typedef intptr_t atomic_uint_least8_t;
+typedef intptr_t atomic_int_least16_t;
+typedef intptr_t atomic_uint_least16_t;
+typedef intptr_t atomic_int_least32_t;
+typedef intptr_t atomic_uint_least32_t;
+typedef intptr_t atomic_int_least64_t;
+typedef intptr_t atomic_uint_least64_t;
+typedef intptr_t atomic_int_fast8_t;
+typedef intptr_t atomic_uint_fast8_t;
+typedef intptr_t atomic_int_fast16_t;
+typedef intptr_t atomic_uint_fast16_t;
+typedef intptr_t atomic_int_fast32_t;
+typedef intptr_t atomic_uint_fast32_t;
+typedef intptr_t atomic_int_fast64_t;
+typedef intptr_t atomic_uint_fast64_t;
+typedef intptr_t atomic_intptr_t;
+typedef intptr_t atomic_uintptr_t;
+typedef intptr_t atomic_size_t;
+typedef intptr_t atomic_ptrdiff_t;
+typedef intptr_t atomic_intmax_t;
+typedef intptr_t atomic_uintmax_t;
+
+#define atomic_store(object, desired)   \
+do {\
+*(object) = (desired);  \
+} while (0)
+
+#define atomic_store_explicit(object, desired, order) \
+atomic_store(object, desired)
+
+#define atomic_load(object) \
+(*(object))
+
+#define atomic_load_explicit(object, order) \
+atomic_load(object)
+
+static inline intptr_t atomic_exchange(intptr_t *object, intptr_t desired)
+{
+intptr_t ret = *object;
+*object = desired;
+return ret;
+}
+
+#define atomic_exchange_explicit(object, desired, order) \
+atomic_exchange(object, desired)
+
+static inline int atomic_compare_exchange_strong(intptr_t *object, intptr_t 
*expected,
+ intptr_t desired)
+{
+int ret;
+if (*object == *expected) {
+*object = desired;
+ret = 1;
+} else {
+*expected = *object;
+ret = 0;
+}
+return ret;
+}
+
+#define atomic_compare_exchange_strong_explicit(object, expected, desired, 
success, failure) \
+atomic_compare_exchange_strong(object, expected, desired)
+
+#define atomic_compare_exchange_weak(object, expected, desired) \
+atomic_compare_exchange_strong(object, expected, desired)
+
+#define atomic_compare_exchange_weak_explicit(object, expected, desired, 
success, failure) \
+atomic_compare_exchange_weak(object, expected, desired)
+
+#define FETCH_MODIFY(opname, op)\
+static inline intptr_t atomic_fetch_ ## opname(intptr_t *object, intptr_t 
operand) \
+{\
+intptr_t ret;  

[FFmpeg-devel] [PATCH 1/4] Add a compat stdatomic.h implementation based on windows atomics

2016-11-28 Thread Wan-Teh Chang
From: Anton Khirnov <an...@khirnov.net>

Adapted from the code by Rémi Denis-Courmont from VLC

This merges libav commit c2755864afadfbaa349e8d583665c86fe99fa90b.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 compat/atomics/win32/stdatomic.h | 179 +++
 configure|   2 +
 2 files changed, 181 insertions(+)
 create mode 100644 compat/atomics/win32/stdatomic.h

diff --git a/compat/atomics/win32/stdatomic.h b/compat/atomics/win32/stdatomic.h
new file mode 100644
index 000..4cbba9c
--- /dev/null
+++ b/compat/atomics/win32/stdatomic.h
@@ -0,0 +1,179 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef COMPAT_ATOMICS_WIN32_STDATOMIC_H
+#define COMPAT_ATOMICS_WIN32_STDATOMIC_H
+
+#include 
+#include 
+#include 
+
+#define ATOMIC_FLAG_INIT 0
+
+#define ATOMIC_VAR_INIT(value) (value)
+
+#define atomic_init(obj, value) \
+do {\
+*(obj) = (value);   \
+} while(0)
+
+#define kill_dependency(y) ((void)0)
+
+#define atomic_thread_fence(order) \
+MemoryBarrier();
+
+#define atomic_signal_fence(order) \
+((void)0)
+
+#define atomic_is_lock_free(obj) 0
+
+typedef intptr_t atomic_flag;
+typedef intptr_t atomic_bool;
+typedef intptr_t atomic_char;
+typedef intptr_t atomic_schar;
+typedef intptr_t atomic_uchar;
+typedef intptr_t atomic_short;
+typedef intptr_t atomic_ushort;
+typedef intptr_t atomic_int;
+typedef intptr_t atomic_uint;
+typedef intptr_t atomic_long;
+typedef intptr_t atomic_ulong;
+typedef intptr_t atomic_llong;
+typedef intptr_t atomic_ullong;
+typedef intptr_t atomic_wchar_t;
+typedef intptr_t atomic_int_least8_t;
+typedef intptr_t atomic_uint_least8_t;
+typedef intptr_t atomic_int_least16_t;
+typedef intptr_t atomic_uint_least16_t;
+typedef intptr_t atomic_int_least32_t;
+typedef intptr_t atomic_uint_least32_t;
+typedef intptr_t atomic_int_least64_t;
+typedef intptr_t atomic_uint_least64_t;
+typedef intptr_t atomic_int_fast8_t;
+typedef intptr_t atomic_uint_fast8_t;
+typedef intptr_t atomic_int_fast16_t;
+typedef intptr_t atomic_uint_fast16_t;
+typedef intptr_t atomic_int_fast32_t;
+typedef intptr_t atomic_uint_fast32_t;
+typedef intptr_t atomic_int_fast64_t;
+typedef intptr_t atomic_uint_fast64_t;
+typedef intptr_t atomic_intptr_t;
+typedef intptr_t atomic_uintptr_t;
+typedef intptr_t atomic_size_t;
+typedef intptr_t atomic_ptrdiff_t;
+typedef intptr_t atomic_intmax_t;
+typedef intptr_t atomic_uintmax_t;
+
+#define atomic_store(object, desired)   \
+do {\
+*(object) = (desired);  \
+MemoryBarrier();\
+} while (0)
+
+#define atomic_store_explicit(object, desired, order) \
+atomic_store(object, desired)
+
+#define atomic_load(object) \
+(MemoryBarrier(), *(object))
+
+#define atomic_load_explicit(object, order) \
+atomic_load(object)
+
+#define atomic_exchange(object, desired) \
+InterlockedExchangePointer(object, desired);
+
+#define atomic_exchange_explicit(object, desired, order) \
+atomic_exchange(object, desired)
+
+static inline int atomic_compare_exchange_strong(intptr_t *object, intptr_t 
*expected,
+ intptr_t desired)
+{
+intptr_t old = *expected;
+*expected = InterlockedCompareExchangePointer(object, desired, old);
+return *expected == old;
+}
+
+#define atomic_compare_exchange_strong_explicit(object, expected, desired, 
success, failure) \
+atomic_compare_exchange_strong(object, expected, desired)
+
+#define atomic_compare_exchange_weak(object, expected, desired) \
+atomic_compare_exchange_strong(object, expected, desired)
+
+#define atomic_compare_exchange_weak_explicit(object, expected, desired, 
success, failure) \
+atomic_compare_exchange_weak(object, expected, desired)
+
+#ifdef _WIN64
+#define atomic_fetch_add(object, operand) \
+InterlockedExchangeAdd64(object, operand)
+
+#define atomic_fetch_sub(object, operand) \
+InterlockedExchangeAdd64(object, -(operand))
+
+#define atomic_fetch_or(object, operand) \
+InterlockedOr64(object, operand)
+
+#define atomic_fetch_xor(object, operand) \
+InterlockedXor64(object, operand)
+
+#define atomic_fetch_and(obj

[FFmpeg-devel] [PATCH] Add a compat stdatomic.h implementation based on GCC atomics

2016-11-28 Thread Wan-Teh Chang
From: Anton Khirnov <an...@khirnov.net>

Adapted from the code by Rémi Denis-Courmont from VLC

This merges libav commit 4e928ef340ac20325f529d92fcbc51e768085358.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 compat/atomics/gcc/stdatomic.h | 173 +
 configure  |   6 ++
 2 files changed, 179 insertions(+)
 create mode 100644 compat/atomics/gcc/stdatomic.h

diff --git a/compat/atomics/gcc/stdatomic.h b/compat/atomics/gcc/stdatomic.h
new file mode 100644
index 000..41cadde
--- /dev/null
+++ b/compat/atomics/gcc/stdatomic.h
@@ -0,0 +1,173 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * based on vlc_atomic.h from VLC
+ * Copyright (C) 2010 Rémi Denis-Courmont
+ */
+
+#ifndef COMPAT_ATOMICS_GCC_STDATOMIC_H
+#define COMPAT_ATOMICS_GCC_STDATOMIC_H
+
+#include 
+#include 
+
+#define ATOMIC_FLAG_INIT 0
+
+#define ATOMIC_VAR_INIT(value) (value)
+
+#define atomic_init(obj, value) \
+do {\
+*(obj) = (value);   \
+} while(0)
+
+#define kill_dependency(y) ((void)0)
+
+#define atomic_thread_fence(order) \
+__sync_synchronize()
+
+#define atomic_signal_fence(order) \
+((void)0)
+
+#define atomic_is_lock_free(obj) 0
+
+typedef _Bool  atomic_flag;
+typedef _Bool  atomic_bool;
+typedef  char  atomic_char;
+typedef   signed char  atomic_schar;
+typedef unsigned char  atomic_uchar;
+typedef  short atomic_short;
+typedef unsigned short atomic_ushort;
+typedef  int   atomic_int;
+typedef unsigned int   atomic_uint;
+typedef  long  atomic_long;
+typedef unsigned long  atomic_ulong;
+typedef  long long atomic_llong;
+typedef unsigned long long atomic_ullong;
+typedef  wchar_t   atomic_wchar_t;
+typedef   int_least8_t atomic_int_least8_t;
+typedef  uint_least8_t atomic_uint_least8_t;
+typedef  int_least16_t atomic_int_least16_t;
+typedef uint_least16_t atomic_uint_least16_t;
+typedef  int_least32_t atomic_int_least32_t;
+typedef uint_least32_t atomic_uint_least32_t;
+typedef  int_least64_t atomic_int_least64_t;
+typedef uint_least64_t atomic_uint_least64_t;
+typedef   int_fast8_t atomic_int_fast8_t;
+typedef  uint_fast8_t atomic_uint_fast8_t;
+typedef  int_fast16_t atomic_int_fast16_t;
+typedef uint_fast16_t atomic_uint_fast16_t;
+typedef  int_fast32_t atomic_int_fast32_t;
+typedef uint_fast32_t atomic_uint_fast32_t;
+typedef  int_fast64_t atomic_int_fast64_t;
+typedef uint_fast64_t atomic_uint_fast64_t;
+typedef  intptr_t atomic_intptr_t;
+typedef uintptr_t atomic_uintptr_t;
+typedefsize_t atomic_size_t;
+typedef ptrdiff_t atomic_ptrdiff_t;
+typedef  intmax_t atomic_intmax_t;
+typedef uintmax_t atomic_uintmax_t;
+
+#define atomic_store(object, desired)   \
+do {\
+*(object) = (desired);  \
+__sync_synchronize();   \
+} while (0)
+
+#define atomic_store_explicit(object, desired, order) \
+atomic_store(object, desired)
+
+#define atomic_load(object) \
+(__sync_synchronize(), *(object))
+
+#define atomic_load_explicit(object, order) \
+atomic_load(object)
+
+#define atomic_exchange(object, desired)\
+({  \
+typeof(object) _obj = (object); \
+typeof(*object) _old;   \
+do  \
+_old = atomic_load(_obj);   \
+while (!__sync_bool_compare_and_swap(_obj, _old, (desired)));   \
+_old;   \
+})
+
+#define atomic_exchange_explicit(object, desired, order) \
+atomic_exchange(object, desired)
+
+#define atomic_compare_exchange_strong(object, expected, desired)   \
+({  \
+typeof(object) _exp = (expected);   \
+typeof(*object) 

[FFmpeg-devel] [PATCH] configure: check for stdatomic.h

2016-11-23 Thread Wan-Teh Chang
From: Anton Khirnov <an...@khirnov.net>

Since this is a C11 feature, it requires -std=c11.

Not actually used for anything yet, that will be added in the following
commits.

This merges libav commit 13f5d2bf75b95a0bfdb9940a5e359a719e242bed.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 configure | 29 -
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index 875fbf3..0176f9c 100755
--- a/configure
+++ b/configure
@@ -1189,6 +1189,19 @@ check_cpp_condition(){
 EOF
 }
 
+test_cflags_cpp(){
+log test_cflags_cpp "$@"
+flags=$1
+condition=$2
+shift 2
+set -- $($cflags_filter "$flags")
+check_cpp "$@" <= 201112L"; then
+add_cflags -std=c11
+else
+check_cflags -std=c99
+fi
+
 check_cc -D_FILE_OFFSET_BITS=64 <
 EOF
@@ -5568,6 +5590,11 @@ check_header windows.h
 check_header X11/extensions/XvMClib.h
 check_header asm/types.h
 
+# it seems there are versions of clang in some distros that try to use the
+# gcc headers, which explodes for stdatomic
+# so we also check that atomics actually work here
+check_builtin stdatomic_h stdatomic.h "atomic_int foo; atomic_store(, 0)"
+
 check_lib2 "windows.h shellapi.h" CommandLineToArgvW -lshell32
 check_lib2 "windows.h wincrypt.h" CryptGenRandom -ladvapi32
 check_lib2 "windows.h psapi.h" GetProcessMemoryInfo -lpsapi
-- 
2.8.0.rc3.226.g39d4020

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] compat/atomics: merge the C11 atomics from Libav

2016-11-23 Thread Wan-Teh Chang
From: Anton Khirnov <an...@khirnov.net>

Merge the following libav commits:
13f5d2bf75b95a0bfdb9940a5e359a719e242bed
4e928ef340ac20325f529d92fcbc51e768085358
c2755864afadfbaa349e8d583665c86fe99fa90b
bb81ed476569b912a37ed553e756e123b6b13b14
f9a6a80e065cdb95b233978f1d96ec9bc863daa1
eb34d40354e2474517c9b9bd787e0dadc89c2a81

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 compat/atomics/dummy/stdatomic.h   | 176 +
 compat/atomics/gcc/stdatomic.h | 173 
 compat/atomics/pthread/stdatomic.c |  39 
 compat/atomics/pthread/stdatomic.h | 197 +
 compat/atomics/suncc/stdatomic.h   | 186 ++
 compat/atomics/win32/stdatomic.h   | 179 +
 configure  |  45 -
 7 files changed, 994 insertions(+), 1 deletion(-)
 create mode 100644 compat/atomics/dummy/stdatomic.h
 create mode 100644 compat/atomics/gcc/stdatomic.h
 create mode 100644 compat/atomics/pthread/stdatomic.c
 create mode 100644 compat/atomics/pthread/stdatomic.h
 create mode 100644 compat/atomics/suncc/stdatomic.h
 create mode 100644 compat/atomics/win32/stdatomic.h

diff --git a/compat/atomics/dummy/stdatomic.h b/compat/atomics/dummy/stdatomic.h
new file mode 100644
index 000..374e1e5
--- /dev/null
+++ b/compat/atomics/dummy/stdatomic.h
@@ -0,0 +1,176 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * based on vlc_atomic.h from VLC
+ * Copyright (C) 2010 Rémi Denis-Courmont
+ */
+
+#ifndef LIBAV_COMPAT_ATOMICS_DUMMY_STDATOMIC_H
+#define LIBAV_COMPAT_ATOMICS_DUMMY_STDATOMIC_H
+
+#include 
+
+#define ATOMIC_FLAG_INIT 0
+
+#define ATOMIC_VAR_INIT(value) (value)
+
+#define atomic_init(obj, value) \
+do {\
+*(obj) = (value);   \
+} while(0)
+
+#define kill_dependency(y) ((void)0)
+
+#define atomic_thread_fence(order) \
+((void)0)
+
+#define atomic_signal_fence(order) \
+((void)0)
+
+#define atomic_is_lock_free(obj) 0
+
+typedef intptr_t atomic_flag;
+typedef intptr_t atomic_bool;
+typedef intptr_t atomic_char;
+typedef intptr_t atomic_schar;
+typedef intptr_t atomic_uchar;
+typedef intptr_t atomic_short;
+typedef intptr_t atomic_ushort;
+typedef intptr_t atomic_int;
+typedef intptr_t atomic_uint;
+typedef intptr_t atomic_long;
+typedef intptr_t atomic_ulong;
+typedef intptr_t atomic_llong;
+typedef intptr_t atomic_ullong;
+typedef intptr_t atomic_wchar_t;
+typedef intptr_t atomic_int_least8_t;
+typedef intptr_t atomic_uint_least8_t;
+typedef intptr_t atomic_int_least16_t;
+typedef intptr_t atomic_uint_least16_t;
+typedef intptr_t atomic_int_least32_t;
+typedef intptr_t atomic_uint_least32_t;
+typedef intptr_t atomic_int_least64_t;
+typedef intptr_t atomic_uint_least64_t;
+typedef intptr_t atomic_int_fast8_t;
+typedef intptr_t atomic_uint_fast8_t;
+typedef intptr_t atomic_int_fast16_t;
+typedef intptr_t atomic_uint_fast16_t;
+typedef intptr_t atomic_int_fast32_t;
+typedef intptr_t atomic_uint_fast32_t;
+typedef intptr_t atomic_int_fast64_t;
+typedef intptr_t atomic_uint_fast64_t;
+typedef intptr_t atomic_intptr_t;
+typedef intptr_t atomic_uintptr_t;
+typedef intptr_t atomic_size_t;
+typedef intptr_t atomic_ptrdiff_t;
+typedef intptr_t atomic_intmax_t;
+typedef intptr_t atomic_uintmax_t;
+
+#define atomic_store(object, desired)   \
+do {\
+*(object) = (desired);  \
+} while (0)
+
+#define atomic_store_explicit(object, desired, order) \
+atomic_store(object, desired)
+
+#define atomic_load(object) \
+(*(object))
+
+#define atomic_load_explicit(object, order) \
+atomic_load(object)
+
+static inline intptr_t atomic_exchange(intptr_t *object, intptr_t desired)
+{
+intptr_t ret = *object;
+*object = desired;
+return ret;
+}
+
+#define atomic_exchange_explicit(object, desired, order) \
+atomic_exchange(object, desired)
+
+static inline int atomic_compare_exchange_strong(intptr_t *object, intptr_t 
*expected,
+ intptr_t desired)
+{
+int ret;
+if (*object == *expected) {
+*object = desired;
+ret = 1;
+} else {
+*expect

[FFmpeg-devel] [PATCH] compat/atomics: merge the C11 atomics from Libav

2016-11-23 Thread Wan-Teh Chang
I found the commits to merge by inspecting the "git log configure"
output and the list of Anton Khirnov's commits at
http://git.libav.org/?p=libav.git;a=search;h=5cc0057f4910c8c72421b812c8f337ef6c43696c;s=Anton+Khirnov;st=author

Wan-Teh Chang

Anton Khirnov (1):
  compat/atomics: merge the C11 atomics from Libav

 compat/atomics/dummy/stdatomic.h   | 176 +
 compat/atomics/gcc/stdatomic.h | 173 
 compat/atomics/pthread/stdatomic.c |  39 
 compat/atomics/pthread/stdatomic.h | 197 +
 compat/atomics/suncc/stdatomic.h   | 186 ++
 compat/atomics/win32/stdatomic.h   | 179 +
 configure  |  45 -
 7 files changed, 994 insertions(+), 1 deletion(-)
 create mode 100644 compat/atomics/dummy/stdatomic.h
 create mode 100644 compat/atomics/gcc/stdatomic.h
 create mode 100644 compat/atomics/pthread/stdatomic.c
 create mode 100644 compat/atomics/pthread/stdatomic.h
 create mode 100644 compat/atomics/suncc/stdatomic.h
 create mode 100644 compat/atomics/win32/stdatomic.h

-- 
2.8.0.rc3.226.g39d4020

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avutil/tests: add cpu_init to .gitignore and tests/fate

2016-11-23 Thread Wan-Teh Chang
This is a follow-up to commit d84a21207ea83055dc9b6dc1cd6a379f2ea756e7,
which added the libavutil/tests/cpu_init.c. Also add |integral| to
libavfilter/tests/.gitignore.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavfilter/tests/.gitignore | 1 +
 libavutil/tests/.gitignore   | 1 +
 tests/fate/libavutil.mak | 5 +
 3 files changed, 7 insertions(+)

diff --git a/libavfilter/tests/.gitignore b/libavfilter/tests/.gitignore
index b605afa..65ef86f 100644
--- a/libavfilter/tests/.gitignore
+++ b/libavfilter/tests/.gitignore
@@ -1,3 +1,4 @@
 /drawutils
 /filtfmts
 /formats
+/integral
diff --git a/libavutil/tests/.gitignore b/libavutil/tests/.gitignore
index c23c44b..d2cc5cb 100644
--- a/libavutil/tests/.gitignore
+++ b/libavutil/tests/.gitignore
@@ -9,6 +9,7 @@
 /cast5
 /color_utils
 /cpu
+/cpu_init
 /crc
 /des
 /dict
diff --git a/tests/fate/libavutil.mak b/tests/fate/libavutil.mak
index 6fbad64..0ab6fc5 100644
--- a/tests/fate/libavutil.mak
+++ b/tests/fate/libavutil.mak
@@ -44,6 +44,11 @@ fate-cpu: libavutil/tests/cpu$(EXESUF)
 fate-cpu: CMD = runecho libavutil/tests/cpu $(CPUFLAGS:%=-c%) $(THREADS:%=-t%)
 fate-cpu: REF = /dev/null
 
+FATE_LIBAVUTIL += fate-cpu_init
+fate-cpu_init: libavutil/tests/cpu_init$(EXESUF)
+fate-cpu_init: CMD = run libavutil/tests/cpu_init
+fate-cpu_init: REF = /dev/null
+
 FATE_LIBAVUTIL += fate-crc
 fate-crc: libavutil/tests/crc$(EXESUF)
 fate-crc: CMD = run libavutil/tests/crc
-- 
2.8.0.rc3.226.g39d4020

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avutil/tests: add cpu_init to .gitignore and tests/fate

2016-11-23 Thread Wan-Teh Chang
Hi Michael,

Please review and apply this patch. I didn't know I also needed to add
the new cpu_init test to libavutil/tests/.gitignore and
tests/fate/libavutil.mak.

I noticed libavfilter/tests/integral being reported as an untracked
file when I ran "git status", so I also took care of that in this patch.

Wan-Teh Chang (1):
  avutil/tests: add cpu_init to .gitignore and tests/fate

 libavfilter/tests/.gitignore | 1 +
 libavutil/tests/.gitignore   | 1 +
 tests/fate/libavutil.mak | 5 +
 3 files changed, 7 insertions(+)

-- 
2.8.0.rc3.226.g39d4020

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avutil: fix data race in av_get_cpu_flags().

2016-11-23 Thread Wan-Teh Chang
On Tue, Nov 22, 2016 at 3:30 PM, wm4 <nfx...@googlemail.com> wrote:
> On Tue, 22 Nov 2016 23:57:15 +0100
> Michael Niedermayer <mich...@niedermayer.cc> wrote:
>
>> For example the progress code in the frame threading.
>
> Which was recently fixed in Libav AFAIR...

You're right. libav/libavcodec/pthread_frame.c has code similar to my
ffmpeg patch http://ffmpeg.org/pipermail/ffmpeg-devel/2016-March/190454.html,
and much more.

Note: libav/libavcodec/pthread_frame.c uses unnecessary (too strong)
memory barriers in ff_thread_report_progress(). We can fix those when
the code is merged to ffmpeg.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avutil: fix data race in av_get_cpu_flags().

2016-11-23 Thread Wan-Teh Chang
On Tue, Nov 22, 2016 at 3:32 PM, wm4 <nfx...@googlemail.com> wrote:
>
> [libav/compat/atomics/] is emulation code for compilers which don't provide 
> C11 atomics.
> All relevant compilers provide them natively (and presumably implement
> the weaker semantics).

Thank you for pointing this out.

Do you know when the atomics code in libav will be merged to ffmpeg?

I don't want add new code that will interfere with the merge. On the
other hand, the new code I wanted to add will be easy to rewrite using
C11 atomics. So this all depends on when the merge will be performed.

Thanks,
Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avutil/cpu: remove the |checked| static variable

2016-11-23 Thread Wan-Teh Chang
Remove the |checked| variable because the invalid value of -1 for
|flags| can be used to indicate the same condition. Also rename |flags|
to |cpu_flags| because there are a local variable and a function
parameter named |flags| in the same file.

Add a test program libavutil/tests/cpu_init.c to check whether the
one-time initialization in av_get_cpu_flags() has data races.

Co-author: Dmitry Vyukov of Google

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavutil/Makefile |  1 +
 libavutil/cpu.c| 40 ++
 libavutil/tests/cpu_init.c | 72 ++
 3 files changed, 94 insertions(+), 19 deletions(-)
 create mode 100644 libavutil/tests/cpu_init.c

diff --git a/libavutil/Makefile b/libavutil/Makefile
index 0fa90fe..732961a 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -187,6 +187,7 @@ TESTPROGS = adler32 
\
 camellia\
 color_utils \
 cpu \
+cpu_init\
 crc \
 des \
 dict\
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index f5785fc..73317c4 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -44,7 +44,20 @@
 #include 
 #endif
 
-static int flags, checked;
+static int cpu_flags = -1;
+
+static int get_cpu_flags(void)
+{
+if (ARCH_AARCH64)
+return ff_get_cpu_flags_aarch64();
+if (ARCH_ARM)
+return ff_get_cpu_flags_arm();
+if (ARCH_PPC)
+return ff_get_cpu_flags_ppc();
+if (ARCH_X86)
+return ff_get_cpu_flags_x86();
+return 0;
+}
 
 void av_force_cpu_flags(int arg){
 if (   (arg & ( AV_CPU_FLAG_3DNOW|
@@ -69,33 +82,22 @@ void av_force_cpu_flags(int arg){
 arg |= AV_CPU_FLAG_MMX;
 }
 
-flags   = arg;
-checked = arg != -1;
+cpu_flags = arg;
 }
 
 int av_get_cpu_flags(void)
 {
-if (checked)
-return flags;
-
-if (ARCH_AARCH64)
-flags = ff_get_cpu_flags_aarch64();
-if (ARCH_ARM)
-flags = ff_get_cpu_flags_arm();
-if (ARCH_PPC)
-flags = ff_get_cpu_flags_ppc();
-if (ARCH_X86)
-flags = ff_get_cpu_flags_x86();
-
-checked = 1;
+int flags = cpu_flags;
+if (flags == -1) {
+flags = get_cpu_flags();
+cpu_flags = flags;
+}
 return flags;
 }
 
 void av_set_cpu_flags_mask(int mask)
 {
-checked   = 0;
-flags = av_get_cpu_flags() & mask;
-checked   = 1;
+cpu_flags = get_cpu_flags() & mask;
 }
 
 int av_parse_cpu_flags(const char *s)
diff --git a/libavutil/tests/cpu_init.c b/libavutil/tests/cpu_init.c
new file mode 100644
index 000..5a5ecda
--- /dev/null
+++ b/libavutil/tests/cpu_init.c
@@ -0,0 +1,72 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * This test program tests whether the one-time initialization in
+ * av_get_cpu_flags() has data races.
+ */
+
+#include 
+#include 
+
+#include "config.h"
+
+#include "libavutil/cpu.h"
+#include "libavutil/thread.h"
+
+#if HAVE_PTHREADS
+static void *thread_main(void *arg)
+{
+int *flags = arg;
+
+*flags = av_get_cpu_flags();
+return NULL;
+}
+#endif
+
+
+int main(int argc, char **argv)
+{
+#if HAVE_PTHREADS
+int cpu_flags1;
+int cpu_flags2;
+int ret;
+pthread_t thread1;
+pthread_t thread2;
+
+if ((ret = pthread_create(, NULL, thread_main, _flags1))) {
+fprintf(stderr, "pthread_create failed: %s.\n", strerror(ret));
+return 1;
+}
+if ((ret = pthread_create(, NULL, thread_main, _flags2))) {
+fprintf(stderr, "pthread_create failed: %s.\n", strerror(ret));
+return 1;
+}
+pthread_join(thread1, NULL);
+pthread_join(thread2, NULL);
+
+if (cpu_flags1 < 0)
+return 2;
+if (cpu_flags2 <

Re: [FFmpeg-devel] [PATCH] avutil: fix data race in av_get_cpu_flags().

2016-11-22 Thread Wan-Teh Chang
Hi Michael,

On Tue, Nov 22, 2016 at 2:57 PM, Michael Niedermayer
<mich...@niedermayer.cc> wrote:
>
> we have plenty of code that accesses fields without the extra layer
> of weak atomics.
> For example the progress code in the frame threading.

The progress code in the frame threading also has data races as
defined by the C standard, so ThreadSanitizer also warns about it. I
submitted patches for that code to use atomic operations before.

> Can you just merge the varible ?

Yes, I will create a patch to just merge the variables. My goal is to
fix the data races, so I still need to create another patch on top to
use atomic operations.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avutil: fix data race in av_get_cpu_flags().

2016-11-22 Thread Wan-Teh Chang
Hi wm4,

On Tue, Nov 22, 2016 at 1:41 PM, wm4 <nfx...@googlemail.com> wrote:
>
> Again, once the atomics changes in Libav are merged, the avpriv_atomic_
> additions will have to be deleted, and code using it rewritten.

Thanks for the heads-up. Is there an atomics patch for libav being
reviewed right now?

I inspected the libav code this morning after I read your earlier
message. The current atomics code in libav ignores the memory order
argument, so the "relaxed" memory order isn't implemented in the
current libav.

For example, the current revision of libav/compat/atomics/gcc/stdatomic.h has:

==
#define atomic_store(object, desired)   \
do {\
*(object) = (desired);  \
__sync_synchronize();   \
} while (0)

#define atomic_store_explicit(object, desired, order) \
atomic_store(object, desired)

#define atomic_load(object) \
(__sync_synchronize(), *(object))

#define atomic_load_explicit(object, order) \
atomic_load(object)
==

So I am wondering if there is a libav patch that implements the
|order| argument for atomic_store_explicit() and
atomic_load_explicit().

Thanks,
Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avutil: fix data race in av_get_cpu_flags().

2016-11-22 Thread Wan-Teh Chang
Hi Michael,

On Tue, Nov 22, 2016 at 1:22 PM, Michael Niedermayer
<mich...@niedermayer.cc> wrote:
>
> ok, i see th race but do you really need the atomic operations ?
> isnt merging the 2 variables into 1 as you do enough ?
> (i mean the tools might still complain but would there be an actual
>  race remaining?)

The atomic operations avoid the "undefined behavior" resulting from
the data races in the C source code. ThreadSanitizer analyzes the C
source code, therefore it must warn about what may be undefined
behavior according to the C standard, even though for a particular
compiler and processor, the generated code is the same.

Here is a small test program that shows gcc generates the same x86_64
assembly code for the normal and atomic (with relaxed memory ordering)
load and store operations:

==
$ gcc --version
gcc (Ubuntu 4.8.4-2ubuntu1~14.04.3) 4.8.4
Copyright (C) 2013 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

$ cat flags.c
static int flags;

int get_flags_nonatomic(void)
{
return flags;
}

int get_flags_atomic_relaxed(void)
{
return __atomic_load_n(, __ATOMIC_RELAXED);
}

void set_flags_nonatomic(int val)
{
flags = val;
}

void set_flags_atomic_relaxed(int val)
{
__atomic_store_n(, val, __ATOMIC_RELAXED);
}
$ gcc -Wall -O3 -std=c11 -S -c flags.c
$ cat flags.s
.file "flags.c"
.text
.p2align 4,,15
.globl get_flags_nonatomic
.type get_flags_nonatomic, @function
get_flags_nonatomic:
.LFB0:
.cfi_startproc
movl flags(%rip), %eax
ret
.cfi_endproc
.LFE0:
.size get_flags_nonatomic, .-get_flags_nonatomic
.p2align 4,,15
.globl get_flags_atomic_relaxed
.type get_flags_atomic_relaxed, @function
get_flags_atomic_relaxed:
.LFB1:
.cfi_startproc
movl flags(%rip), %eax
ret
.cfi_endproc
.LFE1:
.size get_flags_atomic_relaxed, .-get_flags_atomic_relaxed
.p2align 4,,15
.globl set_flags_nonatomic
.type set_flags_nonatomic, @function
set_flags_nonatomic:
.LFB2:
.cfi_startproc
movl %edi, flags(%rip)
ret
.cfi_endproc
.LFE2:
.size set_flags_nonatomic, .-set_flags_nonatomic
.p2align 4,,15
.globl set_flags_atomic_relaxed
.type set_flags_atomic_relaxed, @function
set_flags_atomic_relaxed:
.LFB3:
.cfi_startproc
movl %edi, flags(%rip)
ret
.cfi_endproc
.LFE3:
.size set_flags_atomic_relaxed, .-set_flags_atomic_relaxed
.local flags
.comm flags,4,16
.ident "GCC: (Ubuntu 4.8.4-2ubuntu1~14.04.3) 4.8.4"
.section .note.GNU-stack,"",@progbits
==

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Make the one-time initialization in av_get_cpu_flags() thread-safe.

2016-11-22 Thread Wan-Teh Chang
This patch was generated in error. Please ignore it.

Wan-Teh Chang

On Tue, Nov 22, 2016 at 11:30 AM, Wan-Teh Chang <w...@google.com> wrote:
> The static variables |flags| and |checked| in libavutil/cpu.c are read
> and written using normal load and store instructions. These are
> considered as data races. The fix is to use atomic load and store
> instructions. The fix also eliminates the |checked| variable because the
> invalid value of -1 for |flags| can be used to indicate the same
> condition.
>
> The fix requires a new atomic integer compare and swap function. Since
> the fix does not need memory barriers, "relaxed" variants of
> avpriv_atomic_int_get() and avpriv_atomic_int_set() are also added.
>
> Add a test program libavutil/tests/cpu_init.c to verify the one-time
> initialization in av_get_cpu_flags() is thread-safe.
>
> Co-author: Dmitry Vyukov of Google, which suggested the data race fix.
>
> Signed-off-by: Wan-Teh Chang <w...@google.com>
> ---
>  libavutil/Makefile |  1 +
>  libavutil/atomic.c | 40 ++
>  libavutil/atomic.h | 34 --
>  libavutil/atomic_gcc.h | 33 +
>  libavutil/atomic_suncc.h   | 19 
>  libavutil/atomic_win32.h   | 21 ++
>  libavutil/cpu.c| 41 ++
>  libavutil/cpu.h|  2 --
>  libavutil/tests/atomic.c   | 13 +
>  libavutil/tests/cpu_init.c | 72 
> ++
>  10 files changed, 253 insertions(+), 23 deletions(-)
>  create mode 100644 libavutil/tests/cpu_init.c
>
> diff --git a/libavutil/Makefile b/libavutil/Makefile
> index 0fa90fe..732961a 100644
> --- a/libavutil/Makefile
> +++ b/libavutil/Makefile
> @@ -187,6 +187,7 @@ TESTPROGS = adler32   
>   \
>  camellia\
>  color_utils \
>  cpu \
> +cpu_init\
>  crc \
>  des \
>  dict\
> diff --git a/libavutil/atomic.c b/libavutil/atomic.c
> index 64cff25..7bce013 100644
> --- a/libavutil/atomic.c
> +++ b/libavutil/atomic.c
> @@ -40,6 +40,11 @@ int avpriv_atomic_int_get(volatile int *ptr)
>  return res;
>  }
>
> +int avpriv_atomic_int_get_relaxed(volatile int *ptr)
> +{
> +return avpriv_atomic_int_get(ptr);
> +}
> +
>  void avpriv_atomic_int_set(volatile int *ptr, int val)
>  {
>  pthread_mutex_lock(_lock);
> @@ -47,6 +52,11 @@ void avpriv_atomic_int_set(volatile int *ptr, int val)
>  pthread_mutex_unlock(_lock);
>  }
>
> +void avpriv_atomic_int_set_relaxed(volatile int *ptr, int val)
> +{
> +avpriv_atomic_int_set(ptr, val);
> +}
> +
>  int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int inc)
>  {
>  int res;
> @@ -59,6 +69,17 @@ int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int 
> inc)
>  return res;
>  }
>
> +int avpriv_atomic_int_cas_relaxed(volatile int *ptr, int oldval, int newval)
> +{
> +int ret;
> +pthread_mutex_lock(_lock);
> +ret = *ptr;
> +if (ret == oldval)
> +*ptr = newval;
> +pthread_mutex_unlock(_lock);
> +return ret;
> +}
> +
>  void *avpriv_atomic_ptr_cas(void * volatile *ptr, void *oldval, void *newval)
>  {
>  void *ret;
> @@ -77,17 +98,36 @@ int avpriv_atomic_int_get(volatile int *ptr)
>  return *ptr;
>  }
>
> +int avpriv_atomic_int_get_relaxed(volatile int *ptr)
> +{
> +return avpriv_atomic_int_get(ptr);
> +}
> +
>  void avpriv_atomic_int_set(volatile int *ptr, int val)
>  {
>  *ptr = val;
>  }
>
> +void avpriv_atomic_int_set_relaxed(volatile int *ptr, int val)
> +{
> +avpriv_atomic_int_set(ptr, val);
> +}
> +
>  int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int inc)
>  {
>  *ptr += inc;
>  return *ptr;
>  }
>
> +int avpriv_atomic_int_cas_relaxed(volatile int *ptr, int oldval, int newval)
> +{
> +if (*ptr == oldval) {
> +*ptr = newval;
> +return oldval;
> +}
> +return *ptr;
> +}
> +
>  void *avpriv_atomic_ptr_cas(void * volatile *ptr, void *oldval, void *newval)
>  {
>  if (*ptr == oldval) {
> diff --git a/libavutil/atomic.h b/libavutil/atomic.h
> index 1

[FFmpeg-devel] [PATCH] avutil: fix data race in av_get_cpu_flags().

2016-11-22 Thread Wan-Teh Chang
Make the one-time initialization in av_get_cpu_flags() thread-safe. The
static variables |flags| and |checked| in libavutil/cpu.c are read and
written using normal load and store operations. These are considered as
data races. The fix is to use atomic load and store operations. The fix
also eliminates the |checked| variable because the invalid value of -1
for |flags| can be used to indicate the same condition.

The fix requires a new atomic integer compare and swap function. Since
the fix does not need memory barriers, "relaxed" variants of
avpriv_atomic_int_get() and avpriv_atomic_int_set() are also added.

Add a test program libavutil/tests/cpu_init.c to verify the one-time
initialization in av_get_cpu_flags() is thread-safe.

Co-author: Dmitry Vyukov of Google, which suggested the data race fix.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavutil/Makefile |  1 +
 libavutil/atomic.c | 40 ++
 libavutil/atomic.h | 34 --
 libavutil/atomic_gcc.h | 33 +
 libavutil/atomic_suncc.h   | 19 
 libavutil/atomic_win32.h   | 21 ++
 libavutil/cpu.c| 41 ++
 libavutil/cpu.h|  2 --
 libavutil/tests/atomic.c   | 13 +
 libavutil/tests/cpu_init.c | 72 ++
 10 files changed, 253 insertions(+), 23 deletions(-)
 create mode 100644 libavutil/tests/cpu_init.c

diff --git a/libavutil/Makefile b/libavutil/Makefile
index 0fa90fe..732961a 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -187,6 +187,7 @@ TESTPROGS = adler32 
\
 camellia\
 color_utils \
 cpu \
+cpu_init\
 crc \
 des \
 dict\
diff --git a/libavutil/atomic.c b/libavutil/atomic.c
index 64cff25..7bce013 100644
--- a/libavutil/atomic.c
+++ b/libavutil/atomic.c
@@ -40,6 +40,11 @@ int avpriv_atomic_int_get(volatile int *ptr)
 return res;
 }
 
+int avpriv_atomic_int_get_relaxed(volatile int *ptr)
+{
+return avpriv_atomic_int_get(ptr);
+}
+
 void avpriv_atomic_int_set(volatile int *ptr, int val)
 {
 pthread_mutex_lock(_lock);
@@ -47,6 +52,11 @@ void avpriv_atomic_int_set(volatile int *ptr, int val)
 pthread_mutex_unlock(_lock);
 }
 
+void avpriv_atomic_int_set_relaxed(volatile int *ptr, int val)
+{
+avpriv_atomic_int_set(ptr, val);
+}
+
 int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int inc)
 {
 int res;
@@ -59,6 +69,17 @@ int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int 
inc)
 return res;
 }
 
+int avpriv_atomic_int_cas_relaxed(volatile int *ptr, int oldval, int newval)
+{
+int ret;
+pthread_mutex_lock(_lock);
+ret = *ptr;
+if (ret == oldval)
+*ptr = newval;
+pthread_mutex_unlock(_lock);
+return ret;
+}
+
 void *avpriv_atomic_ptr_cas(void * volatile *ptr, void *oldval, void *newval)
 {
 void *ret;
@@ -77,17 +98,36 @@ int avpriv_atomic_int_get(volatile int *ptr)
 return *ptr;
 }
 
+int avpriv_atomic_int_get_relaxed(volatile int *ptr)
+{
+return avpriv_atomic_int_get(ptr);
+}
+
 void avpriv_atomic_int_set(volatile int *ptr, int val)
 {
 *ptr = val;
 }
 
+void avpriv_atomic_int_set_relaxed(volatile int *ptr, int val)
+{
+avpriv_atomic_int_set(ptr, val);
+}
+
 int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int inc)
 {
 *ptr += inc;
 return *ptr;
 }
 
+int avpriv_atomic_int_cas_relaxed(volatile int *ptr, int oldval, int newval)
+{
+if (*ptr == oldval) {
+*ptr = newval;
+return oldval;
+}
+return *ptr;
+}
+
 void *avpriv_atomic_ptr_cas(void * volatile *ptr, void *oldval, void *newval)
 {
 if (*ptr == oldval) {
diff --git a/libavutil/atomic.h b/libavutil/atomic.h
index 15906d2..c5aa1a4 100644
--- a/libavutil/atomic.h
+++ b/libavutil/atomic.h
@@ -40,20 +40,38 @@
  *
  * @param ptr atomic integer
  * @return the current value of the atomic integer
- * @note This acts as a memory barrier.
+ * @note This acts as a memory barrier with sequentially-consistent ordering.
  */
 int avpriv_atomic_int_get(volatile int *ptr);
 
 /**
+ * Load the current value stored in an atomic integer.
+ *
+ * @param ptr atomic integer
+ * @return the current value of the atomic integer
+ * @note This does NOT act as a memory barrier.
+ */
+int avpriv_atomic_int_get_relaxed(volatile int *ptr);
+
+/**
  * Store a new value in an atomic integer.
  *
  * @param ptr atomic integer
  * @param val the value to sto

[FFmpeg-devel] [PATCH] avutil: fix data race in av_get_cpu_flags().

2016-11-22 Thread Wan-Teh Chang
Here is a new version of my patch to address Michael's review comments.

Wan-Teh Chang (1):
  Make the one-time initialization in av_get_cpu_flags() thread-safe.

 libavutil/Makefile |  1 +
 libavutil/atomic.c | 40 ++
 libavutil/atomic.h | 34 --
 libavutil/atomic_gcc.h | 33 +
 libavutil/atomic_suncc.h   | 19 
 libavutil/atomic_win32.h   | 21 ++
 libavutil/cpu.c| 41 ++
 libavutil/cpu.h|  2 --
 libavutil/tests/atomic.c   | 13 +
 libavutil/tests/cpu_init.c | 72 ++
 10 files changed, 253 insertions(+), 23 deletions(-)
 create mode 100644 libavutil/tests/cpu_init.c

-- 
2.8.0.rc3.226.g39d4020

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] Make the one-time initialization in av_get_cpu_flags() thread-safe.

2016-11-22 Thread Wan-Teh Chang
The static variables |flags| and |checked| in libavutil/cpu.c are read
and written using normal load and store instructions. These are
considered as data races. The fix is to use atomic load and store
instructions. The fix also eliminates the |checked| variable because the
invalid value of -1 for |flags| can be used to indicate the same
condition.

The fix requires a new atomic integer compare and swap function. Since
the fix does not need memory barriers, "relaxed" variants of
avpriv_atomic_int_get() and avpriv_atomic_int_set() are also added.

Add a test program libavutil/tests/cpu_init.c to verify the one-time
initialization in av_get_cpu_flags() is thread-safe.

Co-author: Dmitry Vyukov of Google, which suggested the data race fix.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavutil/Makefile |  1 +
 libavutil/atomic.c | 40 ++
 libavutil/atomic.h | 34 --
 libavutil/atomic_gcc.h | 33 +
 libavutil/atomic_suncc.h   | 19 
 libavutil/atomic_win32.h   | 21 ++
 libavutil/cpu.c| 41 ++
 libavutil/cpu.h|  2 --
 libavutil/tests/atomic.c   | 13 +
 libavutil/tests/cpu_init.c | 72 ++
 10 files changed, 253 insertions(+), 23 deletions(-)
 create mode 100644 libavutil/tests/cpu_init.c

diff --git a/libavutil/Makefile b/libavutil/Makefile
index 0fa90fe..732961a 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -187,6 +187,7 @@ TESTPROGS = adler32 
\
 camellia\
 color_utils \
 cpu \
+cpu_init\
 crc \
 des \
 dict\
diff --git a/libavutil/atomic.c b/libavutil/atomic.c
index 64cff25..7bce013 100644
--- a/libavutil/atomic.c
+++ b/libavutil/atomic.c
@@ -40,6 +40,11 @@ int avpriv_atomic_int_get(volatile int *ptr)
 return res;
 }
 
+int avpriv_atomic_int_get_relaxed(volatile int *ptr)
+{
+return avpriv_atomic_int_get(ptr);
+}
+
 void avpriv_atomic_int_set(volatile int *ptr, int val)
 {
 pthread_mutex_lock(_lock);
@@ -47,6 +52,11 @@ void avpriv_atomic_int_set(volatile int *ptr, int val)
 pthread_mutex_unlock(_lock);
 }
 
+void avpriv_atomic_int_set_relaxed(volatile int *ptr, int val)
+{
+avpriv_atomic_int_set(ptr, val);
+}
+
 int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int inc)
 {
 int res;
@@ -59,6 +69,17 @@ int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int 
inc)
 return res;
 }
 
+int avpriv_atomic_int_cas_relaxed(volatile int *ptr, int oldval, int newval)
+{
+int ret;
+pthread_mutex_lock(_lock);
+ret = *ptr;
+if (ret == oldval)
+*ptr = newval;
+pthread_mutex_unlock(_lock);
+return ret;
+}
+
 void *avpriv_atomic_ptr_cas(void * volatile *ptr, void *oldval, void *newval)
 {
 void *ret;
@@ -77,17 +98,36 @@ int avpriv_atomic_int_get(volatile int *ptr)
 return *ptr;
 }
 
+int avpriv_atomic_int_get_relaxed(volatile int *ptr)
+{
+return avpriv_atomic_int_get(ptr);
+}
+
 void avpriv_atomic_int_set(volatile int *ptr, int val)
 {
 *ptr = val;
 }
 
+void avpriv_atomic_int_set_relaxed(volatile int *ptr, int val)
+{
+avpriv_atomic_int_set(ptr, val);
+}
+
 int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int inc)
 {
 *ptr += inc;
 return *ptr;
 }
 
+int avpriv_atomic_int_cas_relaxed(volatile int *ptr, int oldval, int newval)
+{
+if (*ptr == oldval) {
+*ptr = newval;
+return oldval;
+}
+return *ptr;
+}
+
 void *avpriv_atomic_ptr_cas(void * volatile *ptr, void *oldval, void *newval)
 {
 if (*ptr == oldval) {
diff --git a/libavutil/atomic.h b/libavutil/atomic.h
index 15906d2..c5aa1a4 100644
--- a/libavutil/atomic.h
+++ b/libavutil/atomic.h
@@ -40,20 +40,38 @@
  *
  * @param ptr atomic integer
  * @return the current value of the atomic integer
- * @note This acts as a memory barrier.
+ * @note This acts as a memory barrier with sequentially-consistent ordering.
  */
 int avpriv_atomic_int_get(volatile int *ptr);
 
 /**
+ * Load the current value stored in an atomic integer.
+ *
+ * @param ptr atomic integer
+ * @return the current value of the atomic integer
+ * @note This does NOT act as a memory barrier.
+ */
+int avpriv_atomic_int_get_relaxed(volatile int *ptr);
+
+/**
  * Store a new value in an atomic integer.
  *
  * @param ptr atomic integer
  * @param val the value to store in the atomic integer
- * @note This acts as a memory barrier.
+ *

Re: [FFmpeg-devel] [PATCH] avutil: fix data race in av_get_cpu_flags().

2016-11-22 Thread Wan-Teh Chang
Hi Michael,

On Mon, Nov 21, 2016 at 4:35 PM, Michael Niedermayer
<mich...@niedermayer.cc> wrote:
> On Mon, Nov 21, 2016 at 03:37:49PM -0800, Wan-Teh Chang wrote:
>> Make the one-time initialization in av_get_cpu_flags() thread-safe.
>> The fix assumes -1 is an invalid value for cpu flags.
>
> please explain the bug / race that occurs and how it is fixed

Note: I will include the following explanation in the next version of my patch.

The static variables |flags| and |checked| in libavutil/cpu.c are read
and written using normal load and store instructions. These are
considered as data races. The fix is to use atomic load and store
instructions. The fix also eliminates the |checked| variable because
the invalid value of -1 for |flags| can be used to indicate the same
condition.

Our application runs into the data races because two threads call
avformat_find_stream_info() at the same time.
avformat_find_stream_info() calls av_parser_init(), which may
eventually call av_get_cpu_flag(), depending on the codec.

I just wrote a minimal test case (libavutil/tests/cpu_init.c) that
reproduces the data races. The test program creates two threads that
call av_get_cpu_flags(). When built with --toolchain=clang-tsan, the
test program triggers two ThreadSanitizer warnings:

$ libavutil/tests/cpu_init
==
WARNING: ThreadSanitizer: data race (pid=66608)
  Read of size 4 at 0x7f7aa8c15d40 by thread T2:
#0 av_get_cpu_flags
/usr/local/google/home/wtc/ffmpeg-cpu-data-race-before/ffmpeg/libavutil/cpu.c:78
(exe+0x000a8536)
#1 thread_main
/usr/local/google/home/wtc/ffmpeg-cpu-data-race-before/ffmpeg/libavutil/tests/cpu_init.c:32
(exe+0x000a8494)

  Previous write of size 4 at 0x7f7aa8c15d40 by thread T1:
#0 av_get_cpu_flags
/usr/local/google/home/wtc/ffmpeg-cpu-data-race-before/ffmpeg/libavutil/cpu.c:90
(exe+0x000a8578)
#1 thread_main
/usr/local/google/home/wtc/ffmpeg-cpu-data-race-before/ffmpeg/libavutil/tests/cpu_init.c:32
(exe+0x000a8494)

  Thread T2 (tid=66611, running) created by main thread at:
#0 pthread_create ??:0 (exe+0x0004d9bb)
#1 main 
/usr/local/google/home/wtc/ffmpeg-cpu-data-race-before/ffmpeg/libavutil/tests/cpu_init.c:51
(exe+0x000a83cb)

  Thread T1 (tid=66610, running) created by main thread at:
#0 pthread_create ??:0 (exe+0x0004d9bb)
#1 main 
/usr/local/google/home/wtc/ffmpeg-cpu-data-race-before/ffmpeg/libavutil/tests/cpu_init.c:47
(exe+0x000a83a9)

SUMMARY: ThreadSanitizer: data race
/usr/local/google/home/wtc/ffmpeg-cpu-data-race-before/ffmpeg/libavutil/cpu.c:78
av_get_cpu_flags
==
==
WARNING: ThreadSanitizer: data race (pid=66608)
  Read of size 4 at 0x7f7aa8c15d3c by thread T2:
#0 av_get_cpu_flags
/usr/local/google/home/wtc/ffmpeg-cpu-data-race-before/ffmpeg/libavutil/cpu.c:79
(exe+0x000a854b)
#1 thread_main
/usr/local/google/home/wtc/ffmpeg-cpu-data-race-before/ffmpeg/libavutil/tests/cpu_init.c:32
(exe+0x000a8494)

  Previous write of size 4 at 0x7f7aa8c15d3c by thread T1:
#0 av_get_cpu_flags
/usr/local/google/home/wtc/ffmpeg-cpu-data-race-before/ffmpeg/libavutil/cpu.c:88
(exe+0x000a8566)
#1 thread_main
/usr/local/google/home/wtc/ffmpeg-cpu-data-race-before/ffmpeg/libavutil/tests/cpu_init.c:32
(exe+0x000a8494)

  Thread T2 (tid=66611, running) created by main thread at:
#0 pthread_create ??:0 (exe+0x0004d9bb)
#1 main 
/usr/local/google/home/wtc/ffmpeg-cpu-data-race-before/ffmpeg/libavutil/tests/cpu_init.c:51
(exe+0x000a83cb)

  Thread T1 (tid=66610, running) created by main thread at:
#0 pthread_create ??:0 (exe+0x0004d9bb)
#1 main 
/usr/local/google/home/wtc/ffmpeg-cpu-data-race-before/ffmpeg/libavutil/tests/cpu_init.c:47
(exe+0x000a83a9)

SUMMARY: ThreadSanitizer: data race
/usr/local/google/home/wtc/ffmpeg-cpu-data-race-before/ffmpeg/libavutil/cpu.c:79
av_get_cpu_flags
==
ThreadSanitizer: reported 2 warnings

>> The fix requires new atomic functions to get, set, and compare-and-swap
>> an integer without a memory barrier.
>
> why ?

The fix needs a compare-and-swap function for an atomic integer. There
is no such function in libavutil/atomic.h.

Although the fix can use avpriv_atomic_int_get() and
avpriv_atomic_int_set(), these two functions execute a memory barrier
with sequentially-consistent ordering, which the fix doesn't need. To
improve performance, I added avpriv_atomic_int_get_relaxed() and
avpriv_atomic_int_set_relaxed()

>> The data race fix is written by Dmitry Vyukov of Google.
>
> Then the author for the git patch should be set accordingly

I will look into this. I may just identify him as a co-author.

>> @@ -44,7 +45,20 @@
>>  #include 
>>  #endif
>>
>> -static int flags, checked;
>> +static int cpu_flags = -1;
>> +
>> +static int get_cpu_flags(void)
>> +{
>> +if (ARCH_

[FFmpeg-devel] [PATCH] avutil: fix data race in av_get_cpu_flags().

2016-11-21 Thread Wan-Teh Chang
Make the one-time initialization in av_get_cpu_flags() thread-safe.
The fix assumes -1 is an invalid value for cpu flags.

The fix requires new atomic functions to get, set, and compare-and-swap
an integer without a memory barrier.

The data race fix is written by Dmitry Vyukov of Google.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavutil/atomic.c   | 40 
 libavutil/atomic.h   | 34 --
 libavutil/atomic_gcc.h   | 33 +
 libavutil/atomic_suncc.h | 19 +++
 libavutil/atomic_win32.h | 21 +
 libavutil/cpu.c  | 41 ++---
 libavutil/tests/atomic.c | 13 +
 7 files changed, 180 insertions(+), 21 deletions(-)

diff --git a/libavutil/atomic.c b/libavutil/atomic.c
index 64cff25..7bce013 100644
--- a/libavutil/atomic.c
+++ b/libavutil/atomic.c
@@ -40,6 +40,11 @@ int avpriv_atomic_int_get(volatile int *ptr)
 return res;
 }
 
+int avpriv_atomic_int_get_relaxed(volatile int *ptr)
+{
+return avpriv_atomic_int_get(ptr);
+}
+
 void avpriv_atomic_int_set(volatile int *ptr, int val)
 {
 pthread_mutex_lock(_lock);
@@ -47,6 +52,11 @@ void avpriv_atomic_int_set(volatile int *ptr, int val)
 pthread_mutex_unlock(_lock);
 }
 
+void avpriv_atomic_int_set_relaxed(volatile int *ptr, int val)
+{
+avpriv_atomic_int_set(ptr, val);
+}
+
 int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int inc)
 {
 int res;
@@ -59,6 +69,17 @@ int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int 
inc)
 return res;
 }
 
+int avpriv_atomic_int_cas_relaxed(volatile int *ptr, int oldval, int newval)
+{
+int ret;
+pthread_mutex_lock(_lock);
+ret = *ptr;
+if (ret == oldval)
+*ptr = newval;
+pthread_mutex_unlock(_lock);
+return ret;
+}
+
 void *avpriv_atomic_ptr_cas(void * volatile *ptr, void *oldval, void *newval)
 {
 void *ret;
@@ -77,17 +98,36 @@ int avpriv_atomic_int_get(volatile int *ptr)
 return *ptr;
 }
 
+int avpriv_atomic_int_get_relaxed(volatile int *ptr)
+{
+return avpriv_atomic_int_get(ptr);
+}
+
 void avpriv_atomic_int_set(volatile int *ptr, int val)
 {
 *ptr = val;
 }
 
+void avpriv_atomic_int_set_relaxed(volatile int *ptr, int val)
+{
+avpriv_atomic_int_set(ptr, val);
+}
+
 int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int inc)
 {
 *ptr += inc;
 return *ptr;
 }
 
+int avpriv_atomic_int_cas_relaxed(volatile int *ptr, int oldval, int newval)
+{
+if (*ptr == oldval) {
+*ptr = newval;
+return oldval;
+}
+return *ptr;
+}
+
 void *avpriv_atomic_ptr_cas(void * volatile *ptr, void *oldval, void *newval)
 {
 if (*ptr == oldval) {
diff --git a/libavutil/atomic.h b/libavutil/atomic.h
index 15906d2..c5aa1a4 100644
--- a/libavutil/atomic.h
+++ b/libavutil/atomic.h
@@ -40,20 +40,38 @@
  *
  * @param ptr atomic integer
  * @return the current value of the atomic integer
- * @note This acts as a memory barrier.
+ * @note This acts as a memory barrier with sequentially-consistent ordering.
  */
 int avpriv_atomic_int_get(volatile int *ptr);
 
 /**
+ * Load the current value stored in an atomic integer.
+ *
+ * @param ptr atomic integer
+ * @return the current value of the atomic integer
+ * @note This does NOT act as a memory barrier.
+ */
+int avpriv_atomic_int_get_relaxed(volatile int *ptr);
+
+/**
  * Store a new value in an atomic integer.
  *
  * @param ptr atomic integer
  * @param val the value to store in the atomic integer
- * @note This acts as a memory barrier.
+ * @note This acts as a memory barrier with sequentially-consistent ordering.
  */
 void avpriv_atomic_int_set(volatile int *ptr, int val);
 
 /**
+ * Store a new value in an atomic integer.
+ *
+ * @param ptr atomic integer
+ * @param val the value to store in the atomic integer
+ * @note This does NOT act as a memory barrier.
+ */
+void avpriv_atomic_int_set_relaxed(volatile int *ptr, int val);
+
+/**
  * Add a value to an atomic integer.
  *
  * @param ptr atomic integer
@@ -65,12 +83,24 @@ void avpriv_atomic_int_set(volatile int *ptr, int val);
 int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int inc);
 
 /**
+ * Atomic integer compare and swap.
+ *
+ * @param ptr pointer to the integer to operate on
+ * @param oldval do the swap if the current value of *ptr equals to oldval
+ * @param newval value to replace *ptr with
+ * @return the value of *ptr before comparison
+ * @note This does NOT act as a memory barrier.
+ */
+int avpriv_atomic_int_cas_relaxed(volatile int *ptr, int oldval, int newval);
+
+/**
  * Atomic pointer compare and swap.
  *
  * @param ptr pointer to the pointer to operate on
  * @param oldval do the swap if the current value of *ptr equals to oldval
  * @param newval value to replace *ptr with
  * @return the value of *ptr before comparison
+ * @note This acts as a memory barrier with sequentially-consistent or

[FFmpeg-devel] [PATCH] avutil: fix data race in av_get_cpu_flags().

2016-11-21 Thread Wan-Teh Chang
Hi,

This patch makes the one-time initialization in av_get_cpu_flags()
thread-safe. The data race was reported by ThreadSanitizer.

Wan-Teh Chang (1):
  avutil: fix data race in av_get_cpu_flags().

 libavutil/atomic.c   | 40 
 libavutil/atomic.h   | 34 --
 libavutil/atomic_gcc.h   | 33 +
 libavutil/atomic_suncc.h | 19 +++
 libavutil/atomic_win32.h | 21 +
 libavutil/cpu.c  | 41 ++---
 libavutil/tests/atomic.c | 13 +
 7 files changed, 180 insertions(+), 21 deletions(-)

-- 
2.8.0.rc3.226.g39d4020

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Fix a bug in ff_thread_report_progress in updating progress[field].

2016-03-01 Thread Wan-Teh Chang
On Tue, Mar 1, 2016 at 7:44 AM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
> Hi,
>
> On Tue, Mar 1, 2016 at 9:34 AM, Wan-Teh Chang <wtc-at-google@ffmpeg.org>
> wrote:
>
>> By the way, I'm also wondering why ff_thread_report_progress is
>> sometimes called with progress[field] >= n? I saw it happen when I ran
>> make fate-h264, but did not investigate it.
>
> I don't know, which sample (test name) specifically?

The attached ff_thread_report_progress.txt file is a patch for ffmpeg
that counts the fast and slow code paths in ff_thread_report_progress
and ff_thread_await_progress, and prints the counters at the end.

I ran "make fate-h264 THREADS=4". Here are the results, which show
that the fast code path (progress[field] >= n) was taken in
ff_thread_report_progress.

wtc@hostname:~/ffmpeg-dev/fast-slow-paths-instrumentation/ffmpeg/tests/data/fate$
grep slow *.err
h264-conformance-aud_mw_e.err:report_progress: fast=100, slow=1000 |
await_progress: fast=10999, slow=198
h264-conformance-ba1_ft_c.err:report_progress: fast=299, slow=5382 |
await_progress: fast=112159, slow=1254
h264-conformance-ba1_sony_d.err:report_progress: fast=17, slow=153 |
await_progress: fast=0, slow=0
h264-conformance-ba2_sony_f.err:report_progress: fast=300, slow=2700 |
await_progress: fast=36259, slow=688
h264-conformance-ba3_sva_c.err:report_progress: fast=17, slow=153 |
await_progress: fast=6054, slow=48
h264-conformance-bamq1_jvc_c.err:report_progress: fast=30, slow=270 |
await_progress: fast=0, slow=0
h264-conformance-bamq2_jvc_c.err:report_progress: fast=30, slow=270 |
await_progress: fast=4235, slow=87
h264-conformance-ba_mw_d.err:report_progress: fast=100, slow=900 |
await_progress: fast=10969, slow=223
h264-conformance-banm_mw_d.err:report_progress: fast=100, slow=900 |
await_progress: fast=9003, slow=243
h264-conformance-basqp1_sony_c.err:report_progress: fast=4, slow=36 |
await_progress: fast=0, slow=0
h264-conformance-caba1_sony_d.err:report_progress: fast=50, slow=450 |
await_progress: fast=0, slow=0
h264-conformance-caba1_sva_b.err:report_progress: fast=17, slow=153 |
await_progress: fast=0, slow=0
h264-conformance-caba2_sony_e.err:report_progress: fast=300, slow=2700
| await_progress: fast=34215, slow=920
h264-conformance-caba2_sva_b.err:report_progress: fast=17, slow=153 |
await_progress: fast=1610, slow=24
h264-conformance-caba3_sony_c.err:report_progress: fast=101, slow=909
| await_progress: fast=60774, slow=264
h264-conformance-caba3_sva_b.err:report_progress: fast=17, slow=153 |
await_progress: fast=6637, slow=91
h264-conformance-caba3_toshiba_e.err:report_progress: fast=300,
slow=2700 | await_progress: fast=28113, slow=716
h264-conformance-cabaci3_sony_b.err:report_progress: fast=101,
slow=909 | await_progress: fast=60989, slow=302
h264-conformance-cabac_mot_fld0_full.err:report_progress: fast=24,
slow=360 | await_progress: fast=72775, slow=83
h264-conformance-cabac_mot_frm0_full.err:report_progress: fast=12,
slow=360 | await_progress: fast=79299, slow=225
h264-conformance-cabac_mot_mbaff0_full.err:report_progress: fast=12,
slow=180 | await_progress: fast=77371, slow=119
h264-conformance-cabac_mot_picaff0_full.err:report_progress: fast=18,
slow=346 | await_progress: fast=88910, slow=169
h264-conformance-cabast3_sony_e.err:report_progress: fast=9, slow=162
| await_progress: fast=7851, slow=54
h264-conformance-cabastbr3_sony_b.err:report_progress: fast=25,
slow=450 | await_progress: fast=7967, slow=119
h264-conformance-cabref3_sand_d.err:report_progress: fast=36, slow=324
| await_progress: fast=38201, slow=43
h264-conformance-cacqp3_sony_d.err:report_progress: fast=26, slow=234
| await_progress: fast=9512, slow=126
h264-conformance-cafi1_sva_c.err:report_progress: fast=34, slow=510 |
await_progress: fast=78904, slow=2
h264-conformance-cama1_sony_c.err:report_progress: fast=5, slow=75 |
await_progress: fast=0, slow=0
h264-conformance-cama1_toshiba_b.err:report_progress: fast=30,
slow=270 | await_progress: fast=72871, slow=191
h264-conformance-cama1_vtc_c.err:report_progress: fast=3, slow=45 |
await_progress: fast=9041, slow=14
h264-conformance-cama2_vtc_b.err:report_progress: fast=3, slow=57 |
await_progress: fast=13790, slow=14
h264-conformance-cama3_sand_e.err:report_progress: fast=18, slow=162 |
await_progress: fast=34497, slow=77
h264-conformance-cama3_vtc_b.err:report_progress: fast=3, slow=54 |
await_progress: fast=13784, slow=13
h264-conformance-camaci3_sony_c.err:report_progress: fast=7, slow=28 |
await_progress: fast=1327, slow=22
h264-conformance-camanl1_toshiba_b.err:report_progress: fast=30,
slow=300 | await_progress: fast=71845, slow=200
h264-conformance-camanl2_toshiba_b.err:report_progress: fast=30,
slow=300 | await_progress: fast=65382, slow=187
h264-conformance-camanl3_sand_e.err:report_progress: fast=18, slow=180
| await_progress: fast=34512, slow=61
h264-conformance-camasl3_sony_b.err:report_progress: fast=7, slow=28 |
awa

Re: [FFmpeg-devel] [PATCH] Read |state| under the protection of |mutex| or |progress_mutex|.

2016-03-01 Thread Wan-Teh Chang
Hi Ronald,

On Fri, Feb 26, 2016 at 12:16 PM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
>
> This will be slower, right? Is this an actual issue? Like, can you point to
> cases like [1] (i.e. a real-world race condition with real-world
> consequences) that were fixed with your patch?
>
> Ronald
>
> https://blogs.gnome.org/rbultje/2014/01/12/brute-force-thread-debugging/

With the new relaxed and acquire/release atomic int get and set
functions I proposed in
http://ffmpeg.org/pipermail/ffmpeg-devel/2016-March/190516.html, we
have the means to make the current code portable while preserving the
exact performance for x86/x86_64. But unlike the progress[field]
values, I am strongly against using this approach for |state| for two
reasons.

1) |state| is guarded by one of two mutexes: |mutex| and
|progress_mutex|. Which mutex should guard |state| when is not
documented, and I haven't reverse-engineered that policy.

2) progress[field] is only accessed in a pair of short functions
(ff_thread_report_progress and ff_thread_await_progress). For such a
simple problem, my first attempt at using atomics still needed to be
corrected by an expert (Dmitry Vyukov, who works on ThreadSanitizer).
In contrast, |state| is accessed in many places in
libavcodec/pthread_frame.c. Using atomics on |state| will be more
difficult, and the resulting code will be difficult to maintain.

So, to fix the |state| data races I propose using the approach of this patch.

Note: this proposal is my personal judgement and may not reflect
Dmitry's opinion. Dmitry and I seem to disagree on how to best fix the
|state| data races. To my surprise, he is fine with using atomics to
make the double-checked locking style access to |state| correct and
portable.

Thanks,
Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Move the |die| member of FrameThreadContext to PerThreadContext.

2016-03-01 Thread Wan-Teh Chang
On Fri, Feb 26, 2016 at 12:14 PM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
>
> Is probably OK since it doesn't introduce new lock/unlock pairs, yes. We
> typically don't care about a few bytes of memory.

Hi Ronald,

I just signed off this patch and submitted it in
http://ffmpeg.org/pipermail/ffmpeg-devel/2016-March/190520.html.

Does the patch need additional review? Could you approve and commit
the patch for me?

Thanks,
Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] Move the |die| member of FrameThreadContext to PerThreadContext.

2016-03-01 Thread Wan-Teh Chang
This fixes a data race warning by ThreadSanitizer.
FrameThreadContext.die is read by all the worker threads but is not
protected by any mutex. Move it to PerThreadContext so that each worker
thread reads its own copy of |die|, which can then be protected with
PerThreadContext.mutex.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavcodec/pthread_frame.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index b77dd1e..c5ac44f 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -93,6 +93,8 @@ typedef struct PerThreadContext {
 
 const enum AVPixelFormat *available_formats; ///< Format array for 
get_format()
 enum AVPixelFormat result_format;///< get_format() result
+
+int die;///< Set when the thread should exit.
 } PerThreadContext;
 
 /**
@@ -111,8 +113,6 @@ typedef struct FrameThreadContext {
 * Set for the first N packets, where N is 
the number of threads.
 * While it is set, 
ff_thread_en/decode_frame won't return any results.
 */
-
-int die;   ///< Set when threads should exit.
 } FrameThreadContext;
 
 #define THREAD_SAFE_CALLBACKS(avctx) \
@@ -134,10 +134,10 @@ static attribute_align_arg void *frame_worker_thread(void 
*arg)
 
 pthread_mutex_lock(>mutex);
 while (1) {
-while (p->state == STATE_INPUT_READY && !fctx->die)
+while (p->state == STATE_INPUT_READY && !p->die)
 pthread_cond_wait(>input_cond, >mutex);
 
-if (fctx->die) break;
+if (p->die) break;
 
 if (!codec->update_thread_context && THREAD_SAFE_CALLBACKS(avctx))
 ff_thread_finish_setup(avctx);
@@ -553,12 +553,11 @@ void ff_frame_thread_free(AVCodecContext *avctx, int 
thread_count)
 fctx->threads->avctx->internal->is_copy = 1;
 }
 
-fctx->die = 1;
-
 for (i = 0; i < thread_count; i++) {
 PerThreadContext *p = >threads[i];
 
 pthread_mutex_lock(>mutex);
+p->die = 1;
 pthread_cond_signal(>input_cond);
 pthread_mutex_unlock(>mutex);
 
-- 
2.7.0.rc3.207.g0ac5344

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Add the relaxed and acquire/releae flavors of avpriv_atomic_int_get and avpriv_atomic_int_set.

2016-03-01 Thread Wan-Teh Chang
Hi,

I removed the ff_thread_report_progress and ff_thread_await_progress
changes from this patch. This patch now only changes
libavutil/atomic*. I also added some comments to libavutil/atomic.h to
describe how the acquire and release operations are intended to be
used.

While working on the comments in libavutil/atomic.h, I noticed that
the comment for avpriv_atomic_int_add_and_fetch contradicts the code.
The comment says:

 * @note This does NOT act as a memory barrier. This is primarily
 *   intended for reference counting.

But the primary implementation in libavutil/atomic_gcc.h uses the
sequentially consistent ordering rather than the relaxed ordering:

#define avpriv_atomic_int_add_and_fetch atomic_int_add_and_fetch_gcc
static inline int atomic_int_add_and_fetch_gcc(volatile int *ptr, int inc)
{
#if HAVE_ATOMIC_COMPARE_EXCHANGE
return __atomic_add_fetch(ptr, inc, __ATOMIC_SEQ_CST);
#else
return __sync_add_and_fetch(ptr, inc);
#endif
}

How should we make the comment match the code? I suggest that the
default memory order of avpriv_atomic_int_add_and_fetch be the
sequentially consistent ordering, and add a
avpriv_atomic_int_add_and_fetch_relaxed function that does not issue a
memory barrier.

If you agree with my proposal, I can write a separate patch to do that.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] Add the relaxed and acquire/releae flavors of avpriv_atomic_int_get and avpriv_atomic_int_set.

2016-03-01 Thread Wan-Teh Chang
Correct the order of the load/store operation and the memory barrier in
avpriv_atomic_int_get and avpriv_atomic_int_set.

Signed-off-by: Wan-Teh Chang <w...@google.com>
---
 libavutil/atomic.c   | 48 
 libavutil/atomic.h   | 45 +++--
 libavutil/atomic_gcc.h   | 44 
 libavutil/atomic_suncc.h | 30 +-
 libavutil/atomic_win32.h | 28 
 5 files changed, 192 insertions(+), 3 deletions(-)

diff --git a/libavutil/atomic.c b/libavutil/atomic.c
index b13725d..3dc9062 100644
--- a/libavutil/atomic.c
+++ b/libavutil/atomic.c
@@ -40,6 +40,16 @@ int avpriv_atomic_int_get(volatile int *ptr)
 return res;
 }
 
+int avpriv_atomic_int_get_relaxed(volatile int *ptr)
+{
+return avpriv_atomic_int_get(ptr);
+}
+
+int avpriv_atomic_int_get_acquire(volatile int *ptr)
+{
+return avpriv_atomic_int_get(ptr);
+}
+
 void avpriv_atomic_int_set(volatile int *ptr, int val)
 {
 pthread_mutex_lock(_lock);
@@ -47,6 +57,16 @@ void avpriv_atomic_int_set(volatile int *ptr, int val)
 pthread_mutex_unlock(_lock);
 }
 
+void avpriv_atomic_int_set_relaxed(volatile int *ptr, int val)
+{
+avpriv_atomic_int_set(ptr, val);
+}
+
+void avpriv_atomic_int_set_release(volatile int *ptr, int val)
+{
+avpriv_atomic_int_set(ptr, val);
+}
+
 int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int inc)
 {
 int res;
@@ -77,11 +97,31 @@ int avpriv_atomic_int_get(volatile int *ptr)
 return *ptr;
 }
 
+int avpriv_atomic_int_get_relaxed(volatile int *ptr)
+{
+return avpriv_atomic_int_get(ptr);
+}
+
+int avpriv_atomic_int_get_acquire(volatile int *ptr)
+{
+return avpriv_atomic_int_get(ptr);
+}
+
 void avpriv_atomic_int_set(volatile int *ptr, int val)
 {
 *ptr = val;
 }
 
+void avpriv_atomic_int_set_relaxed(volatile int *ptr, int val)
+{
+avpriv_atomic_int_set(ptr, val);
+}
+
+void avpriv_atomic_int_set_release(volatile int *ptr, int val)
+{
+avpriv_atomic_int_set(ptr, val);
+}
+
 int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int inc)
 {
 *ptr += inc;
@@ -121,6 +161,14 @@ int main(void)
 avpriv_atomic_int_set(, 3);
 res = avpriv_atomic_int_get();
 av_assert0(res == 3);
+avpriv_atomic_int_set_relaxed(, 5);
+res = avpriv_atomic_int_get_relaxed();
+av_assert0(res == 5);
+res = avpriv_atomic_int_add_and_fetch(, -1);
+av_assert0(res == 4);
+avpriv_atomic_int_set_release(, 3);
+res = avpriv_atomic_int_get_acquire();
+av_assert0(res == 3);
 
 return 0;
 }
diff --git a/libavutil/atomic.h b/libavutil/atomic.h
index 15906d2..e0e67d2 100644
--- a/libavutil/atomic.h
+++ b/libavutil/atomic.h
@@ -40,20 +40,61 @@
  *
  * @param ptr atomic integer
  * @return the current value of the atomic integer
- * @note This acts as a memory barrier.
+ * @note This acts as a full memory barrier.
  */
 int avpriv_atomic_int_get(volatile int *ptr);
 
 /**
+ * Load the current value stored in an atomic integer, with no memory barrier.
+ *
+ * @param ptr atomic integer
+ * @return the current value of the atomic integer
+ * @note This does NOT act as a memory barrier.
+ */
+int avpriv_atomic_int_get_relaxed(volatile int *ptr);
+
+/**
+ * Load the current value stored in an atomic integer, with an acquire memory
+ * barrier.
+ *
+ * @param ptr atomic integer
+ * @return the current value of the atomic integer
+ * @note This acts as an acquire memory barrier. When used with a release
+ *   operation on the same value, this establishes a happens-before
+ *   relationship between threads.
+ */
+int avpriv_atomic_int_get_acquire(volatile int *ptr);
+
+/**
  * Store a new value in an atomic integer.
  *
  * @param ptr atomic integer
  * @param val the value to store in the atomic integer
- * @note This acts as a memory barrier.
+ * @note This acts as a full memory barrier.
  */
 void avpriv_atomic_int_set(volatile int *ptr, int val);
 
 /**
+ * Store a new value in an atomic integer, with no memory barrier.
+ *
+ * @param ptr atomic integer
+ * @param val the value to store in the atomic integer
+ * @note This does NOT act as a memory barrier.
+ */
+void avpriv_atomic_int_set_relaxed(volatile int *ptr, int val);
+
+/**
+ * Store a new value in an atomic integer, with a release memory barrier.
+ *
+ * @param ptr atomic integer
+ * @param val the value to store in the atomic integer
+ * @note This acts as a release memory barrier. When used with an acquire
+ *   operation on the same value, this establishes a happens-before
+ *   relationship between threads.
+ */
+void avpriv_atomic_int_set_release(volatile int *ptr, int val);
+
+/**
  * Add a value to an atomic integer.
  *
  * @param ptr atomic integer
diff --git a/libavutil/atomic_gcc.h b/libavutil/atomic_gcc.h
index 5f9fc49..34a6a8e 100644
--- a/libavutil/atomic_gcc.h
+++ b/libavutil/atomic_gcc.h
@@ -31,19 +31,63 @@ 

Re: [FFmpeg-devel] [PATCH] Add the relaxed and acquire/releae flavors of avpriv_atomic_int_get and avpriv_atomic_int_set.

2016-03-01 Thread Wan-Teh Chang
Hi,

I have some remarks on this patch.

1. This patch is an alternative fix for the data races in the access
to frame progress values in ff_thread_report_progress and
ff_thread_await_progress. Between the two patches I submitted, I
prefer the first patch
(http://ffmpeg.org/pipermail/ffmpeg-devel/2016-February/190100.html).

Although C11/C++ made significant progress in standardizing the C/C++
memory model, many good programmers still have trouble understanding
it. Code that always accesses progress[field] under the protection of
progress_mutex is very easy to understand. Given how expensive it is
to decode video frame, I cannot help but suspect the double-checked
locking style code in ff_thread_report_progress and
ff_thread_await_progress is premature optimization. If we want to
preserve it, this patch makes it correct.

2. While working on this patch, I found the order of the load/store
operation and the memory barrier in some avpriv_atomic_int_get and
avpriv_atomic_int_set implementations seems wrong. For example,
https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html has examples of
how atomic load and store operations are mapped to assembly code. Some
examples for Load Seq Cst and Store Seq Cst use no memory barrier or
two memory barriers. But in the examples that use one memory barrier,
the order of the load/store operation and the memory barrier is
opposite to the order in avpriv_atomic_int_get and
avpriv_atomic_int_set.

Should I split this change into a separate patch?

3. The Solaris Studio compiler intrinsics for memory barriers (which
are used in atomic_suncc.h) are documented at:

https://docs.oracle.com/cd/E24457_01/html/E21991/gjzmf.html

On Mon, Feb 29, 2016 at 7:05 PM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
> Hi,
>
> On Mon, Feb 29, 2016 at 9:35 PM, Wan-Teh Chang <wtc-at-google@ffmpeg.org
>> wrote:
>
>> Correct the order of the load/store operation and the memory barrier in
>> avpriv_atomic_int_get and avpriv_atomic_int_set.
>
> This sounds useful. Is there some documentation on which one should be used
> under what conditions? I.e. when do I need full, release or acquire?

http://en.cppreference.com/w/c/atomic/memory_order is pretty good. I
am surprised that I can't find a good Wikipedia article on this
subject. The best I can find is
https://en.wikipedia.org/wiki/Memory_ordering.

I'd like to reiterate that this is difficult for many good
programmers. I need the relaxed and acquire/release flavors of
avpriv_atomic_int_get and avpriv_atomic_int_set to make the
ff_thread_report_progress and ff_thread_await_progress code portable
and to ensure that the new code is identical to the original code for
x86/x86_64, according to the mappings in
https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Fix a bug in ff_thread_report_progress in updating progress[field].

2016-03-01 Thread Wan-Teh Chang
On Mon, Feb 29, 2016 at 7:00 PM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
>
> Do you have a sample+commandline to reproduce? The thing is, in all cases
> where we use this, only one thread writes to a specific progress[n]. Two
> threads may write to progress[], one per field, but one will write to
> progress[0] and the other to progress[1]. If this happens, I don't mind the
> patch, but I'd like to know how exactly this happens (also for posterity
> documentation purposes).

Dmitry Vyukov found this issue by code inspection. We understand this
is a bug only if two threads may call ff_thread_report_progress, on
the same progress[field] value (which I didn't state in my original
report), at the same time. I thought we should report the issue here
to get your opinion.

You can reject this patch, or accept the patch to make it easier to
reason about the correctness of the code.

By the way, I'm also wondering why ff_thread_report_progress is
sometimes called with progress[field] >= n? I saw it happen when I ran
make fate-h264, but did not investigate it.

Thanks,
Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] Add the relaxed and acquire/releae flavors of avpriv_atomic_int_get and avpriv_atomic_int_set.

2016-02-29 Thread Wan-Teh Chang
Correct the order of the load/store operation and the memory barrier in
avpriv_atomic_int_get and avpriv_atomic_int_set.

Use the atomic get and set functions in ff_thread_report_progress and
ff_thread_await_progress.
---
 libavcodec/pthread_frame.c | 11 +++
 libavutil/atomic.c | 48 ++
 libavutil/atomic.h | 33 +++
 libavutil/atomic_gcc.h | 44 ++
 libavutil/atomic_suncc.h   | 30 -
 libavutil/atomic_win32.h   | 28 +++
 6 files changed, 189 insertions(+), 5 deletions(-)

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index b77dd1e..34f9207 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -32,6 +32,7 @@
 #include "thread.h"
 #include "version.h"
 
+#include "libavutil/atomic.h"
 #include "libavutil/avassert.h"
 #include "libavutil/buffer.h"
 #include "libavutil/common.h"
@@ -474,7 +475,7 @@ void ff_thread_report_progress(ThreadFrame *f, int n, int 
field)
 PerThreadContext *p;
 volatile int *progress = f->progress ? (int*)f->progress->data : NULL;
 
-if (!progress || progress[field] >= n) return;
+if (!progress || avpriv_atomic_int_get_relaxed([field]) >= n) 
return;
 
 p = f->owner->internal->thread_ctx;
 
@@ -482,8 +483,10 @@ void ff_thread_report_progress(ThreadFrame *f, int n, int 
field)
 av_log(f->owner, AV_LOG_DEBUG, "%p finished %d field %d\n", progress, 
n, field);
 
 pthread_mutex_lock(>progress_mutex);
-progress[field] = n;
-pthread_cond_broadcast(>progress_cond);
+if (progress[field] < n) {
+avpriv_atomic_int_set_release([field], n);
+pthread_cond_broadcast(>progress_cond);
+}
 pthread_mutex_unlock(>progress_mutex);
 }
 
@@ -492,7 +495,7 @@ void ff_thread_await_progress(ThreadFrame *f, int n, int 
field)
 PerThreadContext *p;
 volatile int *progress = f->progress ? (int*)f->progress->data : NULL;
 
-if (!progress || progress[field] >= n) return;
+if (!progress || avpriv_atomic_int_get_acquire([field]) >= n) 
return;
 
 p = f->owner->internal->thread_ctx;
 
diff --git a/libavutil/atomic.c b/libavutil/atomic.c
index b13725d..3dc9062 100644
--- a/libavutil/atomic.c
+++ b/libavutil/atomic.c
@@ -40,6 +40,16 @@ int avpriv_atomic_int_get(volatile int *ptr)
 return res;
 }
 
+int avpriv_atomic_int_get_relaxed(volatile int *ptr)
+{
+return avpriv_atomic_int_get(ptr);
+}
+
+int avpriv_atomic_int_get_acquire(volatile int *ptr)
+{
+return avpriv_atomic_int_get(ptr);
+}
+
 void avpriv_atomic_int_set(volatile int *ptr, int val)
 {
 pthread_mutex_lock(_lock);
@@ -47,6 +57,16 @@ void avpriv_atomic_int_set(volatile int *ptr, int val)
 pthread_mutex_unlock(_lock);
 }
 
+void avpriv_atomic_int_set_relaxed(volatile int *ptr, int val)
+{
+avpriv_atomic_int_set(ptr, val);
+}
+
+void avpriv_atomic_int_set_release(volatile int *ptr, int val)
+{
+avpriv_atomic_int_set(ptr, val);
+}
+
 int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int inc)
 {
 int res;
@@ -77,11 +97,31 @@ int avpriv_atomic_int_get(volatile int *ptr)
 return *ptr;
 }
 
+int avpriv_atomic_int_get_relaxed(volatile int *ptr)
+{
+return avpriv_atomic_int_get(ptr);
+}
+
+int avpriv_atomic_int_get_acquire(volatile int *ptr)
+{
+return avpriv_atomic_int_get(ptr);
+}
+
 void avpriv_atomic_int_set(volatile int *ptr, int val)
 {
 *ptr = val;
 }
 
+void avpriv_atomic_int_set_relaxed(volatile int *ptr, int val)
+{
+avpriv_atomic_int_set(ptr, val);
+}
+
+void avpriv_atomic_int_set_release(volatile int *ptr, int val)
+{
+avpriv_atomic_int_set(ptr, val);
+}
+
 int avpriv_atomic_int_add_and_fetch(volatile int *ptr, int inc)
 {
 *ptr += inc;
@@ -121,6 +161,14 @@ int main(void)
 avpriv_atomic_int_set(, 3);
 res = avpriv_atomic_int_get();
 av_assert0(res == 3);
+avpriv_atomic_int_set_relaxed(, 5);
+res = avpriv_atomic_int_get_relaxed();
+av_assert0(res == 5);
+res = avpriv_atomic_int_add_and_fetch(, -1);
+av_assert0(res == 4);
+avpriv_atomic_int_set_release(, 3);
+res = avpriv_atomic_int_get_acquire();
+av_assert0(res == 3);
 
 return 0;
 }
diff --git a/libavutil/atomic.h b/libavutil/atomic.h
index 15906d2..1921095 100644
--- a/libavutil/atomic.h
+++ b/libavutil/atomic.h
@@ -45,6 +45,23 @@
 int avpriv_atomic_int_get(volatile int *ptr);
 
 /**
+ * Load the current value stored in an atomic integer, with no memory barrier.
+ *
+ * @param ptr atomic integer
+ * @return the current value of the atomic integer
+ */
+int avpriv_atomic_int_get_relaxed(volatile int *ptr);
+
+/**
+ * Load the current value stored in an atomic integer, with an acquire memory
+ * barrier.
+ *
+ * @param ptr atomic integer
+ * @return the current value of the atomic integer
+ */
+int avpriv_atomic_int_get_acquire(volatile int *ptr);
+
+/**
  * Store a new value in an 

[FFmpeg-devel] [PATCH] Fix a bug in ff_thread_report_progress in updating progress[field].

2016-02-29 Thread Wan-Teh Chang
This bug was found by Dmitry Vyukov. If two threads may call
ff_thread_report_progress at the same time, progress[field] may
decrease. For example, suppose progress[field] is 10 and two threads
call ff_thread_report_progress to update progress[field] to 11 and
12, respectively. If the second thread acquires progress_mutex first
and updates progress[field] to 12, then the first thread will update
progress[field] to 11, causing progress[field] to decrease.
---
 libavcodec/pthread_frame.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index b77dd1e..a43e8fe 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -482,8 +482,10 @@ void ff_thread_report_progress(ThreadFrame *f, int n, int 
field)
 av_log(f->owner, AV_LOG_DEBUG, "%p finished %d field %d\n", progress, 
n, field);
 
 pthread_mutex_lock(>progress_mutex);
-progress[field] = n;
-pthread_cond_broadcast(>progress_cond);
+if (progress[field] < n) {
+progress[field] = n;
+pthread_cond_broadcast(>progress_cond);
+}
 pthread_mutex_unlock(>progress_mutex);
 }
 
-- 
2.7.0.rc3.207.g0ac5344

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] submit_thread should also lock the mutex that guards the source avctx (prev_thread->avctx) when calling update_context_from_thread.

2016-02-29 Thread Wan-Teh Chang
On Fri, Feb 26, 2016 at 5:04 PM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
>
> To be a little bit more explicit, it should be relatively easy to
> accomplish this by changing the position of qscale in the relevant struct,
> and only copy that half of the struct (where entries are obviously grouped)
> where we actually care about the copied value.
>
> See e.g. the copy_fields macro [1] on how we do partial-struct copies.
>
> Ronald
>
> [1]
> http://git.videolan.org/?p=ffmpeg.git;a=blob;f=libavcodec/h264_slice.c;h=9a5bc3f63fb5c6aae3747a03657b8dc821c7d750;hb=HEAD#l427

Hi Ronald,

Thanks a lot for the hints. As soon as I had access to a computer last
Saturday, I implemented your suggestion. Although the data race on
|qscale| is gone, ThreadSantizer warned about the next data race of
this kind. I fixed that using the same approach, and then
ThreadSantizer warned about the next data race of this kind.

That made me realize the fix for this class of data race is going to
big, and I should do this work in the ffmpeg HEAD rather than the old
version I'm using. I followed the instructions James Almer gave
earlier in this email thread:

./configure --samples=fate-suite/ --toolchain=clang-tsan --disable-stripping
make fate-h264 THREADS=4

(Note: James suggested --toolchain=gcc-tsan. That didn't work for me
for some reason. I tried --toolchain=clang-tsan instead and it
worked.)

ThreadSanitizer reported the same class of data races in the h264
decoder with multiple threads. To understand the code and propose a
proper fix, I am afraid that I will need to live and breathe this code
for several days because the structs involved (H264Context,
H264SliceContext, etc.) have a lot of members.

So, I am wondering if you could run fate-h264 under tsan and take a
look at the data races in the h264 decoding code. I think you will be
able to fix the data races much faster than I can. If you don't have a
lot of time, I'll be happy to implement a solution if you can outline
it after your investigation. Thanks.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] submit_thread should also lock the mutex that guards the source avctx (prev_thread->avctx) when calling update_context_from_thread.

2016-02-29 Thread Wan-Teh Chang
Hi James,

On Thu, Feb 25, 2016 at 6:50 PM, James Almer <jamr...@gmail.com> wrote:
>
> Did you try running the FATE suite using threadsanitizer, or just decoded 
> random
> videos? I remember like half the tests were failing because of data races and
> most of them pointed to code changed by your patches. It would be interesting 
> to
> see how they are affected.

When I submitted my patch for review, I only decoded random videos
(with an old version of ffmpeg). Later in this email thread, Ronald
showed my patch removed all concurrency, so I'll need to go back to
the drawing board.

> configure has the --toolchain=gcc-tsan option for this. Make sure to also add
> --disable-stripping, then run "make fate THREADS=4 SAMPLES=/path/to/samples" 
> or
> similar. You may instead want to run a small set at a time instead of the 
> whole
> suite since it's big and threadsanitizer slow. Try for example fate-h264 in 
> that
> case.

Thank you very much for the command line. I did this last Saturday on
the ffmpeg HEAD. Although the h264 decoding code has changed
significantly between the old version I'm using and the ffmpeg HEAD, I
am certain these are the same class of data race.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] submit_thread should also lock the mutex that guards the source avctx (prev_thread->avctx) when calling update_context_from_thread.

2016-02-26 Thread Wan-Teh Chang
On Fri, Feb 26, 2016 at 1:17 PM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
>
> I'm happy to help out if you tell me which field/member tsan is complaining
> about.

Hi Ronald,

I am using an old version of ffmpeg. Here is the ThreadSanitizer
warning on the data race and the relevant source code from that ffmpeg
source tree.

The data race is reported on the |qscale| member of MpegEncContext.
(In the current version, the corresponding thing seems to be the
|qscale| member in H264SliceContext.)

I will also try to reproduce the data race with the current version of
ffmpeg. The relevant code in the current version looks similar, but it
is certainly possible that the data race is gone.

Thanks a lot for your help!

Wan-Teh Chang

==

WARNING: ThreadSanitizer: data race (pid=161600)
  Write of size 4 at 0x7f53af71a830 by thread T8 (mutexes: write M16537):
#0 ff_h264_decode_mb_cabac ffmpeg/libavcodec/h264_cabac.c:2336:23
(754b7d528ac43192f37741068b3495f5+0x00673706)
#1 decode_slice ffmpeg/libavcodec/h264.c:3553:23
(754b7d528ac43192f37741068b3495f5+0x00667e37)
#2 execute_decode_slices ffmpeg/libavcodec/h264.c:3709:16
(754b7d528ac43192f37741068b3495f5+0x0066691d)
#3 decode_nal_units ffmpeg/libavcodec/h264.c:4010:17
(754b7d528ac43192f37741068b3495f5+0x00633530)
#4 decode_frame ffmpeg/libavcodec/h264.c:4132:17
(754b7d528ac43192f37741068b3495f5+0x0064a6eb)
#5 frame_worker_thread ffmpeg/libavcodec/pthread.c:389:21
(754b7d528ac43192f37741068b3495f5+0x0098669f)

  Previous read of size 8 at 0x7f53af71a830 by main thread (mutexes:
write M16539):
#0 memcpy 
llvm/llvm/projects/compiler-rt/lib/tsan/rtl/tsan_interceptors.cc:685:5
(754b7d528ac43192f37741068b3495f5+0x000d6600)
#1 ff_mpeg_update_thread_context
ffmpeg/libavcodec/mpegvideo.c:542:9
(754b7d528ac43192f37741068b3495f5+0x008e08e7)
#2 decode_update_thread_context ffmpeg/libavcodec/h264.c:1178:11
(754b7d528ac43192f37741068b3495f5+0x0064965a)
#3 update_context_from_thread ffmpeg/libavcodec/pthread.c:462:19
(754b7d528ac43192f37741068b3495f5+0x00983a5e)
#4 submit_packet ffmpeg/libavcodec/pthread.c:563:15
(754b7d528ac43192f37741068b3495f5+0x00982e0c)
#5 ff_thread_decode_frame ffmpeg/libavcodec/pthread.c:624
(754b7d528ac43192f37741068b3495f5+0x00982e0c)
#6 avcodec_decode_video2 ffmpeg/libavcodec/utils.c:1565:19
(754b7d528ac43192f37741068b3495f5+0x00a8e401)

Code snippets for the stack trace of the first thread (thread T8):

libavcodec/h264_cabac.c
2316 // decode_cabac_mb_dqp
2317 if(get_cabac_noinline( >cabac, >cabac_state[60 +
(h->last_qscale_diff != 0)])){
2318 int val = 1;
2319 int ctx= 2;
2320 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
2321
2322 while( get_cabac_noinline( >cabac,
>cabac_state[60 + ctx] ) ) {
2323 ctx= 3;
2324 val++;
2325 if(val > 2*max_qp){ //prevent infinite loop
2326 av_log(h->s.avctx, AV_LOG_ERROR, "cabac
decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
2327 return -1;
2328 }
2329 }
2330
2331 if( val&0x01 )
2332 val=   (val + 1)>>1 ;
2333 else
2334 val= -((val + 1)>>1);
2335 h->last_qscale_diff = val;
2336 s->qscale += val;
2337 if(((unsigned)s->qscale) > max_qp){
2338 if(s->qscale<0) s->qscale+= max_qp+1;
2339 elses->qscale-= max_qp+1;
2340 }
2341 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
2342 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
2343 }else

libavcodec/h264.c (the first two code snippets are in
libavcodec/h264_slice.c in the current version):
3525 static int decode_slice(struct AVCodecContext *avctx, void *arg)
3526 {
3527 H264Context *h = *(void **)arg;
3528 MpegEncContext *const s = >s;
3529 const int part_mask = s->partitioned_frame ? (ER_AC_END |
ER_AC_ERROR)
3530: 0x7F;
3531 int lf_x_start = s->mb_x;
3532
3533 s->mb_skip_run = -1;
3534
3535 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME ||
3536 s->codec_id != AV_CODEC_ID_H264 ||
3537 (CONFIG_GRAY && (s->flags & CODEC_FLAG_GRAY));
3538
3539 if (h->pps.cabac) {
3540 /* realign */
3541 align_get_bits(>gb);
3542
3543 /* init cabac */
3544 ff_init_cabac_states();
3545 ff_init_cabac_decoder(>cabac,
3546   s->gb.buffer + get_bits_count(>gb) / 8,
3547   (get_bits_left(>gb) + 7) / 8)

Re: [FFmpeg-devel] [PATCH] submit_thread should also lock the mutex that guards the source avctx (prev_thread->avctx) when calling update_context_from_thread.

2016-02-26 Thread Wan-Teh Chang
On Fri, Feb 26, 2016 at 12:44 PM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
> Hi,
>
> On Fri, Feb 26, 2016 at 3:26 PM, Ronald S. Bultje <rsbul...@gmail.com>
> wrote:
>
>>
>> So doesn't this patch remove all concurrency by making the decode() of
>> thread N-1 finish before decode() of thread N can start? More practically,
>> what is the output of time ffmpeg -threads N -i large-h264-file -f null -
>> with N being 1 and 2 before and after your patch?
>>
>
> I actually tested this, just for fun:
>
> before:
> $ for t in 1 2; do time ffmpeg -threads $t -i tos3k-vp9-b5000.webm -f null
> -v 0 -nostats -vframes 500 -; done
>
> real 0m6.732s
> user 0m6.643s
> sys 0m0.064s
>
> real 0m4.124s <<< note how this is significantly smaller than 6.732
> user 0m6.566s
> sys 0m0.114s
>
> after:
> $ for t in 1 2; do time ffmpeg -threads $t -i tos3k-vp9-b5000.webm -f null
> -v 0 -nostats -vframes 500 -; done
>
> real 0m6.808s
> user 0m6.541s
> sys 0m0.071s
>
> real 0m7.001s <<< note how this is no longer significantly smaller than
> 6.808
> user 0m6.945s
> sys 0m0.108s
>
> That seems like a pretty major malfunction of MT performance to me...

Hi Ronald,

Thank you very much for reviewing and testing my patch. I will study
your description of the code, analyze the data race reported by
ThreadSanitizer again, and report back.

Among the four data races I reported yesterday, this data race is the
most difficult to analyze because the code that has the data race is
outside libavcodec/pthread_frame.c, in the h264 decoder I remember.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Always read frame progress values under progress_mutex.

2016-02-25 Thread Wan-Teh Chang
On Thu, Feb 25, 2016 at 8:14 PM, Michael Niedermayer
<mich...@niedermayer.cc> wrote:
>
> also considering this is performance relevant code, please provide
> a benchmark

I'll be happy to provide a benchmark. I don't see the instructions for
running benchmarks in the patch submission checklist:
https://ffmpeg.org/developer.html#patch-submission-checklist

How do I run FFmpeg benchmarks?

Thanks,
Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] submit_thread should also lock the mutex that guards the source avctx (prev_thread->avctx) when calling update_context_from_thread.

2016-02-25 Thread Wan-Teh Chang
This is because the codec->decode() call in frame_worker_thread may be
modifying that avctx at the same time. This data race is reported by
ThreadSanitizer.

Although submit_thread holds two locks simultaneously, it always
acquires them in the same order because |prev_thread| is always the
array element before |p| (with a wraparound at the end of array), and
submit_thread is the only function that acquires those two locks, so
this won't result in a deadlock.
---
 libavcodec/pthread_frame.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index b77dd1e..e7879e3 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -330,7 +330,9 @@ static int submit_packet(PerThreadContext *p, AVPacket 
*avpkt)
 pthread_mutex_unlock(_thread->progress_mutex);
 }
 
+pthread_mutex_lock(_thread->mutex);
 err = update_context_from_thread(p->avctx, prev_thread->avctx, 0);
+pthread_mutex_unlock(_thread->mutex);
 if (err) {
 pthread_mutex_unlock(>mutex);
 return err;
-- 
2.7.0.rc3.207.g0ac5344

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] Read |state| under the protection of |mutex| or |progress_mutex|.

2016-02-25 Thread Wan-Teh Chang
Although not documented, |state| is guarded by either |mutex| or
|progress_mutex|. In several places |state| is read without mutex
protection, often as a double-checked locking style performance
optimization. Fix the data races by reading |state| under mutex
protection.
---
 libavcodec/pthread_frame.c | 44 +++-
 1 file changed, 19 insertions(+), 25 deletions(-)

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index b77dd1e..a768b1c 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -323,12 +323,10 @@ static int submit_packet(PerThreadContext *p, AVPacket 
*avpkt)
 
 if (prev_thread) {
 int err;
-if (prev_thread->state == STATE_SETTING_UP) {
-pthread_mutex_lock(_thread->progress_mutex);
-while (prev_thread->state == STATE_SETTING_UP)
-pthread_cond_wait(_thread->progress_cond, 
_thread->progress_mutex);
-pthread_mutex_unlock(_thread->progress_mutex);
-}
+pthread_mutex_lock(_thread->progress_mutex);
+while (prev_thread->state == STATE_SETTING_UP)
+pthread_cond_wait(_thread->progress_cond, 
_thread->progress_mutex);
+pthread_mutex_unlock(_thread->progress_mutex);
 
 err = update_context_from_thread(p->avctx, prev_thread->avctx, 0);
 if (err) {
@@ -353,9 +351,9 @@ static int submit_packet(PerThreadContext *p, AVPacket 
*avpkt)
 if (!p->avctx->thread_safe_callbacks && (
  p->avctx->get_format != avcodec_default_get_format ||
  p->avctx->get_buffer2 != avcodec_default_get_buffer2)) {
+pthread_mutex_lock(>progress_mutex);
 while (p->state != STATE_SETUP_FINISHED && p->state != 
STATE_INPUT_READY) {
 int call_done = 1;
-pthread_mutex_lock(>progress_mutex);
 while (p->state == STATE_SETTING_UP)
 pthread_cond_wait(>progress_cond, >progress_mutex);
 
@@ -374,8 +372,8 @@ static int submit_packet(PerThreadContext *p, AVPacket 
*avpkt)
 p->state  = STATE_SETTING_UP;
 pthread_cond_signal(>progress_cond);
 }
-pthread_mutex_unlock(>progress_mutex);
 }
+pthread_mutex_unlock(>progress_mutex);
 }
 
 fctx->prev_thread = p;
@@ -426,12 +424,10 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
 do {
 p = >threads[finished++];
 
-if (p->state != STATE_INPUT_READY) {
-pthread_mutex_lock(>progress_mutex);
-while (p->state != STATE_INPUT_READY)
-pthread_cond_wait(>output_cond, >progress_mutex);
-pthread_mutex_unlock(>progress_mutex);
-}
+pthread_mutex_lock(>progress_mutex);
+while (p->state != STATE_INPUT_READY)
+pthread_cond_wait(>output_cond, >progress_mutex);
+pthread_mutex_unlock(>progress_mutex);
 
 av_frame_move_ref(picture, p->frame);
 *got_picture_ptr = p->got_frame;
@@ -510,13 +506,13 @@ void ff_thread_finish_setup(AVCodecContext *avctx) {
 
 if (!(avctx->active_thread_type_THREAD_FRAME)) return;
 
-if(p->state == STATE_SETUP_FINISHED){
+pthread_mutex_lock(>progress_mutex);
+if (p->state == STATE_SETUP_FINISHED) {
 av_log(avctx, AV_LOG_WARNING, "Multiple ff_thread_finish_setup() 
calls\n");
+} else {
+p->state = STATE_SETUP_FINISHED;
+pthread_cond_broadcast(>progress_cond);
 }
-
-pthread_mutex_lock(>progress_mutex);
-p->state = STATE_SETUP_FINISHED;
-pthread_cond_broadcast(>progress_cond);
 pthread_mutex_unlock(>progress_mutex);
 }
 
@@ -528,12 +524,10 @@ static void park_frame_worker_threads(FrameThreadContext 
*fctx, int thread_count
 for (i = 0; i < thread_count; i++) {
 PerThreadContext *p = >threads[i];
 
-if (p->state != STATE_INPUT_READY) {
-pthread_mutex_lock(>progress_mutex);
-while (p->state != STATE_INPUT_READY)
-pthread_cond_wait(>output_cond, >progress_mutex);
-pthread_mutex_unlock(>progress_mutex);
-}
+pthread_mutex_lock(>progress_mutex);
+while (p->state != STATE_INPUT_READY)
+pthread_cond_wait(>output_cond, >progress_mutex);
+pthread_mutex_unlock(>progress_mutex);
 p->got_frame = 0;
 }
 }
-- 
2.7.0.rc3.207.g0ac5344

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Move the |die| member of FrameThreadContext to PerThreadContext.

2016-02-25 Thread Wan-Teh Chang
Hi Ronald,

On Thu, Feb 25, 2016 at 12:00 PM, Ronald S. Bultje <rsbul...@gmail.com> wrote:
>
> But does it fix an actual bug? (Is there a sample this fixes?)

I assume you agree that reading and writing the non-atomic int |die|
flag without mutex protection is a data race. Code inspection verifies
libavcodec/pthread_frame.c reads and writes |die| without mutex
protection.

To fix the data race, we can either protect |die| with a mutex, or
declare |die| as an atomic int.

> If anything, you can use atomic ints instead of regular ints if we don't
> already [1].
>
[...]
> [1] https://ffmpeg.org/doxygen/trunk/atomic_8h_source.html

Thanks for the link. avpriv_atomic_int_get and avpriv_atomic_int_set
perform the load and store with sequential consistency, which requires
a full memory barrier and is slower than simply relying on the
existing per-thread mutex. The drawback of my patch is that it uses
more memory. I think that's the right trade-off, but I would be happy
to use an atomic int. Please let me know what you prefer.

Thanks,
Wan-Teh Chang

PS: Here is the relevant part of the ThreadSanitizer report. I'm using
an old version of ffmpeg (the code is in libavcodec/pthread.c in that
version):

WARNING: ThreadSanitizer: data race (pid=959745)
  Write of size 4 at 0x7d1800055a24 by main thread (mutexes: write M18754):
#0 frame_thread_free ffmpeg/libavcodec/pthread.c:761:15
(c9c6e60608e58c16d6d8dc75627a71ed+0x00985b5a)
#1 ff_thread_free ffmpeg/libavcodec/pthread.c:1103:9
(c9c6e60608e58c16d6d8dc75627a71ed+0x009859ea)
#2 avcodec_close ffmpeg/libavcodec/utils.c:1825:13
(c9c6e60608e58c16d6d8dc75627a71ed+0x00a8f7a6)
...

  Previous read of size 4 at 0x7d1800055a24 by thread T11 (mutexes:
write M18771):
#0 frame_worker_thread ffmpeg/libavcodec/pthread.c:378:60
(c9c6e60608e58c16d6d8dc75627a71ed+0x0098656b)

Note that although some mutexes were acquired when the write and read
occurred, they were not the same mutex. Here are the relevant code
snippets:

 361 /**
 362  * Codec worker thread.
 363  *
 364  * Automatically calls ff_thread_finish_setup() if the codec does
 365  * not provide an update_thread_context method, or if the codec returns
 366  * before calling it.
 367  */
 368 static attribute_align_arg void *frame_worker_thread(void *arg)
 369 {
 370 PerThreadContext *p = arg;
 371 FrameThreadContext *fctx = p->parent;
 372 AVCodecContext *avctx = p->avctx;
 373 const AVCodec *codec = avctx->codec;
 374
 375 pthread_mutex_lock(>mutex);
 376 while (1) {
 377 int i;
 378 while (p->state == STATE_INPUT_READY && !fctx->die)
 379 pthread_cond_wait(>input_cond, >mutex);
 380
 381 if (fctx->die) break;
 382
...

 750 static void frame_thread_free(AVCodecContext *avctx, int thread_count)
 751 {
 752 FrameThreadContext *fctx = avctx->thread_opaque;
 753 const AVCodec *codec = avctx->codec;
 754 int i;
 755
 756 park_frame_worker_threads(fctx, thread_count);
 757
 758 if (fctx->prev_thread && fctx->prev_thread != fctx->threads)
 759 update_context_from_thread(fctx->threads->avctx,
fctx->prev_thread- >avctx, 0);
 760
 761 fctx->die = 1;
 762
 763 for (i = 0; i < thread_count; i++) {
 764 PerThreadContext *p = >threads[i];
 765
 766 pthread_mutex_lock(>mutex);
 767 pthread_cond_signal(>input_cond);
 768 pthread_mutex_unlock(>mutex);
 769
 770 if (p->thread_init)
 771 pthread_join(p->thread, NULL);
 772 p->thread_init=0;
 773
 774 if (codec->close)
 775 codec->close(p->avctx);
 776
 777 avctx->codec = NULL;
 778
 779 release_delayed_buffers(p);
 780 }
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Always read frame progress values under progress_mutex.

2016-02-25 Thread Wan-Teh Chang
On Thu, Feb 25, 2016 at 4:15 PM, Hendrik Leppkes <h.lepp...@gmail.com> wrote:
>
> Similar to the other patch, please explain what this actually fixes,
> those sanitizer tools produce a lot of false positives and nonsensical
> warnings.

Yes, certainly.

In libavcodec/pthread_frame.c, we have:

 46 /**
 47  * Context used by codec threads and stored in their
AVCodecInternal thread_ctx.
 48  */
 49 typedef struct PerThreadContext {
 ...
 59 pthread_mutex_t progress_mutex; ///< Mutex used to protect
frame progress values and progress_cond.

The comment on line 59 documents that progress_mutex guards frame
progress values and progress_cond. (In fact, progress_mutex also
guards the |state| field in many places, but that's not the subject of
this patch.)

I assume "frame progess values" mean f->progress->data, where |f|
points to a ThreadFrame. I inspected libavcodec/pthread_frame.c and
concluded the code (ff_thread_report_progress and
ff_thread_await_progress) matches that comment, except that it also
does a double-checked locking style performance optimization. For
example, here is ff_thread_report_progress:

472 void ff_thread_report_progress(ThreadFrame *f, int n, int field)
473 {
474 PerThreadContext *p;
475 volatile int *progress = f->progress ? (int*)f->progress->data : NULL;
476
477 if (!progress || progress[field] >= n) return;
478
479 p = f->owner->internal->thread_ctx;
480
481 if (f->owner->debug_DEBUG_THREADS)
482 av_log(f->owner, AV_LOG_DEBUG, "%p finished %d field
%d\n", progress, n, field);
483
484 pthread_mutex_lock(>progress_mutex);
485 progress[field] = n;
486 pthread_cond_broadcast(>progress_cond);
487 pthread_mutex_unlock(>progress_mutex);
488 }

progress[field] can only increase. So, both ff_thread_report_progress
and ff_thread_await_progress have a fast code path (line 477 and line
495, respectively) that reads progress[field] without acquiring
progress_mutex. If the speculatively read value of progress[field] is
>= |n|, then the functions return immediately. Otherwise, the
functions take the slow code path, which acquires progress_mutex
properly and does what the functions need to do.

The fast code path is in the style of double-checked locking and has a
data race. ThreadSanitizer is right in this case.

Wan-Teh Chang
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] Always read frame progress values under progress_mutex.

2016-02-25 Thread Wan-Teh Chang
This fixes data race warnings by ThreadSanitizer.
ff_thread_report_progress and ff_thread_await_progress should read
progress[field] only when holding progress_mutex because progress_mutex
guards frame progress values.
---
 libavcodec/pthread_frame.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index b77dd1e..b7f603d 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -474,7 +474,7 @@ void ff_thread_report_progress(ThreadFrame *f, int n, int 
field)
 PerThreadContext *p;
 volatile int *progress = f->progress ? (int*)f->progress->data : NULL;
 
-if (!progress || progress[field] >= n) return;
+if (!progress) return;
 
 p = f->owner->internal->thread_ctx;
 
@@ -482,8 +482,10 @@ void ff_thread_report_progress(ThreadFrame *f, int n, int 
field)
 av_log(f->owner, AV_LOG_DEBUG, "%p finished %d field %d\n", progress, 
n, field);
 
 pthread_mutex_lock(>progress_mutex);
-progress[field] = n;
-pthread_cond_broadcast(>progress_cond);
+if (progress[field] < n) {
+progress[field] = n;
+pthread_cond_broadcast(>progress_cond);
+}
 pthread_mutex_unlock(>progress_mutex);
 }
 
@@ -492,7 +494,7 @@ void ff_thread_await_progress(ThreadFrame *f, int n, int 
field)
 PerThreadContext *p;
 volatile int *progress = f->progress ? (int*)f->progress->data : NULL;
 
-if (!progress || progress[field] >= n) return;
+if (!progress) return;
 
 p = f->owner->internal->thread_ctx;
 
-- 
2.7.0.rc3.207.g0ac5344

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] Move the |die| member of FrameThreadContext to PerThreadContext.

2016-02-25 Thread Wan-Teh Chang
This fixes a data race warning by ThreadSanitizer.
FrameThreadContext.die is read by all the worker threads but is not
protected by any mutex. Move it to PerThreadContext so that each worker
thread reads its own copy of |die|, which can then be protected with
PerThreadContext.mutex.
---
 libavcodec/pthread_frame.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index b77dd1e..c5ac44f 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -93,6 +93,8 @@ typedef struct PerThreadContext {
 
 const enum AVPixelFormat *available_formats; ///< Format array for 
get_format()
 enum AVPixelFormat result_format;///< get_format() result
+
+int die;///< Set when the thread should exit.
 } PerThreadContext;
 
 /**
@@ -111,8 +113,6 @@ typedef struct FrameThreadContext {
 * Set for the first N packets, where N is 
the number of threads.
 * While it is set, 
ff_thread_en/decode_frame won't return any results.
 */
-
-int die;   ///< Set when threads should exit.
 } FrameThreadContext;
 
 #define THREAD_SAFE_CALLBACKS(avctx) \
@@ -134,10 +134,10 @@ static attribute_align_arg void *frame_worker_thread(void 
*arg)
 
 pthread_mutex_lock(>mutex);
 while (1) {
-while (p->state == STATE_INPUT_READY && !fctx->die)
+while (p->state == STATE_INPUT_READY && !p->die)
 pthread_cond_wait(>input_cond, >mutex);
 
-if (fctx->die) break;
+if (p->die) break;
 
 if (!codec->update_thread_context && THREAD_SAFE_CALLBACKS(avctx))
 ff_thread_finish_setup(avctx);
@@ -553,12 +553,11 @@ void ff_frame_thread_free(AVCodecContext *avctx, int 
thread_count)
 fctx->threads->avctx->internal->is_copy = 1;
 }
 
-fctx->die = 1;
-
 for (i = 0; i < thread_count; i++) {
 PerThreadContext *p = >threads[i];
 
 pthread_mutex_lock(>mutex);
+p->die = 1;
 pthread_cond_signal(>input_cond);
 pthread_mutex_unlock(>mutex);
 
-- 
2.7.0.rc3.207.g0ac5344

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel