docker run -i -t 5b1f9964e594 /bin/bash
apt-get update
apt-get install git
git clone https://github.com/Linaro/odp.git
./bootstrap
./configure --disable-test-perf --disable-test-perf-proc
make -j 8
export ODP_SCHEDULER=scalable
./helper/test/cuckootable
works!

./configure --disable-test-perf --disable-test-perf-proc CFLAGS="-O0 -g"
./helper/test/cuckootable
hungs!

export ODP_SCHEDULER=basic
./helper/test/cuckootable
works again!


On 13 March 2018 at 10:34, Maxim Uvarov <maxim.uva...@linaro.org> wrote:

> CC odp ML for this issue.
>
> Maxim.
>
> On 13 March 2018 at 03:33, Bill Fischofer <bill.fischo...@linaro.org>
> wrote:
>
>> Additional details.
>>
>> __atomic_load_n() is a GCC intrinsic, however __lockfree_load_16() is
>> defined in platform/linux-generic/arch/aarch64/odp_atomic.h:
>>
>> static inline __int128 __lockfree_load_16(__int128 *var, int mo)
>> {
>>         __int128 old = *var; /* Possibly torn read */
>>
>>         /* Do CAS to ensure atomicity
>>          * Either CAS succeeds (writing back the same value)
>>          * Or CAS fails and returns the old value (atomic read)
>>          */
>>         (void)__lockfree_compare_exchange_16(var, &old, old, false, mo,
>> mo);
>>         return old;
>> }
>>
>> As is __lockfree_compare_exchange_16():
>>
>> static inline bool
>> __lockfree_compare_exchange_16(register __int128 *var, __int128 *exp,
>>        register __int128 neu, bool weak, int mo_success,
>>        int mo_failure)
>> {
>> (void)weak; /* Always do strong CAS or we can't perform atomic read */
>> /* Ignore memory ordering for failure, memory order for
>> * success must be stronger or equal. */
>> (void)mo_failure;
>> register __int128 old;
>> register __int128 expected;
>> int ll_mo = LL_MO(mo_success);
>> int sc_mo = SC_MO(mo_success);
>>
>> expected = *exp;
>> __asm__ volatile("" ::: "memory");
>> do {
>> /* Atomicity of LLD is not guaranteed */
>> old = lld(var, ll_mo);
>> /* Must write back neu or old to verify atomicity of LLD */
>> } while (odp_unlikely(scd(var, old == expected ? neu : old, sc_mo)));
>> *exp = old; /* Always update, atomically read value */
>> return old == expected;
>> }
>>
>> In turn lld() and scd() are defined in platform/linux-generic/arch/aa
>> rch64/odp_llsc.h:
>>
>> static inline __int128 lld(__int128 *var, int mm)
>> {
>> union i128 old;
>>
>> if (mm == __ATOMIC_ACQUIRE)
>> __asm__ volatile("ldaxp %0, %1, [%2]"
>> : "=&r" (old.i64[0]), "=&r" (old.i64[1])
>> : "r" (var)
>> : "memory");
>> else if (mm == __ATOMIC_RELAXED)
>> __asm__ volatile("ldxp %0, %1, [%2]"
>> : "=&r" (old.i64[0]), "=&r" (old.i64[1])
>> : "r" (var)
>> : );
>> else
>> ODP_ABORT();
>> return old.i128;
>> }
>>
>> /* Return 0 on success, 1 on failure */
>> static inline uint32_t scd(__int128 *var, __int128 neu, int mm)
>> {
>> uint32_t ret;
>>
>> if (mm == __ATOMIC_RELEASE)
>> __asm__ volatile("stlxp %w0, %1, %2, [%3]"
>> : "=&r" (ret)
>> : "r" (((union i128)neu).i64[0]),
>>    "r" (((union i128)neu).i64[1]),
>>    "r" (var)
>> : "memory");
>> else if (mm == __ATOMIC_RELAXED)
>> __asm__ volatile("stxp %w0, %1, %2, [%3]"
>> : "=&r" (ret)
>> : "r" (((union i128)neu).i64[0]),
>>    "r" (((union i128)neu).i64[1]),
>>    "r" (var)
>> : );
>> else
>> ODP_ABORT();
>> return ret;
>> }
>>
>> So these boil down to a sequence of __asm__() instructions. If these are
>> hanging it suggests a compiler issue. Does this occur with a newer GCC
>> level?
>>
>> On Mon, Mar 12, 2018 at 5:21 PM, Maxim Uvarov <maxim.uva...@linaro.org>
>> wrote:
>>
>>> gcc -v
>>> Using built-in specs.
>>> COLLECT_GCC=gcc
>>> COLLECT_LTO_WRAPPER=/usr/lib/gcc/aarch64-linux-gnu/4.8/lto-wrapper
>>> Target: aarch64-linux-gnu
>>> Configured with: ../src/configure -v --with-pkgversion='Ubuntu/Linaro
>>> 4.8.5-4ubuntu2' --with-bugurl=file:///usr/share/doc/gcc-4.8/README.Bugs
>>> --enable-languages=c,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr
>>> --program-suffix=-4.8 --enable-shared --enable-linker-build-id
>>> --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix
>>> --with-gxx-include-dir=/usr/include/c++/4.8 --libdir=/usr/lib
>>> --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug
>>> --enable-libstdcxx-time=yes --enable-gnu-unique-object --disable-libmudflap
>>> --disable-libsanitizer --disable-libquadmath --enable-plugin
>>> --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk
>>> --enable-gtk-cairo 
>>> --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-4.8-arm64/jre
>>> --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-4.8-arm64
>>> --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-4.8-arm64
>>> --with-arch-directory=arm64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar
>>> --enable-multiarch --disable-werror --enable-checking=release
>>> --build=aarch64-linux-gnu --host=aarch64-linux-gnu
>>> --target=aarch64-linux-gnu
>>> Thread model: posix
>>> gcc version 4.8.5 (Ubuntu/Linaro 4.8.5-4ubuntu2)
>>>
>>> On 13 March 2018 at 00:20, Maxim Uvarov <maxim.uva...@linaro.org> wrote:
>>>
>>>> this fixes a problem. But it's too late today to do clean patch. (fun
>>>> debug if gdb does not work under docker).
>>>> So it might be something thunder-x specific.
>>>>
>>>>
>>>> --- a/platform/linux-generic/include/odp_bitset.h
>>>> +++ b/platform/linux-generic/include/odp_bitset.h
>>>> @@ -27,7 +27,7 @@
>>>>  /* Find a suitable data type that supports lock-free atomic operations
>>>> */
>>>>  #if defined(__aarch64__) && defined(__SIZEOF_INT128__) && \
>>>>         __SIZEOF_INT128__ == 16
>>>> -#define LOCKFREE16
>>>> +// #define LOCKFREE16
>>>>  typedef __int128 bitset_t;
>>>>  #define ATOM_BITSET_SIZE (CHAR_BIT * __SIZEOF_INT128__)
>>>>
>>>>
>>>> On 13 March 2018 at 00:14, Maxim Uvarov <maxim.uva...@linaro.org>
>>>> wrote:
>>>>
>>>>> platform/linux-generic/odp_schedule_scalable.c
>>>>>
>>>>> static odp_schedule_group_t schedule_group_create(const char *name,
>>>>>                                                   const odp_thrmask_t
>>>>> *mask)
>>>>> {
>>>>>
>>>>> ......
>>>>>
>>>>>         printf("%s()%d\n", __func__, __LINE__); <-- prints
>>>>>         /* Validate inputs */
>>>>>         if (mask == NULL)
>>>>>                 ODP_ABORT("mask is NULL\n");
>>>>>
>>>>>         printf("%s()%d\n", __func__, __LINE__); <- prints
>>>>>         odp_spinlock_lock(&sched_grp_lock);
>>>>>
>>>>>         printf("%s()%d\n", __func__, __LINE__);
>>>>>         /* Allocate a scheduler group */
>>>>>         free = atom_bitset_load(&sg_free, __ATOMIC_RELAXED);
>>>>>         printf("%s()%d\n", __func__, __LINE__); <- not printed, hung
>>>>> forever before this
>>>>>
>>>>> Maxim.
>>>>>
>>>>> On 13 March 2018 at 00:08, Bill Fischofer <bill.fischo...@linaro.org>
>>>>> wrote:
>>>>>
>>>>>> That's interesting since it was developed by Arm and presumably tested
>>>>>> by them on Arm systems.
>>>>>>
>>>>>> On Mon, Mar 12, 2018 at 4:58 PM, Maxim Uvarov <
>>>>>> maxim.uva...@linaro.org> wrote:
>>>>>> > I see that odp_init_global() fails on thunder-x with salable
>>>>>> scheduler.
>>>>>> >
>>>>>> > On 12 March 2018 at 23:57, Bill Fischofer <
>>>>>> bill.fischo...@linaro.org> wrote:
>>>>>> >>
>>>>>> >> Sure. Dmitry says it's a clang related failure. Is that what you're
>>>>>> >> seeing? If it's related to a specific level of clang we may be
>>>>>> able to
>>>>>> >> simply document it as such.
>>>>>> >>
>>>>>> >> On Mon, Mar 12, 2018 at 4:25 PM, Maxim Uvarov <
>>>>>> maxim.uva...@linaro.org>
>>>>>> >> wrote:
>>>>>> >> > Bill,
>>>>>> >> >
>>>>>> >> > I reproduced fail on thunder-x. So I would like to take a look
>>>>>> at it one
>>>>>> >> > more day before doing rc2.
>>>>>> >> >
>>>>>> >> > Maxim.
>>>>>> >
>>>>>> >
>>>>>>
>>>>>
>>>>>
>>>>
>>>
>>
>

Reply via email to