docker run -i -t 5b1f9964e594 /bin/bash apt-get update apt-get install git git clone https://github.com/Linaro/odp.git ./bootstrap ./configure --disable-test-perf --disable-test-perf-proc make -j 8 export ODP_SCHEDULER=scalable ./helper/test/cuckootable works!
./configure --disable-test-perf --disable-test-perf-proc CFLAGS="-O0 -g" ./helper/test/cuckootable hungs! export ODP_SCHEDULER=basic ./helper/test/cuckootable works again! On 13 March 2018 at 10:34, Maxim Uvarov <maxim.uva...@linaro.org> wrote: > CC odp ML for this issue. > > Maxim. > > On 13 March 2018 at 03:33, Bill Fischofer <bill.fischo...@linaro.org> > wrote: > >> Additional details. >> >> __atomic_load_n() is a GCC intrinsic, however __lockfree_load_16() is >> defined in platform/linux-generic/arch/aarch64/odp_atomic.h: >> >> static inline __int128 __lockfree_load_16(__int128 *var, int mo) >> { >> __int128 old = *var; /* Possibly torn read */ >> >> /* Do CAS to ensure atomicity >> * Either CAS succeeds (writing back the same value) >> * Or CAS fails and returns the old value (atomic read) >> */ >> (void)__lockfree_compare_exchange_16(var, &old, old, false, mo, >> mo); >> return old; >> } >> >> As is __lockfree_compare_exchange_16(): >> >> static inline bool >> __lockfree_compare_exchange_16(register __int128 *var, __int128 *exp, >> register __int128 neu, bool weak, int mo_success, >> int mo_failure) >> { >> (void)weak; /* Always do strong CAS or we can't perform atomic read */ >> /* Ignore memory ordering for failure, memory order for >> * success must be stronger or equal. */ >> (void)mo_failure; >> register __int128 old; >> register __int128 expected; >> int ll_mo = LL_MO(mo_success); >> int sc_mo = SC_MO(mo_success); >> >> expected = *exp; >> __asm__ volatile("" ::: "memory"); >> do { >> /* Atomicity of LLD is not guaranteed */ >> old = lld(var, ll_mo); >> /* Must write back neu or old to verify atomicity of LLD */ >> } while (odp_unlikely(scd(var, old == expected ? neu : old, sc_mo))); >> *exp = old; /* Always update, atomically read value */ >> return old == expected; >> } >> >> In turn lld() and scd() are defined in platform/linux-generic/arch/aa >> rch64/odp_llsc.h: >> >> static inline __int128 lld(__int128 *var, int mm) >> { >> union i128 old; >> >> if (mm == __ATOMIC_ACQUIRE) >> __asm__ volatile("ldaxp %0, %1, [%2]" >> : "=&r" (old.i64[0]), "=&r" (old.i64[1]) >> : "r" (var) >> : "memory"); >> else if (mm == __ATOMIC_RELAXED) >> __asm__ volatile("ldxp %0, %1, [%2]" >> : "=&r" (old.i64[0]), "=&r" (old.i64[1]) >> : "r" (var) >> : ); >> else >> ODP_ABORT(); >> return old.i128; >> } >> >> /* Return 0 on success, 1 on failure */ >> static inline uint32_t scd(__int128 *var, __int128 neu, int mm) >> { >> uint32_t ret; >> >> if (mm == __ATOMIC_RELEASE) >> __asm__ volatile("stlxp %w0, %1, %2, [%3]" >> : "=&r" (ret) >> : "r" (((union i128)neu).i64[0]), >> "r" (((union i128)neu).i64[1]), >> "r" (var) >> : "memory"); >> else if (mm == __ATOMIC_RELAXED) >> __asm__ volatile("stxp %w0, %1, %2, [%3]" >> : "=&r" (ret) >> : "r" (((union i128)neu).i64[0]), >> "r" (((union i128)neu).i64[1]), >> "r" (var) >> : ); >> else >> ODP_ABORT(); >> return ret; >> } >> >> So these boil down to a sequence of __asm__() instructions. If these are >> hanging it suggests a compiler issue. Does this occur with a newer GCC >> level? >> >> On Mon, Mar 12, 2018 at 5:21 PM, Maxim Uvarov <maxim.uva...@linaro.org> >> wrote: >> >>> gcc -v >>> Using built-in specs. >>> COLLECT_GCC=gcc >>> COLLECT_LTO_WRAPPER=/usr/lib/gcc/aarch64-linux-gnu/4.8/lto-wrapper >>> Target: aarch64-linux-gnu >>> Configured with: ../src/configure -v --with-pkgversion='Ubuntu/Linaro >>> 4.8.5-4ubuntu2' --with-bugurl=file:///usr/share/doc/gcc-4.8/README.Bugs >>> --enable-languages=c,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr >>> --program-suffix=-4.8 --enable-shared --enable-linker-build-id >>> --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix >>> --with-gxx-include-dir=/usr/include/c++/4.8 --libdir=/usr/lib >>> --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug >>> --enable-libstdcxx-time=yes --enable-gnu-unique-object --disable-libmudflap >>> --disable-libsanitizer --disable-libquadmath --enable-plugin >>> --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk >>> --enable-gtk-cairo >>> --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-4.8-arm64/jre >>> --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-4.8-arm64 >>> --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-4.8-arm64 >>> --with-arch-directory=arm64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar >>> --enable-multiarch --disable-werror --enable-checking=release >>> --build=aarch64-linux-gnu --host=aarch64-linux-gnu >>> --target=aarch64-linux-gnu >>> Thread model: posix >>> gcc version 4.8.5 (Ubuntu/Linaro 4.8.5-4ubuntu2) >>> >>> On 13 March 2018 at 00:20, Maxim Uvarov <maxim.uva...@linaro.org> wrote: >>> >>>> this fixes a problem. But it's too late today to do clean patch. (fun >>>> debug if gdb does not work under docker). >>>> So it might be something thunder-x specific. >>>> >>>> >>>> --- a/platform/linux-generic/include/odp_bitset.h >>>> +++ b/platform/linux-generic/include/odp_bitset.h >>>> @@ -27,7 +27,7 @@ >>>> /* Find a suitable data type that supports lock-free atomic operations >>>> */ >>>> #if defined(__aarch64__) && defined(__SIZEOF_INT128__) && \ >>>> __SIZEOF_INT128__ == 16 >>>> -#define LOCKFREE16 >>>> +// #define LOCKFREE16 >>>> typedef __int128 bitset_t; >>>> #define ATOM_BITSET_SIZE (CHAR_BIT * __SIZEOF_INT128__) >>>> >>>> >>>> On 13 March 2018 at 00:14, Maxim Uvarov <maxim.uva...@linaro.org> >>>> wrote: >>>> >>>>> platform/linux-generic/odp_schedule_scalable.c >>>>> >>>>> static odp_schedule_group_t schedule_group_create(const char *name, >>>>> const odp_thrmask_t >>>>> *mask) >>>>> { >>>>> >>>>> ...... >>>>> >>>>> printf("%s()%d\n", __func__, __LINE__); <-- prints >>>>> /* Validate inputs */ >>>>> if (mask == NULL) >>>>> ODP_ABORT("mask is NULL\n"); >>>>> >>>>> printf("%s()%d\n", __func__, __LINE__); <- prints >>>>> odp_spinlock_lock(&sched_grp_lock); >>>>> >>>>> printf("%s()%d\n", __func__, __LINE__); >>>>> /* Allocate a scheduler group */ >>>>> free = atom_bitset_load(&sg_free, __ATOMIC_RELAXED); >>>>> printf("%s()%d\n", __func__, __LINE__); <- not printed, hung >>>>> forever before this >>>>> >>>>> Maxim. >>>>> >>>>> On 13 March 2018 at 00:08, Bill Fischofer <bill.fischo...@linaro.org> >>>>> wrote: >>>>> >>>>>> That's interesting since it was developed by Arm and presumably tested >>>>>> by them on Arm systems. >>>>>> >>>>>> On Mon, Mar 12, 2018 at 4:58 PM, Maxim Uvarov < >>>>>> maxim.uva...@linaro.org> wrote: >>>>>> > I see that odp_init_global() fails on thunder-x with salable >>>>>> scheduler. >>>>>> > >>>>>> > On 12 March 2018 at 23:57, Bill Fischofer < >>>>>> bill.fischo...@linaro.org> wrote: >>>>>> >> >>>>>> >> Sure. Dmitry says it's a clang related failure. Is that what you're >>>>>> >> seeing? If it's related to a specific level of clang we may be >>>>>> able to >>>>>> >> simply document it as such. >>>>>> >> >>>>>> >> On Mon, Mar 12, 2018 at 4:25 PM, Maxim Uvarov < >>>>>> maxim.uva...@linaro.org> >>>>>> >> wrote: >>>>>> >> > Bill, >>>>>> >> > >>>>>> >> > I reproduced fail on thunder-x. So I would like to take a look >>>>>> at it one >>>>>> >> > more day before doing rc2. >>>>>> >> > >>>>>> >> > Maxim. >>>>>> > >>>>>> > >>>>>> >>>>> >>>>> >>>> >>> >> >