Re: [PATCH 2/3] bsps/i386: use Pentimum instructions for pc586 and pc686 builds.
This looks like a compiler or libatomic configuration bug. I currently have no time to investigate this further. On 12/10/16 15:36, Pavel Pisa wrote: Hello Sebastian, On Wednesday 12 of October 2016 10:35:55 Sebastian Huber wrote: On 12/10/16 10:26, p...@cmp.felk.cvut.cz wrote: SMP build is broken with i386 set because libatomic and GCC generate infinite loop for __atomic_fetch_add_4 used in rtems_interrupt_lock_acquire __atomic_fetch_add_4: push %ebp mov%esp,%ebp movl $0x5,0x10(%ebp) pop%ebp jmp__atomic_fetch_add_4 Do you have a test case for this compiler/RTEMS bug? The use of libatomic is inefficient, but it should work. may be it is problem of my i386 toolchain build, I have not updated it from April. The next is a simple test #include atomic_uint atvar1; volatile unsigned int res1; volatile unsigned int res2; int main(void) { res1 = atomic_fetch_or(, 0x55); res2 = atomic_fetch_add(, 0xaa); return 0; } The next build commands are used i386-rtems4.12-gcc --pipe -B/opt/rtems4.12/i386-rtems4.12/pc686/lib/ -specs bsp_specs -qrtems-I /opt/rtems4.12/i386-rtems4.12/pc686/lib/include -march=i386 -Wall -O2 -g -ffunction-sections -fdata-sections -o libatomic-add-test.o -c libatomic-add-test.c i386-rtems4.12-gcc --pipe -B/opt/rtems4.12/i386-rtems4.12/pc686/lib/ -specs bsp_specs -qrtems -mtune=pentiumpro -march=pentium -Wall -O2 -g -ffunction-sections -fdata-sections -Wl,--gc-sections -Wl,-Ttext,0x0010 libatomic-add-test.o -o libatomic-test-add problem appears with and without -march=i386, when -march is something newer (pentium) then all is OK. Disassembly looks like 00120a76 <__atomic_fetch_add_4>: 120a76: 55 push %ebp 120a77: 89 e5 mov%esp,%ebp 120a79: c7 45 10 05 00 00 00movl $0x5,0x10(%ebp) 120a80: 5d pop%ebp 120a81: eb f3 jmp120a76 <__atomic_fetch_add_4> 00120a83 <__atomic_add_fetch_4>: 120a83: 55 push %ebp 120a84: 89 e5 mov%esp,%ebp 120a86: c7 45 10 05 00 00 00movl $0x5,0x10(%ebp) 120a8d: 5d pop%ebp 120a8e: eb e6 jmp120a76 <__atomic_fetch_add_4> 00120a90 <__atomic_fetch_or_4>: 120a90: 55 push %ebp 120a91: 89 e5 mov%esp,%ebp 120a93: 56 push %esi 120a94: 53 push %ebx 120a95: 83 ec 0csub$0xc,%esp 120a98: 8b 5d 08mov0x8(%ebp),%ebx 120a9b: 53 push %ebx 120a9c: e8 df 66 00 00 call 127180 <_Libatomic_Protect_start> 120aa1: 8b 33 mov(%ebx),%esi 120aa3: 8b 55 0cmov0xc(%ebp),%edx 120aa6: 09 f2 or %esi,%edx 120aa8: 89 13 mov%edx,(%ebx) 120aaa: 5a pop%edx 120aab: 59 pop%ecx 120aac: 50 push %eax 120aad: 53 push %ebx 120aae: e8 ed 66 00 00 call 1271a0 <_Libatomic_Protect_end> 120ab3: 8d 65 f8lea-0x8(%ebp),%esp 120ab6: 89 f0 mov%esi,%eax 120ab8: 5b pop%ebx 120ab9: 5e pop%esi 120aba: 5d pop%ebp 120abb: c3 ret _Libatomic_Protect_start is provided by RTEMS. 00127180 <_Libatomic_Protect_start>: __uint32_t _Libatomic_Protect_start( void *ptr ) { ISR_Level isr_level; (void) ptr; _ISR_Local_disable( isr_level ); 127180: 9c pushf 127181: fa cli 127182: 58 pop%eax static inline bool _CPU_atomic_Flag_test_and_set( CPU_atomic_Flag *obj, CPU_atomic_Order order ) { #if defined(_RTEMS_SCORE_CPUSTDATOMIC_USE_ATOMIC) return obj->test_and_set( order ); #elif defined(_RTEMS_SCORE_CPUSTDATOMIC_USE_STDATOMIC) return atomic_flag_test_and_set_explicit( obj, order ); 127183: b1 01 mov$0x1,%cl 127185: 8d 74 26 00 lea0x0(%esi,%eiz,1),%esi 127189: 8d bc 27 00 00 00 00lea0x0(%edi,%eiz,1),%edi 127190: 88 ca mov%cl,%dl 127192: 86 15 8c 85 13 00 xchg %dl,0x13858c #if defined(RTEMS_SMP) while ( 127198:
Re: [PATCH 2/3] bsps/i386: use Pentimum instructions for pc586 and pc686 builds.
Hello Sebastian, On Wednesday 12 of October 2016 10:35:55 Sebastian Huber wrote: > On 12/10/16 10:26, p...@cmp.felk.cvut.cz wrote: > > SMP build is broken with i386 set because libatomic and GCC > > generate infinite loop for __atomic_fetch_add_4 used > > in rtems_interrupt_lock_acquire > > > > __atomic_fetch_add_4: > > push %ebp > > mov%esp,%ebp > > movl $0x5,0x10(%ebp) > > pop%ebp > > jmp__atomic_fetch_add_4 > > Do you have a test case for this compiler/RTEMS bug? The use of > libatomic is inefficient, but it should work. may be it is problem of my i386 toolchain build, I have not updated it from April. The next is a simple test #include atomic_uint atvar1; volatile unsigned int res1; volatile unsigned int res2; int main(void) { res1 = atomic_fetch_or(, 0x55); res2 = atomic_fetch_add(, 0xaa); return 0; } The next build commands are used i386-rtems4.12-gcc --pipe -B/opt/rtems4.12/i386-rtems4.12/pc686/lib/ -specs bsp_specs -qrtems-I /opt/rtems4.12/i386-rtems4.12/pc686/lib/include -march=i386 -Wall -O2 -g -ffunction-sections -fdata-sections -o libatomic-add-test.o -c libatomic-add-test.c i386-rtems4.12-gcc --pipe -B/opt/rtems4.12/i386-rtems4.12/pc686/lib/ -specs bsp_specs -qrtems -mtune=pentiumpro -march=pentium -Wall -O2 -g -ffunction-sections -fdata-sections -Wl,--gc-sections -Wl,-Ttext,0x0010 libatomic-add-test.o -o libatomic-test-add problem appears with and without -march=i386, when -march is something newer (pentium) then all is OK. Disassembly looks like 00120a76 <__atomic_fetch_add_4>: 120a76: 55 push %ebp 120a77: 89 e5 mov%esp,%ebp 120a79: c7 45 10 05 00 00 00movl $0x5,0x10(%ebp) 120a80: 5d pop%ebp 120a81: eb f3 jmp120a76 <__atomic_fetch_add_4> 00120a83 <__atomic_add_fetch_4>: 120a83: 55 push %ebp 120a84: 89 e5 mov%esp,%ebp 120a86: c7 45 10 05 00 00 00movl $0x5,0x10(%ebp) 120a8d: 5d pop%ebp 120a8e: eb e6 jmp120a76 <__atomic_fetch_add_4> 00120a90 <__atomic_fetch_or_4>: 120a90: 55 push %ebp 120a91: 89 e5 mov%esp,%ebp 120a93: 56 push %esi 120a94: 53 push %ebx 120a95: 83 ec 0csub$0xc,%esp 120a98: 8b 5d 08mov0x8(%ebp),%ebx 120a9b: 53 push %ebx 120a9c: e8 df 66 00 00 call 127180 <_Libatomic_Protect_start> 120aa1: 8b 33 mov(%ebx),%esi 120aa3: 8b 55 0cmov0xc(%ebp),%edx 120aa6: 09 f2 or %esi,%edx 120aa8: 89 13 mov%edx,(%ebx) 120aaa: 5a pop%edx 120aab: 59 pop%ecx 120aac: 50 push %eax 120aad: 53 push %ebx 120aae: e8 ed 66 00 00 call 1271a0 <_Libatomic_Protect_end> 120ab3: 8d 65 f8lea-0x8(%ebp),%esp 120ab6: 89 f0 mov%esi,%eax 120ab8: 5b pop%ebx 120ab9: 5e pop%esi 120aba: 5d pop%ebp 120abb: c3 ret _Libatomic_Protect_start is provided by RTEMS. 00127180 <_Libatomic_Protect_start>: __uint32_t _Libatomic_Protect_start( void *ptr ) { ISR_Level isr_level; (void) ptr; _ISR_Local_disable( isr_level ); 127180: 9c pushf 127181: fa cli 127182: 58 pop%eax static inline bool _CPU_atomic_Flag_test_and_set( CPU_atomic_Flag *obj, CPU_atomic_Order order ) { #if defined(_RTEMS_SCORE_CPUSTDATOMIC_USE_ATOMIC) return obj->test_and_set( order ); #elif defined(_RTEMS_SCORE_CPUSTDATOMIC_USE_STDATOMIC) return atomic_flag_test_and_set_explicit( obj, order ); 127183: b1 01 mov$0x1,%cl 127185: 8d 74 26 00 lea0x0(%esi,%eiz,1),%esi 127189: 8d bc 27 00 00 00 00lea0x0(%edi,%eiz,1),%edi 127190: 88 ca mov%cl,%dl 127192: 86 15 8c 85 13 00 xchg %dl,0x13858c #if defined(RTEMS_SMP) while ( 127198: 84 d2 test %dl,%dl 12719a: 75 f4 jne127190 <_Libatomic_Protect_start+0x10>
Re: [PATCH 2/3] bsps/i386: use Pentimum instructions for pc586 and pc686 builds.
On 12/10/16 10:26, p...@cmp.felk.cvut.cz wrote: SMP build is broken with i386 set because libatomic and GCC generate infinite loop for __atomic_fetch_add_4 used in rtems_interrupt_lock_acquire __atomic_fetch_add_4: push %ebp mov%esp,%ebp movl $0x5,0x10(%ebp) pop%ebp jmp__atomic_fetch_add_4 Do you have a test case for this compiler/RTEMS bug? The use of libatomic is inefficient, but it should work. -- Sebastian Huber, embedded brains GmbH Address : Dornierstr. 4, D-82178 Puchheim, Germany Phone : +49 89 189 47 41-16 Fax : +49 89 189 47 41-09 E-Mail : sebastian.hu...@embedded-brains.de PGP : Public key available on request. Diese Nachricht ist keine geschäftliche Mitteilung im Sinne des EHUG. ___ devel mailing list devel@rtems.org http://lists.rtems.org/mailman/listinfo/devel
[PATCH 2/3] bsps/i386: use Pentimum instructions for pc586 and pc686 builds.
From: Pavel PisaWhen GCC option -march is not specifies i386-rtems toolchain defaults to i386 architecture instruction set. It does not provide atomic instructions which results in really inefficient atomic_fetch_or even on UP build. SMP build is broken with i386 set because libatomic and GCC generate infinite loop for __atomic_fetch_add_4 used in rtems_interrupt_lock_acquire __atomic_fetch_add_4: push %ebp mov%esp,%ebp movl $0x5,0x10(%ebp) pop%ebp jmp__atomic_fetch_add_4 --- c/src/lib/libbsp/i386/pc386/make/custom/pc586.cfg | 2 +- c/src/lib/libbsp/i386/pc386/make/custom/pc686.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/c/src/lib/libbsp/i386/pc386/make/custom/pc586.cfg b/c/src/lib/libbsp/i386/pc386/make/custom/pc586.cfg index 73e42cd..6f9cfcb 100644 --- a/c/src/lib/libbsp/i386/pc386/make/custom/pc586.cfg +++ b/c/src/lib/libbsp/i386/pc386/make/custom/pc586.cfg @@ -6,7 +6,7 @@ RTEMS_CPU_MODEL=pentium # This contains the compiler options necessary to select the CPU model # and (hopefully) optimize for it. -CPU_CFLAGS = -mtune=pentium +CPU_CFLAGS = -mtune=pentium -march=pentium include $(RTEMS_ROOT)/make/custom/pc386.cfg diff --git a/c/src/lib/libbsp/i386/pc386/make/custom/pc686.cfg b/c/src/lib/libbsp/i386/pc386/make/custom/pc686.cfg index 04f001f..b27e8ae 100644 --- a/c/src/lib/libbsp/i386/pc386/make/custom/pc686.cfg +++ b/c/src/lib/libbsp/i386/pc386/make/custom/pc686.cfg @@ -6,7 +6,7 @@ RTEMS_CPU_MODEL=pentiumpro # This contains the compiler options necessary to select the CPU model # and (hopefully) optimize for it. -CPU_CFLAGS = -mtune=pentiumpro +CPU_CFLAGS = -mtune=pentiumpro -march=pentium include $(RTEMS_ROOT)/make/custom/pc386.cfg -- 1.9.1 ___ devel mailing list devel@rtems.org http://lists.rtems.org/mailman/listinfo/devel