[Bug middle-end/103870] ARM: Wrong branch instruction with optimization O2 and higher
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103870 --- Comment #1 from Petro Karashchenko --- I would expect "bgt" instruction instead of "bne" in such case.
[Bug middle-end/103870] New: ARM: Wrong branch instruction with optimization O2 and higher
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103870 Bug ID: 103870 Summary: ARM: Wrong branch instruction with optimization O2 and higher Product: gcc Version: 10.3.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: middle-end Assignee: unassigned at gcc dot gnu.org Reporter: petro.karashchenko at gmail dot com Target Milestone: --- Code test.c: void f(void) { double a = 2.2204460492503131e-16; int b = 1; int c = 0; while (a > 0) { if (++c >= 10) { b *= 2; c = 0; } if (b > 1) { a *= b; } } } -- arm-none-eabi-gcc -save-temps -Wall -Wextra -c -march=armv7e-m -mtune=cortex-m7 -mthumb -mfpu=fpv5-d16 -mfloat-abi=hard test.c -O0 -- Generated assembly: .arch armv7e-m .fpu fpv5-d16 .eabi_attribute 28, 1 .eabi_attribute 20, 1 .eabi_attribute 21, 1 .eabi_attribute 23, 3 .eabi_attribute 24, 1 .eabi_attribute 25, 1 .eabi_attribute 26, 1 .eabi_attribute 30, 6 .eabi_attribute 34, 1 .eabi_attribute 18, 4 .file "test2.c" .text .align 1 .global f .syntax unified .thumb .thumb_func .type f, %function f: @ args = 0, pretend = 0, frame = 16 @ frame_needed = 1, uses_anonymous_args = 0 @ link register save eliminated. push{r7} sub sp, sp, #20 add r7, sp, #0 mov r2, #0 ldr r3, .L5 strdr2, [r7, #8] movsr3, #1 str r3, [r7, #4] movsr3, #0 str r3, [r7] b .L2 .L4: ldr r3, [r7] addsr3, r3, #1 str r3, [r7] ldr r3, [r7] cmp r3, #9 ble .L3 ldr r3, [r7, #4] lslsr3, r3, #1 str r3, [r7, #4] movsr3, #0 str r3, [r7] .L3: ldr r3, [r7, #4] cmp r3, #1 ble .L2 ldr r3, [r7, #4] vmovs15, r3 @ int vcvt.f64.s32d7, s15 vldr.64 d6, [r7, #8] vmul.f64d7, d6, d7 vstr.64 d7, [r7, #8] .L2: vldr.64 d7, [r7, #8] vcmpe.f64 d7, #0 vmrsAPSR_nzcv, FPSCR bgt .L4 nop nop addsr7, r7, #20 mov sp, r7 @ sp needed ldr r7, [sp], #4 bx lr .L6: .align 2 .L5: .word 1018167296 .size f, .-f .ident "GCC: (GNU Arm Embedded Toolchain 10.3-2021.10) 10.3.1 20210824 (release)" -- arm-none-eabi-gcc -save-temps -Wall -Wextra -c -march=armv7e-m -mtune=cortex-m7 -mthumb -mfpu=fpv5-d16 -mfloat-abi=hard test.c -Os -- Generated assembly: .arch armv7e-m .fpu fpv5-d16 .eabi_attribute 28, 1 .eabi_attribute 20, 1 .eabi_attribute 21, 1 .eabi_attribute 23, 3 .eabi_attribute 24, 1 .eabi_attribute 25, 1 .eabi_attribute 26, 1 .eabi_attribute 30, 4 .eabi_attribute 34, 1 .eabi_attribute 18, 4 .file "test2.c" .text .align 1 .global f .syntax unified .thumb .thumb_func .type f, %function f: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. vldr.64 d7, .L10 movsr3, #0 movsr2, #1 .L5: addsr3, r3, #1 cmp r3, #9 ble .L2 lslsr2, r2, #1 movsr3, #0 .L3: vmovs13, r2 @ int vcvt.f64.s32d6, s13 vmul.f64d7, d7, d6 b .L4 .L2: cmp r2, #1 bne .L3 .L4: vcmpe.f64 d7, #0 vmrsAPSR_nzcv, FPSCR bgt .L5 bx lr .L11: .align 3 .L10: .word 0 .word 1018167296 .size f, .-f .ident "GCC: (GNU Arm Embedded Toolchain 10.3-2021.10) 10.3.1 20210824 (release)" -- The problem appears with condition "if (b > 1)". If -O0 is applied then code ldr r3, [r7, #4] cmp r3, #1 ble .L2 is generate that "ble" instruction code is used, however with -Os we are getting cmp r2, #1 bne .L3 code generated that use "bne" instruction code. The case is that with code above "b *= 2;" at some point "b" overflows and becomes equal to zero. The "ble" instruction code will catch this however "bne" will not. So usage of "bne" is inappropriate for "if (b > 1)" condition that is intended to truncate negative values and zero.
[Bug middle-end/88085] User alignments on var decls not respected if smaller than type alignment
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085 --- Comment #20 from Petro Karashchenko --- I just checked next case typedef int tolerant_int __attribute__((aligned(1))); tolerant_int var; int foo(void) { return var; } -- arm-none-eabi-gcc -save-temps -Wall -Wextra -c -mcpu=arm7tdmi -mthumb test.c -O0 -- .cpu arm7tdmi .eabi_attribute 20, 1 .eabi_attribute 21, 1 .eabi_attribute 23, 3 .eabi_attribute 24, 1 .eabi_attribute 25, 1 .eabi_attribute 26, 1 .eabi_attribute 30, 6 .eabi_attribute 34, 0 .eabi_attribute 18, 4 .file "test.c" .text .comm var,4,1 .align 1 .global foo .arch armv4t .syntax unified .code 16 .thumb_func .fpu softvfp .type foo, %function foo: @ Function supports interworking. @ args = 0, pretend = 0, frame = 0 @ frame_needed = 1, uses_anonymous_args = 0 push{r7, lr} add r7, sp, #0 ldr r3, .L3 ldr r3, [r3] movsr0, r3 mov sp, r7 @ sp needed pop {r7} pop {r1} bx r1 .L4: .align 2 .L3: .word var .size foo, .-foo .ident "GCC: (GNU Tools for Arm Embedded Processors 9-2019-q4-major) 9.2.1 20191025 (release) [ARM/arm-9-branch revision 277599]" - So seems the alignment decrease does not work on types.
[Bug middle-end/88085] User alignments on var decls not respected if smaller than type alignment
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085 --- Comment #19 from Petro Karashchenko --- Sorry my bad again. Just checked with GCC 11 man page When used on a struct, or struct member, the aligned attribute can only increase the alignment; in order to decrease it, the packed attribute must be specified as well. When used as part of a typedef, the aligned attribute can both increase and decrease alignment, and specifying the packed attribute generates a warning.
[Bug middle-end/88085] User alignments on var decls not respected if smaller than type alignment
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085 --- Comment #18 from Petro Karashchenko --- Yes. So I just checked GCC man and see that The aligned attribute can only increase the alignment; but you can decrease it by specifying packed as well. See below. Note that the effectiveness of aligned attributes may be limited by inherent limitations in your linker. On many systems, the linker is only able to arrange for variables to be aligned up to a certain maximum alignment. (For some linkers, the maximum supported alignment may be very very small.) If your linker is only able to align variables up to a maximum of 8 byte alignment, then specifying aligned(16) in an __attribute__ will still only provide you with 8 byte alignment. See your linker documentation for further information. So typedef int tolerant_int __attribute__((aligned(1))); extern tolerant_int possibly_misaligned_data; "possibly_misaligned_data" will still be 4 bytes aligned. The real problem is that "packed" can be applied only to struct or union type definition, I can't just do typedef int tolerant_int __attribute__((packed)); extern tolerant_int possibly_misaligned_data; So it will simply not work and I need to wrap a variable into a struct or union.
[Bug middle-end/88085] User alignments on var decls not respected if smaller than type alignment
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085 --- Comment #16 from Petro Karashchenko --- Again based on your description even if we go with putting "tolerance" on the type should not work because in "typedef int tolerant_int __attribute__((aligned(1)));" the "int" default alignment is 4 and we apply "1", so according to "The @code{aligned} attribute specifies a MINIMUM alignment for the variable or structure field, measured in bytes." the compiler should use the MAX of all alignments of the type MAX(4,1) is 4 and not 1. So typedef int int_1 __attribute__((aligned(1))); typedef int_1 int_2 __attribute__((aligned(2))); typedef int_2 int_4 __attribute__((aligned(4))); typedef int_4 int_8 __attribute__((aligned(8))); typedef int_8 int_16 __attribute__((aligned(16))); int_16 a; Then a should get aligned on 16 and not on 1.
[Bug middle-end/88085] User alignments on var decls not respected if smaller than type alignment
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085 --- Comment #14 from Petro Karashchenko --- Probably I need to fill a ticket to allow "packed" to be applied for variables and not only to a types of structure fields.
[Bug middle-end/88085] User alignments on var decls not respected if smaller than type alignment
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085 --- Comment #13 from Petro Karashchenko --- Sorry that I brought some confusion. I was reading some latest comments and didn't fully payed attention to a ticket description. The reason for my comment is https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94662 that was closed as a duplicate of this issue. For the variable alignment vs type alignment when it is specified your statement seems to be correct, however I agree that it still has a lot of open points. For example what should be the code if we put variable into a structure typedef int __attribute__((vector_size(16))) v4si; struct { v4si a __attribute__((aligned(4))); } b; Should it still get aligned on 16 bytes or 4 bytes? In my case I was seeking for a way to generate alignment tolerant code without using struct { int a; } __attribute__((packed)); Obviously "int a __attribute__((packed));" does not work, so I tried to solve it via "__attribute__((aligned(1)))" attribute.
[Bug middle-end/88085] User alignments on var decls not respected if smaller than type alignment
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085 --- Comment #11 from Petro Karashchenko --- Sorry but based on @cindex @code{aligned} variable attribute @item aligned @itemx aligned (@var{alignment}) The @code{aligned} attribute specifies a MINIMUM alignment for the variable or structure field, measured in bytes. When specified, @var{alignment} must be an integer constant power of 2. Specifying no @var{alignment} argument implies the maximum alignment for the target, which is often, but by no means always, 8 or 16 bytes. I do not see any statement saying that giving a lower alignment is invalid. I see "attribute specifies a MINIMUM alignment" so "int i __attribute__((aligned(1)));" specifies that between 1 and 4 the 1 should be chosen as a "MINIMUM". The statement "must be an integer constant power of 2" is also valid because 1 is a 0 power of 2. So no questions here. "Thus IMHO this bug is invalid." -- I do not see any strong argument on this. All prerequisites from a description are met, so this is a pure bug.
[Bug middle-end/94387] Excess read instructions are generated in case of writing to fields of volatile + packed type (structure)
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94387 --- Comment #7 from Petro Karashchenko --- Is it still 'UNCONFIRMED'? Or it can be moved to 'CONFIRMED' or 'ASSIGNED' state?
[Bug middle-end/94662] New: __attribute__ aligned is ignored
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94662 Bug ID: 94662 Summary: __attribute__ aligned is ignored Product: gcc Version: 9.2.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: middle-end Assignee: unassigned at gcc dot gnu.org Reporter: petro.karashchenko at gmail dot com Target Milestone: --- __attribute__ 'aligned' is ignored. Test case 1: -- int __attribute__((aligned(1))) var; int foo(void) { return var; } -- arm-none-eabi-gcc -save-temps -Wall -Wextra -c -mcpu=arm7tdmi -mthumb test.c -O0 -- Generated assembly: .cpu arm7tdmi .eabi_attribute 20, 1 .eabi_attribute 21, 1 .eabi_attribute 23, 3 .eabi_attribute 24, 1 .eabi_attribute 25, 1 .eabi_attribute 26, 1 .eabi_attribute 30, 6 .eabi_attribute 34, 0 .eabi_attribute 18, 4 .file "test.c" .text .comm var,4,1 .align 1 .global foo .arch armv4t .syntax unified .code 16 .thumb_func .fpu softvfp .type foo, %function foo: @ Function supports interworking. @ args = 0, pretend = 0, frame = 0 @ frame_needed = 1, uses_anonymous_args = 0 push{r7, lr} add r7, sp, #0 ldr r3, .L3 ldr r3, [r3] movsr0, r3 mov sp, r7 @ sp needed pop {r7} pop {r1} bx r1 .L4: .align 2 .L3: .word var .size foo, .-foo .ident "GCC: (GNU Tools for Arm Embedded Processors 9-2019-q4-major) 9.2.1 20191025 (release) [ARM/arm-9-branch revision 277599]" -- Test case 2: -- int __attribute__((aligned(1))) * var = (int *)0x03; int foo(void) { return *var; } -- arm-none-eabi-gcc -save-temps -Wall -Wextra -c -mcpu=arm7tdmi -mthumb test.c -O0 -- Generated assembly: .cpu arm7tdmi .eabi_attribute 20, 1 .eabi_attribute 21, 1 .eabi_attribute 23, 3 .eabi_attribute 24, 1 .eabi_attribute 25, 1 .eabi_attribute 26, 1 .eabi_attribute 30, 6 .eabi_attribute 34, 0 .eabi_attribute 18, 4 .file "test.c" .text .global var .data .type var, %object .size var, 4 var: .4byte 3 .text .align 1 .global foo .arch armv4t .syntax unified .code 16 .thumb_func .fpu softvfp .type foo, %function foo: @ Function supports interworking. @ args = 0, pretend = 0, frame = 0 @ frame_needed = 1, uses_anonymous_args = 0 push{r7, lr} add r7, sp, #0 ldr r3, .L3 ldr r3, [r3] ldr r3, [r3] movsr0, r3 mov sp, r7 @ sp needed pop {r7} pop {r1} bx r1 .L4: .align 2 .L3: .word var .size foo, .-foo .ident "GCC: (GNU Tools for Arm Embedded Processors 9-2019-q4-major) 9.2.1 20191025 (release) [ARM/arm-9-branch revision 277599]" -- Test case 3: -- int foo(void) { return *(int __attribute__((aligned(1))) *) 0x03; } -- arm-none-eabi-gcc -save-temps -Wall -Wextra -c -mcpu=arm7tdmi -mthumb test.c -O0 -- Generated assembly: .cpu arm7tdmi .eabi_attribute 20, 1 .eabi_attribute 21, 1 .eabi_attribute 23, 3 .eabi_attribute 24, 1 .eabi_attribute 25, 1 .eabi_attribute 26, 1 .eabi_attribute 30, 6 .eabi_attribute 34, 0 .eabi_attribute 18, 4 .file "test.c" .text .align 1 .global foo .arch armv4t .syntax unified .code 16 .thumb_func .fpu softvfp .type foo, %function foo: @ Function supports interworking. @ args = 0, pretend = 0, frame = 0 @ frame_needed = 1, uses_anonymous_args = 0 push{r7, lr} add r7, sp, #0 movsr3, #3 ldr r3, [r3] movsr0, r3 mov sp, r7 @ sp needed pop {r7} pop {r1} bx r1 .size foo, .-foo .ident "GCC: (GNU Tools for Arm Embedded Processors 9-2019-q4-major) 9.2.1 20191025 (release) [ARM/arm-9-branch revision 277599]" -- In all 3 test cases I'm expecting that unaligned access code should be generated, but in all 3 test cases generated access is aligned. However in next test case the align attribute actually takes effect: -- int foo(void) { return **(int * __attribute__((aligned(1))) *) 0x03; } int foo1(void) { return ***(int * __attribute__((aligned(1))) * __attribute__((aligned(1)
[Bug middle-end/94387] Excess read instructions are generated in case of writing to fields of volatile + packed type (structure)
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94387 --- Comment #6 from Petro Karashchenko --- Richard Biener thank you for suggestion, but __attribute__((aligned(..))) is applied only to the base address of the struct, hence to the first field only, so if I'm having other fields tightly packed and there are 16, 32 or 64 bit types I will still get excess read instructions generated. In my case I'm having uint8_t *p0, uint8_t *p1 as an inputs and can't rely that those pointers are aligned to 16, 32 or 64, but are byte aligned.
[Bug middle-end/94387] Excess read instructions are generated in case of writing to fields of volatile + packed type (structure)
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94387 --- Comment #4 from Petro Karashchenko --- Andrew Pinski could you please share with me requirements needed for strict alignment? Actually I do not understand why read-write cycle is needed if no "read" or "modify" operation is requested (I mean no operations like |=, &=, +=, etc. are issued), but a "pure" write a constant value is requested. In other words: what is the reason of reading value that is discarded? If I remove 'volatile' from a struct typedef I'm getting pretty optimised code without excessive reads, to it seems to be a 'volatile'+'packed' combo.
[Bug middle-end/94387] Excess read instructions are generated in case of writing to fields of volatile + packed type (structure)
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94387 --- Comment #1 from Petro Karashchenko --- Also the ambiguity of the issue is that excess read instructions generation depends on type of the field. Excess reads are not generated when 8 bit types are accessed and generated when data types greater that 8 bits are accessed.
[Bug middle-end/94387] New: Excess read instructions are generated in case of writing to fields of volatile + packed type (structure)
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94387 Bug ID: 94387 Summary: Excess read instructions are generated in case of writing to fields of volatile + packed type (structure) Product: gcc Version: 9.3.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: middle-end Assignee: unassigned at gcc dot gnu.org Reporter: petro.karashchenko at gmail dot com Target Milestone: --- Created attachment 48140 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=48140=edit preprocessed file Excess read instructions are generated when access members of volatile + packed types (structures): test.c: -- #include typedef volatile struct type1_s { uint32_t a1; uint8_t a2; uint8_t a3; uint8_t a4; uint8_t a5; } __attribute__((packed)) type1_t; typedef volatile struct { uint32_t b1; uint32_t b2; } __attribute__((packed)) type2_t; typedef volatile struct type3_s { type1_t h1; volatile union { uint8_t b[24]; type2_t c1; } __attribute__((packed)) h2; } __attribute__((packed)) type3_t; typedef volatile struct type4_s { uint32_t x1; uint8_t x2; uint16_t x3; uint8_t x4; uint8_t x5; uint8_t x6; } __attribute__((packed)) type4_t; static void my_func2(type3_t *p0, type4_t *p1) ; int my_func1(uint8_t *p0, uint8_t *p1) { type3_t *i = (type3_t *)p0; type4_t *o = (type4_t *)p1; my_func2(i, o); return 0; } static void my_func2(type3_t *p0, type4_t *p1) { p1->x1 = 0xFF01; p1->x6 = 1; p1->x2 = 2; p1->x4 = p0->h1.a3; p1->x5 = p0->h1.a4; p1->x3 = 0; } -- arceb-elf32-gcc -save-temps -Wall -Wextra -c -mcpu=arc600 -mtune=arc600 -mbig-endian -mmul64 test.c -Os -- Disassembly: .global my_func1 .type my_func1, @function my_func1: ldb_s r2,[r1] mov r2,-1 ;6 stb_s r2,[r1] ldb_s r3,[r1,1] stb_s r2,[r1,1] ldb_s r3,[r1,2] stb_s r2,[r1,2] ldb_s r2,[r1,3] mov_s r3,1;0 stb_s r3,[r1,3] stb_s r3,[r1,9] mov_s r3,2 stb_s r3,[r1,4] ldb_s r3,[r0,5] mov_s r2,0;0 stb_s r3,[r1,7] ldb_s r0,[r0,6] stb_s r0,[r1,8] ldb_s r0,[r1,5] stb_s r2,[r1,5] ldb_s r0,[r1,6] stb_s r2,[r1,6] mov_s r0,0;0 j_s [blink] .size my_func1, .-my_func1 -- Expected disassembly: .global my_func1 .type my_func1, @function my_func1: mov r2,-1 ;6 stb_s r2,[r1] stb_s r2,[r1,1] stb_s r2,[r1,2] mov_s r3,1;0 stb_s r3,[r1,3] stb_s r3,[r1,9] mov_s r3,2 stb_s r3,[r1,4] ldb_s r3,[r0,5] mov_s r2,0;0 stb_s r3,[r1,7] ldb_s r0,[r0,6] stb_s r0,[r1,8] stb_s r2,[r1,5] stb_s r2,[r1,6] mov_s r0,0;0 j_s [blink] .size my_func1, .-my_func1 -- I have checked same code compilation with: arm-none-eabi-gcc -save-temps -Wall -Wextra -c -mcpu=arm7tdmi -mthumb test.c -Os The result is pretty much the same, so it is not architecture dependent bug.