[Bug middle-end/103870] ARM: Wrong branch instruction with optimization O2 and higher

2021-12-30 Thread petro.karashchenko at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103870

--- Comment #1 from Petro Karashchenko  ---
I would expect "bgt" instruction instead of "bne" in such case.

[Bug middle-end/103870] New: ARM: Wrong branch instruction with optimization O2 and higher

2021-12-30 Thread petro.karashchenko at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103870

Bug ID: 103870
   Summary: ARM: Wrong branch instruction with optimization O2 and
higher
   Product: gcc
   Version: 10.3.1
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: middle-end
  Assignee: unassigned at gcc dot gnu.org
  Reporter: petro.karashchenko at gmail dot com
  Target Milestone: ---

Code test.c:

void f(void)
{
  double a = 2.2204460492503131e-16;
  int b = 1;
  int c = 0;

  while (a > 0)
  {
if (++c >= 10)
{
  b *= 2;
  c = 0;
}

if (b > 1)
{
  a *= b;
}
  }
}
--
arm-none-eabi-gcc -save-temps -Wall -Wextra -c -march=armv7e-m -mtune=cortex-m7
-mthumb -mfpu=fpv5-d16 -mfloat-abi=hard test.c -O0
--
Generated assembly:
.arch armv7e-m
.fpu fpv5-d16
.eabi_attribute 28, 1
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 6
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.file   "test2.c"
.text
.align  1
.global f
.syntax unified
.thumb
.thumb_func
.type   f, %function
f:
@ args = 0, pretend = 0, frame = 16
@ frame_needed = 1, uses_anonymous_args = 0
@ link register save eliminated.
push{r7}
sub sp, sp, #20
add r7, sp, #0
mov r2, #0
ldr r3, .L5
strdr2, [r7, #8]
movsr3, #1
str r3, [r7, #4]
movsr3, #0
str r3, [r7]
b   .L2
.L4:
ldr r3, [r7]
addsr3, r3, #1
str r3, [r7]
ldr r3, [r7]
cmp r3, #9
ble .L3
ldr r3, [r7, #4]
lslsr3, r3, #1
str r3, [r7, #4]
movsr3, #0
str r3, [r7]
.L3:
ldr r3, [r7, #4]
cmp r3, #1
ble .L2
ldr r3, [r7, #4]
vmovs15, r3 @ int
vcvt.f64.s32d7, s15
vldr.64 d6, [r7, #8]
vmul.f64d7, d6, d7
vstr.64 d7, [r7, #8]
.L2:
vldr.64 d7, [r7, #8]
vcmpe.f64   d7, #0
vmrsAPSR_nzcv, FPSCR
bgt .L4
nop
nop
addsr7, r7, #20
mov sp, r7
@ sp needed
ldr r7, [sp], #4
bx  lr
.L6:
.align  2
.L5:
.word   1018167296
.size   f, .-f
.ident  "GCC: (GNU Arm Embedded Toolchain 10.3-2021.10) 10.3.1 20210824
(release)"
--
arm-none-eabi-gcc -save-temps -Wall -Wextra -c -march=armv7e-m -mtune=cortex-m7
-mthumb -mfpu=fpv5-d16 -mfloat-abi=hard test.c -Os
--
Generated assembly:
.arch armv7e-m
.fpu fpv5-d16
.eabi_attribute 28, 1
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 4
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.file   "test2.c"
.text
.align  1
.global f
.syntax unified
.thumb
.thumb_func
.type   f, %function
f:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
vldr.64 d7, .L10
movsr3, #0
movsr2, #1
.L5:
addsr3, r3, #1
cmp r3, #9
ble .L2
lslsr2, r2, #1
movsr3, #0
.L3:
vmovs13, r2 @ int
vcvt.f64.s32d6, s13
vmul.f64d7, d7, d6
b   .L4
.L2:
cmp r2, #1
bne .L3
.L4:
vcmpe.f64   d7, #0
vmrsAPSR_nzcv, FPSCR
bgt .L5
bx  lr
.L11:
.align  3
.L10:
.word   0
.word   1018167296
.size   f, .-f
.ident  "GCC: (GNU Arm Embedded Toolchain 10.3-2021.10) 10.3.1 20210824
(release)"
--

The problem appears with condition "if (b > 1)". If -O0 is applied then code
ldr r3, [r7, #4]
cmp r3, #1
ble .L2
is generate that "ble" instruction code is used, however with -Os we are
getting
cmp r2, #1
bne .L3
code generated that use "bne" instruction code. The case is that with code
above "b *= 2;" at some point "b" overflows and becomes equal to zero. The
"ble" instruction code will catch this however "bne" will not. So usage of
"bne" is inappropriate for "if (b > 1)" condition that is intended to truncate
negative values and zero.

[Bug middle-end/88085] User alignments on var decls not respected if smaller than type alignment

2021-09-02 Thread petro.karashchenko at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085

--- Comment #20 from Petro Karashchenko  
---
I just checked next case

typedef int tolerant_int __attribute__((aligned(1)));
tolerant_int var;

int foo(void)
{
  return var;
}
--
arm-none-eabi-gcc -save-temps -Wall -Wextra -c -mcpu=arm7tdmi -mthumb test.c
-O0
--
.cpu arm7tdmi
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 6
.eabi_attribute 34, 0
.eabi_attribute 18, 4
.file   "test.c"
.text
.comm   var,4,1
.align  1
.global foo
.arch armv4t
.syntax unified
.code   16
.thumb_func
.fpu softvfp
.type   foo, %function
foo:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 1, uses_anonymous_args = 0
push{r7, lr}
add r7, sp, #0
ldr r3, .L3
ldr r3, [r3]
movsr0, r3
mov sp, r7
@ sp needed
pop {r7}
pop {r1}
bx  r1
.L4:
.align  2
.L3:
.word   var
.size   foo, .-foo
.ident  "GCC: (GNU Tools for Arm Embedded Processors 9-2019-q4-major)
9.2.1 20191025 (release) [ARM/arm-9-branch revision 277599]"
-

So seems the alignment decrease does not work on types.

[Bug middle-end/88085] User alignments on var decls not respected if smaller than type alignment

2021-09-02 Thread petro.karashchenko at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085

--- Comment #19 from Petro Karashchenko  
---
Sorry my bad again. Just checked with GCC 11 man page

When used on a struct, or struct member, the aligned attribute can only
increase the alignment; in order to decrease it, the packed attribute must be
specified as well. When used as part of a typedef, the aligned attribute can
both increase and decrease alignment, and specifying the packed attribute
generates a warning.

[Bug middle-end/88085] User alignments on var decls not respected if smaller than type alignment

2021-09-02 Thread petro.karashchenko at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085

--- Comment #18 from Petro Karashchenko  
---
Yes. So I just checked GCC man and see that

The aligned attribute can only increase the alignment; but you can decrease it
by specifying packed as well. See below.

Note that the effectiveness of aligned attributes may be limited by inherent
limitations in your linker. On many systems, the linker is only able to arrange
for variables to be aligned up to a certain maximum alignment. (For some
linkers, the maximum supported alignment may be very very small.) If your
linker is only able to align variables up to a maximum of 8 byte alignment,
then specifying aligned(16) in an __attribute__ will still only provide you
with 8 byte alignment. See your linker documentation for further information.

So

typedef int tolerant_int __attribute__((aligned(1)));
extern tolerant_int possibly_misaligned_data;

"possibly_misaligned_data" will still be 4 bytes aligned.

The real problem is that "packed" can be applied only to struct or union type
definition, I can't just do

typedef int tolerant_int __attribute__((packed));
extern tolerant_int possibly_misaligned_data;

So it will simply not work and I need to wrap a variable into a struct or
union.

[Bug middle-end/88085] User alignments on var decls not respected if smaller than type alignment

2021-09-02 Thread petro.karashchenko at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085

--- Comment #16 from Petro Karashchenko  
---
Again based on your description even if we go with putting "tolerance" on the
type should not work because in "typedef int tolerant_int
__attribute__((aligned(1)));" the "int" default alignment is 4 and we apply
"1", so according to "The @code{aligned} attribute specifies a MINIMUM
alignment for the variable or structure field, measured in bytes." the compiler
should use the MAX of all alignments of the type MAX(4,1) is 4 and not 1.

So
typedef int int_1 __attribute__((aligned(1)));
typedef int_1 int_2 __attribute__((aligned(2)));
typedef int_2 int_4 __attribute__((aligned(4)));
typedef int_4 int_8 __attribute__((aligned(8)));
typedef int_8 int_16 __attribute__((aligned(16)));

int_16 a;

Then a should get aligned on 16 and not on 1.

[Bug middle-end/88085] User alignments on var decls not respected if smaller than type alignment

2021-09-02 Thread petro.karashchenko at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085

--- Comment #14 from Petro Karashchenko  
---
Probably I need to fill a ticket to allow "packed" to be applied for variables
and not only to a types of structure fields.

[Bug middle-end/88085] User alignments on var decls not respected if smaller than type alignment

2021-09-02 Thread petro.karashchenko at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085

--- Comment #13 from Petro Karashchenko  
---
Sorry that I brought some confusion. I was reading some latest comments and
didn't fully payed attention to a ticket description. The reason for my comment
is https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94662 that was closed as a
duplicate of this issue.

For the variable alignment vs type alignment when it is specified your
statement seems to be correct, however I agree that it still has a lot of open
points. For example what should be the code if we put variable into a structure

typedef int __attribute__((vector_size(16))) v4si; 

struct {
  v4si a __attribute__((aligned(4)));
} b;

Should it still get aligned on 16 bytes or 4 bytes?

In my case I was seeking for a way to generate alignment tolerant code without
using
struct {
  int a;
} __attribute__((packed));

Obviously "int a __attribute__((packed));" does not work, so I tried to solve
it via "__attribute__((aligned(1)))" attribute.

[Bug middle-end/88085] User alignments on var decls not respected if smaller than type alignment

2021-09-02 Thread petro.karashchenko at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085

--- Comment #11 from Petro Karashchenko  
---
Sorry but based on

@cindex @code{aligned} variable attribute
@item aligned
@itemx aligned (@var{alignment})
The @code{aligned} attribute specifies a MINIMUM alignment for the variable
or structure field, measured in bytes.  When specified, @var{alignment} must
be an integer constant power of 2.  Specifying no @var{alignment} argument
implies the maximum alignment for the target, which is often, but by no
means always, 8 or 16 bytes.

I do not see any statement saying that giving a lower alignment is invalid.
I see "attribute specifies a MINIMUM alignment" so "int i
__attribute__((aligned(1)));" specifies that between 1 and 4 the 1 should be
chosen as a "MINIMUM".

The statement "must be an integer constant power of 2" is also valid because 1
is a 0 power of 2. So no questions here.

"Thus IMHO this bug is invalid." -- I do not see any strong argument on this.
All prerequisites from a description are met, so this is a pure bug.

[Bug middle-end/94387] Excess read instructions are generated in case of writing to fields of volatile + packed type (structure)

2020-04-19 Thread petro.karashchenko at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94387

--- Comment #7 from Petro Karashchenko  ---
Is it still 'UNCONFIRMED'? Or it can be moved to 'CONFIRMED' or 'ASSIGNED'
state?

[Bug middle-end/94662] New: __attribute__ aligned is ignored

2020-04-19 Thread petro.karashchenko at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94662

Bug ID: 94662
   Summary: __attribute__ aligned is ignored
   Product: gcc
   Version: 9.2.1
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: middle-end
  Assignee: unassigned at gcc dot gnu.org
  Reporter: petro.karashchenko at gmail dot com
  Target Milestone: ---

__attribute__ 'aligned' is ignored.

Test case 1:
--
int __attribute__((aligned(1))) var;

int foo(void)
{
  return var;
}
--
arm-none-eabi-gcc -save-temps -Wall -Wextra -c -mcpu=arm7tdmi -mthumb test.c
-O0
--
Generated assembly:
.cpu arm7tdmi
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 6
.eabi_attribute 34, 0
.eabi_attribute 18, 4
.file   "test.c"
.text
.comm   var,4,1
.align  1
.global foo
.arch armv4t
.syntax unified
.code   16
.thumb_func
.fpu softvfp
.type   foo, %function
foo:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 1, uses_anonymous_args = 0
push{r7, lr}
add r7, sp, #0
ldr r3, .L3
ldr r3, [r3]
movsr0, r3
mov sp, r7
@ sp needed
pop {r7}
pop {r1}
bx  r1
.L4:
.align  2
.L3:
.word   var
.size   foo, .-foo
.ident  "GCC: (GNU Tools for Arm Embedded Processors 9-2019-q4-major)
9.2.1 20191025 (release) [ARM/arm-9-branch revision 277599]"
--

Test case 2:
--
int __attribute__((aligned(1))) * var = (int *)0x03;

int foo(void)
{
  return *var;
}
--
arm-none-eabi-gcc -save-temps -Wall -Wextra -c -mcpu=arm7tdmi -mthumb test.c
-O0
--
Generated assembly:
.cpu arm7tdmi
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 6
.eabi_attribute 34, 0
.eabi_attribute 18, 4
.file   "test.c"
.text
.global var
.data
.type   var, %object
.size   var, 4
var:
.4byte  3
.text
.align  1
.global foo
.arch armv4t
.syntax unified
.code   16
.thumb_func
.fpu softvfp
.type   foo, %function
foo:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 1, uses_anonymous_args = 0
push{r7, lr}
add r7, sp, #0
ldr r3, .L3
ldr r3, [r3]
ldr r3, [r3]
movsr0, r3
mov sp, r7
@ sp needed
pop {r7}
pop {r1}
bx  r1
.L4:
.align  2
.L3:
.word   var
.size   foo, .-foo
.ident  "GCC: (GNU Tools for Arm Embedded Processors 9-2019-q4-major)
9.2.1 20191025 (release) [ARM/arm-9-branch revision 277599]"
--

Test case 3:
--
int foo(void)
{
  return *(int __attribute__((aligned(1))) *) 0x03;
}
--
arm-none-eabi-gcc -save-temps -Wall -Wextra -c -mcpu=arm7tdmi -mthumb test.c
-O0
--
Generated assembly:
.cpu arm7tdmi
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 6
.eabi_attribute 34, 0
.eabi_attribute 18, 4
.file   "test.c"
.text
.align  1
.global foo
.arch armv4t
.syntax unified
.code   16
.thumb_func
.fpu softvfp
.type   foo, %function
foo:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 1, uses_anonymous_args = 0
push{r7, lr}
add r7, sp, #0
movsr3, #3
ldr r3, [r3]
movsr0, r3
mov sp, r7
@ sp needed
pop {r7}
pop {r1}
bx  r1
.size   foo, .-foo
.ident  "GCC: (GNU Tools for Arm Embedded Processors 9-2019-q4-major)
9.2.1 20191025 (release) [ARM/arm-9-branch revision 277599]"
--

In all 3 test cases I'm expecting that unaligned access code should be
generated, but in all 3 test cases generated access is aligned.

However in next test case the align attribute actually takes effect:
--
int foo(void)
{
  return **(int * __attribute__((aligned(1))) *) 0x03;
}

int foo1(void)
{
  return ***(int * __attribute__((aligned(1))) * __attribute__((aligned(1)

[Bug middle-end/94387] Excess read instructions are generated in case of writing to fields of volatile + packed type (structure)

2020-03-30 Thread petro.karashchenko at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94387

--- Comment #6 from Petro Karashchenko  ---
Richard Biener thank you for suggestion, but __attribute__((aligned(..))) is
applied only to the base address of the struct, hence to the first field only,
so if I'm having other fields tightly packed and there are 16, 32 or 64 bit
types I will still get excess read instructions generated. In my case I'm
having uint8_t *p0, uint8_t *p1 as an inputs and can't rely that those pointers
are aligned to 16, 32 or 64, but are byte aligned.

[Bug middle-end/94387] Excess read instructions are generated in case of writing to fields of volatile + packed type (structure)

2020-03-29 Thread petro.karashchenko at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94387

--- Comment #4 from Petro Karashchenko  ---
Andrew Pinski could you please share with me requirements needed for strict
alignment?
Actually I do not understand why read-write cycle is needed if no "read" or
"modify" operation is requested (I mean no operations like |=, &=, +=, etc. are
issued), but a "pure" write a constant value is requested. In other words: what
is the reason of reading value that is discarded?

If I remove 'volatile' from a struct typedef I'm getting pretty optimised code
without excessive reads, to it seems to be a 'volatile'+'packed' combo.

[Bug middle-end/94387] Excess read instructions are generated in case of writing to fields of volatile + packed type (structure)

2020-03-29 Thread petro.karashchenko at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94387

--- Comment #1 from Petro Karashchenko  ---
Also the ambiguity of the issue is that excess read instructions generation
depends on type of the field. Excess reads are not generated when 8 bit types
are accessed and generated when data types greater that 8 bits are accessed.

[Bug middle-end/94387] New: Excess read instructions are generated in case of writing to fields of volatile + packed type (structure)

2020-03-29 Thread petro.karashchenko at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94387

Bug ID: 94387
   Summary: Excess read instructions are generated in case of
writing to fields of volatile + packed type
(structure)
   Product: gcc
   Version: 9.3.1
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: middle-end
  Assignee: unassigned at gcc dot gnu.org
  Reporter: petro.karashchenko at gmail dot com
  Target Milestone: ---

Created attachment 48140
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=48140=edit
preprocessed file

Excess read instructions are generated when access members of volatile + packed
types (structures):
test.c:
--
#include 

typedef volatile struct type1_s {
uint32_t a1;
uint8_t a2;
uint8_t a3;
uint8_t a4;
uint8_t a5;
} __attribute__((packed)) type1_t;

typedef volatile struct {
uint32_t b1;
uint32_t b2;
} __attribute__((packed)) type2_t;

typedef volatile struct type3_s {
type1_t h1;
volatile union {
uint8_t b[24];
type2_t c1;
} __attribute__((packed)) h2;
} __attribute__((packed)) type3_t;

typedef volatile struct type4_s {
uint32_t x1;
uint8_t x2;
uint16_t x3;
uint8_t x4;
uint8_t x5;
uint8_t x6;
} __attribute__((packed)) type4_t;

static void my_func2(type3_t *p0, type4_t *p1) ;

int my_func1(uint8_t *p0, uint8_t *p1)
{
type3_t *i = (type3_t *)p0;
type4_t *o = (type4_t *)p1;

my_func2(i, o);

return 0;
}

static void my_func2(type3_t *p0, type4_t *p1)
{
p1->x1 = 0xFF01;
p1->x6 = 1;
p1->x2 = 2;
p1->x4 = p0->h1.a3;
p1->x5 = p0->h1.a4;
p1->x3 = 0;
}
--
arceb-elf32-gcc -save-temps -Wall -Wextra -c -mcpu=arc600 -mtune=arc600
-mbig-endian -mmul64 test.c -Os
--
Disassembly:
.global my_func1
.type   my_func1, @function
my_func1:
ldb_s r2,[r1]
mov r2,-1   ;6
stb_s r2,[r1]
ldb_s r3,[r1,1]
stb_s r2,[r1,1]
ldb_s r3,[r1,2]
stb_s r2,[r1,2]
ldb_s r2,[r1,3]
mov_s   r3,1;0
stb_s r3,[r1,3]
stb_s r3,[r1,9]
mov_s r3,2
stb_s r3,[r1,4]
ldb_s r3,[r0,5]
mov_s   r2,0;0
stb_s r3,[r1,7]
ldb_s r0,[r0,6]
stb_s r0,[r1,8]
ldb_s r0,[r1,5]
stb_s r2,[r1,5]
ldb_s r0,[r1,6]
stb_s r2,[r1,6]
mov_s   r0,0;0
j_s [blink]
.size   my_func1, .-my_func1
--
Expected disassembly:
.global my_func1
.type   my_func1, @function
my_func1:
mov r2,-1   ;6
stb_s r2,[r1]
stb_s r2,[r1,1]
stb_s r2,[r1,2]
mov_s   r3,1;0
stb_s r3,[r1,3]
stb_s r3,[r1,9]
mov_s r3,2
stb_s r3,[r1,4]
ldb_s r3,[r0,5]
mov_s   r2,0;0
stb_s r3,[r1,7]
ldb_s r0,[r0,6]
stb_s r0,[r1,8]
stb_s r2,[r1,5]
stb_s r2,[r1,6]
mov_s   r0,0;0
j_s [blink]
.size   my_func1, .-my_func1
--
I have checked same code compilation with:
arm-none-eabi-gcc -save-temps -Wall -Wextra -c -mcpu=arm7tdmi -mthumb test.c
-Os
The result is pretty much the same, so it is not architecture dependent bug.