[PATCH] rtl: use rtx_code for gen_ccmp_first and gen_ccmp_next

2023-08-23 Thread Richard Earnshaw via Gcc-patches
Note, this patch is dependent on the patch I posted yesterday to
forward declare rtx_code in coretypes.h.

--
Now that we have a forward declaration of rtx_code in coretypes.h, we
can adjust these hooks to take rtx_code arguments rather than an int.

gcc/ChangeLog:

* target.def (gen_ccmp_first, gen_ccmp_next): Use rtx_code for
CODE, CMP_CODE and BIT_CODE arguments.
* config/aarch64/aarch64.cc (aarch64_gen_ccmp_first): Likewise.
(aarch64_gen_ccmp_next): Likewise.
* doc/tm.texi: Regenerated.
---
 gcc/config/aarch64/aarch64.cc | 5 +++--
 gcc/doc/tm.texi   | 4 ++--
 gcc/target.def| 4 ++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 560e5431636..bc09185b8ec 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -25585,7 +25585,7 @@ aarch64_asan_shadow_offset (void)
 
 static rtx
 aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
-			int code, tree treeop0, tree treeop1)
+			rtx_code code, tree treeop0, tree treeop1)
 {
   machine_mode op_mode, cmp_mode, cc_mode = CCmode;
   rtx op0, op1;
@@ -25659,7 +25659,8 @@ aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
 
 static rtx
 aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
-		   int cmp_code, tree treeop0, tree treeop1, int bit_code)
+		   rtx_code cmp_code, tree treeop0, tree treeop1,
+		   rtx_code bit_code)
 {
   rtx op0, op1, target;
   machine_mode op_mode, cmp_mode, cc_mode = CCmode;
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 95ba56e05ae..75cb8e3417c 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -12005,7 +12005,7 @@ This target hook is required only when the target has several different
 modes and they have different conditional execution capability, such as ARM.
 @end deftypefn
 
-@deftypefn {Target Hook} rtx TARGET_GEN_CCMP_FIRST (rtx_insn **@var{prep_seq}, rtx_insn **@var{gen_seq}, int @var{code}, tree @var{op0}, tree @var{op1})
+@deftypefn {Target Hook} rtx TARGET_GEN_CCMP_FIRST (rtx_insn **@var{prep_seq}, rtx_insn **@var{gen_seq}, rtx_code @var{code}, tree @var{op0}, tree @var{op1})
 This function prepares to emit a comparison insn for the first compare in a
  sequence of conditional comparisions.  It returns an appropriate comparison
  with @code{CC} for passing to @code{gen_ccmp_next} or @code{cbranch_optab}.
@@ -12015,7 +12015,7 @@ This function prepares to emit a comparison insn for the first compare in a
  @var{code} is the @code{rtx_code} of the compare for @var{op0} and @var{op1}.
 @end deftypefn
 
-@deftypefn {Target Hook} rtx TARGET_GEN_CCMP_NEXT (rtx_insn **@var{prep_seq}, rtx_insn **@var{gen_seq}, rtx @var{prev}, int @var{cmp_code}, tree @var{op0}, tree @var{op1}, int @var{bit_code})
+@deftypefn {Target Hook} rtx TARGET_GEN_CCMP_NEXT (rtx_insn **@var{prep_seq}, rtx_insn **@var{gen_seq}, rtx @var{prev}, rtx_code @var{cmp_code}, tree @var{op0}, tree @var{op1}, rtx_code @var{bit_code})
 This function prepares to emit a conditional comparison within a sequence
  of conditional comparisons.  It returns an appropriate comparison with
  @code{CC} for passing to @code{gen_ccmp_next} or @code{cbranch_optab}.
diff --git a/gcc/target.def b/gcc/target.def
index 7d684296c17..3ad0bde3ece 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2735,7 +2735,7 @@ DEFHOOK
  insns are saved in @var{gen_seq}.  They will be emitted when all the\n\
  compares in the conditional comparision are generated without error.\n\
  @var{code} is the @code{rtx_code} of the compare for @var{op0} and @var{op1}.",
- rtx, (rtx_insn **prep_seq, rtx_insn **gen_seq, int code, tree op0, tree op1),
+ rtx, (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx_code code, tree op0, tree op1),
  NULL)
 
 DEFHOOK
@@ -2752,7 +2752,7 @@ DEFHOOK
  be appropriate for passing to @code{gen_ccmp_next} or @code{cbranch_optab}.\n\
  @var{code} is the @code{rtx_code} of the compare for @var{op0} and @var{op1}.\n\
  @var{bit_code} is @code{AND} or @code{IOR}, which is the op on the compares.",
- rtx, (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, int cmp_code, tree op0, tree op1, int bit_code),
+ rtx, (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, rtx_code cmp_code, tree op0, tree op1, rtx_code bit_code),
  NULL)
 
 /* Return a new value for loop unroll size.  */


[PATCH] rtl: Forward declare rtx_code

2023-08-22 Thread Richard Earnshaw via Gcc-patches

Now that we require C++ 11, we can safely forward declare rtx_code
so that we can use it in target hooks.

gcc/ChangeLog
* coretypes.h (rtx_code): Add forward declaration.
* rtl.h (rtx_code): Make compatible with forward declaration.
---
 gcc/coretypes.h | 4 
 gcc/rtl.h   | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/coretypes.h b/gcc/coretypes.h
index ca8837cef67..51e9ce0 100644
--- a/gcc/coretypes.h
+++ b/gcc/coretypes.h
@@ -100,6 +100,10 @@ struct gimple;
 typedef gimple *gimple_seq;
 struct gimple_stmt_iterator;
 
+/* Forward declare rtx_code, so that we can use it in target hooks without
+   needing to pull in rtl.h.  */
+enum rtx_code : unsigned;
+
 /* Forward decls for leaf gimple subclasses (for individual gimple codes).
Keep this in the same order as the corresponding codes in gimple.def.  */
 
diff --git a/gcc/rtl.h b/gcc/rtl.h
index e1c51156f90..0e9491b89b4 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -45,7 +45,7 @@ class predefined_function_abi;
 /* Register Transfer Language EXPRESSIONS CODES */
 
 #define RTX_CODE	enum rtx_code
-enum rtx_code  {
+enum rtx_code : unsigned {
 
 #define DEF_RTL_EXPR(ENUM, NAME, FORMAT, CLASS)   ENUM ,
 #include "rtl.def"		/* rtl expressions are documented here */


Re: [PATCH] machine_mode type size: Extend enum size from 8-bit to 16-bit

2023-04-11 Thread Richard Earnshaw via Gcc-patches




On 11/04/2023 10:46, Richard Sandiford via Gcc-patches wrote:

 writes:

ARM SVE has:svint8_t, svint8x2_t, svint8x3_t, svint8x4_t
As far as I known, they don't have tuple type for partial vector.


Yeah, there are no separate types for partial vectors, but there
are separate modes.  E.g. VNx2QI is a partial vector of QIs,
with each QI stored in a 64-bit container.

I agree with all the comments about the danger of growing the number of
modes too much.  But it looks like rtx_def should be easy to rearrange.
Unless I'm missing something, there are less than 256 rtx codes at
present.  So one simple option would be to make the code 8 bits and
the machine_mode 16 bits (and swap them, so that they stay well-aligned
wrt their size).

That of course would create new problem if we want more than 256 codes
in future.  But then there would be the option of a non-power-of-2
split (12/12 or whatever).  Also, it's possible to multiplex operations
into a single code by adding an extra operand, whereas it's harder to
multiplex modes.

Thanks,
Richard


The rtx code and mode are both accessed quite frequently, making them 
non-native machine sizes might have impact on the performance of 
accessing the fields.


[committed] arm: mve: fix auto-inc generation [PR107674]

2023-04-06 Thread Richard Earnshaw via Gcc-patches

My change r13-416-g485a0ae0982abe caused the compiler to stop
generating auto-inc operations on mve loads and stores.  The fix
is to check whether there is a replacement register available
when in strict mode and the register is still a pseudo.

gcc:

PR target/107674
* config/arm/arm.cc (arm_effective_regno): New function.
(mve_vector_mem_operand): Use it.
---
 gcc/config/arm/arm.cc | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index a46627bc375..bf7ff9a9704 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -13639,6 +13639,19 @@ arm_coproc_mem_operand_no_writeback (rtx op)
   return arm_coproc_mem_operand_wb (op, 0);
 }
 
+/* In non-STRICT mode, return the register number; in STRICT mode return
+   the hard regno or the replacement if it won't be a mem.  Otherwise, return
+   the original pseudo number.  */
+static int
+arm_effective_regno (rtx op, bool strict)
+{
+  gcc_assert (REG_P (op));
+  if (!strict || REGNO (op) < FIRST_PSEUDO_REGISTER
+  || !reg_renumber || reg_renumber[REGNO (op)] < 0)
+return REGNO (op);
+  return reg_renumber[REGNO (op)];
+}
+
 /* This function returns TRUE on matching mode and op.
 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13).  */
@@ -13651,7 +13664,7 @@ mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
   /* Match: (mem (reg)).  */
   if (REG_P (op))
 {
-  int reg_no = REGNO (op);
+  reg_no = arm_effective_regno (op, strict);
   return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
 	   ? reg_no <= LAST_LO_REGNUM
 	   : reg_no < LAST_ARM_REGNUM)
@@ -13662,7 +13675,7 @@ mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
   if (code == POST_INC || code == PRE_DEC
   || code == PRE_INC || code == POST_DEC)
 {
-  reg_no = REGNO (XEXP (op, 0));
+  reg_no = arm_effective_regno (XEXP (op, 0), strict);
   return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
 	   ? reg_no <= LAST_LO_REGNUM
 	   :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
@@ -13678,7 +13691,7 @@ mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
 	   || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
 	   && GET_CODE (XEXP (op, 1)) == CONST_INT))
 {
-  reg_no = REGNO (XEXP (op, 0));
+  reg_no = arm_effective_regno (XEXP (op, 0), strict);
   if (code == PLUS)
 	val = INTVAL (XEXP (op, 1));
   else


Re: C++ modules and AAPCS/ARM EABI clash on inline key methods

2023-02-24 Thread Richard Earnshaw via Gcc-patches




On 23/02/2023 21:20, Alexandre Oliva wrote:

On Feb 23, 2023, Alexandre Oliva  wrote:


On Feb 23, 2023, Richard Earnshaw  wrote:

On 22/02/2023 19:57, Alexandre Oliva wrote:

On Feb 21, 2023, Richard Earnshaw  wrote:


Rather than scanning for the triplet, a better test would be



{ xfail { arm_eabi } }


Indeed, thanks.  Here's the updated patch, retested.  Ok to install?



Based on Nathan's comments, we should just skip the test on arm_eabi,
it's simply not applicable.



Like this, I suppose.  Retested on x86_64-linux-gnu (trunk) and
arm-wrs-vxworks7 (gcc-12).  Ok to install?


Erhm, actually, that version still ran the assembler scans and failed.
This one skips the testset entirely.


Yeah, I tried something like that and it didn't appear to work. Perhaps 
it's a bug in the way dg-do-module is implemented.





[PR105224] C++ modules and AAPCS/ARM EABI clash on inline key methods

From: Alexandre Oliva 

g++.dg/modules/virt-2_a.C fails on arm-eabi and many other arm targets
that use the AAPCS variant.  ARM is the only target that overrides
TARGET_CXX_KEY_METHOD_MAY_BE_INLINE.  It's not clear to me which way
the clash between AAPCS and C++ Modules design should be resolved, but
currently it favors AAPCS and thus the test fails, so skip it on
arm_eabi.


for  gcc/testsuite/ChangeLog

PR c++/105224
* g++.dg/modules/virt-2_a.C: Skip on arm_eabi.
---
  gcc/testsuite/g++.dg/modules/virt-2_a.C |3 +++
  1 file changed, 3 insertions(+)

diff --git a/gcc/testsuite/g++.dg/modules/virt-2_a.C 
b/gcc/testsuite/g++.dg/modules/virt-2_a.C
index 580552be5a0d8..ede711c3e83be 100644
--- a/gcc/testsuite/g++.dg/modules/virt-2_a.C
+++ b/gcc/testsuite/g++.dg/modules/virt-2_a.C
@@ -1,3 +1,6 @@
+// AAPCS overrides TARGET_CXX_KEY_METHOD_MAY_BE_INLINE,
+// in a way that invalidates this test.
+// { dg-skip-if "TARGET_CXX_KEY_METHOD_MAY_BE_INLINE" { arm_eabi } }


Given the logic of this macro, the text should be 
"!TARGET_CXX_METHOD_MAY_BE_INLINE".


OK with that change.

R.


  // { dg-module-do run }
  // { dg-additional-options -fmodules-ts }
  export module foo;




Re: C++ modules and AAPCS/ARM EABI clash on inline key methods

2023-02-23 Thread Richard Earnshaw via Gcc-patches




On 22/02/2023 19:57, Alexandre Oliva wrote:

On Feb 21, 2023, Richard Earnshaw  wrote:


Rather than scanning for the triplet, a better test would be



{ xfail { arm_eabi } }


Indeed, thanks.  Here's the updated patch, retested.  Ok to install?


Based on Nathan's comments, we should just skip the test on arm_eabi, 
it's simply not applicable.


R.




[PR105224] C++ modules and AAPCS/ARM EABI clash on inline key methods

From: Alexandre Oliva 

g++.dg/modules/virt-2_a.C fails on arm-eabi and many other arm targets
that use the AAPCS variant.  ARM is the only target that overrides
TARGET_CXX_KEY_METHOD_MAY_BE_INLINE.  It's not clear to me which way
the clash between AAPCS and C++ Modules design should be resolved, but
currently it favors AAPCS and thus the test fails.

Skipping the test or conditionally dropping the inline keyword breaks
subsequent tests, so I'm XFAILing the expectation that vtable and rtti
symbols are output on arm_eabi targets.


for  gcc/testsuite/ChangeLog

PR c++/105224
* g++.dg/modules/virt-2_a.C: XFAIL syms on arm_eabi.
---
  gcc/testsuite/g++.dg/modules/virt-2_a.C |6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/g++.dg/modules/virt-2_a.C 
b/gcc/testsuite/g++.dg/modules/virt-2_a.C
index 580552be5a0d8..f5d68878f50fb 100644
--- a/gcc/testsuite/g++.dg/modules/virt-2_a.C
+++ b/gcc/testsuite/g++.dg/modules/virt-2_a.C
@@ -22,6 +22,6 @@ export int Visit (Visitor *v)
  }
  
  // Emit here

-// { dg-final { scan-assembler {_ZTVW3foo7Visitor:} } }
-// { dg-final { scan-assembler {_ZTIW3foo7Visitor:} } }
-// { dg-final { scan-assembler {_ZTSW3foo7Visitor:} } }
+// { dg-final { scan-assembler {_ZTVW3foo7Visitor:} { xfail { arm_eabi } } } }
+// { dg-final { scan-assembler {_ZTIW3foo7Visitor:} { xfail { arm_eabi } } } }
+// { dg-final { scan-assembler {_ZTSW3foo7Visitor:} { xfail { arm_eabi } } } }




Re: C++ modules and AAPCS/ARM EABI clash on inline key methods

2023-02-21 Thread Richard Earnshaw via Gcc-patches




On 21/02/2023 16:31, Richard Earnshaw via Gcc-patches wrote:

On 17/02/2023 06:09, Alexandre Oliva via Gcc-patches wrote:

On Apr  5, 2022, Alexandre Oliva  wrote:


Would something like this be acceptable/desirable?  It's overreaching,
in that not all arm platforms are expected to fail, but the result on
them will be an unexpected pass, which is not quite as bad as the
unexpected fail we get on most arm variants now.


Ping?
https://gcc.gnu.org/pipermail/gcc-patches/2022-April/592763.html

[PR105224] C++ modules and AAPCS/ARM EABI clash on inline key methods

g++.dg/modules/virt-2_a.C fails on arm-eabi and many other arm targets
that use the AAPCS variant.  ARM is the only target that overrides
TARGET_CXX_KEY_METHOD_MAY_BE_INLINE.  It's not clear to me which way
the clash between AAPCS and C++ Modules design should be resolved, but
currently it favors AAPCS and thus the test fails.

Skipping the test or conditionally dropping the inline keyword breaks
subsequent tests, so I'm XFAILing the expectation that vtable and rtti
symbols are output on arm*-*-*.

Retested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?



I started looking at this a few weeks back, but I was a bit confused by 
the testcase and then never got around to following up.


The Arm C++ binding rules normally exclude using an inline function 
definition from being chosen as the key function because this not 
uncommonly appears in a header file; instead a later function in the 
class is defined to take that role, if such a function exists (in effect 
an inline function is treated the same way as if the function definition 
appeared within the class definition itself).


But in this class we have only the one function, so in effect this 
testcase appears to fall back to the 'no key function' rule and as such 
I'd expect the class impedimenta to be required in all instances of the 
function.  That doesn't seem to be happening, so either there's 
something I'm missing, or there's something the compiler is doing wrong 
for this case.


Nathan, your insights would be appreciated here.

R.




for  gcc/testsuite/ChangeLog

PR c++/105224
* g++.dg/modules/virt-2_a.C: XFAIL syms on arm*-*-*.
---
  gcc/testsuite/g++.dg/modules/virt-2_a.C |    6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/g++.dg/modules/virt-2_a.C 
b/gcc/testsuite/g++.dg/modules/virt-2_a.C

index 580552be5a0d8..b265515e2c7fd 100644
--- a/gcc/testsuite/g++.dg/modules/virt-2_a.C
+++ b/gcc/testsuite/g++.dg/modules/virt-2_a.C
@@ -22,6 +22,6 @@ export int Visit (Visitor *v)
  }
  // Emit here
-// { dg-final { scan-assembler {_ZTVW3foo7Visitor:} } }
-// { dg-final { scan-assembler {_ZTIW3foo7Visitor:} } }
-// { dg-final { scan-assembler {_ZTSW3foo7Visitor:} } }
+// { dg-final { scan-assembler {_ZTVW3foo7Visitor:} { xfail arm*-*-* 
} } }
+// { dg-final { scan-assembler {_ZTIW3foo7Visitor:} { xfail arm*-*-* 
} } }
+// { dg-final { scan-assembler {_ZTSW3foo7Visitor:} { xfail arm*-*-* 
} } }




Rather than scanning for the triplet, a better test would be

{ xfail { arm_eabi } }

Or something along those lines.

R.




Re: C++ modules and AAPCS/ARM EABI clash on inline key methods

2023-02-21 Thread Richard Earnshaw via Gcc-patches

On 17/02/2023 06:09, Alexandre Oliva via Gcc-patches wrote:

On Apr  5, 2022, Alexandre Oliva  wrote:


Would something like this be acceptable/desirable?  It's overreaching,
in that not all arm platforms are expected to fail, but the result on
them will be an unexpected pass, which is not quite as bad as the
unexpected fail we get on most arm variants now.


Ping?
https://gcc.gnu.org/pipermail/gcc-patches/2022-April/592763.html

[PR105224] C++ modules and AAPCS/ARM EABI clash on inline key methods

g++.dg/modules/virt-2_a.C fails on arm-eabi and many other arm targets
that use the AAPCS variant.  ARM is the only target that overrides
TARGET_CXX_KEY_METHOD_MAY_BE_INLINE.  It's not clear to me which way
the clash between AAPCS and C++ Modules design should be resolved, but
currently it favors AAPCS and thus the test fails.

Skipping the test or conditionally dropping the inline keyword breaks
subsequent tests, so I'm XFAILing the expectation that vtable and rtti
symbols are output on arm*-*-*.

Retested on arm-vxworks7 (gcc-12) and arm-eabi (trunk).  Ok to install?



I started looking at this a few weeks back, but I was a bit confused by 
the testcase and then never got around to following up.


The Arm C++ binding rules normally exclude using an inline function 
definition from being chosen as the key function because this not 
uncommonly appears in a header file; instead a later function in the 
class is defined to take that role, if such a function exists (in effect 
an inline function is treated the same way as if the function definition 
appeared within the class definition itself).


But in this class we have only the one function, so in effect this 
testcase appears to fall back to the 'no key function' rule and as such 
I'd expect the class impedimenta to be required in all instances of the 
function.  That doesn't seem to be happening, so either there's 
something I'm missing, or there's something the compiler is doing wrong 
for this case.


Nathan, your insights would be appreciated here.

R.




for  gcc/testsuite/ChangeLog

PR c++/105224
* g++.dg/modules/virt-2_a.C: XFAIL syms on arm*-*-*.
---
  gcc/testsuite/g++.dg/modules/virt-2_a.C |6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/g++.dg/modules/virt-2_a.C 
b/gcc/testsuite/g++.dg/modules/virt-2_a.C
index 580552be5a0d8..b265515e2c7fd 100644
--- a/gcc/testsuite/g++.dg/modules/virt-2_a.C
+++ b/gcc/testsuite/g++.dg/modules/virt-2_a.C
@@ -22,6 +22,6 @@ export int Visit (Visitor *v)
  }
  
  // Emit here

-// { dg-final { scan-assembler {_ZTVW3foo7Visitor:} } }
-// { dg-final { scan-assembler {_ZTIW3foo7Visitor:} } }
-// { dg-final { scan-assembler {_ZTSW3foo7Visitor:} } }
+// { dg-final { scan-assembler {_ZTVW3foo7Visitor:} { xfail arm*-*-* } } }
+// { dg-final { scan-assembler {_ZTIW3foo7Visitor:} { xfail arm*-*-* } } }
+// { dg-final { scan-assembler {_ZTSW3foo7Visitor:} { xfail arm*-*-* } } }




Re: [PATCH][GCC] arm: Optimize arm-mlib.h header inclusion (pr108505).

2023-02-08 Thread Richard Earnshaw via Gcc-patches




On 27/01/2023 17:44, Srinath Parvathaneni via Gcc-patches wrote:

Hello,

I have committed a fix [1] into gcc trunk for a build issue mentioned in 
pr108505 and
latter received few upstream comments proposing more robust fix for this issue.

In this patch I'm addressing those comments and sending this as a followup 
patch.

Regression tested on arm-none-eabi target and found no regressions.

Ok for master?



OK.  But please correct the syntax for the PR in the subject.  It should 
be "[PR...]" not "(PR...)".


R.


[1] https://gcc.gnu.org/pipermail/gcc-patches/2023-January/610513.html

Regards,
Srinath.

gcc/ChangeLog:

2023-01-27  Srinath Parvathaneni  

 PR target/108505
 * config.gcc (tm_mlib_file): Define new variable.


### Attachment also inlined for ease of reply###


diff --git a/gcc/config.gcc b/gcc/config.gcc
index 
89f56047cfe3126bc6c8e90c8b4840dea13538f9..2aab92bbfd8b4088259ebf9b565af8e8bbef1122
 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4355,6 +4355,7 @@ case "${target}" in
case ${arm_multilib} in
aprofile|rmprofile)

tmake_profile_file="arm/t-multilib"
+   tm_mlib_file="arm/arm-mlib.h"
;;
@*)
ml=`echo "X$arm_multilib" | sed 
'1s,^X@,,'`
@@ -4393,7 +4394,7 @@ case "${target}" in
# through to the multilib selector
with_float="soft"
tmake_file="${tmake_file} ${tmake_profile_file}"
-   tm_file="$tm_file arm/arm-mlib.h"
+   tm_file="$tm_file $tm_mlib_file"
TM_MULTILIB_CONFIG="$with_multilib_list"
fi
fi





Re: [PATCH] arm: [MVE] Add missing length=8 attribute

2023-02-03 Thread Richard Earnshaw via Gcc-patches




On 01/02/2023 09:46, Christophe Lyon via Gcc-patches wrote:

I have noticed that the "length" "8" attribute is missing in a few
patterns in mve.md.

gcc/
* config/arm/mve.md (mve_vabavq_p_): Add length
attribute.
(mve_vqshluq_m_n_s): Likewise.
(mve_vshlq_m_): Likewise.
(mve_vsriq_m_n_): Likewise.
(mve_vsubq_m_): Likewise.
---


OK

R.


Re: [PATCH] arm: Fix warning in libgcc/config/arm/pr-support.c

2023-02-03 Thread Richard Earnshaw via Gcc-patches




On 01/02/2023 09:46, Christophe Lyon via Gcc-patches wrote:

I have noticed some warnings when building GCC for arm-eabi:
pr-support.c:110:7: warning: variable ‘set_pac_sp’ set but not used 
[-Wunused-but-set-variable]
pr-support.c:109:7: warning: variable ‘set_pac’ set but not used 
[-Wunused-but-set-variable]

This small patch avoids them by defining these two variables undef
TARGET_HAVE_PACBTI, like the code which actually uses them.

libgcc/
* config/arm/pr-support.c (__gnu_unwind_execute): Use
TARGET_HAVE_PACBTI to define set_pac and set_pac_sp.


OK

R.


Re: [PATCH][GCC] arm: Fix inclusion of arm-mlib.h header more than once (pr108505).

2023-01-24 Thread Richard Earnshaw via Gcc-patches




On 24/01/2023 09:55, Srinath Parvathaneni via Gcc-patches wrote:

Hello,

The patch fixes the build issue for arm-none-eabi target configured with
--with-multilib-list=aprofile,rmprofile, in which case the header file
arm/arm-mlib.h is being included more than once and the toolchain build
is failing (PR108505).

Regression tested on arm-none-eabi target and found no regressions.

Ok for master?

Regards,
Srinath.

gcc/ChangeLog:

2023-01-24  Srinath Parvathaneni  

 PR target/108505
 * config.gcc (tm_file): Move the variable out of loop.


### Attachment also inlined for ease of reply###




A more robust fix would be:



diff --git a/gcc/config.gcc b/gcc/config.gcc
index 
771bd35e803b47e79c0a62eab8f4845e9bbf96ef..d828223c16d3076da0ab6582dfaf59ad657ea438
 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4350,7 +4350,6 @@ case "${target}" in
case ${arm_multilib} in
aprofile|rmprofile)

tmake_profile_file="arm/t-multilib"
-   tm_file="$tm_file 
arm/arm-mlib.h"

tm_mlib_file="arm/arm-mlib.h"

;;
@*)
ml=`echo "X$arm_multilib" | sed 
'1s,^X@,,'`
@@ -4389,6 +4388,7 @@ case "${target}" in
# through to the multilib selector
with_float="soft"
tmake_file="${tmake_file} ${tmake_profile_file}"
+   tm_file="$tm_file arm/arm-mlib.h"

tm_file="$tm_file $tm_mlib_file"


TM_MULTILIB_CONFIG="$with_multilib_list"
fi
fi





Then if we ever need to add additional alternative multilib variants we 
can control the selection separately.


R.


Re: [PATCH v2][GCC] arm: Add support for new frame unwinding instruction "0xb5".

2023-01-20 Thread Richard Earnshaw via Gcc-patches




On 20/01/2023 17:27, Srinath Parvathaneni via Gcc-patches wrote:

Hi,

This patch adds support for Arm frame unwinding instruction "0xb5" [1]. When
an exception is taken and "0xb5" instruction is encounter during runtime
stack-unwinding, we use effective vsp as modifier in pointer authentication.
On completion of stack unwinding if "0xb5" instruction is not encountered
then CFA will be used as modifier in pointer authentication.

[1] https://github.com/ARM-software/abi-aa/releases/download/2022Q3/ehabi32.pdf

Regression tested on arm-none-eabi target and found no regressions.

Ok for master?

Regards,
Srinath.

gcc/ChangeLog:

2022-11-09  Srinath Parvathaneni  

 * libgcc/config/arm/pr-support.c (__gnu_unwind_execute): Decode opcode 
"0xb5".


### Attachment also inlined for ease of reply###


diff --git a/libgcc/config/arm/pr-support.c b/libgcc/config/arm/pr-support.c
index 
e48854587c667a959aa66ccc4982231f6ecc..1fbc41e17c227c21af1937344ded2a7fd80e61df
 100644
--- a/libgcc/config/arm/pr-support.c
+++ b/libgcc/config/arm/pr-support.c
@@ -107,7 +107,9 @@ __gnu_unwind_execute (_Unwind_Context * context, 
__gnu_unwind_state * uws)
_uw op;
int set_pc;
int set_pac = 0;
+  int set_pac_sp = 0;
_uw reg;
+  _uw sp;
  
set_pc = 0;

for (;;)
@@ -124,10 +126,11 @@ __gnu_unwind_execute (_Unwind_Context * context, 
__gnu_unwind_state * uws)
  #if defined(TARGET_HAVE_PACBTI)
  if (set_pac)
{
- _uw sp;
  _uw lr;
  _uw pac;
- _Unwind_VRS_Get (context, _UVRSC_CORE, R_SP, _UVRSD_UINT32, );
+ if (!set_pac_sp)
+   _Unwind_VRS_Get (context, _UVRSC_CORE, R_SP, _UVRSD_UINT32,
+);
  _Unwind_VRS_Get (context, _UVRSC_CORE, R_LR, _UVRSD_UINT32, );
  _Unwind_VRS_Get (context, _UVRSC_PAC, R_IP,
   _UVRSD_UINT32, );
@@ -259,6 +262,14 @@ __gnu_unwind_execute (_Unwind_Context * context, 
__gnu_unwind_state * uws)
  continue;
}
  
+	  /* Use current VSP as modifier in PAC validation.  */

+ if (op == 0xb5)
+   {
+ _Unwind_VRS_Get (context, _UVRSC_CORE, R_SP, _UVRSD_UINT32, );
+ set_pac_sp = 1;
+ continue;
+   }
+
  if ((op & 0xfc) == 0xb4)  /* Obsolete FPA.  */
return _URC_FAILURE;
  






OK.

R.


Re: [GCC][PATCH v4] arm: Add pacbti related multilib support for armv8.1-m.main.

2023-01-20 Thread Richard Earnshaw via Gcc-patches




On 13/01/2023 17:46, Srinath Parvathaneni via Gcc-patches wrote:

Hi,

This patch adds the support for pacbti multlilib linking by making
"-mbranch-protection=none" as default multilib option for arm-none-eabi
target.

Eg 1.

If the passed command line flags are (without mbranch-protection):
a) -march=armv8.1-m.main+mve -mfloat-abi=hard -mfpu=auto

"-mbranch-protection=none" will be used in the multilib matching.

Eg 2.

If the passed command line flags are (with mbranch-protection):
a) -march=armv8.1-m.main+mve+pacbti -mfloat-abi=hard -mfpu=auto  
-mbranch-protection=pac-ret

"-mbranch-protection=standard" will be used in the multilib matching.

Regression tested on arm-none-eabi and bootstrapped on arm-none-linux-gnueabihf.

Ok for master?

Regards,
Srinath.

gcc/ChangeLog:

2023-01-11  Srinath Parvathaneni  

 * config.gcc ($tm_file): Update variable.
 * config/arm/arm-mlib.h: Create new header file.
 * config/arm/t-rmprofile (MULTI_ARCH_DIRS_RM): Rename 
mbranch-protection
 multilib arch directory.
 (MULTILIB_REUSE): Add multilib reuse rules.
 (MULTILIB_MATCHES): Add multilib match rules.

gcc/testsuite/ChangeLog:

2023-01-11  Srinath Parvathaneni  

 * gcc.target/arm/multilib.exp (multilib_config "rmprofile"): Update
 tests.
 * gcc.target/arm/pac-12.c: New test.
 * gcc.target/arm/pac-13.c: Likewise.
 * gcc.target/arm/pac-14.c: Likewise.


OK.

R.


Re: [PATCH][GCC] arm: Add support for new frame unwinding instruction "0xb5".

2023-01-20 Thread Richard Earnshaw via Gcc-patches




On 10/11/2022 10:37, Srinath Parvathaneni via Gcc-patches wrote:

Hi,

This patch adds support for Arm frame unwinding instruction "0xb5" [1]. When
an exception is taken and "0xb5" instruction is encounter during runtime
stack-unwinding, we use effective vsp as modifier in pointer authentication.
On completion of stack unwinding if "0xb5" instruction is not encountered
then CFA will be used as modifier in pointer authentication.

[1] https://github.com/ARM-software/abi-aa/releases/download/2022Q3/ehabi32.pdf

Regression tested on arm-none-eabi target and found no regressions.

Ok for master?

Regards,
Srinath.

gcc/ChangeLog:

2022-11-09  Srinath Parvathaneni  

 * libgcc/config/arm/pr-support.c (__gnu_unwind_execute): Decode opcode
"0xb5".


### Attachment also inlined for ease of reply###


diff --git a/libgcc/config/arm/pr-support.c b/libgcc/config/arm/pr-support.c
index 
e48854587c667a959aa66ccc4982231f6ecc..73e4942a39b34a83c2da85def6b13e82ec501552
 100644
--- a/libgcc/config/arm/pr-support.c
+++ b/libgcc/config/arm/pr-support.c
@@ -107,7 +107,9 @@ __gnu_unwind_execute (_Unwind_Context * context, 
__gnu_unwind_state * uws)
_uw op;
int set_pc;
int set_pac = 0;
+  int set_pac_sp = 0;
_uw reg;
+  _uw sp;
  
set_pc = 0;

for (;;)
@@ -124,10 +126,11 @@ __gnu_unwind_execute (_Unwind_Context * context, 
__gnu_unwind_state * uws)
  #if defined(TARGET_HAVE_PACBTI)
  if (set_pac)
{
- _uw sp;
  _uw lr;
  _uw pac;
- _Unwind_VRS_Get (context, _UVRSC_CORE, R_SP, _UVRSD_UINT32, );
+ if (!set_pac_sp)
+   _Unwind_VRS_Get (context, _UVRSC_CORE, R_SP, _UVRSD_UINT32,
+);
  _Unwind_VRS_Get (context, _UVRSC_CORE, R_LR, _UVRSD_UINT32, );
  _Unwind_VRS_Get (context, _UVRSC_PAC, R_IP,
   _UVRSD_UINT32, );
@@ -259,7 +262,19 @@ __gnu_unwind_execute (_Unwind_Context * context, 
__gnu_unwind_state * uws)
  continue;
}
  
-	  if ((op & 0xfc) == 0xb4)  /* Obsolete FPA.  */

+ /* Use current VSP as modifier in PAC validation.  */
+ if (op == 0xb5)
+   {
+ if (set_pac)
+   _Unwind_VRS_Get (context, _UVRSC_CORE, R_SP, _UVRSD_UINT32,
+);
+ else
+   return _URC_FAILURE;


I don't think you need to worry about the case when set_pac is false; in 
fact, I don't think you need to even test set_pac here.  It's harmless 
if this opcode appears and then we never do the authentication, so just 
record the SP value at this point.



+ set_pac_sp = 1;
+ continue;
+   }
+
+ if ((op & 0xfd) == 0xb6)  /* Obsolete FPA.  */


No, this is logically impossible (0xfd is binary _1101, while 0xb6 
is binary 1011_110 and thus bit 2 will never be set after the mask). 
But you don't need to change the condition here at all, because we've 
already taken out the case you're worried about immediately above (and 
ended that block with a 'continue').



return _URC_FAILURE;
 > /* op & 0xf8 == 0xb8.  */





R.


Re: [GCC][PATCH 13/15, v6] arm: Add support for dwarf debug directives and pseudo hard-register for PAC feature.

2023-01-20 Thread Richard Earnshaw via Gcc-patches




On 18/01/2023 17:18, Srinath Parvathaneni via Gcc-patches wrote:

Hello,

This patch teaches the DWARF support in gcc about RA_AUTH_CODE pseudo 
hard-register and also
updates the ".save", ".cfi_register", ".cfi_offset", ".cfi_restore" directives 
accordingly.
This patch also adds support to emit ".pacspval" directive when "pac ip, lr, 
sp" instruction
in generated in the assembly.

RA_AUTH_CODE register number is 107 and it's dwarf register number is 143.

Applying this patch on top of PACBTI series posted here
https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599658.html and when 
compiling the following
test.c with "-march=armv8.1-m.main+mve+pacbti -mbranch-protection=pac-ret 
-mthumb -mfloat-abi=hard
fasynchronous-unwind-tables -g -O0 -S" command line options, the assembly 
output after this patch
looks like below:

$cat test.c

void fun1(int a);
void fun(int a,...)
{
   fun1(a);
}

int main()
{
   fun (10);
   return 0;
}

$ arm-none-eabi-gcc -march=armv8.1-m.main+mve+pacbti 
-mbranch-protection=pac-ret -mthumb -mfloat-abi=hard
-fasynchronous-unwind-tables -g -O0 -S test.s

Assembly output:
...
fun:
...
 .pacspval
 pac ip, lr, sp
 .cfi_register 143, 12
 push{r3, r7, ip, lr}
 .save {r3, r7, ra_auth_code, lr}
...
 .cfi_offset 143, -24
...
 .cfi_restore 143
...
 aut ip, lr, sp
 bx  lr
...
main:
...
 .pacspval
 pac ip, lr, sp
 .cfi_register 143, 12
 push{r3, r7, ip, lr}
 .save {r3, r7, ra_auth_code, lr}
...
 .cfi_offset 143, -8
...
 .cfi_restore 143
...
 aut ip, lr, sp
 bx  lr
...

Regression tested on arm-none-eabi target and found no regressions.

Ok for master?

Regards,
Srinath.

2023-01-18  Srinath Parvathaneni  

 * config/arm/aout.h (ra_auth_code): Add entry in enum.
 (emit_multi_reg_push): Add RA_AUTH_CODE register to
 dwarf frame expression.
 (arm_emit_multi_reg_pop): Restore RA_AUTH_CODE register.
 (arm_expand_prologue): Update frame related information and reg notes
 for pac/pacbit insn.
 (arm_regno_class): Check for pac pseudo reigster.
 (arm_dbx_register_number): Assign ra_auth_code register number in 
dwarf.
 (arm_init_machine_status): Set pacspval_needed to zero.
 (arm_debugger_regno): Check for PAC register.
 (arm_unwind_emit_sequence): Print .save directive with ra_auth_code
 register.
 (arm_unwind_emit_set): Add entry for IP_REGNUM in switch case.
 (arm_unwind_emit): Update REG_CFA_REGISTER case._
 * config/arm/arm.h (FIRST_PSEUDO_REGISTER): Modify.
 (DWARF_PAC_REGNUM): Define.
 (IS_PAC_REGNUM): Likewise.
 (enum reg_class): Add PAC_REG entry.
 (machine_function): Add pacbti_needed state to structure.
 * config/arm/arm.md (RA_AUTH_CODE): Define.

gcc/testsuite/ChangeLog:

2023-01-18  Srinath Parvathaneni  

 * g++.target/arm/pac-1.C: New test.
 * gcc.target/arm/pac-15.c: Likewise.


OK.

R.


Re: [GCC][PATCH 13/15, v5] arm: Add support for dwarf debug directives and pseudo hard-register for PAC feature.

2023-01-18 Thread Richard Earnshaw via Gcc-patches




On 13/01/2023 17:44, Srinath Parvathaneni via Gcc-patches wrote:

Hello,

This patch teaches the DWARF support in gcc about RA_AUTH_CODE pseudo 
hard-register and also
updates the ".save", ".cfi_register", ".cfi_offset", ".cfi_restore" directives 
accordingly.
This patch also adds support to emit ".pacspval" directive when "pac ip, lr, 
sp" instruction
in generated in the assembly.

RA_AUTH_CODE register number is 107 and it's dwarf register number is 143.

Applying this patch on top of PACBTI series posted here
https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599658.html and when 
compiling the following
test.c with "-march=armv8.1-m.main+mve+pacbti -mbranch-protection=pac-ret 
-mthumb -mfloat-abi=hard
fasynchronous-unwind-tables -g -O0 -S" command line options, the assembly 
output after this patch
looks like below:

$cat test.c

void fun1(int a);
void fun(int a,...)
{
   fun1(a);
}

int main()
{
   fun (10);
   return 0;
}

$ arm-none-eabi-gcc -march=armv8.1-m.main+mve+pacbti 
-mbranch-protection=pac-ret -mthumb -mfloat-abi=hard
-fasynchronous-unwind-tables -g -O0 -S test.s

Assembly output:
...
fun:
...
 .pacspval
 pac ip, lr, sp
 .cfi_register 143, 12
 push{r3, r7, ip, lr}
 .save {r3, r7, ra_auth_code, lr}
...
 .cfi_offset 143, -24
...
 .cfi_restore 143
...
 aut ip, lr, sp
 bx  lr
...
main:
...
 .pacspval
 pac ip, lr, sp
 .cfi_register 143, 12
 push{r3, r7, ip, lr}
 .save {r3, r7, ra_auth_code, lr}
...
 .cfi_offset 143, -8
...
 .cfi_restore 143
...
 aut ip, lr, sp
 bx  lr
...

Regression tested on arm-none-eabi target and found no regressions.

Ok for master?

Regards,
Srinath.

2023-01-11  Srinath Parvathaneni  

 * config/arm/aout.h (ra_auth_code): Add entry in enum.
 (emit_multi_reg_push): Add RA_AUTH_CODE register to
 dwarf frame expression.
 (arm_emit_multi_reg_pop): Restore RA_AUTH_CODE register.
 (arm_expand_prologue): Update frame related information and reg notes
 for pac/pacbit insn.
 (arm_regno_class): Check for pac pseudo reigster.
 (arm_dbx_register_number): Assign ra_auth_code register number in 
dwarf.
 (arm_init_machine_status): Set pacspval_needed to zero.
 (arm_debugger_regno): Check for PAC register.
 (arm_unwind_emit_sequence): Print .save directive with ra_auth_code
 register.
 (arm_unwind_emit_set): Add entry for IP_REGNUM in switch case.
 (arm_unwind_emit): Update REG_CFA_REGISTER case._
 * config/arm/arm.h (FIRST_PSEUDO_REGISTER): Modify.
 (DWARF_PAC_REGNUM): Define.
 (IS_PAC_REGNUM): Likewise.
 (enum reg_class): Add PAC_REG entry.
 (machine_function): Add pacbti_needed state to structure.
 * config/arm/arm.md (RA_AUTH_CODE): Define.

gcc/testsuite/ChangeLog:

2023-01-11  Srinath Parvathaneni  

 * g++.target/arm/pac-1.C: New test.
 * gcc.target/arm/pac-15.c: Likewise.


Your attachments are still not being correctly detected.  Perhaps this 
is because of the filename you've chosen, which has no recognizable 
extension.  If you name your files .patch (or .diff, or even 
.txt) then the system should automatically pick the right mime type 
for encoding.


+ /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
+following example reg-reg copy of SP to IP register is handled
+through .cfi_def_cfa_register directive and the .cfi_offset
+directive for IP register is skipped by dwarf code emitter.
+Example:
+   mov ip, sp
+   .cfi_def_cfa_register 12
+   push{fp, ip, lr, pc}
+   .cfi_offset 11, -16
+   .cfi_offset 13, -12
+   .cfi_offset 14, -8
+
+Where as Arm-specific .save directive reg-reg copy handling is
+buggy.  After the reg-reg copy, the copied registers need to be

It's not buggy (if it were you'd need to fix it :).  It just works in a 
different way to the dwarf tracker and doesn't need to handle reg->reg 
copies.  So please rephrase this.


+populated in .save directive register list but with the current
+implementation of .save directive original registers are getting
+populated in the register list.  So to avoid this issue for IP
+register when PACBTI is enabled we manually updated the .save
+directive register list to use "ra_auth_code" (pseduo register 143)
+instead of IP register as shown in following example.
+Example:
+   pacbti  ip, lr, sp
+   .cfi_register 143, 12
+   push{r3, r7, ip, lr}
+   .save {r3, r7, ra_auth_code, lr}
+ */

R.


Re: [PATCH][GCC] arm: fix __arm_vld1q_z* and __arm_vst1q_p* intrinsics.

2023-01-18 Thread Richard Earnshaw via Gcc-patches




On 22/12/2021 16:21, Murray Steele via Gcc-patches wrote:

Hi,

On 22/12/2021 16:04, Richard Earnshaw wrote:



Is there a PR in bugzilla for this?

R.




No, not at this time. It's something I came across whilst
making changes of my own.

For completeness, the ACLE specification I am referencing
has been added below [1].

[1]: https://github.com/ARM-software/acle/releases/tag/r2021Q3

Thanks,
Murray


Andre created one today and I've now pulled this patch in.  Thanks, and 
sorry for the delay getting it committed.


R.


Re: [PATCH] testsuite: Skip intrinsics test if arm

2023-01-17 Thread Richard Earnshaw via Gcc-patches




On 15/01/2023 17:06, Torbjorn SVENSSON via Gcc-patches wrote:



On 2023-01-12 16:03, Richard Earnshaw wrote:



On 19/09/2022 17:16, Torbjörn SVENSSON via Gcc-patches wrote:

In the test case, it's clearly written that intrinsics is not
implemented on arm*. A simple xfail does not help since there are
link error and that would cause an UNRESOLVED testcase rather than
XFAIL.
By chaning to dg-skip-if, the entire test case is omitted.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/advsimd-intrinsics/vld1x2.c: Replace
dg-xfail-if with gd-skip-if.


Sorry for the delay reviewing this, I missed it at the time.

My problem with your suggested solution is that if these intrinsics 
are ever added this test will not automatically pick this up as it 
will have been disabled.  I presume from the comment (and the body of 
the test that contains an #ifdef for aarch64) that this is expected to 
be a temporary issue rather than something permanent.


So IMO I think it is correct to leave this as unresolved because the 
test cannot be built due to an issue with the compiler.


This patch has already been merged after Kyrill reviewed it back in 
September.


Without this change, the log would be filled with warnings about missing 
types. Maybe we could add some check that will enable the test only if 
the types are known?

Would that mitigate your concern?

Attached is the log from vld1x2.c on Cortex-A7 with -mfloat-abi=hard 
-mfpu=neon.


When I look at the result of a run, I only look at the test cases that 
are either FAIL (obviously), XPASS and UNRESOLVED. All other test cases 
are in a "good" state from what I can tell. If there are a lot of test 
cases in the UNRESOLVED state, that are not yet implemented year after 
year, it makes it harder to identify those test cases that are of 
interest. Right or wrong, that's why I suggested to remove it for the 
list of test cases that should be working.


Let me know what you think.


Ah, OK.  Somehow I'd misplaced v2 of the patch, which is the version 
that got approved.


R.



Kind regards,
Torbjörn



R.



Co-Authored-By: Yvan ROUX  
Signed-off-by: Torbjörn SVENSSON  
---
  gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c

index 92a139bc523..f933102be47 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c
@@ -1,6 +1,6 @@
  /* We haven't implemented these intrinsics for arm yet.  */
-/* { dg-xfail-if "" { arm*-*-* } } */
  /* { dg-do run } */
+/* { dg-skip-if "unsupported" { arm*-*-* } } */
  /* { dg-options "-O3" } */
  #include 


Re: [GCC][PATCH 13/15, v5] arm: Add support for dwarf debug directives and pseudo hard-register for PAC feature.

2023-01-13 Thread Richard Earnshaw via Gcc-patches

On 13/01/2023 22:25, Richard Earnshaw (lists) via Gcc-patches wrote:

On 13/01/2023 22:12, Jakub Jelinek wrote:

On Fri, Jan 13, 2023 at 09:58:26PM +, Richard Earnshaw (lists) wrote:
> I'm afraid increasing number of DWARF registers is ABI incompatible 
change.

> E.g. libgcc __frame_state_for function fills in:
> typedef struct frame_state
> {
>    void *cfa;
>    void *eh_ptr;
>    long cfa_offset;
>    long args_size;
>    long reg_or_offset[PRE_GCC3_DWARF_FRAME_REGISTERS+1];
>    unsigned short cfa_reg;
>    unsigned short retaddr_column;
>    char saved[PRE_GCC3_DWARF_FRAME_REGISTERS+1];
> } frame_state;
> > structure, where PRE_GCC3_DWARF_FRAME_REGISTERS defaults to
> __LIBGCC_DWARF_FRAME_REGISTERS__, which is defined to
> DWARF_FRAME_REGISTERS, which defaults to FIRST_PSEUDO_REGISTER.
> So, changing FIRST_PSEUDO_REGISTER is an ABI change unless you 
arrange for

> PRE_GCC3_DWARF_FRAME_REGISTERS to be defined to the old value.
> >  Jakub
>
So where's the red flag that warns about this?

I also note that Richard Sandiford made a similar type of change for 
AArch64
in r10-4195 (183bfdafc6f1f98711c5400498a7268cc1441096) and nothing 
was said

about that at the time.

It seems incredibly fragile to me to have some ABI based off the 
number of

machine registers.


It is.  The new unwinder fortunately doesn't suffer from this (at least I
think it doesn't), but in older gccs the unwinder could be split 
across different
objects, having e.g. parts of the unwinder in one shared library and 
another

part in another one, each built by different GCC version.

Guess targets which weren't supported in GCC 2.x are ok, while
__frame_state_for is in libgcc, nothing calls it, so while such changes
change the ABI, nothing likely cares.
But for older targets it is a problem.

And it is hard to catch this in the testsuite, one would either need to
hardcode the count for each target in the test, or test with mixing 
GCC 2.x

compiled code with current trunk.

Before the introduction of libgcc_eh.a etc., parts of the unwinder was 
e.g.

exported from glibc.
See e.g. 
https://gcc.gnu.org/legacy-ml/gcc-patches/2001-07/threads.html#00472 


for some details.

 Jakub



So:
1) GCC-2.* didn't support the EABI, which is all we support these days.
2) the Arm port updated FIRST_PSEUDO_REGISTER in 2019 in r10-4441 
(16155ccf588a403c033ccd7743329671bcfb27d5) and I didn't see any fallout 
from that.

In fact it's been changed in

 16155ccf588a
 cf16f980e527
 0be8bd1a1c89
 f1adb0a9f4d7
 9b66ebb1460d
 5a9335ef017c

All since 2003 (ie since gcc-3.0 was released).

3) The Arm port uses the unwinding mechanism defined by the ABI, not the 
dwarf2 based tables.


So I'm inclined to think this probably isn't going to be a problem in 
reality.


R.




Re: [GCC][PATCH 13/15, v5] arm: Add support for dwarf debug directives and pseudo hard-register for PAC feature.

2023-01-13 Thread Richard Earnshaw via Gcc-patches

On 13/01/2023 21:58, Richard Earnshaw (lists) via Gcc-patches wrote:

On 13/01/2023 18:02, Jakub Jelinek via Gcc-patches wrote:
On Fri, Jan 13, 2023 at 05:44:15PM +, Srinath Parvathaneni via 
Gcc-patches wrote:

Hello,

This patch teaches the DWARF support in gcc about RA_AUTH_CODE pseudo 
hard-register and also
updates the ".save", ".cfi_register", ".cfi_offset", ".cfi_restore" 
directives accordingly.
This patch also adds support to emit ".pacspval" directive when "pac 
ip, lr, sp" instruction

in generated in the assembly.

RA_AUTH_CODE register number is 107 and it's dwarf register number is 
143.


I'm afraid increasing number of DWARF registers is ABI incompatible 
change.

E.g. libgcc __frame_state_for function fills in:
typedef struct frame_state
{
   void *cfa;
   void *eh_ptr;
   long cfa_offset;
   long args_size;
   long reg_or_offset[PRE_GCC3_DWARF_FRAME_REGISTERS+1];
   unsigned short cfa_reg;
   unsigned short retaddr_column;
   char saved[PRE_GCC3_DWARF_FRAME_REGISTERS+1];
} frame_state;

structure, where PRE_GCC3_DWARF_FRAME_REGISTERS defaults to
__LIBGCC_DWARF_FRAME_REGISTERS__, which is defined to
DWARF_FRAME_REGISTERS, which defaults to FIRST_PSEUDO_REGISTER.
So, changing FIRST_PSEUDO_REGISTER is an ABI change unless you arrange 
for

PRE_GCC3_DWARF_FRAME_REGISTERS to be defined to the old value.

Jakub



So where's the red flag that warns about this?

I also note that Richard Sandiford made a similar type of change for 
AArch64 in r10-4195 (183bfdafc6f1f98711c5400498a7268cc1441096) and 
nothing was said about that at the time.


It seems incredibly fragile to me to have some ABI based off the number 
of machine registers.


R.


Also, the Arm port does not use dwarf based unwinding, so is this really 
relevant?


R.


Re: [PATCH] arm: Split up MVE _Generic associations to prevent type clashes [PR107515]

2023-01-13 Thread Richard Earnshaw via Gcc-patches




On 01/12/2022 18:19, Stam Markianos-Wright via Gcc-patches wrote:

Hi all,

With these previous patches:
https://gcc.gnu.org/pipermail/gcc-patches/2022-November/606586.html
https://gcc.gnu.org/pipermail/gcc-patches/2022-November/606587.html
we enabled the MVE overloaded _Generic associations to handle more
scalar types, however at PR 107515 we found a new regression that
wasn't detected in our testing:

With glibc's `posix/types.h`:
```
typedef signed int __int32_t;
...
typedef __int32_t int32_t;
```
We would get a `error: '_Generic' specifies two compatible types`
from `__ARM_mve_coerce3` because of `type: param`, when `type` is
`int` and `int32_t: param` both being the same under the hood.

The same did not happen with Newlib's header `sys/_stdint.h`:
```
typedef long int __int32_t;
...
typedef __int32_t int32_t ;
```
which worked fine, because it uses `long int`.

The same could feasibly happen in `__ARM_mve_coerce2` between
`__fp16` and `float16_t`.

The solution here is to break the _Generic down, so that the similar
types don't appear at the same level, as is done in `__ARM_mve_typeid`.

Ok for trunk?

Thanks,
Stam Markianos-Wright

gcc/ChangeLog:
     PR target/96795
     PR target/107515
     * config/arm/arm_mve.h (__ARM_mve_coerce2): Split types.
     (__ARM_mve_coerce3): Likewise.

gcc/testsuite/ChangeLog:
     PR target/96795
     PR target/107515
     * 
gcc.target/arm/mve/intrinsics/mve_intrinsic_type_overloads-fp.c: New test.
     * 
gcc.target/arm/mve/intrinsics/mve_intrinsic_type_overloads-int.c: New test.


Please fix the missing new lines at the end of the tests.

Otherwise OK.

R.




=== Inline Ctrl+C, Ctrl+V or patch ===

diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 
09167ec118ed3310c5077145e119196f29d83cac..70003653db65736fcfd019e83d9f18153be650dc 100644

--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -35659,9 +35659,9 @@ extern void *__ARM_undef;
  #define __ARM_mve_coerce1(param, type) \
  _Generic(param, type: param, const type: param, default: *(type 
*)__ARM_undef)

  #define __ARM_mve_coerce2(param, type) \
-    _Generic(param, type: param, float16_t: param, float32_t: param, 
default: *(type *)__ARM_undef)
+    _Generic(param, type: param, __fp16: param, default: _Generic 
(param, _Float16: param, float16_t: param, float32_t: param, default: 
*(type *)__ARM_undef))

  #define __ARM_mve_coerce3(param, type) \
-    _Generic(param, type: param, int8_t: param, int16_t: param, 
int32_t: param, int64_t: param, uint8_t: param, uint16_t: param, 
uint32_t: param, uint64_t: param, default: *(type *)__ARM_undef)
+    _Generic(param, type: param, default: _Generic (param, int8_t: 
param, int16_t: param, int32_t: param, int64_t: param, uint8_t: param, 
uint16_t: param, uint32_t: param, uint64_t: param, default: *(type 
*)__ARM_undef))


  #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point.  */

diff --git 
a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_intrinsic_type_overloads-fp.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_intrinsic_type_overloads-fp.c

new file mode 100644
index 
..427dcacb5ff59b53d5eab1f1582ef6460da3f2f3

--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_intrinsic_type_overloads-fp.c

@@ -0,0 +1,65 @@
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2 -Wno-pedantic -Wno-long-long" } */
+#include "arm_mve.h"
+
+float f1;
+double f2;
+float16_t f3;
+float32_t f4;
+__fp16 f5;
+_Float16 f6;
+
+int i1;
+short i2;
+long i3;
+long long i4;
+int8_t i5;
+int16_t i6;
+int32_t i7;
+int64_t i8;
+
+const int ci1;
+const short ci2;
+const long ci3;
+const long long ci4;
+const int8_t ci5;
+const int16_t ci6;
+const int32_t ci7;
+const int64_t ci8;
+
+float16x8_t floatvec;
+int16x8_t intvec;
+
+void test(void)
+{
+    /* Test a few different supported ways of passing an int value.  The
+    intrinsic vmulq was chosen arbitrarily, but it is representative of
+    all intrinsics that take a non-const scalar value.  */
+    intvec = vmulq(intvec, 2);
+    intvec = vmulq(intvec, (int32_t) 2);
+    intvec = vmulq(intvec, (short) 2);
+    intvec = vmulq(intvec, i1);
+    intvec = vmulq(intvec, i2);
+    intvec = vmulq(intvec, i3);
+    intvec = vmulq(intvec, i4);
+    intvec = vmulq(intvec, i5);
+    intvec = vmulq(intvec, i6);
+    intvec = vmulq(intvec, i7);
+    intvec = vmulq(intvec, i8);
+
+    /* Test a few different supported ways of passing a float value.  */
+    floatvec = vmulq(floatvec, 0.5);
+    floatvec = vmulq(floatvec, 0.5f);
+    floatvec = vmulq(floatvec, (__fp16) 0.5);
+    floatvec = vmulq(floatvec, f1);
+    floatvec = vmulq(floatvec, f2);
+    floatvec = vmulq(floatvec, f3);
+    floatvec = vmulq(floatvec, f4);
+    floatvec = vmulq(floatvec, f5);
+    floatvec = vmulq(floatvec, f6);
+    floatvec = vmulq(floatvec, 

Re: [PATCH] arm: unified syntax for libgcc clear_cache

2023-01-13 Thread Richard Earnshaw via Gcc-patches
I've just noticed that this was never committed.  Presumably that's 
because the patch did not apply cleanly.  I've cleaned it up and pushed 
it now.


R.

On 30/09/2022 16:30, Seija Kijin via Gcc-patches wrote:

Yes, please!

On Tue, Sep 6, 2022 at 10:48 AM Kyrylo Tkachov  wrote:


Hi Seija,


-Original Message-
From: Gcc-patches  On Behalf Of Seija Kijin via
Gcc-patches
Sent: Thursday, August 11, 2022 2:36 PM
To: gcc-patches@gcc.gnu.org
Subject: [PATCH] arm: unified syntax for libgcc clear_cache

The patch to convert all thumb1 code in libgcc to unified syntax
omitted changing all swi instructions to the current name: svc.

This patch fixes this case.


This is ok, thanks.
Do you need someone to commit this for you?

Kyrill



---
  libgcc/config/arm/lib1funcs.S | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
index 8c39c9f20a2b..19fa1462ccf3 100644
--- a/libgcc/config/arm/lib1funcs.S
+++ b/libgcc/config/arm/lib1funcs.S
@@ -1522,7 +1522,7 @@ LSYM(Lover12):
   add r7, r7, #2
  #endif
   mov r2, #0
- swi 0
+ svc 0
   do_pop {r7}
   RET
   FUNC_END clear_cache


Re: [GCC][PATCH v2] arm: Add cde feature support for Cortex-M55 CPU.

2023-01-13 Thread Richard Earnshaw via Gcc-patches




On 31/10/2022 12:38, Srinath Parvathaneni via Gcc-patches wrote:

Hi,


-Original Message-
From: Christophe Lyon 
Sent: Monday, October 17, 2022 2:30 PM
To: Srinath Parvathaneni ; gcc-
patc...@gcc.gnu.org
Cc: Richard Earnshaw 
Subject: Re: [GCC][PATCH] arm: Add cde feature support for Cortex-M55
CPU.

Hi Srinath,


On 10/10/22 10:20, Srinath Parvathaneni via Gcc-patches wrote:

Hi,

This patch adds cde feature (optional) support for Cortex-M55 CPU,
please refer [1] for more details. To use this feature we need to
specify +cdecpN (e.g. -mcpu=cortex-m55+cdecp), where N is the

coprocessor number 0 to 7.


Bootstrapped for arm-none-linux-gnueabihf target, regression tested on
arm-none-eabi target and found no regressions.

[1] https://developer.arm.com/documentation/101051/0101/?lang=en

(version: r1p1).


Ok for master?

Regards,
Srinath.

gcc/ChangeLog:

2022-10-07  Srinath Parvathaneni  

  * common/config/arm/arm-common.cc (arm_canon_arch_option_1):

Ignore cde

  options for mlibarch.
  * config/arm/arm-cpus.in (begin cpu cortex-m55): Add cde options.
  * doc/invoke.texi (CDE): Document options for Cortex-M55 CPU.

gcc/testsuite/ChangeLog:

2022-10-07  Srinath Parvathaneni  

  * gcc.target/arm/multilib.exp: Add multilib tests for Cortex-M55 CPU.


### Attachment also inlined for ease of reply

###



diff --git a/gcc/common/config/arm/arm-common.cc
b/gcc/common/config/arm/arm-common.cc
index


c38812f1ea6a690cd19b0dc74d963c4f5ae155ca..b6f955b3c012475f398382e72
c9a

3966412991ec 100644
--- a/gcc/common/config/arm/arm-common.cc
+++ b/gcc/common/config/arm/arm-common.cc
@@ -753,6 +753,15 @@ arm_canon_arch_option_1 (int argc, const char

**argv, bool arch_for_multilib)

 arm_initialize_isa (target_isa, selected_cpu->common.isa_bits);
 arm_parse_option_features (target_isa, _cpu->common,
 strchr (cpu, '+'));
+  if (arch_for_multilib)
+   {
+ const enum isa_feature removable_bits[] =

{ISA_IGNORE_FOR_MULTILIB,

+isa_nobit};
+ sbitmap isa_bits = sbitmap_alloc (isa_num_bits);
+ arm_initialize_isa (isa_bits, removable_bits);
+ bitmap_and_compl (target_isa, target_isa, isa_bits);
+   }
+


I can see the piece of code you add here is exactly the same as the one a few
lines above when handling "if (arch)". Can this be moved below and thus be
common to the two cases, or does it have to be performed before
bitmap_ior of fpu_isa?


Thanks for pointing out this, I have moved the common code below the arch and 
cpu
if blocks in the attached patch.
  

Also, IIUC, CDE was already optional for other CPUs (M33, M35P, star-mc1),
so the hunk above fixes a latent bug when handling multilibs for these CPUs
too? If so, maybe worth splitting the patch into two parts since the above is
not strictly related to M55?


Even though CDE is optional for the mentioned CPUs as per the specs, the code to
enable CDE as optional feature is missing in current compiler.
Current GCC compiler supports CDE as optional feature only with -march options 
and
this pass adds CDE as optional for M55 and so this is not a fix bug.


But I'm not a maintainer ;-)

Thanks,

Christophe


 if (fpu && strcmp (fpu, "auto") != 0)
{
  /* The easiest and safest way to remove the default fpu diff
--git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in index


5a63bc548e54dbfdce5d1df425bd615d81895d80..aa02c04c4924662f3ddd58e
69673

92ba3f4b4a87 100644
--- a/gcc/config/arm/arm-cpus.in
+++ b/gcc/config/arm/arm-cpus.in
@@ -1633,6 +1633,14 @@ begin cpu cortex-m55
option nomve remove mve mve_float
option nofp remove ALL_FP mve_float
option nodsp remove MVE mve_float
+ option cdecp0 add cdecp0
+ option cdecp1 add cdecp1
+ option cdecp2 add cdecp2
+ option cdecp3 add cdecp3
+ option cdecp4 add cdecp4
+ option cdecp5 add cdecp5
+ option cdecp6 add cdecp6
+ option cdecp7 add cdecp7
isa quirk_no_asmcpu quirk_vlldm
costs v7m
vendor 41
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index


aa5655764a0360959f9c1061749d2cc9ebd23489..26857f7a90e42d925bc69086
86ac

78138a53c4ad 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21698,6 +21698,10 @@ floating-point instructions on @samp{cortex-

m55}.

   Disable the M-Profile Vector Extension (MVE) single precision floating-

point

   instructions on @samp{cortex-m55}.

+@item +cdecp0, +cdecp1, ... , +cdecp7 Enable the Custom Datapath
+Extension (CDE) on selected coprocessors according to the numbers
+given in the options in the range 0 to 7 on @samp{cortex-m55}.
+
   @item  +nofp
   Disables the floating-point instructions on @samp{arm9e},
   @samp{arm946e-s}, @samp{arm966e-s}, @samp{arm968e-s},

@samp{arm10e},

diff --git a/gcc/testsuite/gcc.target/arm/multilib.exp
b/gcc/testsuite/gcc.target/arm/multilib.exp
index



Re: [PATCH 3/9] arm: Don't add crtfastmath.o for -shared

2023-01-13 Thread Richard Earnshaw via Gcc-patches




On 13/01/2023 08:00, Richard Biener via Gcc-patches wrote:

Don't add crtfastmath.o for -shared to avoid altering the FP
environment when loading a shared library.

PR target/55522
* config/arm/linux-eabi.h (ENDFILE_SPEC): Don't add
crtfastmath.o for -shared.
* config/arm/unknown-elf.h (STARTFILE_SPEC): Likewise.


OK.

R.


---
  gcc/config/arm/linux-eabi.h  | 2 +-
  gcc/config/arm/unknown-elf.h | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/linux-eabi.h b/gcc/config/arm/linux-eabi.h
index 57f830f0176..a119875599d 100644
--- a/gcc/config/arm/linux-eabi.h
+++ b/gcc/config/arm/linux-eabi.h
@@ -121,7 +121,7 @@
  
  #undef	ENDFILE_SPEC

  #define ENDFILE_SPEC \
-  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} "  \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:%{!shared:crtfastmath.o%s}} " 
  \
LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC)
  
  /* Use the default LIBGCC_SPEC, not the version in linux-elf.h, as we

diff --git a/gcc/config/arm/unknown-elf.h b/gcc/config/arm/unknown-elf.h
index 464d38b6cc6..397ac3f68b9 100644
--- a/gcc/config/arm/unknown-elf.h
+++ b/gcc/config/arm/unknown-elf.h
@@ -33,7 +33,7 @@
  
  #undef  STARTFILE_SPEC

  #define STARTFILE_SPEC\
-  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} "  \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:%{!shared:crtfastmath.o%s}} " 
  \
UNKNOWN_ELF_STARTFILE_SPEC
  
  #define UNKNOWN_ELF_ENDFILE_SPEC	"crtend%O%s crtn%O%s"


Re: [PATCH 1/9] aarch64: Don't add crtfastmath.o for -shared

2023-01-13 Thread Richard Earnshaw via Gcc-patches




On 13/01/2023 07:59, Richard Biener via Gcc-patches wrote:

Don't add crtfastmath.o for -shared to avoid altering the FP
environment when loading a shared library.

PR target/55522
* config/aarch64/aarch64-elf-raw.h (ENDFILE_SPEC): Don't add
crtfastmath.o for -shared.
* config/aarch64/aarch64-freebsd.h (GNU_USER_TARGET_MATHFILE_SPEC):
Likewise.
* config/aarch64/aarch64-linux.h (GNU_USER_TARGET_MATHFILE_SPEC):
Likewise.
---
  gcc/config/aarch64/aarch64-elf-raw.h | 2 +-
  gcc/config/aarch64/aarch64-freebsd.h | 2 +-
  gcc/config/aarch64/aarch64-linux.h   | 2 +-
  3 files changed, 3 insertions(+), 3 deletions(-)



OK.

R.


diff --git a/gcc/config/aarch64/aarch64-elf-raw.h 
b/gcc/config/aarch64/aarch64-elf-raw.h
index d4d820a9d54..fa5b4527ea0 100644
--- a/gcc/config/aarch64/aarch64-elf-raw.h
+++ b/gcc/config/aarch64/aarch64-elf-raw.h
@@ -25,7 +25,7 @@
  #define STARTFILE_SPEC " crti%O%s crtbegin%O%s crt0%O%s"
  #define ENDFILE_SPEC \
" crtend%O%s crtn%O%s " \
-  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+  "%{Ofast|ffast-math|funsafe-math-optimizations:%{!shared:crtfastmath.o%s}}"
  
  #ifndef LINK_SPEC

  #define LINK_SPEC "%{h*} \
diff --git a/gcc/config/aarch64/aarch64-freebsd.h 
b/gcc/config/aarch64/aarch64-freebsd.h
index 13beb3781b6..2cf9cf6f046 100644
--- a/gcc/config/aarch64/aarch64-freebsd.h
+++ b/gcc/config/aarch64/aarch64-freebsd.h
@@ -50,7 +50,7 @@
  #define LINK_SPEC FBSD_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC
  
  #define GNU_USER_TARGET_MATHFILE_SPEC \

-  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+  "%{Ofast|ffast-math|funsafe-math-optimizations:%{!shared:crtfastmath.o%s}}"
  
  #undef ENDFILE_SPEC

  #define ENDFILE_SPEC \
diff --git a/gcc/config/aarch64/aarch64-linux.h 
b/gcc/config/aarch64/aarch64-linux.h
index 5e4553d79f5..61ed4067fc5 100644
--- a/gcc/config/aarch64/aarch64-linux.h
+++ b/gcc/config/aarch64/aarch64-linux.h
@@ -50,7 +50,7 @@
  #define LINK_SPEC LINUX_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC
  
  #define GNU_USER_TARGET_MATHFILE_SPEC \

-  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+  "%{Ofast|ffast-math|funsafe-math-optimizations:%{!shared:crtfastmath.o%s}}"
  
  #undef ENDFILE_SPEC

  #define ENDFILE_SPEC   \


Re: [PATCH] testsuite: Skip intrinsics test if arm

2023-01-12 Thread Richard Earnshaw via Gcc-patches




On 19/09/2022 17:16, Torbjörn SVENSSON via Gcc-patches wrote:

In the test case, it's clearly written that intrinsics is not
implemented on arm*. A simple xfail does not help since there are
link error and that would cause an UNRESOLVED testcase rather than
XFAIL.
By chaning to dg-skip-if, the entire test case is omitted.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/advsimd-intrinsics/vld1x2.c: Replace
dg-xfail-if with gd-skip-if.


Sorry for the delay reviewing this, I missed it at the time.

My problem with your suggested solution is that if these intrinsics are 
ever added this test will not automatically pick this up as it will have 
been disabled.  I presume from the comment (and the body of the test 
that contains an #ifdef for aarch64) that this is expected to be a 
temporary issue rather than something permanent.


So IMO I think it is correct to leave this as unresolved because the 
test cannot be built due to an issue with the compiler.


R.



Co-Authored-By: Yvan ROUX  
Signed-off-by: Torbjörn SVENSSON  
---
  gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c
index 92a139bc523..f933102be47 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c
@@ -1,6 +1,6 @@
  /* We haven't implemented these intrinsics for arm yet.  */
-/* { dg-xfail-if "" { arm*-*-* } } */
  /* { dg-do run } */
+/* { dg-skip-if "unsupported" { arm*-*-* } } */
  /* { dg-options "-O3" } */
  
  #include 


Re: [PATCH] [PR42093] [arm] [thumb2] disable tree-dce for test

2023-01-12 Thread Richard Earnshaw via Gcc-patches




On 02/12/2022 09:26, Alexandre Oliva via Gcc-patches wrote:


CD-DCE introduces blocks to share common PHI nodes, which replaces a
backwards branch that used to prevent the thumb2 jump table shortening
that PR42093 tested for.  In order to keep on testing that the
backward branch prevents the jumptable shortening, disable tree-dce.

Regstraped on x86_64-linux-gnu, also tested with crosses to riscv64-elf
and arm-eabi.  Ok to install?

 > for  gcc/testsuite/ChangeLog

PR target/42093
* gcc.target/arm/pr42093.c: Disable tree-dce.


OK.

R.


---
  gcc/testsuite/gcc.target/arm/pr42093.c |2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr42093.c 
b/gcc/testsuite/gcc.target/arm/pr42093.c
index 7ba2f933eef81..69b1470607c7f 100644
--- a/gcc/testsuite/gcc.target/arm/pr42093.c
+++ b/gcc/testsuite/gcc.target/arm/pr42093.c
@@ -1,4 +1,4 @@
-/* { dg-options "-mthumb -O2 -fno-reorder-blocks" }  */
+/* { dg-options "-mthumb -O2 -fno-reorder-blocks -fno-tree-dce" }  */
  /* { dg-require-effective-target arm_thumb2_ok } */
  /* { dg-final { scan-assembler-not "tbb" } } */
  /* { dg-final { scan-assembler-not "tbh" } } */



Re: [PATCH] [PR40457] [arm] expand SI-aligned movdi into pair of movsi

2023-01-11 Thread Richard Earnshaw via Gcc-patches




On 02/12/2022 09:29, Alexandre Oliva via Gcc-patches wrote:


When expanding a misaligned DImode move, emit aligned SImode moves if
the parts are sufficiently aligned.  This enables neighboring stores
to be peephole-combined into stm, as expected by the PR40457 testcase,
even after SLP vectorizes the originally aligned SImode stores into a
misaligned DImode store.

Regstraped on x86_64-linux-gnu, also tested with crosses to riscv64-elf
and arm-eabi (tms570).  Ok to install?


for  gcc/ChangeLog

PR target/40457
* config/arm/arm.md (movmisaligndi): Prefer aligned SImode
moves.


OK.

R.


---
  gcc/config/arm/arm.md |   12 ++--
  1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 69bf343fb0ed6..a9eb0299aa761 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -12783,8 +12783,16 @@ (define_expand "movmisaligndi"
rtx hi_op0 = gen_highpart_mode (SImode, DImode, operands[0]);
rtx hi_op1 = gen_highpart_mode (SImode, DImode, operands[1]);
  
-  emit_insn (gen_movmisalignsi (lo_op0, lo_op1));

-  emit_insn (gen_movmisalignsi (hi_op0, hi_op1));
+  if (aligned_operand (lo_op0, SImode) && aligned_operand (lo_op1, SImode))
+{
+  emit_move_insn (lo_op0, lo_op1);
+  emit_move_insn (hi_op0, hi_op1);
+}
+  else
+{
+  emit_insn (gen_movmisalignsi (lo_op0, lo_op1));
+  emit_insn (gen_movmisalignsi (hi_op0, hi_op1));
+}
DONE;
  })
  



Re: [PATCH 12/15 V5] arm: implement bti injection

2023-01-11 Thread Richard Earnshaw via Gcc-patches




On 22/12/2022 17:13, Andrea Corallo via Gcc-patches wrote:

Richard Earnshaw  writes:


On 14/12/2022 17:00, Richard Earnshaw via Gcc-patches wrote:

On 14/12/2022 16:40, Andrea Corallo via Gcc-patches wrote:

Hi Richard,

thanks for reviewing.

Richard Earnshaw  writes:


On 28/10/2022 17:40, Andrea Corallo via Gcc-patches wrote:

Hi all,
please find attached the third iteration of this patch addresing
review
comments.
Thanks
     Andrea



@@ -23374,12 +23374,6 @@ output_probe_stack_range (rtx reg1, rtx reg2)
     return "";
   }

-static bool
-aarch_bti_enabled ()
-{
-  return false;
-}
-
   /* Generate the prologue instructions for entry into an ARM or Thumb-2
  function.  */
   void
@@ -32992,6 +32986,61 @@ arm_current_function_pac_enabled_p (void)
     && !crtl->is_leaf));
   }

+/* Return TRUE if Branch Target Identification Mechanism is
enabled.  */
+bool
+aarch_bti_enabled (void)
+{
+  return aarch_enable_bti == 1;
+}

See comment in earlier patch about the location of this function
moving.   Can aarch_enable_bti take values other than 0 and 1?


Yes default is 2.

It shouldn't be by this point, because, hopefully you've gone
through the equivalent of this hunk (from aarch64) somewhere in
arm_override_options:
     if (aarch_enable_bti == 2)
   {
   #ifdef TARGET_ENABLE_BTI
     aarch_enable_bti = 1;
   #else
     aarch_enable_bti = 0;
   #endif
   }
And after this point the '2' should never be seen again.  We use
this trick to permit the user to force a default that differs from
the configuration.
However, I don't see a hunk to do this in patch 3, so perhaps that
needs updating to fix this.


I've just remembered that the above is to support a configure-time
option of the compiler to enable branch protection.  But perhaps we
don't want to have that in AArch32, in which case it would be better
not to have the default be 2 anyway, just default to off (0).

R.


Done in 1/15 (needs approval again now).




[...]


+  return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) ==
UNSPEC_BTI_NOP;

I'm not sure where this crept in, but UNSPEC and UNSPEC_VOLATILE have
separate enums in the backend, so UNSPEC_BIT_NOP should really be
VUNSPEC_BTI_NOP and defined in the enum "unspecv".


Done


+aarch_pac_insn_p (rtx x)
+{
+  if (!x || !INSN_P (x))
+    return false;
+
+  rtx pat = PATTERN (x);
+
+  if (GET_CODE (pat) == SET)
+    {
+  rtx tmp = XEXP (pat, 1);
+  if (tmp
+  && GET_CODE (tmp) == UNSPEC
+  && (XINT (tmp, 1) == UNSPEC_PAC_NOP
+  || XINT (tmp, 1) == UNSPEC_PACBTI_NOP))
+    return true;
+    }
+

This will also need updating (see review on earlier patch) because
PACBTI needs to be unspec_volatile, while PAC doesn't.


Done


+/* The following two functions are for code compatibility with aarch64
+   code, this even if in arm we have only one bti instruction.  */
+

I'd just write
   /* Target specific mapping for aarch_gen_bti_c and
   aarch_gen_bti_j. For Arm, both of these map to a simple BTI
instruction.  */


Done



@@ -162,6 +162,7 @@ (define_c_enum "unspec" [
     UNSPEC_PAC_NOP    ; Represents PAC signing LR
     UNSPEC_PACBTI_NOP    ; Represents PAC signing LR + valid landing pad
     UNSPEC_AUT_NOP    ; Represents PAC verifying LR
+  UNSPEC_BTI_NOP    ; Represent BTI
   ])

BTI is an unspec volatile, so this should be in the "vunspec" enum and
renamed accordingly (see above).


Done.

Please find attached the updated version of this patch.

BR

    Andrea


Apart from that, this is OK.
R.


Cool, attached the updated patch.

Also I added some error handling not to run the bti pass if the march
selected does not support bti.

BR

   Andrea




OK.

R.


Re: [PATCH 1/15 V2] arm: Make mbranch-protection opts parsing common to AArch32/64

2023-01-11 Thread Richard Earnshaw via Gcc-patches




On 22/12/2022 17:04, Andrea Corallo via Gcc-patches wrote:

Hi all,

respinning this as a rebase was necessary, also now is setting
'aarch_enable_bti' to zero as default for arm as suggested during the
review of 12/15.

Best Regards

   Andrea




gcc/ChangeLog:

* common/config/aarch64/aarch64-common.cc: Include aarch-common.h.
(all_architectures): Fix comment.
(aarch64_parse_extension): Rename return type, enum value names.
* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Rename
factored out aarch_ra_sign_scope and aarch_ra_sign_key variables.
Also rename corresponding enum values.
* config/aarch64/aarch64-opts.h (aarch64_function_type): Factor
out aarch64_function_type and move it to common code as
aarch_function_type in aarch-common.h.
* config/aarch64/aarch64-protos.h: Include common types header,
move out types aarch64_parse_opt_result and aarch64_key_type to
aarch-common.h
* config/aarch64/aarch64.cc: Move mbranch-protection parsing types
and functions out into aarch-common.h and aarch-common.cc.  Fix up
all the name changes resulting from the move.
* config/aarch64/aarch64.md: Fix up aarch64_ra_sign_key type name change
and enum value.
* config/aarch64/aarch64.opt: Include aarch-common.h to import
type move.  Fix up name changes from factoring out common code and
data.
* config/arm/aarch-common-protos.h: Export factored out routines to both
backends.
* config/arm/aarch-common.cc: Include newly factored out types.
Move all mbranch-protection code and data structures from
aarch64.cc.
* config/arm/aarch-common.h: New header that declares types shared
between aarch32 and aarch64 backends.
* config/arm/arm-protos.h: Declare types and variables that are
made common to aarch64 and aarch32 backends - aarch_ra_sign_key,
aarch_ra_sign_scope and aarch_enable_bti.

I don't see an entry for config/arm/arm.opt.  Please make sure you 
patches pass "git gcc-verify".


Otherwise, this is OK.

R.


Re: [PATCH 10/15 V7] arm: Implement cortex-M return signing address codegen

2023-01-11 Thread Richard Earnshaw via Gcc-patches




On 11/01/2023 09:58, Andrea Corallo via Gcc-patches wrote:

Richard Earnshaw  writes:

[...]



Otherwise ok with that change.

R.


Minor respin of this patch addressing the suggestion to have
'use_return_insn' return zero when PAC is enabled.

BR

   Andrea



+  /* Never use a return instruction when return address signing
+ mechanism is enabled.  */
+  if (arm_current_function_pac_enabled_p ())
+return 0;
+

I can see what it does.  It would be better to explain why it does: ie 
that return address authentication needs more than one instruction.


OK with that change.


Re: [PATCH 9/15] arm: Set again stack pointer as CFA reg when popping if necessary

2023-01-09 Thread Richard Earnshaw via Gcc-patches




On 09/01/2023 16:48, Richard Earnshaw via Gcc-patches wrote:



On 09/01/2023 14:58, Andrea Corallo via Gcc-patches wrote:

Andrea Corallo via Gcc-patches  writes:


Richard Earnshaw  writes:


On 27/09/2022 16:24, Kyrylo Tkachov via Gcc-patches wrote:



-Original Message-
From: Andrea Corallo 
Sent: Tuesday, September 27, 2022 11:06 AM
To: Kyrylo Tkachov 
Cc: Andrea Corallo via Gcc-patches ; Richard
Earnshaw ; nd 
Subject: Re: [PATCH 9/15] arm: Set again stack pointer as CFA reg 
when

popping if necessary

Kyrylo Tkachov  writes:


Hi Andrea,


-Original Message-
From: Gcc-patches  On Behalf Of Andrea
Corallo via Gcc-patches
Sent: Friday, August 12, 2022 4:34 PM
To: Andrea Corallo via Gcc-patches 
Cc: Richard Earnshaw ; nd 
Subject: [PATCH 9/15] arm: Set again stack pointer as CFA reg when

popping

if necessary

Hi all,

this patch enables 'arm_emit_multi_reg_pop' to set again the stack
pointer as CFA reg when popping if this is necessary.



  From what I can tell from similar functions this is correct, 
but could you

elaborate on why this change is needed for my understanding please?

Thanks,
Kyrill


Hi Kyrill,

sure, if the frame pointer was set, than it is the current CFA 
register.

If we request to adjust the current CFA register offset indicating it
being SP (while it's actually FP) that is indeed not correct and the
incoherence we will be detected by an assertion in the dwarf emission
machinery.

Thanks,  the patch is ok
Kyrill



Best Regards

    Andrea


Hmm, wait.  Why would a multi-reg pop be updating the stack pointer?


Hi Richard,

not sure I understand, isn't any pop updating SP by definition?



Back on this,

compiling:

===
int i;

void foo (int);

int bar()
{
   foo (i);
   return 0;
}
===

With -march=armv8.1-m.main+fp -mbranch-protection=pac-ret+leaf -mthumb 
-O0 -g


Produces the following asm for bar.

bar:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 1, uses_anonymous_args = 0
pac    ip, lr, sp
push    {r3, r7, ip, lr}
add    r7, sp, #0
ldr    r3, .L3
ldr    r3, [r3]
mov    r0, r3
bl    foo
movs    r3, #0
mov    r0, r3
pop    {r3, r7, ip, lr}
aut    ip, lr, sp
bx    lr

The offending instruction causing the ICE (without this patch) when
emitting dwarf is "pop {r3, r7, ip, lr}".

The current CFA reg when emitting the multipop is R7 (the frame
pointer).  If is not the multipop that has the duty to restore SP as
current CFA here which other instruction should do it?



Digging a bit deeper, I'm now even more confused.  arm_expand_epilogue 
contains (parphrasing the code):


  if frame_pointer_needed
    {
  if arm
    {}
  else
    {
  if adjust
    r7 += adjust
  mov sp, r7    // Reset CFA to SP
    }
     }

so there should always be a move of r7 into SP, even if this is strictly 
redundant.  I don't understand why this doesn't happen for your 
testcase.  Can you dig a bit deeper?  I wonder if we've (probably 
incorrectly) assumed that this function doesn't need an epilogue but can 
use a simple return?  I don't think we should do that when 
authentication is needed: a simple return should really be one instruction.




So I strongly suspect the real problem here is that use_return_insn () 
in arm.cc needs to be updated to return false when using pointer 
authentication.  The specification for this function says that a return 
can be done in one instruction; and clearly when we need authentication 
more than one is needed.


R.


Best Regards

   Andrea


R.


Re: [PATCH 9/15] arm: Set again stack pointer as CFA reg when popping if necessary

2023-01-09 Thread Richard Earnshaw via Gcc-patches




On 09/01/2023 14:58, Andrea Corallo via Gcc-patches wrote:

Andrea Corallo via Gcc-patches  writes:


Richard Earnshaw  writes:


On 27/09/2022 16:24, Kyrylo Tkachov via Gcc-patches wrote:



-Original Message-
From: Andrea Corallo 
Sent: Tuesday, September 27, 2022 11:06 AM
To: Kyrylo Tkachov 
Cc: Andrea Corallo via Gcc-patches ; Richard
Earnshaw ; nd 
Subject: Re: [PATCH 9/15] arm: Set again stack pointer as CFA reg when
popping if necessary

Kyrylo Tkachov  writes:


Hi Andrea,


-Original Message-
From: Gcc-patches  On Behalf Of Andrea
Corallo via Gcc-patches
Sent: Friday, August 12, 2022 4:34 PM
To: Andrea Corallo via Gcc-patches 
Cc: Richard Earnshaw ; nd 
Subject: [PATCH 9/15] arm: Set again stack pointer as CFA reg when

popping

if necessary

Hi all,

this patch enables 'arm_emit_multi_reg_pop' to set again the stack
pointer as CFA reg when popping if this is necessary.



  From what I can tell from similar functions this is correct, but could you

elaborate on why this change is needed for my understanding please?

Thanks,
Kyrill


Hi Kyrill,

sure, if the frame pointer was set, than it is the current CFA register.
If we request to adjust the current CFA register offset indicating it
being SP (while it's actually FP) that is indeed not correct and the
incoherence we will be detected by an assertion in the dwarf emission
machinery.

Thanks,  the patch is ok
Kyrill



Best Regards

Andrea


Hmm, wait.  Why would a multi-reg pop be updating the stack pointer?


Hi Richard,

not sure I understand, isn't any pop updating SP by definition?



Back on this,

compiling:

===
int i;

void foo (int);

int bar()
{
   foo (i);
   return 0;
}
===

With -march=armv8.1-m.main+fp -mbranch-protection=pac-ret+leaf -mthumb -O0 -g

Produces the following asm for bar.

bar:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 1, uses_anonymous_args = 0
pac ip, lr, sp
push{r3, r7, ip, lr}
add r7, sp, #0
ldr r3, .L3
ldr r3, [r3]
mov r0, r3
bl  foo
movsr3, #0
mov r0, r3
pop {r3, r7, ip, lr}
aut ip, lr, sp
bx  lr

The offending instruction causing the ICE (without this patch) when
emitting dwarf is "pop {r3, r7, ip, lr}".

The current CFA reg when emitting the multipop is R7 (the frame
pointer).  If is not the multipop that has the duty to restore SP as
current CFA here which other instruction should do it?



Digging a bit deeper, I'm now even more confused.  arm_expand_epilogue 
contains (parphrasing the code):


 if frame_pointer_needed
   {
 if arm
   {}
 else
   {
 if adjust
   r7 += adjust
 mov sp, r7 // Reset CFA to SP
   }
}

so there should always be a move of r7 into SP, even if this is strictly 
redundant.  I don't understand why this doesn't happen for your 
testcase.  Can you dig a bit deeper?  I wonder if we've (probably 
incorrectly) assumed that this function doesn't need an epilogue but can 
use a simple return?  I don't think we should do that when 
authentication is needed: a simple return should really be one instruction.



Best Regards

   Andrea


R.


Re: [PATCH 9/15] arm: Set again stack pointer as CFA reg when popping if necessary

2023-01-09 Thread Richard Earnshaw via Gcc-patches




On 09/01/2023 14:58, Andrea Corallo via Gcc-patches wrote:

Andrea Corallo via Gcc-patches  writes:


Richard Earnshaw  writes:


On 27/09/2022 16:24, Kyrylo Tkachov via Gcc-patches wrote:



-Original Message-
From: Andrea Corallo 
Sent: Tuesday, September 27, 2022 11:06 AM
To: Kyrylo Tkachov 
Cc: Andrea Corallo via Gcc-patches ; Richard
Earnshaw ; nd 
Subject: Re: [PATCH 9/15] arm: Set again stack pointer as CFA reg when
popping if necessary

Kyrylo Tkachov  writes:


Hi Andrea,


-Original Message-
From: Gcc-patches  On Behalf Of Andrea
Corallo via Gcc-patches
Sent: Friday, August 12, 2022 4:34 PM
To: Andrea Corallo via Gcc-patches 
Cc: Richard Earnshaw ; nd 
Subject: [PATCH 9/15] arm: Set again stack pointer as CFA reg when

popping

if necessary

Hi all,

this patch enables 'arm_emit_multi_reg_pop' to set again the stack
pointer as CFA reg when popping if this is necessary.



  From what I can tell from similar functions this is correct, but could you

elaborate on why this change is needed for my understanding please?

Thanks,
Kyrill


Hi Kyrill,

sure, if the frame pointer was set, than it is the current CFA register.
If we request to adjust the current CFA register offset indicating it
being SP (while it's actually FP) that is indeed not correct and the
incoherence we will be detected by an assertion in the dwarf emission
machinery.

Thanks,  the patch is ok
Kyrill



Best Regards

Andrea


Hmm, wait.  Why would a multi-reg pop be updating the stack pointer?


Hi Richard,

not sure I understand, isn't any pop updating SP by definition?



Back on this,

compiling:

===
int i;

void foo (int);

int bar()
{
   foo (i);
   return 0;
}
===

With -march=armv8.1-m.main+fp -mbranch-protection=pac-ret+leaf -mthumb -O0 -g

Produces the following asm for bar.

bar:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 1, uses_anonymous_args = 0
pac ip, lr, sp
push{r3, r7, ip, lr}
add r7, sp, #0
ldr r3, .L3
ldr r3, [r3]
mov r0, r3
bl  foo
movsr3, #0
mov r0, r3
pop {r3, r7, ip, lr}
aut ip, lr, sp
bx  lr

The offending instruction causing the ICE (without this patch) when
emitting dwarf is "pop {r3, r7, ip, lr}".

The current CFA reg when emitting the multipop is R7 (the frame
pointer).  If is not the multipop that has the duty to restore SP as
current CFA here which other instruction should do it?


Ah, OK.  I think this is a special case, though, because in this 
specific case the frame pointer (r7) and the stack pointer point to the 
same place.  This means that in the epilogue we don't start by restoring 
SP from FP (at which point we tell the dwarf code that the frame is back 
in SP again).


For example, if I have:


int i;

void foo (int, int*);

int bar()
{
  int j[10];
  foo (i,j);
  return 0;
}


then the epilogue sequence starts with:

addsr7, r7, #40
.cfi_def_cfa_offset 8
mov sp, r7
.cfi_def_cfa_register 13

And then the pop works correctly as-is.

But I'm not convinced that this is enough anyway, you cause the compiler 
to output a directive that changes the CFA pointer back to r13, but you 
don't output anything that changes the CFA offset.  So I think this 
means that the CFA state machine ends up pointing to the wrong location, 
but it's hard to tell as you haven't shown the CFA directives in your 
example above.




Best Regards

   Andrea


R.


[committed] arm: correctly define __ARM_FEATURE_CLZ

2022-12-19 Thread Richard Earnshaw via Gcc-patches

The ACLE requires that __ARM_FEATURE_CLZ be defined if the hardware
supports it; it's also clear that this doesn't mean the current ISA,
so we must define this even when compiling for Thumb1 if the target
supports CLZ in A32.

This brings GCC into alignment with Clang.

gcc/ChangeLog:

* config/arm/arm-c.cc (__ARM_FEATURE_CLZ): Fix definition of
preprocessor macro when target has CLZ in another ISA.
---
 gcc/config/arm/arm-c.cc | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/config/arm/arm-c.cc b/gcc/config/arm/arm-c.cc
index 86c56bf2680..202898fa041 100644
--- a/gcc/config/arm/arm-c.cc
+++ b/gcc/config/arm/arm-c.cc
@@ -238,8 +238,12 @@ arm_cpu_builtins (struct cpp_reader* pfile)
 builtin_define_with_int_value ("__ARM_FEATURE_LDREX",
    TARGET_ARM_FEATURE_LDREX);
 
+  /* ACLE says that __ARM_FEATURE_CLZ is defined if the hardware
+ supports it; it's also clear that this doesn't mean the current
+ ISA, so we define this even when compiling for Thumb1 if the
+ target supports CLZ in A32.  */
   def_or_undef_macro (pfile, "__ARM_FEATURE_CLZ",
-		  ((TARGET_ARM_ARCH >= 5 && !TARGET_THUMB)
+		  ((TARGET_ARM_ARCH >= 5 && arm_arch_notm)
 		   || TARGET_ARM_ARCH_ISA_THUMB >=2));
 
   def_or_undef_macro (pfile, "__ARM_FEATURE_NUMERIC_MAXMIN",


Re: [PATCH 12/15 V4] arm: implement bti injection

2022-12-14 Thread Richard Earnshaw via Gcc-patches




On 14/12/2022 17:00, Richard Earnshaw via Gcc-patches wrote:



On 14/12/2022 16:40, Andrea Corallo via Gcc-patches wrote:

Hi Richard,

thanks for reviewing.

Richard Earnshaw  writes:


On 28/10/2022 17:40, Andrea Corallo via Gcc-patches wrote:

Hi all,
please find attached the third iteration of this patch addresing
review
comments.
Thanks
    Andrea



@@ -23374,12 +23374,6 @@ output_probe_stack_range (rtx reg1, rtx reg2)
    return "";
  }

-static bool
-aarch_bti_enabled ()
-{
-  return false;
-}
-
  /* Generate the prologue instructions for entry into an ARM or Thumb-2
 function.  */
  void
@@ -32992,6 +32986,61 @@ arm_current_function_pac_enabled_p (void)
    && !crtl->is_leaf));
  }

+/* Return TRUE if Branch Target Identification Mechanism is 
enabled.  */

+bool
+aarch_bti_enabled (void)
+{
+  return aarch_enable_bti == 1;
+}

See comment in earlier patch about the location of this function
moving.   Can aarch_enable_bti take values other than 0 and 1?


Yes default is 2.


It shouldn't be by this point, because, hopefully you've gone through 
the equivalent of this hunk (from aarch64) somewhere in 
arm_override_options:



    if (aarch_enable_bti == 2)
  {
  #ifdef TARGET_ENABLE_BTI
    aarch_enable_bti = 1;
  #else
    aarch_enable_bti = 0;
  #endif
  }

And after this point the '2' should never be seen again.  We use this 
trick to permit the user to force a default that differs from the 
configuration.


However, I don't see a hunk to do this in patch 3, so perhaps that needs 
updating to fix this.


I've just remembered that the above is to support a configure-time 
option of the compiler to enable branch protection.  But perhaps we 
don't want to have that in AArch32, in which case it would be better not 
to have the default be 2 anyway, just default to off (0).


R.






[...]


+  return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) ==
UNSPEC_BTI_NOP;

I'm not sure where this crept in, but UNSPEC and UNSPEC_VOLATILE have
separate enums in the backend, so UNSPEC_BIT_NOP should really be
VUNSPEC_BTI_NOP and defined in the enum "unspecv".


Done


+aarch_pac_insn_p (rtx x)
+{
+  if (!x || !INSN_P (x))
+    return false;
+
+  rtx pat = PATTERN (x);
+
+  if (GET_CODE (pat) == SET)
+    {
+  rtx tmp = XEXP (pat, 1);
+  if (tmp
+  && GET_CODE (tmp) == UNSPEC
+  && (XINT (tmp, 1) == UNSPEC_PAC_NOP
+  || XINT (tmp, 1) == UNSPEC_PACBTI_NOP))
+    return true;
+    }
+

This will also need updating (see review on earlier patch) because
PACBTI needs to be unspec_volatile, while PAC doesn't.


Done


+/* The following two functions are for code compatibility with aarch64
+   code, this even if in arm we have only one bti instruction.  */
+

I'd just write
  /* Target specific mapping for aarch_gen_bti_c and
  aarch_gen_bti_j. For Arm, both of these map to a simple BTI
instruction.  */


Done



@@ -162,6 +162,7 @@ (define_c_enum "unspec" [
    UNSPEC_PAC_NOP    ; Represents PAC signing LR
    UNSPEC_PACBTI_NOP    ; Represents PAC signing LR + valid landing pad
    UNSPEC_AUT_NOP    ; Represents PAC verifying LR
+  UNSPEC_BTI_NOP    ; Represent BTI
  ])

BTI is an unspec volatile, so this should be in the "vunspec" enum and
renamed accordingly (see above).


Done.

Please find attached the updated version of this patch.

BR

   Andrea



Apart from that, this is OK.

R.


Re: [PATCH 12/15 V4] arm: implement bti injection

2022-12-14 Thread Richard Earnshaw via Gcc-patches




On 14/12/2022 16:40, Andrea Corallo via Gcc-patches wrote:

Hi Richard,

thanks for reviewing.

Richard Earnshaw  writes:


On 28/10/2022 17:40, Andrea Corallo via Gcc-patches wrote:

Hi all,
please find attached the third iteration of this patch addresing
review
comments.
Thanks
Andrea



@@ -23374,12 +23374,6 @@ output_probe_stack_range (rtx reg1, rtx reg2)
return "";
  }

-static bool
-aarch_bti_enabled ()
-{
-  return false;
-}
-
  /* Generate the prologue instructions for entry into an ARM or Thumb-2
 function.  */
  void
@@ -32992,6 +32986,61 @@ arm_current_function_pac_enabled_p (void)
&& !crtl->is_leaf));
  }

+/* Return TRUE if Branch Target Identification Mechanism is enabled.  */
+bool
+aarch_bti_enabled (void)
+{
+  return aarch_enable_bti == 1;
+}

See comment in earlier patch about the location of this function
moving.   Can aarch_enable_bti take values other than 0 and 1?


Yes default is 2.


It shouldn't be by this point, because, hopefully you've gone through 
the equivalent of this hunk (from aarch64) somewhere in 
arm_override_options:



   if (aarch_enable_bti == 2)
 {
 #ifdef TARGET_ENABLE_BTI
   aarch_enable_bti = 1;
 #else
   aarch_enable_bti = 0;
 #endif
 }

And after this point the '2' should never be seen again.  We use this 
trick to permit the user to force a default that differs from the 
configuration.


However, I don't see a hunk to do this in patch 3, so perhaps that needs 
updating to fix this.





[...]


+  return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) ==
UNSPEC_BTI_NOP;

I'm not sure where this crept in, but UNSPEC and UNSPEC_VOLATILE have
separate enums in the backend, so UNSPEC_BIT_NOP should really be
VUNSPEC_BTI_NOP and defined in the enum "unspecv".


Done


+aarch_pac_insn_p (rtx x)
+{
+  if (!x || !INSN_P (x))
+return false;
+
+  rtx pat = PATTERN (x);
+
+  if (GET_CODE (pat) == SET)
+{
+  rtx tmp = XEXP (pat, 1);
+  if (tmp
+ && GET_CODE (tmp) == UNSPEC
+ && (XINT (tmp, 1) == UNSPEC_PAC_NOP
+ || XINT (tmp, 1) == UNSPEC_PACBTI_NOP))
+   return true;
+}
+

This will also need updating (see review on earlier patch) because
PACBTI needs to be unspec_volatile, while PAC doesn't.


Done


+/* The following two functions are for code compatibility with aarch64
+   code, this even if in arm we have only one bti instruction.  */
+

I'd just write
  /* Target specific mapping for aarch_gen_bti_c and
  aarch_gen_bti_j. For Arm, both of these map to a simple BTI
instruction.  */


Done



@@ -162,6 +162,7 @@ (define_c_enum "unspec" [
UNSPEC_PAC_NOP  ; Represents PAC signing LR
UNSPEC_PACBTI_NOP   ; Represents PAC signing LR + valid landing pad
UNSPEC_AUT_NOP  ; Represents PAC verifying LR
+  UNSPEC_BTI_NOP   ; Represent BTI
  ])

BTI is an unspec volatile, so this should be in the "vunspec" enum and
renamed accordingly (see above).


Done.

Please find attached the updated version of this patch.

BR

   Andrea



Apart from that, this is OK.

R.


Re: [PATCH 10/15 V6] arm: Implement cortex-M return signing address codegen

2022-12-14 Thread Richard Earnshaw via Gcc-patches




On 14/12/2022 16:35, Andrea Corallo via Gcc-patches wrote:

Richard Earnshaw  writes:

[...]



+  if (TARGET_TPCS_FRAME)
+error ("Return address signing and %<-mtpcs-frame%> are
incompatible.");

So really this is 'not implemented' rather than not compatible - I
don't see why we couldn't implement this if we really wanted to.  It's
not worth implementing it because tpcs-frames are very much legacy
these days.

So the message should use sorry() and say 'is not supported' rather
than 'are incompatible'.

+(define_insn "pacbti_nop"
+  [(set (reg:SI IP_REGNUM)
+   (unspec:SI [(reg:SI SP_REGNUM) (reg:SI LR_REGNUM)]
+  VUNSPEC_PACBTI_NOP))]

No, this needs to be unspec_volatile, not unspec.

+(define_insn "aut_nop"
+  [(unspec:SI [(reg:SI IP_REGNUM) (reg:SI SP_REGNUM) (reg:SI LR_REGNUM)]
+ VUNSPEC_AUT_NOP)]

Similarly.

R.



Hi Richard & all,

please find attached the updated patch implementing suggestions.

BR

   Andrea


+   (unspec_volatile:SI [(reg:SI SP_REGNUM) (reg:SI LR_REGNUM)]
+  VUNSPEC_PACBTI_NOP))]

Please fix the indentation of the VUNSPEC_...

+  [(unspec_volatile:SI [(reg:SI IP_REGNUM) (reg:SI SP_REGNUM) (reg:SI 
LR_REGNUM)]

+ VUNSPEC_AUT_NOP)]

And here.

Otherwise ok with that change.

R.


Re: [PATCH 10/15 V5] arm: Implement cortex-M return signing address codegen

2022-12-12 Thread Richard Earnshaw via Gcc-patches




On 09/12/2022 14:16, Andrea Corallo via Gcc-patches wrote:

Hi Richard,

thanks for reviewing.

Richard Earnshaw  writes:


On 07/11/2022 08:57, Andrea Corallo via Gcc-patches wrote:

Hi all,
please find attached the lastest version of this patch incorporating
some
more improvents.  Feel free to ignore V3.
Best Regards
Andrea




As part of previous upstream suggestions a test for varargs has been
added and '-mtpcs-frame' is deemed being incompatible with this return
signing address feature being introduced.


I don't see any check for the tpcs-frame incompatibility?  What
happens if a user does combine the options?


Check added.


gcc/Changelog

2021-11-03  Andrea Corallo  

* config/arm/arm.h (arm_arch8m_main): Declare it.
* config/arm/arm.cc (arm_arch8m_main): Define it.
(arm_option_reconfigure_globals): Set arm_arch8m_main.
(arm_compute_frame_layout, arm_expand_prologue)
(thumb2_expand_return, arm_expand_epilogue)
(arm_conditional_register_usage): Update for pac codegen.
(arm_current_function_pac_enabled_p): New function.
* config/arm/arm.md (pac_ip_lr_sp, pacbti_ip_lr_sp, aut_ip_lr_sp):
Add new patterns.
* config/arm/unspecs.md (UNSPEC_PAC_IP_LR_SP)
(UNSPEC_PACBTI_IP_LR_SP, UNSPEC_AUT_IP_LR_SP): Add unspecs.

You're missing an entry for aarch_bti_enabled () - yes I realize
that's just a placeholder at present and will be fully defined in
patch 12.


Fixed


+static bool
+aarch_bti_enabled ()
+{
+  return false;
+}
+

No comment on this function (and in patch 12 it moves to a different
location).  It would be best to have it in the right place at this
point in time.

+  clobber_ip = (IS_NESTED (func_type)
+&& (((TARGET_APCS_FRAME && frame_pointer_needed &&
TARGET_ARM)
+ || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+  || flag_stack_clash_protection)
+ && !df_regs_ever_live_p (LR_REGNUM)
+ && arm_r3_live_at_start_p ()))
+|| (arm_current_function_pac_enabled_p (;

Redundant parenthesis around arm_current_function_pac_enabled_p () call.


Fixed


+ gcc_assert(arm_compute_static_chain_stack_bytes() == 4
+ || arm_current_function_pac_enabled_p ());

I wonder if this assert is now really serving a useful purpose.  I'd
consider removing it.


Removed


@@ -27309,7 +27340,7 @@ thumb2_expand_return (bool simple_return)
 to assert it for now to ensure that future code changes do not silently
 change this behavior.  */
gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
-  if (num_regs == 1)
+  if (num_regs == 1 && !arm_current_function_pac_enabled_p ())
  {
rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
@@ -27324,10 +27355,20 @@ thumb2_expand_return (bool simple_return)
  }
else
  {
-  saved_regs_mask &= ~ (1 << LR_REGNUM);
-  saved_regs_mask |=   (1 << PC_REGNUM);
-  arm_emit_multi_reg_pop (saved_regs_mask);
-}
+ if (arm_current_function_pac_enabled_p ())
+   {
+ gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
+ arm_emit_multi_reg_pop (saved_regs_mask);
+ emit_insn (gen_aut_nop ());
+ emit_jump_insn (simple_return_rtx);
+   }
+ else
+   {
+ saved_regs_mask &= ~ (1 << LR_REGNUM);
+ saved_regs_mask |=   (1 << PC_REGNUM);
+ arm_emit_multi_reg_pop (saved_regs_mask);
+   }
+   }
  }
else

The logic for these blocks would, I think, be better expressed as

if (pac_enabled)
...
else if (num_regs == 1)
  ...  // existing code
else
  ...  // existing code


Done


Also, I think (out of an abundance of caution) we really need a
scheduling barrier placed before calls to gen_aut_nop() pattern is
emitted, to ensure that the scheduler never tries to move this
instruction away from the position we place it.  Use gen_blockage()
for that (see TARGET_SCHED_PROLOG).  Alternatively, we could make the
UNSPEC_PAC_NOP an unspec_volatile, which has the same effect (IIRC)
without needing an additional insn - if you use this approach, then
please make sure this is explained in a comment.

+(define_insn "pacbti_nop"
+  [(set (reg:SI IP_REGNUM)
+   (unspec:SI [(reg:SI SP_REGNUM) (reg:SI LR_REGNUM)]
+  UNSPEC_PACBTI_NOP))]
+  "arm_arch8m_main"
+  "pacbti\t%|ip, %|lr, %|sp"
+  [(set_attr "conds" "unconditional")])

The additional side-effect of this being a BTI landing pad means that
we mustn't move any other instruction before it.  So I think this
needs to be an unspec_volatile as well.


Done


On the tests, they are OK as they stand, but we lack anything that
will be tested when suitable hardware is unavailable 

Re: [PATCH]AArch64 div-by-255, ensure that arguments are registers. [PR107988]

2022-12-08 Thread Richard Earnshaw via Gcc-patches




On 08/12/2022 16:39, Tamar Christina via Gcc-patches wrote:

Hi All,

At -O0 (as opposed to e.g. volatile) we can get into the situation where the
in0 and result RTL arguments passed to the division function are memory
locations instead of registers.  I think we could reject these early on by
checking that the gimple values are GIMPLE registers, but I think it's better to
handle it.

As such I force them to registers and emit a move to the memory locations and
leave it up to reload to handle.  This fixes the ICE and still allows the
optimization in these cases,  which improves the code quality a lot.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar



gcc/ChangeLog:

PR target/107988
* config/aarch64/aarch64.cc
(aarch64_vectorize_can_special_div_by_constant): Ensure input and output
RTL are registers.

gcc/testsuite/ChangeLog:

PR target/107988
* gcc.target/aarch64/pr107988-1.c: New test.

--- inline copy of patch --
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
b8dc3f070c8afc47c85fa18768c4da92c774338f..9f96424993c4fe90e1b241fcb3aa97025225
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -24337,12 +24337,27 @@ aarch64_vectorize_can_special_div_by_constant (enum 
tree_code code,
if (!VECTOR_TYPE_P (vectype))
 return false;
  
+  if (!REG_P (in0))

+in0 = force_reg (GET_MODE (in0), in0);
+
gcc_assert (output);
  
-  if (!*output)

-*output = gen_reg_rtx (TYPE_MODE (vectype));
+  rtx res =  NULL_RTX;
+
+  /* Once e get to this point we cannot reject the RTL,  if it's not a reg then
+ Create a new reg and write the result to the output afterwards.  */
+  if (!*output || !REG_P (*output))
+res = gen_reg_rtx (TYPE_MODE (vectype));
+  else
+res = *output;


Why not write
  rtx res = *output
  if (!res || !REG_P (res))
res = gen_reg_rtx...

then you don't need either the else clause or the dead NULL_RTX assignment.



+
+  emit_insn (gen_aarch64_bitmask_udiv3 (TYPE_MODE (vectype), res, in0, in1));
+
+  if (*output && res != *output)
+emit_move_insn (*output, res);
+  else
+*output = res;
  
-  emit_insn (gen_aarch64_bitmask_udiv3 (TYPE_MODE (vectype), *output, in0, in1));

return true;
  }
  
diff --git a/gcc/testsuite/gcc.target/aarch64/pr107988-1.c b/gcc/testsuite/gcc.target/aarch64/pr107988-1.c

new file mode 100644
index 
..c4fd290271b738345173b569bdc58c092fba7fe9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr107988-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O0" } */
+typedef unsigned short __attribute__((__vector_size__ (16))) V;
+
+V
+foo (V v)
+{
+  v /= 255;
+  return v;
+}






Otherwise OK.

R.


Re: [GCC][PATCH 13/15, v4] arm: Add support for dwarf debug directives and pseudo hard-register for PAC feature.

2022-12-08 Thread Richard Earnshaw via Gcc-patches




On 09/11/2022 14:32, Srinath Parvathaneni via Gcc-patches wrote:

Hello,

This patch teaches the DWARF support in gcc about RA_AUTH_CODE pseudo 
hard-register and also
updates the ".save", ".cfi_register", ".cfi_offset", ".cfi_restore" directives 
accordingly.
This patch also adds support to emit ".pacspval" directive when "pac ip, lr, 
sp" instruction
in generated in the assembly.

RA_AUTH_CODE register number is 107 and it's dwarf register number is 143.

Applying this patch on top of PACBTI series posted here
https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599658.html and when 
compiling the following
test.c with "-march=armv8.1-m.main+mve+pacbti -mbranch-protection=pac-ret 
-mthumb -mfloat-abi=hard
fasynchronous-unwind-tables -g -O0 -S" command line options, the assembly 
output after this patch
looks like below:

$cat test.c

void fun1(int a);
void fun(int a,...)
{
   fun1(a);
}

int main()
{
   fun (10);
   return 0;
}

$ arm-none-eabi-gcc -march=armv8.1-m.main+mve+pacbti 
-mbranch-protection=pac-ret -mthumb -mfloat-abi=hard
-fasynchronous-unwind-tables -g -O0 -S test.s

Assembly output:
...
fun:
...
 .pacspval
 pac ip, lr, sp
 .cfi_register 143, 12
 push{r3, r7, ip, lr}
 .save {r3, r7, ra_auth_code, lr}
...
 .cfi_offset 143, -24
...
 .cfi_restore 143
...
 aut ip, lr, sp
 bx  lr
...
main:
...
 .pacspval
 pac ip, lr, sp
 .cfi_register 143, 12
 push{r3, r7, ip, lr}
 .save {r3, r7, ra_auth_code, lr}
...
 .cfi_offset 143, -8
...
 .cfi_restore 143
...
 aut ip, lr, sp
 bx  lr
...

Regression tested on arm-none-eabi target and found no regressions.

Ok for master?

Regards,
Srinath.

gcc/testsuite/ChangeLog:

2022-11-04  Srinath Parvathaneni  

 * g++.target/arm/pac-1.C: New test.
 * gcc.target/arm/pac-9.c: New test.


2022-11-04  Srinath Parvathaneni  

 * config/arm/aout.h (ra_auth_code): Add entry in enum.
 * config/arm/arm.cc (pac_emit): Declare new global boolean variable.
 (emit_multi_reg_push): Add RA_AUTH_CODE register to
 dwarf frame expression.
 (arm_emit_multi_reg_pop): Restore RA_AUTH_CODE register.
 (arm_expand_prologue): Update frame related infomration and reg notes
 for pac/pacbit insn.
 (arm_regno_class): Check for pac pseudo reigster.
 (arm_dbx_register_number): Assign ra_auth_code register number in 
dwarf.
 (arm_unwind_emit_sequence): Print .save directive with ra_auth_code
 register.
 (arm_unwind_emit_set): Add entry for IP_REGNUM in switch case.
 (arm_unwind_emit): Update REG_CFA_REGISTER case._
 (arm_conditional_register_usage): Mark ra_auth_code in fixed reigsters.
 * config/arm/arm.h (FIRST_PSEUDO_REGISTER): Modify.
 (IS_PAC_PSEUDO_REGNUM): Define.
 (enum reg_class): Add PAC_REG entry.
 * config/arm/arm.md (RA_AUTH_CODE): Define.

gcc/testsuite/ChangeLog:

2022-11-04  Srinath Parvathaneni  

 * g++.target/arm/pac-1.C: New test.
 * gcc.target/arm/pac-9.c: Likewise.


### Attachment also inlined for ease of reply###


diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h
index 
b918ad3782fbee82320febb8b6e72ad615780261..ffeed45a678f17c63d5b42c21f020ca416cbf23f
 100644
--- a/gcc/config/arm/aout.h
+++ b/gcc/config/arm/aout.h
@@ -74,7 +74,8 @@
"wr8",   "wr9",   "wr10",  "wr11",  \
"wr12",  "wr13",  "wr14",  "wr15",  \
"wcgr0", "wcgr1", "wcgr2", "wcgr3", \
-  "cc", "vfpcc", "sfp", "afp", "apsrq", "apsrge", "p0"   \
+  "cc", "vfpcc", "sfp", "afp", "apsrq", "apsrge", "p0",  \
+  "ra_auth_code" \
  }
  #endif
  
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h

index 
a2dc3fc145c52d8381c54634687376089a47e704..91c400f12568156ed29bf5d5e59460bf887fbefb
 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -820,7 +820,8 @@ extern const int arm_arch_cde_coproc_bits[];
s16-s31   S VFP variable (aka d8-d15).
vfpcc   Not a real register.  Represents the VFP condition
code flags.
-   vpr Used to represent MVE VPR predication.  */
+   vpr Used to represent MVE VPR predication.
+   ra_auth_codePseudo register to save PAC.  */
  
  /* The stack backtrace structure is as follows:

fp points to here:  |  save code pointer  |  [fp]
@@ -861,7 +862,7 @@ extern const int arm_arch_cde_coproc_bits[];
1,1,1,1,1,1,1,1,\
1,1,1,1,\
/* Specials.  */\
-  1,1,1,1,1,1,1\
+  1,1,1,1,1,1,1,1  \
  }
  
  /* 1 for registers not available across function calls.

@@ -891,7 +892,7 @@ 

Re: [GCC][PATCH v2] arm: Add pacbti related multilib support for armv8.1-m.main.

2022-12-06 Thread Richard Earnshaw via Gcc-patches




On 31/10/2022 15:36, Srinath Parvathaneni via Gcc-patches wrote:

Hi,

This patch adds the support for pacbti multlilib linking by making
"-mbranch-protection=none" as default in the command line for all M-profile
targets and uses "-mbranch-protection=none" for multilib matching. If any
valid value is passed to "-mbranch-protection" in the command line, this
new value overwrites the default value in the command line and uses
"-mbranch-protection=standard" for multilib matching.

Eg 1.

If the passed command line flags are:
a) -march=armv8.1-m.main+mve -mfloat-abi=hard -mfpu=auto
b) -mcpu=cortex-m85+nopacbti -mfloat-abi=hard -mfpu=auto

After this patch the command line flags the compiler receives will be:
a) -march=armv8.1-m.main+mve -mfloat-abi=hard -mfpu=auto 
-mbranch-protection=none
b) -mcpu=cortex-m85+nopacbti -mfloat-abi=hard -mfpu=auto 
-mbranch-protection=none

"-mbranch-protection=none" will be used in the multilib matching.

Eg 2.

If the passed command line flags are:
a) -march=armv8.1-m.main+mve+pacbti -mfloat-abi=hard -mfpu=auto  
-mbranch-protection=pac-ret
b) -mcpu=cortex-m85 -mfloat-abi=hard -mfpu=auto  -mbranch-protection=pac-ret+bti

After this patch the command line flags the compiler receives will be:
a) -march=armv8.1-m.main+mve+pacbti -mfloat-abi=hard -mfpu=auto 
-mbranch-protection=pac-ret
b) -mcpu=cortex-m85 -mfloat-abi=hard -mfpu=auto -mbranch-protection=pac-ret+bti

"-mbranch-protection=standard" will be used in the multilib matching.

Eg 3.

For A-profile target, if the passed command line flags are:
-march=armv8-a+simd -mfloat-abi=hard -mfpu=auto

Even after this patch the command line flags compiler receives will remain the 
same:
-march=armv8-a+simd -mfloat-abi=hard -mfpu=auto

Regression tested on arm-none-eabi and bootstrapped on arm-none-linux-gnueabihf.

Ok for master?

Regards,
Srinath.

gcc/ChangeLog:

2022-10-28  Srinath Parvathaneni  

 * common/config/arm/arm-common.cc
 (arm_canon_branch_protection_option): Define new function.
 * config/arm/arm-cpus.in (armv8.1-m.main): Move dsp option below pacbti
 option.
 * config/arm/arm.h (arm_canon_branch_protection_option): Define 
function
 prototype.
 (CANON_BRANCH_PROTECTION_SPEC_FUNCTION): Define macro.
 (MBRANCH_PROTECTION_SPECS): Likewise.
 * config/arm/t-rmprofile (MULTI_ARCH_OPTS_RM): Add new options.
 (MULTI_ARCH_DIRS_RM): Add new directories.
 (MULTILIB_REQUIRED): Add new option.
 (MULTILIB_REUSE): Reuse existing multlibs.
 (MULTILIB_MATCHES): Match multilib strings.

gcc/testsuite/ChangeLog:

2022-10-28  Srinath Parvathaneni  

 * gcc.target/arm/multilib.exp (multilib_config "rmprofile"): Update
 tests.
 * gcc.target/arm/pac-10.c: New test.
 * gcc.target/arm/pac-11.c: Likewise.
 * gcc.target/arm/pac-12.c: Likewise.


Your attachment this time is gzipped, which is almost as bad as 
octet-stream.  Please use text/plain attachments.


--- a/gcc/config/arm/arm-cpus.in
+++ b/gcc/config/arm/arm-cpus.in
@@ -746,8 +746,8 @@ begin arch armv8.1-m.main
  profile M
  isa ARMv8_1m_main
 # fp => FPv5-sp-d16; fp.dp => FPv5-d16
- option dsp add armv7em
  option pacbti add pacbti
+ option dsp add armv7em

Why is this needed?  It looks completely unnecessary.

+/* Automatically add -mbranch-protection=none for M-profile targets if
+   -mbranch-protection value isn't specified via the command line.  */
+#define MBRANCH_PROTECTION_SPECS   \
+  "%{!mbranch-protection=*:%:canon_branch_protection(%{march=*:arch %*;" \
+  "mcpu=*:cpu %*;:})}"
+

This doesn't canonicalize the branch-protection option, it provides a 
default if none was specified.  So if we really need this operation (see 
below) it should be renamed accordingly.


 MULTI_ARCH_OPTS_RM += mbranch-protection=standard
-MULTI_ARCH_DIRS_RM += mbranch-protection
+MULTI_ARCH_DIRS_RM += branch_protection_on
+MULTI_ARCH_OPTS_RM += mbranch-protection=none
+MULTI_ARCH_DIRS_RM += branch_protection_off

These options are related (you'll never need both), so it should be 
written as


 MULTI_ARCH_OPTS_RM += mbranch-protection=standard/mbranch-protection=none
and then add
MULTI_ARCH_DIRS_RM  += mbranch_protection_on mbranch_protection_off

as a single line.  But I think it would be better to rename the 
directory names as "bp" and "bp_off".


Except that...

Why do we need a separate bp_off multilib at all? That's the default and 
the multilib framework can be told how to handle that ...


Firstly, create a new header, lets call it arm-mlib.h, containing

#define MULTILIB_DEFAULTS { "mbranch-protection=none" }

And then arrange to add this new header when the multilib framework is 
enabled via config.gcc.  You'll need to add this to ${tm_file} when we 
add the extra multilibs (search for "aprofile|rmprofile" in config.gcc).


Now you no longer need to handle this case at all in the 

Re: [PATCH 10/15 V4] arm: Implement cortex-M return signing address codegen

2022-12-06 Thread Richard Earnshaw via Gcc-patches




On 06/12/2022 15:46, Andrea Corallo wrote:

Hi Richard,

thanks for reviewing.

Just one clarification before I complete the respin of this patch.

Richard Earnshaw  writes:

[...]


Also, I think (out of an abundance of caution) we really need a
scheduling barrier placed before calls to gen_aut_nop() pattern is
emitted, to ensure that the scheduler never tries to move this
instruction away from the position we place it.  Use gen_blockage()
for that (see TARGET_SCHED_PROLOG).  Alternatively, we could make the
UNSPEC_PAC_NOP an unspec_volatile, which has the same effect (IIRC)
without needing an additional insn - if you use this approach, then
please make sure this is explained in a comment.

+(define_insn "pacbti_nop"
+  [(set (reg:SI IP_REGNUM)
+   (unspec:SI [(reg:SI SP_REGNUM) (reg:SI LR_REGNUM)]
+  UNSPEC_PACBTI_NOP))]
+  "arm_arch8m_main"
+  "pacbti\t%|ip, %|lr, %|sp"
+  [(set_attr "conds" "unconditional")])

The additional side-effect of this being a BTI landing pad means that
we mustn't move any other instruction before it.  So I think this
needs to be an unspec_volatile as well.


IIUC from this we want to make all the three (UNSPEC_PAC_NOP,
UNSPEC_PACBTI_NOP, UNSPEC_AUT_NOP) unspec volatile, correct?


UNSPEC_PAC_NOP doesn't need to be volatile. The register constraints 
will be enough to ensure it is run before any instruction that consumes 
the result it produces.


UNSPEC_PAC_BTI_NOP needs to be volatile, as it's essential that when we 
have an instruction (for example ldr r3, [r3]) early in the program that 
doesn't interact with the prologue then it cannot be migrated before the 
BTI as the BTI is a landing pad and must be the first instruction in the 
function.  This is why UNSPEC_BTI_NOP is volatile.


UNSPEC_AUT_NOP must be volatile because we want to ensure that no 
instruction is moved after this one and before the return as that might 
expose a ROP gadget to hackers.


R.



IIUC correctly the scheduler should not reorder them as we have
expressed which register they consume and produce but is for double
caution correct?


On the tests, they are OK as they stand, but we lack anything that
will be tested when suitable hardware is unavailable (all tests are
"dg-do run").  Can we please have some compile-only tests as well?


Ack.

BR

   Andrea


Re: [PATCH 12/15 V3] arm: implement bti injection

2022-12-05 Thread Richard Earnshaw via Gcc-patches




On 28/10/2022 17:40, Andrea Corallo via Gcc-patches wrote:

Hi all,

please find attached the third iteration of this patch addresing review
comments.

Thanks

   Andrea



@@ -23374,12 +23374,6 @@ output_probe_stack_range (rtx reg1, rtx reg2)
   return "";
 }

-static bool
-aarch_bti_enabled ()
-{
-  return false;
-}
-
 /* Generate the prologue instructions for entry into an ARM or Thumb-2
function.  */
 void
@@ -32992,6 +32986,61 @@ arm_current_function_pac_enabled_p (void)
   && !crtl->is_leaf));
 }

+/* Return TRUE if Branch Target Identification Mechanism is enabled.  */
+bool
+aarch_bti_enabled (void)
+{
+  return aarch_enable_bti == 1;
+}

See comment in earlier patch about the location of this function moving. 
 Can aarch_enable_bti take values other than 0 and 1?  If not, then 
writing aarch_enable_bti != 0 is slightly more robust, but perhaps this 
should be replaced by a macro anyway, much like a number of other 
predicates used by the backend.


+  return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 
UNSPEC_BTI_NOP;


I'm not sure where this crept in, but UNSPEC and UNSPEC_VOLATILE have 
separate enums in the backend, so UNSPEC_BIT_NOP should really be 
VUNSPEC_BTI_NOP and defined in the enum "unspecv".


+aarch_pac_insn_p (rtx x)
+{
+  if (!x || !INSN_P (x))
+return false;
+
+  rtx pat = PATTERN (x);
+
+  if (GET_CODE (pat) == SET)
+{
+  rtx tmp = XEXP (pat, 1);
+  if (tmp
+ && GET_CODE (tmp) == UNSPEC
+ && (XINT (tmp, 1) == UNSPEC_PAC_NOP
+ || XINT (tmp, 1) == UNSPEC_PACBTI_NOP))
+   return true;
+}
+

This will also need updating (see review on earlier patch) because 
PACBTI needs to be unspec_volatile, while PAC doesn't.


+/* The following two functions are for code compatibility with aarch64
+   code, this even if in arm we have only one bti instruction.  */
+

I'd just write
 /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j. 
For Arm, both of these map to a simple BTI instruction.  */



@@ -162,6 +162,7 @@ (define_c_enum "unspec" [
   UNSPEC_PAC_NOP   ; Represents PAC signing LR
   UNSPEC_PACBTI_NOP; Represents PAC signing LR + valid landing pad
   UNSPEC_AUT_NOP   ; Represents PAC verifying LR
+  UNSPEC_BTI_NOP   ; Represent BTI
 ])

BTI is an unspec volatile, so this should be in the "vunspec" enum and 
renamed accordingly (see above).


R.


Re: [PATCH 10/15 V4] arm: Implement cortex-M return signing address codegen

2022-12-05 Thread Richard Earnshaw via Gcc-patches




On 07/11/2022 08:57, Andrea Corallo via Gcc-patches wrote:

Hi all,

please find attached the lastest version of this patch incorporating some
more improvents.  Feel free to ignore V3.

Best Regards

   Andrea



> As part of previous upstream suggestions a test for varargs has been
> added and '-mtpcs-frame' is deemed being incompatible with this return
> signing address feature being introduced.

I don't see any check for the tpcs-frame incompatibility?  What happens 
if a user does combine the options?


gcc/Changelog

2021-11-03  Andrea Corallo  

* config/arm/arm.h (arm_arch8m_main): Declare it.
* config/arm/arm.cc (arm_arch8m_main): Define it.
(arm_option_reconfigure_globals): Set arm_arch8m_main.
(arm_compute_frame_layout, arm_expand_prologue)
(thumb2_expand_return, arm_expand_epilogue)
(arm_conditional_register_usage): Update for pac codegen.
(arm_current_function_pac_enabled_p): New function.
* config/arm/arm.md (pac_ip_lr_sp, pacbti_ip_lr_sp, aut_ip_lr_sp):
Add new patterns.
* config/arm/unspecs.md (UNSPEC_PAC_IP_LR_SP)
(UNSPEC_PACBTI_IP_LR_SP, UNSPEC_AUT_IP_LR_SP): Add unspecs.

You're missing an entry for aarch_bti_enabled () - yes I realize that's 
just a placeholder at present and will be fully defined in patch 12.


+static bool
+aarch_bti_enabled ()
+{
+  return false;
+}
+

No comment on this function (and in patch 12 it moves to a different 
location).  It would be best to have it in the right place at this point 
in time.


+  clobber_ip = (IS_NESTED (func_type)
+&& (((TARGET_APCS_FRAME && frame_pointer_needed && 
TARGET_ARM)

+ || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+  || flag_stack_clash_protection)
+ && !df_regs_ever_live_p (LR_REGNUM)
+ && arm_r3_live_at_start_p ()))
+|| (arm_current_function_pac_enabled_p (;

Redundant parenthesis around arm_current_function_pac_enabled_p () call.

+ gcc_assert(arm_compute_static_chain_stack_bytes() == 4
+ || arm_current_function_pac_enabled_p ());

I wonder if this assert is now really serving a useful purpose.  I'd 
consider removing it.


@@ -27309,7 +27340,7 @@ thumb2_expand_return (bool simple_return)
 to assert it for now to ensure that future code changes do not silently
 change this behavior.  */
   gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
-  if (num_regs == 1)
+  if (num_regs == 1 && !arm_current_function_pac_enabled_p ())
 {
   rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
   rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
@@ -27324,10 +27355,20 @@ thumb2_expand_return (bool simple_return)
 }
   else
 {
-  saved_regs_mask &= ~ (1 << LR_REGNUM);
-  saved_regs_mask |=   (1 << PC_REGNUM);
-  arm_emit_multi_reg_pop (saved_regs_mask);
-}
+ if (arm_current_function_pac_enabled_p ())
+   {
+ gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
+ arm_emit_multi_reg_pop (saved_regs_mask);
+ emit_insn (gen_aut_nop ());
+ emit_jump_insn (simple_return_rtx);
+   }
+ else
+   {
+ saved_regs_mask &= ~ (1 << LR_REGNUM);
+ saved_regs_mask |=   (1 << PC_REGNUM);
+ arm_emit_multi_reg_pop (saved_regs_mask);
+   }
+   }
 }
   else

The logic for these blocks would, I think, be better expressed as

   if (pac_enabled)
   ...
   else if (num_regs == 1)
 ...  // existing code
   else
 ...  // existing code

Also, I think (out of an abundance of caution) we really need a 
scheduling barrier placed before calls to gen_aut_nop() pattern is 
emitted, to ensure that the scheduler never tries to move this 
instruction away from the position we place it.  Use gen_blockage() for 
that (see TARGET_SCHED_PROLOG).  Alternatively, we could make the 
UNSPEC_PAC_NOP an unspec_volatile, which has the same effect (IIRC) 
without needing an additional insn - if you use this approach, then 
please make sure this is explained in a comment.


+(define_insn "pacbti_nop"
+  [(set (reg:SI IP_REGNUM)
+   (unspec:SI [(reg:SI SP_REGNUM) (reg:SI LR_REGNUM)]
+  UNSPEC_PACBTI_NOP))]
+  "arm_arch8m_main"
+  "pacbti\t%|ip, %|lr, %|sp"
+  [(set_attr "conds" "unconditional")])

The additional side-effect of this being a BTI landing pad means that we 
mustn't move any other instruction before it.  So I think this needs to 
be an unspec_volatile as well.


On the tests, they are OK as they stand, but we lack anything that will 
be tested when suitable hardware is unavailable (all tests are "dg-do 
run").  Can we please have some compile-only tests as well?


R.



[PATCH] sync libsframe toplevel from binutils-gdb

2022-11-25 Thread Richard Earnshaw via Gcc-patches

This pulls in the toplevel portion of this binutils-gdb commit:
   19e559f1c91bfaedbd2f91d85ee161f3f03fda3c libsframe: add the SFrame library

ChangeLog:
* Makefile.def: Add libsframe as new module with its dependencies.
* Makefile.in: Regenerated.
* configure.ac: Add libsframe to host_libs.
* configure: Regenerated.
---
 Makefile.def |2 +
 Makefile.in  | 1288 +-
 configure|2 +-
 configure.ac |2 +-
 4 files changed, 1287 insertions(+), 7 deletions(-)

diff --git a/Makefile.def b/Makefile.def
index 02e63c57177..83ae77586ad 100644
--- a/Makefile.def
+++ b/Makefile.def
@@ -145,6 +145,7 @@ host_modules= { module= lto-plugin; bootstrap=true;
 host_modules= { module= libcc1; extra_configure_flags=--enable-shared; };
 host_modules= { module= gotools; };
 host_modules= { module= libctf; bootstrap=true; };
+host_modules= { module= libsframe; bootstrap=true; };
 
 target_modules = { module= libstdc++-v3;
 		   bootstrap=true;
@@ -470,6 +471,7 @@ dependencies = { module=all-binutils; on=all-intl; };
 dependencies = { module=all-binutils; on=all-gas; };
 dependencies = { module=all-binutils; on=all-libctf; };
 dependencies = { module=all-ld; on=all-libctf; };
+dependencies = { module=all-binutils; on=all-libsframe; };
 
 // We put install-opcodes before install-binutils because the installed
 // binutils might be on PATH, and they might need the shared opcodes
diff --git a/Makefile.in b/Makefile.in
index 6ffa9660c25..e5bed1bea3a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1097,7 +1097,8 @@ configure-host:  \
 maybe-configure-lto-plugin \
 maybe-configure-libcc1 \
 maybe-configure-gotools \
-maybe-configure-libctf
+maybe-configure-libctf \
+maybe-configure-libsframe
 .PHONY: configure-target
 configure-target:  \
 maybe-configure-target-libstdc++-v3 \
@@ -1273,6 +1274,9 @@ all-host: maybe-all-gotools
 @if libctf-no-bootstrap
 all-host: maybe-all-libctf
 @endif libctf-no-bootstrap
+@if libsframe-no-bootstrap
+all-host: maybe-all-libsframe
+@endif libsframe-no-bootstrap
 
 .PHONY: all-target
 
@@ -1384,6 +1388,7 @@ info-host: maybe-info-lto-plugin
 info-host: maybe-info-libcc1
 info-host: maybe-info-gotools
 info-host: maybe-info-libctf
+info-host: maybe-info-libsframe
 
 .PHONY: info-target
 
@@ -1472,6 +1477,7 @@ dvi-host: maybe-dvi-lto-plugin
 dvi-host: maybe-dvi-libcc1
 dvi-host: maybe-dvi-gotools
 dvi-host: maybe-dvi-libctf
+dvi-host: maybe-dvi-libsframe
 
 .PHONY: dvi-target
 
@@ -1560,6 +1566,7 @@ pdf-host: maybe-pdf-lto-plugin
 pdf-host: maybe-pdf-libcc1
 pdf-host: maybe-pdf-gotools
 pdf-host: maybe-pdf-libctf
+pdf-host: maybe-pdf-libsframe
 
 .PHONY: pdf-target
 
@@ -1648,6 +1655,7 @@ html-host: maybe-html-lto-plugin
 html-host: maybe-html-libcc1
 html-host: maybe-html-gotools
 html-host: maybe-html-libctf
+html-host: maybe-html-libsframe
 
 .PHONY: html-target
 
@@ -1736,6 +1744,7 @@ TAGS-host: maybe-TAGS-lto-plugin
 TAGS-host: maybe-TAGS-libcc1
 TAGS-host: maybe-TAGS-gotools
 TAGS-host: maybe-TAGS-libctf
+TAGS-host: maybe-TAGS-libsframe
 
 .PHONY: TAGS-target
 
@@ -1824,6 +1833,7 @@ install-info-host: maybe-install-info-lto-plugin
 install-info-host: maybe-install-info-libcc1
 install-info-host: maybe-install-info-gotools
 install-info-host: maybe-install-info-libctf
+install-info-host: maybe-install-info-libsframe
 
 .PHONY: install-info-target
 
@@ -1912,6 +1922,7 @@ install-dvi-host: maybe-install-dvi-lto-plugin
 install-dvi-host: maybe-install-dvi-libcc1
 install-dvi-host: maybe-install-dvi-gotools
 install-dvi-host: maybe-install-dvi-libctf
+install-dvi-host: maybe-install-dvi-libsframe
 
 .PHONY: install-dvi-target
 
@@ -2000,6 +2011,7 @@ install-pdf-host: maybe-install-pdf-lto-plugin
 install-pdf-host: maybe-install-pdf-libcc1
 install-pdf-host: maybe-install-pdf-gotools
 install-pdf-host: maybe-install-pdf-libctf
+install-pdf-host: maybe-install-pdf-libsframe
 
 .PHONY: install-pdf-target
 
@@ -2088,6 +2100,7 @@ install-html-host: maybe-install-html-lto-plugin
 install-html-host: maybe-install-html-libcc1
 install-html-host: maybe-install-html-gotools
 install-html-host: maybe-install-html-libctf
+install-html-host: maybe-install-html-libsframe
 
 .PHONY: install-html-target
 
@@ -2176,6 +2189,7 @@ installcheck-host: maybe-installcheck-lto-plugin
 installcheck-host: maybe-installcheck-libcc1
 installcheck-host: maybe-installcheck-gotools
 installcheck-host: maybe-installcheck-libctf
+installcheck-host: maybe-installcheck-libsframe
 
 .PHONY: installcheck-target
 
@@ -2264,6 +2278,7 @@ mostlyclean-host: maybe-mostlyclean-lto-plugin
 mostlyclean-host: maybe-mostlyclean-libcc1
 mostlyclean-host: maybe-mostlyclean-gotools
 mostlyclean-host: maybe-mostlyclean-libctf
+mostlyclean-host: maybe-mostlyclean-libsframe
 
 .PHONY: mostlyclean-target
 
@@ -2352,6 +2367,7 @@ clean-host: maybe-clean-lto-plugin
 clean-host: maybe-clean-libcc1
 clean-host: maybe-clean-gotools
 clean-host: 

Re: [Patch Arm] Fix PR 92999

2022-11-24 Thread Richard Earnshaw via Gcc-patches




On 11/11/2022 21:50, Ramana Radhakrishnan via Gcc-patches wrote:

On Thu, Nov 10, 2022 at 7:46 PM Ramana Radhakrishnan
 wrote:


On Thu, Nov 10, 2022 at 6:03 PM Richard Earnshaw
 wrote:




On 10/11/2022 17:21, Richard Earnshaw via Gcc-patches wrote:



On 08/11/2022 18:20, Ramana Radhakrishnan via Gcc-patches wrote:

PR92999 is a case where the VFP calling convention does not allocate
enough FP registers for a homogenous aggregate containing FP16 values.
I believe this is the complete fix but would appreciate another set of
eyes on this.

Could I get a hand with a regression test run on an armhf environment
while I fix my environment ?

gcc/ChangeLog:

PR target/92999
*  config/arm/arm.c (aapcs_vfp_allocate_return_reg): Adjust to handle
aggregates with elements smaller than SFmode.

gcc/testsuite/ChangeLog:

* gcc.target/arm/pr92999.c: New test.


Thanks,
Ramana

Signed-off-by: Ramana Radhakrishnan 


I'm not sure about this.  The AAPCS does not mention a base type of a
half-precision FP type as an appropriate homogeneous aggregate for using
VFP registers for either calling or returning.


Ooh interesting, thanks for taking a look and poking at the AAPCS and
that's a good catch. BF16 should also have the same behaviour as FP16
, I suspect ?


I suspect I got caught out by the definition of the Homogenous
aggregate from Section 5.3.5
((https://github.com/ARM-software/abi-aa/blob/2982a9f3b512a5bfdc9e3fea5d3b298f9165c36b/aapcs32/aapcs32.rst#homogeneous-aggregates)
which simply suggests it's an aggregate of fundamental types which
lists half precision floating point .


A homogeneous aggregate is any aggregate that fits the general 
definition, but only HAs of specific types are of interest for the VFP 
PCS rules.


The problem we have is that when we added HFmode (and later BF16mode) 
support we didn't notice that the base types are VFP candidates, but the 
nested types (eg in records or arrays) are not.


The problems started around SVN r236269 (git:1b81a1c1bd53) when we added 
FP16 support.





FTR, ideally I should have read 7.1.2.1
https://github.com/ARM-software/abi-aa/blob/2982a9f3b512a5bfdc9e3fea5d3b298f9165c36b/aapcs32/aapcs32.rst#procedure-calling)
:)







So perhaps the bug is that we try to treat this as a homogeneous
aggregate at all.


Yep I agree - I'll take a look again tomorrow and see if I can get a fix.

(And thanks Alex for the test run, I might trouble you again while I
still (slowly) get some of my boards back up)



and as promised take 2. I'd really prefer another review on this one
to see if I've not missed anything in the cases below.


I think I'd prefer to try and fix this at the point where we accept the 
base types, ie around:


case REAL_TYPE:
  mode = TYPE_MODE (type);
  if (mode != DFmode && mode != SFmode && mode != HFmode && mode != 
BFmode)

return -1;

by changing this to something like

/* HFmode and BFmode can be passed in registers, but are not valid
   base types for an HFA, so only accept these if we are at the top
   level.  */
if (!(mode == DFmode || mode == SFmode
  || (depth == 0
  && (mode == HFmode || mode == BFmode)))
   return -1;

and we then pass depth into the recursion calls as an extra parameter, 
starting at 0 for the top level and incrementing it by 1 each time 
aapcs_vfp_sub_candidate recurses.


For the test, would it be possible to rewrite it in the style of 
gcc.target/arm/aapcs/* and put it there? That would ensure that not only 
are the caller and callee compatible, but that the values are passed in 
the correct location.


R.



Re: [PATCH] aarch64: Fix test_dfp_17.c for big-endian [PR 107604]

2022-11-22 Thread Richard Earnshaw via Gcc-patches




On 22/11/2022 13:09, Christophe Lyon wrote:



On 11/22/22 12:33, Richard Earnshaw wrote:



On 22/11/2022 11:21, Richard Sandiford wrote:

Richard Earnshaw via Gcc-patches  writes:

On 22/11/2022 09:01, Christophe Lyon via Gcc-patches wrote:
gcc.target/aarch64/aapcs64/test_dfp_17.c has been failing on 
big-endian, because the _Decimal32 on-stack argument is not

padded in the same direction depending on endianness.

This patch fixes the testcase so that it expects the argument
in the right stack location, similarly to what other tests do
in the same directory.

gcc/testsuite/ChangeLog:

PR target/107604 * gcc.target/aarch64/aapcs64/test_dfp_17.c:
Fix for big-endian. --- 
gcc/testsuite/gcc.target/aarch64/aapcs64/test_dfp_17.c | 4

 1 file changed, 4 insertions(+)

diff --git
a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_dfp_17.c 
b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_dfp_17.c index

22dc462bf7c..3c45f715cf7 100644 ---
a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_dfp_17.c +++
b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_dfp_17.c @@
-32,6 +32,10 @@ struct z b = { 9.0dd, 10.0dd, 11.0dd, 12.0dd
}; ANON(struct z, a, D1) ANON(struct z, b, STACK) ANON(int , 5,
W0) +#ifndef __AAPCS64_BIG_ENDIAN__ ANON(_Decimal32, f1,
STACK+32) /* Note: no promotion to _Decimal64.  */ +#else +
ANON(_Decimal32, f1, STACK+36) /* Note: no promotion to 
_Decimal64.  */ +#endif LAST_ANON(_Decimal64, 0.5dd, STACK+40) #endif


Why would a Decimal32 change stack placement based on the
endianness? Isn't it a 4-byte object?


Yes, but PARM_BOUNDARY (64) sets a minimum alignment for all stack
 arguments.

Richard


Ah, OK.
Indeed, it was not immediately obvious to me either, when looking at 
aarch64_layout_arg. aarch64_function_arg_padding comes into play, too.




I wonder if we should have a new macro in the tests, something like 
ANON_PADDED to describe this case and that works things out more 
automagically for big-endian.

Maybe. There are many other tests under aapcs64/ which have a similar
#ifndef __AAPCS64_BIG_ENDIAN__



Yes, it could be used to clean all those up as well.




I notice the new ANON definition is not correctly indented.

It looks OK on my side (2 spaces).


Never mind then, it must be a quirk of how the diff is displayed.


Thanks,

Christophe



R.


Re: [PATCH] aarch64: Fix test_dfp_17.c for big-endian [PR 107604]

2022-11-22 Thread Richard Earnshaw via Gcc-patches




On 22/11/2022 11:21, Richard Sandiford wrote:

Richard Earnshaw via Gcc-patches  writes:

On 22/11/2022 09:01, Christophe Lyon via Gcc-patches wrote:

gcc.target/aarch64/aapcs64/test_dfp_17.c has been failing on
big-endian, because the _Decimal32 on-stack argument is not padded in
the same direction depending on endianness.

This patch fixes the testcase so that it expects the argument in the
right stack location, similarly to what other tests do in the same
directory.

gcc/testsuite/ChangeLog:

PR target/107604
* gcc.target/aarch64/aapcs64/test_dfp_17.c: Fix for big-endian.
---
   gcc/testsuite/gcc.target/aarch64/aapcs64/test_dfp_17.c | 4 
   1 file changed, 4 insertions(+)

diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_dfp_17.c 
b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_dfp_17.c
index 22dc462bf7c..3c45f715cf7 100644
--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_dfp_17.c
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_dfp_17.c
@@ -32,6 +32,10 @@ struct z b = { 9.0dd, 10.0dd, 11.0dd, 12.0dd };
 ANON(struct z, a, D1)
 ANON(struct z, b, STACK)
 ANON(int , 5, W0)
+#ifndef __AAPCS64_BIG_ENDIAN__
 ANON(_Decimal32, f1, STACK+32) /* Note: no promotion to _Decimal64.  */
+#else
+  ANON(_Decimal32, f1, STACK+36) /* Note: no promotion to _Decimal64.  */
+#endif
 LAST_ANON(_Decimal64, 0.5dd, STACK+40)
   #endif


Why would a Decimal32 change stack placement based on the endianness?
Isn't it a 4-byte object?


Yes, but PARM_BOUNDARY (64) sets a minimum alignment for all stack arguments.

Richard


Ah, OK.

I wonder if we should have a new macro in the tests, something like 
ANON_PADDED to describe this case and that works things out more 
automagically for big-endian.


I notice the new ANON definition is not correctly indented.

R.


Re: [PATCH] aarch64: Fix test_dfp_17.c for big-endian [PR 107604]

2022-11-22 Thread Richard Earnshaw via Gcc-patches




On 22/11/2022 09:01, Christophe Lyon via Gcc-patches wrote:

gcc.target/aarch64/aapcs64/test_dfp_17.c has been failing on
big-endian, because the _Decimal32 on-stack argument is not padded in
the same direction depending on endianness.

This patch fixes the testcase so that it expects the argument in the
right stack location, similarly to what other tests do in the same
directory.

gcc/testsuite/ChangeLog:

PR target/107604
* gcc.target/aarch64/aapcs64/test_dfp_17.c: Fix for big-endian.
---
  gcc/testsuite/gcc.target/aarch64/aapcs64/test_dfp_17.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_dfp_17.c 
b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_dfp_17.c
index 22dc462bf7c..3c45f715cf7 100644
--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_dfp_17.c
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_dfp_17.c
@@ -32,6 +32,10 @@ struct z b = { 9.0dd, 10.0dd, 11.0dd, 12.0dd };
ANON(struct z, a, D1)
ANON(struct z, b, STACK)
ANON(int , 5, W0)
+#ifndef __AAPCS64_BIG_ENDIAN__
ANON(_Decimal32, f1, STACK+32) /* Note: no promotion to _Decimal64.  */
+#else
+  ANON(_Decimal32, f1, STACK+36) /* Note: no promotion to _Decimal64.  */
+#endif
LAST_ANON(_Decimal64, 0.5dd, STACK+40)
  #endif


Why would a Decimal32 change stack placement based on the endianness? 
Isn't it a 4-byte object?


Re: [Patch Arm] Fix PR 92999

2022-11-10 Thread Richard Earnshaw via Gcc-patches




On 10/11/2022 17:21, Richard Earnshaw via Gcc-patches wrote:



On 08/11/2022 18:20, Ramana Radhakrishnan via Gcc-patches wrote:

PR92999 is a case where the VFP calling convention does not allocate
enough FP registers for a homogenous aggregate containing FP16 values.
I believe this is the complete fix but would appreciate another set of
eyes on this.

Could I get a hand with a regression test run on an armhf environment
while I fix my environment ?

gcc/ChangeLog:

PR target/92999
*  config/arm/arm.c (aapcs_vfp_allocate_return_reg): Adjust to handle
aggregates with elements smaller than SFmode.

gcc/testsuite/ChangeLog:

* gcc.target/arm/pr92999.c: New test.


Thanks,
Ramana

Signed-off-by: Ramana Radhakrishnan 


I'm not sure about this.  The AAPCS does not mention a base type of a 
half-precision FP type as an appropriate homogeneous aggregate for using 
VFP registers for either calling or returning.


So perhaps the bug is that we try to treat this as a homogeneous 
aggregate at all.


R.


And clang seems to agree with my opinion: https://godbolt.org/z/ncaYfzebM

R.


Re: [Patch Arm] Fix PR 92999

2022-11-10 Thread Richard Earnshaw via Gcc-patches




On 08/11/2022 18:20, Ramana Radhakrishnan via Gcc-patches wrote:

PR92999 is a case where the VFP calling convention does not allocate
enough FP registers for a homogenous aggregate containing FP16 values.
I believe this is the complete fix but would appreciate another set of
eyes on this.

Could I get a hand with a regression test run on an armhf environment
while I fix my environment ?

gcc/ChangeLog:

PR target/92999
*  config/arm/arm.c (aapcs_vfp_allocate_return_reg): Adjust to handle
aggregates with elements smaller than SFmode.

gcc/testsuite/ChangeLog:

* gcc.target/arm/pr92999.c: New test.


Thanks,
Ramana

Signed-off-by: Ramana Radhakrishnan 


I'm not sure about this.  The AAPCS does not mention a base type of a 
half-precision FP type as an appropriate homogeneous aggregate for using 
VFP registers for either calling or returning.


So perhaps the bug is that we try to treat this as a homogeneous 
aggregate at all.


R.


Re: [PATCH 9/15] arm: Set again stack pointer as CFA reg when popping if necessary

2022-11-08 Thread Richard Earnshaw via Gcc-patches




On 26/10/2022 09:49, Andrea Corallo via Gcc-patches wrote:

Richard Earnshaw  writes:


On 27/09/2022 16:24, Kyrylo Tkachov via Gcc-patches wrote:



-Original Message-
From: Andrea Corallo 
Sent: Tuesday, September 27, 2022 11:06 AM
To: Kyrylo Tkachov 
Cc: Andrea Corallo via Gcc-patches ; Richard
Earnshaw ; nd 
Subject: Re: [PATCH 9/15] arm: Set again stack pointer as CFA reg when
popping if necessary

Kyrylo Tkachov  writes:


Hi Andrea,


-Original Message-
From: Gcc-patches  On Behalf Of Andrea
Corallo via Gcc-patches
Sent: Friday, August 12, 2022 4:34 PM
To: Andrea Corallo via Gcc-patches 
Cc: Richard Earnshaw ; nd 
Subject: [PATCH 9/15] arm: Set again stack pointer as CFA reg when

popping

if necessary

Hi all,

this patch enables 'arm_emit_multi_reg_pop' to set again the stack
pointer as CFA reg when popping if this is necessary.



  From what I can tell from similar functions this is correct, but could you

elaborate on why this change is needed for my understanding please?

Thanks,
Kyrill


Hi Kyrill,

sure, if the frame pointer was set, than it is the current CFA register.
If we request to adjust the current CFA register offset indicating it
being SP (while it's actually FP) that is indeed not correct and the
incoherence we will be detected by an assertion in the dwarf emission
machinery.

Thanks,  the patch is ok
Kyrill



Best Regards

Andrea


Hmm, wait.  Why would a multi-reg pop be updating the stack pointer?


Hi Richard,

not sure I understand, isn't any pop updating SP by definition?


Yes, but the SP must already be the CFA before this instruction, since 
SP must be the base of the pop. So the reg note changing the CFA to SP 
can't be right.  I'm thinking there must be some earlier restore of SP 
that's missing a frame-related note.


R.



BR

   Andrea


Re: [PING][PATCH 0/15] arm: Enables return address verification and branch target identification on Cortex-M

2022-10-21 Thread Richard Earnshaw via Gcc-patches




On 21/09/2022 09:07, Andrea Corallo via Gcc-patches wrote:

Hi all,

ping^2 for patches 9/15 7/15 11/15 12/15 and 10/15 V2 of this series.

   Andrea


Subject says xx/15, but I only see 1-12 from you.

R.


Re: [PATCH 13/15] arm: Add pacbti related multilib support for armv8.1-m.main.

2022-10-21 Thread Richard Earnshaw via Gcc-patches




On 12/08/2022 18:10, Srinath Parvathaneni via Gcc-patches wrote:

  Hi,

This patch supports following -march/-mbranch-protection combination by linking 
them
to existing pacbti multilibs.

$ -march=armv8.1-m.main+pacbti+fp.dp+mve.fp -mbranch-protection=standard 
-mfloat-abi=hard -mthumb
$ -march=armv8.1-m.main+pacbti+fp.dp+mve -mbranch-protection=standard 
-mfloat-abi=hard -mthumb
$ -march=armv8.1-m.main+dsp+pacbti+fp.dp -mbranch-protection=standard 
-mfloat-abi=hard -mthumb

Regression tested on arm-none-eabi and bootstrapped on arm-none-linux-gnueabihf.

Ok for master?

Regards,
Srinath.

gcc/ChangeLog:

2022-08-12  Srinath Parvathaneni  

 * config/arm/t-rmprofile: Add pacbti multililb variants.

gcc/testsuite/ChangeLog:

2022-08-12  Srinath Parvathaneni  

 * gcc.target/arm/pac-10.c: New test.
 * gcc.target/arm/pac-11.c: Likewise.
 * gcc.target/arm/pac-12.c: Likewise.


Please resend with a correctly attached patch.  You've used octet-stream 
rather than a text format.


R.


Re: [PATCH 10/15 V2] arm: Implement cortex-M return signing address codegen

2022-10-21 Thread Richard Earnshaw via Gcc-patches




On 14/09/2022 15:20, Andrea Corallo via Gcc-patches wrote:

Hi all,

this patch enables address return signature and verification based on
Armv8.1-M Pointer Authentication [1].

To sign the return address, we use the PAC R12, LR, SP instruction
upon function entry.  This is signing LR using SP and storing the
result in R12.  R12 will be pushed into the stack.

During function epilogue R12 will be popped and AUT R12, LR, SP will
be used to verify that the content of LR is still valid before return.

Here an example of PAC instrumented function prologue and epilogue:

void foo (void);

int main()
{
   foo ();
   return 0;
}

Compiled with '-march=armv8.1-m.main -mbranch-protection=pac-ret
-mthumb' translates into:

main:
pac ip, lr, sp
push{r3, r7, ip, lr}
add r7, sp, #0
bl  foo
movsr3, #0
mov r0, r3
pop {r3, r7, ip, lr}
aut ip, lr, sp
bx  lr

The patch also takes care of generating a PACBTI instruction in place
of the sequence BTI+PAC when Branch Target Identification is enabled
contextually.

Ex. the previous example compiled with '-march=armv8.1-m.main
-mbranch-protection=pac-ret+bti -mthumb' translates into:

main:
pacbti  ip, lr, sp
push{r3, r7, ip, lr}
add r7, sp, #0
bl  foo
movsr3, #0
mov r0, r3
pop {r3, r7, ip, lr}
aut ip, lr, sp
bx  lr

As part of previous upstream suggestions a test for varargs has been
added and '-mtpcs-frame' is deemed being incompatible with this return
signing address feature being introduced.

[1] 


gcc/Changelog

2021-11-03  Andrea Corallo  

* config/arm/arm.c: (arm_compute_frame_layout)
(arm_expand_prologue, thumb2_expand_return, arm_expand_epilogue)
(arm_conditional_register_usage): Update for pac codegen.
(arm_current_function_pac_enabled_p): New function.
* config/arm/arm.md (pac_ip_lr_sp, pacbti_ip_lr_sp, aut_ip_lr_sp):
Add new patterns.
* config/arm/unspecs.md (UNSPEC_PAC_IP_LR_SP)
(UNSPEC_PACBTI_IP_LR_SP, UNSPEC_AUT_IP_LR_SP): Add unspecs.

gcc/testsuite/Changelog

2021-11-03  Andrea Corallo  

* gcc.target/arm/pac.h : New file.
* gcc.target/arm/pac-1.c : New test case.
* gcc.target/arm/pac-2.c : Likewise.
* gcc.target/arm/pac-3.c : Likewise.
* gcc.target/arm/pac-4.c : Likewise.
* gcc.target/arm/pac-5.c : Likewise.
* gcc.target/arm/pac-6.c : Likewise.
* gcc.target/arm/pac-7.c : Likewise.
* gcc.target/arm/pac-8.c : Likewise.



+  if (arm_current_function_pac_enabled_p () && !(arm_arch7 && 
arm_arch_cmse))
+error ("This architecture does not support branch protection 
instructions");


This test feels wrong.  What does having cmse give us?  I suspect you 
want a test that ensures we have at least v8-m.main so that the NOP 
instructions are correctly defined as NOPs (or, in this case, PACBTI 
instructions) rather than unpredictable; but if that's the case then I 
think you really want to write the test that way here (perhaps in a 
macro) and then move this test into that so that it becomes 
self-documenting - but don't we have a v8-m.main test anyway?



+ if (arm_current_function_pac_enabled_p ())
+   {
+  gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
+ arm_emit_multi_reg_pop (saved_regs_mask);
+ emit_insn (gen_aut_nop ());
+ emit_jump_insn (simple_return_rtx);
+   }

The assert is using indents that are just spaces, but the other lines 
use tabs.  Please use tabs everywhere rather than mixing like this.


+/* Return TRUE if return address signing mechanism is enabled.  */
+bool
+arm_current_function_pac_enabled_p (void)
+{
+  return aarch_ra_sign_scope == AARCH_FUNCTION_ALL
+|| (aarch_ra_sign_scope == AARCH_FUNCTION_NON_LEAF
+   && !crtl->is_leaf);
+}

This is a case where you should use parenthesis around the expression so 
that the continuation lines are correctly indented.


@@ -11518,7 +11518,7 @@ (define_expand "prologue"
  arm_expand_prologue ();
else
  thumb1_expand_prologue ();
-  DONE;
+   DONE;
   "
 )

Although this is a trivial cleanup, it has nothing to do with this 
patch.  Please remove.


+  "arm_arch7 && arm_arch_cmse"

See my comments earlier about this test; the same applies here.

+   (unspec:SI [(reg:SI SP_REGNUM) (reg:SI LR_REGNUM)]
+   UNSPEC_PAC_NOP))]
+
Again you have a mix of lines indented with tabs and lines indented with 
just spaces.  Similarly with pacbti_nop and aut_nop.


Do you have a test for the nested functions case (I can't see it, but 
perhaps I've missed it somewhere)?


R.


Re: [PATCH 9/15] arm: Set again stack pointer as CFA reg when popping if necessary

2022-10-21 Thread Richard Earnshaw via Gcc-patches




On 27/09/2022 16:24, Kyrylo Tkachov via Gcc-patches wrote:




-Original Message-
From: Andrea Corallo 
Sent: Tuesday, September 27, 2022 11:06 AM
To: Kyrylo Tkachov 
Cc: Andrea Corallo via Gcc-patches ; Richard
Earnshaw ; nd 
Subject: Re: [PATCH 9/15] arm: Set again stack pointer as CFA reg when
popping if necessary

Kyrylo Tkachov  writes:


Hi Andrea,


-Original Message-
From: Gcc-patches  On Behalf Of Andrea
Corallo via Gcc-patches
Sent: Friday, August 12, 2022 4:34 PM
To: Andrea Corallo via Gcc-patches 
Cc: Richard Earnshaw ; nd 
Subject: [PATCH 9/15] arm: Set again stack pointer as CFA reg when

popping

if necessary

Hi all,

this patch enables 'arm_emit_multi_reg_pop' to set again the stack
pointer as CFA reg when popping if this is necessary.



 From what I can tell from similar functions this is correct, but could you

elaborate on why this change is needed for my understanding please?

Thanks,
Kyrill


Hi Kyrill,

sure, if the frame pointer was set, than it is the current CFA register.
If we request to adjust the current CFA register offset indicating it
being SP (while it's actually FP) that is indeed not correct and the
incoherence we will be detected by an assertion in the dwarf emission
machinery.


Thanks,  the patch is ok
Kyrill



Best Regards

   Andrea


Hmm, wait.  Why would a multi-reg pop be updating the stack pointer? 
Please can you show a code sequence where this is needed.


R.


Re: [PATCH 7/15] arm: Emit build attributes for PACBTI target feature

2022-10-21 Thread Richard Earnshaw via Gcc-patches




On 12/08/2022 16:30, Andrea Corallo via Gcc-patches wrote:

This patch emits assembler directives for PACBTI build attributes as
defined by the
ABI.



gcc/ChangeLog:

* config/arm/arm.c (arm_file_start): Emit EABI attributes for
Tag_PAC_extension, Tag_BTI_extension, TAG_BTI_use, TAG_PACRET_use.

gcc/testsuite/ChangeLog:

* gcc.target/arm/acle/pacbti-m-predef-1.c: New test.
* gcc.target/arm/acle/pacbti-m-predef-3: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-6.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-7.c: Likewise.

Co-Authored-By: Tejas Belagod  



OK.

R.


Re: [PATCH 1/2] Add a parameter for the builtin function of prefetch to align with LLVM

2022-10-21 Thread Richard Earnshaw via Gcc-patches




On 20/10/2022 18:37, Andrew Pinski via Gcc-patches wrote:

On Thu, Oct 20, 2022 at 10:28 AM Segher Boessenkool
 wrote:


On Thu, Oct 20, 2022 at 01:44:15AM +, Jiang, Haochen wrote:

Maybe the testcase change cause some misunderstanding and concern.

Actually, the patch did not disrupt the previous builtins, as the 
builtin_prefetch
uses vargs. I set the default value of the new parameter as data prefetch, which
means that if we are not using the fourth parameter, just like how we use
prefetch previously, it is still what it is.


I still think it is a mistake to have one builtin do two very distinct
operations, only very superficially related.  Instruction fetch and data
demand loads are almosty entirely unrelated, and so is the prefetch
machinery for them, on all machines I am familiar with.


On aarch64 (armv8), it is actually the same instruction: PRFM. It
might be the only one which is that way though.
It even allows to specify the level for the instruction prefetch too
(which is actually useful for say OcteonTX2 which has an interesting
cache hierarchy).



Just because the encodings are similar doesn't mean that the 
instructions are the same, although it's true that once you reach 
unification in the cache hierarchy the end behaviour /might/ be 
indistinguishable.


Really, Segher's point seems to be 'why overload the existing builtin 
for this'?  It's not like the new parameter is something that users 
would really need to pass in as a run-time choice; and that wouldn't 
work anyway because in the end we do need distinct instructions.


R.


Though I agree it is a mistake to have one builtin which handles both
data and instruction prefetch.

Thanks,
Andrew



Which makes
sense anyway, since instruction prefetch and data prefetch have
completely different performance characteristics and considerations.
Maybe if you start with the mistake of having unified L1 caches it
seems natural, but thankfully most machines do not do that.


Segher


Re: [PATCH 7/15] arm: Emit build attributes for PACBTI target feature

2022-10-20 Thread Richard Earnshaw via Gcc-patches




On 20/10/2022 15:47, Kyrylo Tkachov via Gcc-patches wrote:

Hi Andrea,


-Original Message-
From: Gcc-patches  On Behalf Of Andrea
Corallo via Gcc-patches
Sent: Friday, August 12, 2022 4:31 PM
To: Andrea Corallo via Gcc-patches 
Cc: Richard Earnshaw ; nd 
Subject: [PATCH 7/15] arm: Emit build attributes for PACBTI target feature

This patch emits assembler directives for PACBTI build attributes as
defined by the
ABI.



gcc/ChangeLog:

* config/arm/arm.c (arm_file_start): Emit EABI attributes for
Tag_PAC_extension, Tag_BTI_extension, TAG_BTI_use,
TAG_PACRET_use.

gcc/testsuite/ChangeLog:

* gcc.target/arm/acle/pacbti-m-predef-1.c: New test.
* gcc.target/arm/acle/pacbti-m-predef-3: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-6.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-7.c: Likewise.

Co-Authored-By: Tejas Belagod  


diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 0068817b0f2..ceec14f84b6 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -28349,6 +28349,8 @@ static void
  arm_file_start (void)
  {
int val;
+  bool pac = (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
+  bool bti = (aarch_enable_bti == 1);
  
arm_print_asm_arch_directives

  (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
@@ -28419,6 +28421,22 @@ arm_file_start (void)
arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
 (int) arm_fp16_format);
  
+  if (TARGET_HAVE_PACBTI)

+   {
+ arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
+ arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
+   }
+  else if (pac || bti)
+   {
+ arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
+ arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
+   }

This hunk will set both Tag_PAC_extension and Tag_BTI_extension if only one of 
pac or bti is on. Is that intended?
Would it makes sense to instead set the two Tag_*_extension tags individually 
as in the hunk below?


That's because they are one feature in armv8-m and these tags describe 
the presence of the feature in the architecture.



+
+  if (bti)
+arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
+  if (pac)
+   arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
+


But this describes /use/ by the code of each feature.

R.


if (arm_lang_output_object_attributes_hook)
arm_lang_output_object_attributes_hook();
  }

Thanks,
Kyrill



Re: [PATCH 1/2] Add a parameter for the builtin function of prefetch to align with LLVM

2022-10-17 Thread Richard Earnshaw via Gcc-patches




On 14/10/2022 09:34, Haochen Jiang via Gcc-patches wrote:

gcc/ChangeLog:

* builtins.cc (expand_builtin_prefetch): Handle the fourth parameter in
expand function.
* config/aarch64/aarch64-sve.md: Add default parameter value.
* config/aarch64/aarch64.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/alpha/alpha.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/arc/arc.md: Add default parameter value.
* config/arm/arm.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/frv/frv.md: Ditto.
* config/i386/i386.md: Ditto.
* config/ia64/ia64.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/mips/mips.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/pa/pa.md: Ditto.
* config/rs6000/rs6000.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/s390/s390.cc (s390_expand_cpymem): Generate fourth parameter 
for
gen_prefetch call.
(s390_expand_setmem): Ditto.
(s390_expand_cmpmem): Ditto.
* config/s390/s390.md (prefetch): New define_expand.
(*prefetch): Add default parameter value.
* config/sh/sh.md: Ditto.
* config/sparc/sparc.md: Ditto.
* doc/rtl.texi: Document cache variable for prefetch.
* rtl.def (PREFETCH): Change prefetch DEF_RTL_EXPR to add fourth 
parameter.
* rtlanal.cc (setup_reg_subrtx_bounds): Change gcc_checking_assert for
fourth parameter.
* target-insns.def (prefetch): Add fourth rtx for prefetch.

gcc/testsuite/ChangeLog:

* gcc.c-torture/execute/builtin-prefetch-1.c: Add fourth parameter for
testcases.
* gcc.c-torture/execute/builtin-prefetch-2.c: Ditto.
* gcc.c-torture/execute/builtin-prefetch-3.c: Ditto.
* gcc.c-torture/execute/builtin-prefetch-4.c: Ditto.
* gcc.c-torture/execute/builtin-prefetch-5.c: Ditto.
* gcc.c-torture/execute/builtin-prefetch-6.c: Ditto.
* gcc.dg/builtin-prefetch-1.c: Ditto.
* gcc.misc-tests/i386-pf-3dnow-1.c: Ditto.
* gcc.misc-tests/i386-pf-athlon-1.c: Ditto.
* gcc.misc-tests/i386-pf-none-1.c: Ditto.
* gcc.misc-tests/i386-pf-sse-1.c: Ditto.
* gcc.target/i386/avx-1.c: Change prefetch macro define to variable 
args.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/aarch64/prefetchi-1.c: New test.
* gcc.target/alpha/prefetchi-1.c: Ditto.
* gcc.target/arc/prefetchi-1.c: Ditto.
* gcc.target/arm/prefetchi-1.c: Ditto.
* gcc.target/hppa/prefetchi-1.c: Ditto.
* gcc.target/i386/prefetchi-1.c: Ditto.
* gcc.target/ia64/prefetchi-1.c: Ditto.
* gcc.target/mips/prefetchi-1.c: Ditto.
* gcc.target/powerpc/prefetchi-1.c: Ditto.
* gcc.target/s390/prefetchi-1.c: Ditto.
* gcc.target/sh/prefetchi-1.c: Ditto.
* gcc.target/sparc/prefetchi-1.c: Ditto.
---
  gcc/builtins.cc   |  34 --
  gcc/config/aarch64/aarch64-sve.md |  15 ++-
  gcc/config/aarch64/aarch64.md |  19 +++-
  gcc/config/alpha/alpha.md |  19 +++-
  gcc/config/arc/arc.md |  20 +++-
  gcc/config/arm/arm.md |  19 +++-
  gcc/config/frv/frv.md |   6 +-
  gcc/config/i386/i386.md   |  17 ++-
  gcc/config/ia64/ia64.md   |  19 +++-
  gcc/config/mips/mips.md   |  22 +++-
  gcc/config/pa/pa.md   |  12 +-
  gcc/config/rs6000/rs6000.md   |  19 +++-
  gcc/config/s390/s390.cc   |  10 +-
  gcc/config/s390/s390.md   |  19 +++-
  gcc/config/sh/sh.md   |  15 ++-
  gcc/config/sparc/sparc.md |  15 ++-
  gcc/doc/rtl.texi  |   6 +-
  gcc/rtl.def   |   5 +-
  gcc/rtlanal.cc|   2 +-
  gcc/target-insns.def  |   2 +-
  .../execute/builtin-prefetch-1.c  |  45 
  .../execute/builtin-prefetch-2.c  | 106 +-
  .../execute/builtin-prefetch-3.c  |  92 +++
  .../execute/builtin-prefetch-4.c  |  44 
  .../execute/builtin-prefetch-5.c  |  12 +-
  .../execute/builtin-prefetch-6.c  |   4 +-
  gcc/testsuite/gcc.dg/builtin-prefetch-1.c |   5 +-
  .../gcc.misc-tests/i386-pf-3dnow-1.c  |  16 +--
  .../gcc.misc-tests/i386-pf-athlon-1.c |  16 +--
  gcc/testsuite/gcc.misc-tests/i386-pf-none-1.c |  16 +--
  

Re: [PATCH] [testsuite][arm] Fix cmse-15.c expected output

2022-10-03 Thread Richard Earnshaw via Gcc-patches




On 23/09/2022 09:43, Torbjörn SVENSSON via Gcc-patches wrote:

The cmse-15.c testcase fails at -Os because ICF means that we
generate
secure3:
 b   secure1

which is OK, but does not match the currently expected
secure3:
...
 bx  r[0-3]

gcc/testsuite/ChangeLog:

* gcc.target/arm/cmse/cmse-15.c: Align with -Os improvements.


OK.

R.


Co-Authored-By: Yvan ROUX  
Signed-off-by: Torbjörn SVENSSON  
---
  gcc/testsuite/gcc.target/arm/cmse/cmse-15.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c 
b/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c
index b0fefe561a1..5188f1d697f 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c
@@ -144,6 +144,8 @@ int secure2 (s_bar_ptr s_bar_p)
  **bx  r[0-3]
  ** |
  **blx r[0-3]
+** |
+** b   secure1
  ** )
  **...
  */


Re: [PATCH] arm: Add missing early clobber to MVE vrev64q_m patterns

2022-10-03 Thread Richard Earnshaw via Gcc-patches




On 03/10/2022 11:43, Christophe Lyon via Gcc-patches wrote:

Like the non-predicated vrev64q patterns, mve_vrev64q_m_
and mve_vrev64q_m_f need an early clobber constraint, otherwise
we can generate an unpredictable instruction:

Warning: 64-bit element size and same destination and source operands makes 
instruction UNPREDICTABLE
when calling vrevq64_m* with the same first and second arguments.

Regression-tested on arm-none-eabi, bootstap in progress on
armv8l-unknown-linux-gnueabihf.

OK for trunk?


OK.

R.


Thanks,

Christophe

gcc/ChangeLog:

* config/arm/mve.md: (mve_vrev64q_m_): Add early
   clobber.
  (mve_vrev64q_m_f): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/arm/mve/intrinsics/vrev64q_m_s16-clobber.c: New test.
---
  gcc/config/arm/mve.md   |  4 ++--
  .../arm/mve/intrinsics/vrev64q_m_s16-clobber.c  | 17 +
  2 files changed, 19 insertions(+), 2 deletions(-)
  create mode 100644 
gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_m_s16-clobber.c

diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 714178609f7..62186f124da 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -3503,7 +3503,7 @@ (define_insn "mve_vqshlq_m_r_"
  ;;
  (define_insn "mve_vrev64q_m_"
[
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+   (set (match_operand:MVE_2 0 "s_register_operand" "=")
(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
   (match_operand:MVE_2 2 "s_register_operand" "w")
   (match_operand: 3 "vpr_register_operand" 
"Up")]
@@ -4598,7 +4598,7 @@ (define_insn "mve_vrev32q_m_"
  ;;
  (define_insn "mve_vrev64q_m_f"
[
-   (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+   (set (match_operand:MVE_0 0 "s_register_operand" "=")
(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
   (match_operand:MVE_0 2 "s_register_operand" "w")
   (match_operand: 3 "vpr_register_operand" 
"Up")]
diff --git 
a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_m_s16-clobber.c 
b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_m_s16-clobber.c
new file mode 100644
index 000..6464c96181d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_m_s16-clobber.c
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int16x8_t
+foo (int16x8_t a, mve_pred16_t p)
+{
+  return vrev64q_m_s16 (a, a, p);
+}
+
+float16x8_t
+foo2 (float16x8_t a, mve_pred16_t p)
+{
+  return vrev64q_m_f16 (a, a, p);
+}


Re: [PATCH v2] testsuite: [arm] Relax expected register names in MVE tests

2022-09-30 Thread Richard Earnshaw via Gcc-patches




On 30/09/2022 12:19, Christophe Lyon via Gcc-patches wrote:

These two tests have hardcoded q0 as destination/source of load/store
instructions, but this register is actually used only under
-mfloat-abi=hard. When using -mfloat-abi=softfp, other registers
(eg. q3) can be used to transfer function arguments from core
registers to MVE registers, making the expected regexp fail.

This small patch replaces q0 with q[0-7] to accept any 'q' register.
In several places where we had q[0-9]+, replace it with q[0-7] as MVE
only has q0-q7 registers.

OK for trunk?

gcc/testsuite/ChangeLog:

* gcc.target/arm/mve/mve_load_memory_modes.c: Update expected
registers.
* gcc.target/arm/mve/mve_store_memory_modes.c: Likewise.


OK.

R.

---
  .../arm/mve/mve_load_memory_modes.c   | 58 +--
  .../arm/mve/mve_store_memory_modes.c  | 58 +--
  2 files changed, 58 insertions(+), 58 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c 
b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
index e35eb1108aa..816980d1203 100644
--- a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
+++ b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
@@ -7,7 +7,7 @@
  /*
  **off_load8_0:
  **...
-** vldrb.8 q0, \[r0, #16\]
+** vldrb.8 q[0-7], \[r0, #16\]
  **...
  */
  int8x16_t off_load8_0 (int8_t * a)
@@ -18,7 +18,7 @@ int8x16_t off_load8_0 (int8_t * a)
  /*
  **off_load8_1:
  **...
-** vldrb.u16   q0, \[r0, #1\]
+** vldrb.u16   q[0-7], \[r0, #1\]
  **...
  */
  uint16x8_t off_load8_1 (uint8_t * a)
@@ -29,7 +29,7 @@ uint16x8_t off_load8_1 (uint8_t * a)
  /*
  **off_load8_2:
  **...
-** vldrb.s32   q0, \[r0, #127\]
+** vldrb.s32   q[0-7], \[r0, #127\]
  **...
  */
  int32x4_t off_load8_2 (int8_t * a)
@@ -40,7 +40,7 @@ int32x4_t off_load8_2 (int8_t * a)
  /*
  **off_load8_3:
  **...
-** vldrb.8 q0, \[r0, #-127\]
+** vldrb.8 q[0-7], \[r0, #-127\]
  **...
  */
  uint8x16_t off_load8_3 (uint8_t * a)
@@ -51,7 +51,7 @@ uint8x16_t off_load8_3 (uint8_t * a)
  /*
  **not_off_load8_0:
  **...
-** vldrb.8 q0, \[r[0-9]+\]
+** vldrb.8 q[0-7], \[r[0-7]+\]
  **...
  */
  int8x16_t not_off_load8_0 (int8_t * a)
@@ -62,7 +62,7 @@ int8x16_t not_off_load8_0 (int8_t * a)
  /*
  **off_loadfp16_0:
  **...
-** vldrh.16q0, \[r0, #-244\]
+** vldrh.16q[0-7], \[r0, #-244\]
  **...
  */
  float16x8_t off_loadfp16_0 (float16_t *a)
@@ -73,7 +73,7 @@ float16x8_t off_loadfp16_0 (float16_t *a)
  /*
  **off_load16_0:
  **...
-** vldrh.16q0, \[r0, #-2\]
+** vldrh.16q[0-7], \[r0, #-2\]
  **...
  */
  uint16x8_t off_load16_0 (uint16_t * a)
@@ -84,7 +84,7 @@ uint16x8_t off_load16_0 (uint16_t * a)
  /*
  **off_load16_1:
  **...
-** vldrh.u32   q0, \[r0, #254\]
+** vldrh.u32   q[0-7], \[r0, #254\]
  **...
  */
  uint32x4_t off_load16_1 (uint16_t * a)
@@ -95,7 +95,7 @@ uint32x4_t off_load16_1 (uint16_t * a)
  /*
  **not_off_load16_0:
  **...
-** vldrh.16q0, \[r[0-9]+\]
+** vldrh.16q[0-7], \[r[0-7]+\]
  **...
  */
  int16x8_t not_off_load16_0 (int8_t * a)
@@ -106,7 +106,7 @@ int16x8_t not_off_load16_0 (int8_t * a)
  /*
  **not_off_load16_1:
  **...
-** vldrh.u32   q0, \[r[0-9]+\]
+** vldrh.u32   q[0-7], \[r[0-7]+\]
  **...
  */
  uint32x4_t not_off_load16_1 (uint16_t * a)
@@ -117,7 +117,7 @@ uint32x4_t not_off_load16_1 (uint16_t * a)
  /*
  **off_loadfp32_0:
  **...
-** vldrw.32q0, \[r0, #24\]
+** vldrw.32q[0-7], \[r0, #24\]
  **...
  */
  float32x4_t off_loadfp32_0 (float32_t *a)
@@ -128,7 +128,7 @@ float32x4_t off_loadfp32_0 (float32_t *a)
  /*
  **off_load32_0:
  **...
-** vldrw.32q0, \[r0, #4\]
+** vldrw.32q[0-7], \[r0, #4\]
  **...
  */
  uint32x4_t off_load32_0 (uint32_t * a)
@@ -139,7 +139,7 @@ uint32x4_t off_load32_0 (uint32_t * a)
  /*
  **off_load32_1:
  **...
-** vldrw.32q0, \[r0, #-508\]
+** vldrw.32q[0-7], \[r0, #-508\]
  **...
  */
  int32x4_t off_load32_1 (int32_t * a)
@@ -149,7 +149,7 @@ int32x4_t off_load32_1 (int32_t * a)
  /*
  **pre_load8_0:
  **...
-** vldrb.8 q[0-9]+, \[r0, #16\]!
+** vldrb.8 q[0-7], \[r0, #16\]!
  **...
  */
  int8_t* pre_load8_0 (int8_t * a, int8x16_t *v)
@@ -162,7 +162,7 @@ int8_t* pre_load8_0 (int8_t * a, int8x16_t *v)
  /*
  **pre_load8_1:
  **...
-** vldrb.u16   q[0-9]+, \[r0, #4\]!
+** vldrb.u16   q[0-7], \[r0, #4\]!
  **...
  */
  uint8_t* pre_load8_1 (uint8_t * a, uint16x8_t *v)
@@ -175,7 +175,7 @@ uint8_t* pre_load8_1 (uint8_t * a, uint16x8_t *v)
  /*
  **pre_loadfp16_0:
  **...
-** vldrh.16q[0-9]+, \[r0, #128\]!
+** vldrh.16q[0-7], \[r0, #128\]!
  **...
  */
  float16_t* pre_loadfp16_0 (float16_t *a, float16x8_t 

Re: [PATCH][committed] aarch64: Suggest an -mcpu option when user passes CPU name to -march

2022-09-05 Thread Richard Earnshaw via Gcc-patches




On 05/09/2022 14:35, Kyrylo Tkachov via Gcc-patches wrote:

Hi all,

This small patch helps users who confuse -march and -mcpu on AArch64.
Sometimes users pass -march with a CPU name, where they most likely wanted to
use -mcpu, which would select the right architecture features *and* tune for
their desired CPU. Currently we'll just error out with an unkown architecture
message and list the valid architecture options.
With this patch we check if their string matches a known CPU and suggest they
use an -mcpu option instead.

So compiling with -march=neoverse-n1 will now give the error:
cc1: error: unknown value 'neoverse-n1' for '-march'
cc1: note: valid arguments are: armv8-a armv8.1-a armv8.2-a armv8.3-a armv8.4-a 
armv8.5-a armv8.6-a armv8.7-a armv8.8-a armv8-r armv9-a
cc1: note: did you mean '-mcpu=neoverse-n1'?

Bootstrapped and tested on aarch64-none-linux-gnu.
Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_validate_march): Check if invalid 
arch
string is a valid -mcpu string and emit hint.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/spellcheck_10.c: New test.


What about the reverse case, passing an architecture to -mcpu?

R.


Re: [GCC 13/15][PATCH v3] arm: Add support for dwarf debug directives and pseudo hard-register for PAC feature.

2022-08-19 Thread Richard Earnshaw via Gcc-patches




On 19/08/2022 11:04, Srinath Parvathaneni via Gcc-patches wrote:

Hello,

This patch teaches the DWARF support in gcc about RA_AUTH_CODE pseudo 
hard-register and also
.save {ra_auth_code} and .cfi_offset ra_auth_code  dwarf directives for 
the PAC feature
in Armv8.1-M architecture.

RA_AUTH_CODE register number is 107 and it's dwarf register number is 143.

When compiled with " -march=armv8.1-m.main -mbranch-protection=pac-ret+leaf+bti 
-mthumb
-mfloat-abi=soft -fasynchronous-unwind-tables -g -O2 -S" command line options, 
the assembly
output after this patch looks like below:

 ...
 .cfi_startproc
 pacbti  ip, lr, sp
 movsr1, #40
 push{ip, lr}
 .save {ra_auth_code, lr}
 .cfi_def_cfa_offset 8
 .cfi_offset 143, -8
 .cfi_offset 14, -4
 ...
 pop {ip, lr}
 .cfi_restore 14
 .cfi_restore 143
 .cfi_def_cfa_offset 0
 movsr0, #0
 aut ip, lr, sp
 bx  lr
 .cfi_endproc
 ...

Regression tested on arm-none-eabi target and found no regressions.

Ok for master?

Regards,
Srinath.

gcc/ChangeLog:

2022-08-17  Srinath Parvathaneni  

 * config/arm/aout.h (ra_auth_code): Add to enum.
 * config/arm/arm.cc (emit_multi_reg_push): Add RA_AUTH_CODE register to
 dwarf frame expression.
 (arm_emit_multi_reg_pop): Restore RA_AUTH_CODE register.
 (arm_expand_prologue): Mark as frame related insn.
 (arm_regno_class): Check for pac pseudo reigster.
 (arm_dbx_register_number): Assign ra_auth_code register number in 
dwarf.
 (arm_unwind_emit_sequence): Print .save directive with ra_auth_code
 register.
 (arm_conditional_register_usage): Mark ra_auth_code in fixed reigsters.
 * config/arm/arm.h (FIRST_PSEUDO_REGISTER): Modify.
 (IS_PAC_Pseudo_REGNUM): Define.
 (enum reg_class): Add PAC_REG entry.
 * config/arm/arm.md (RA_AUTH_CODE): Define.

gcc/testsuite/ChangeLog:

2022-08-17  Srinath Parvathaneni  

 * g++.target/arm/pac-1.C: New test.
 * gcc.target/arm/pac-9.c: Likewise.

The general boiler-plate to add the RA register is OK, but the code that 
tweaks the generation of the push instructions is fixing the wrong 
problem.  The dwarf code already knows how to to track reg->reg copies 
and put out the right frame information, but this isn't working because 
you've not augmented the PAC instruction correctly.  What you need is a 
frame-related augmentation to that and that essentially does


(set (IP_REGNUM) (RA_AUTH_CODE))

The generic dwarf code should then handle all the rest for emitting CFI 
directives.


The code in arm_unwind_emit_sequence (and technically in 
arm_unwind_emit_set as well, but we probably never reach that code as of 
today) then needs updating to handle the special cases when IP appears 
in the list of registers and PAC is enabled.  That's a bit of a hack, 
but I can't immediately think of a better way of handling it.


R.



### Attachment also inlined for ease of reply###


diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h
index 
b918ad3782fbee82320febb8b6e72ad615780261..ffeed45a678f17c63d5b42c21f020ca416cbf23f
 100644
--- a/gcc/config/arm/aout.h
+++ b/gcc/config/arm/aout.h
@@ -74,7 +74,8 @@
"wr8",   "wr9",   "wr10",  "wr11",  \
"wr12",  "wr13",  "wr14",  "wr15",  \
"wcgr0", "wcgr1", "wcgr2", "wcgr3", \
-  "cc", "vfpcc", "sfp", "afp", "apsrq", "apsrge", "p0"   \
+  "cc", "vfpcc", "sfp", "afp", "apsrq", "apsrge", "p0",  \
+  "ra_auth_code" \
  }
  #endif
  
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h

index 
3495ab857eac38ecdf37e55f1d201b1c35cbde0b..c7067819f6785e44d30d8e5365505ab98682
 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -816,7 +816,8 @@ extern const int arm_arch_cde_coproc_bits[];
s16-s31   S VFP variable (aka d8-d15).
vfpcc   Not a real register.  Represents the VFP condition
code flags.
-   vpr Used to represent MVE VPR predication.  */
+   vpr Used to represent MVE VPR predication.
+   ra_auth_codePseudo register to save PAC.  */
  
  /* The stack backtrace structure is as follows:

fp points to here:  |  save code pointer  |  [fp]
@@ -857,7 +858,7 @@ extern const int arm_arch_cde_coproc_bits[];
1,1,1,1,1,1,1,1,\
1,1,1,1,\
/* Specials.  */\
-  1,1,1,1,1,1,1\
+  1,1,1,1,1,1,1,1  \
  }
  
  /* 1 for registers not available across function calls.

@@ -887,7 +888,7 @@ extern const int arm_arch_cde_coproc_bits[];
1,1,1,1,1,1,1,1,\
1,1,1,1,\
/* 

Re: [GCC][PATCH v2] arm: Add support for Arm Cortex-M85 CPU.

2022-08-18 Thread Richard Earnshaw via Gcc-patches




On 12/08/2022 18:20, Srinath Parvathaneni via Gcc-patches wrote:

Hi,

This patch adds the -mcpu support for the Arm Cortex-M85 CPU which is an
Armv8.1-M Mainline CPU supporting MVE and PACBTI by default.

-mpcu=cortex-m85 switch by default matches to 
-march=armv8.1-m.main+pacbti+mve.fp+fp.dp.

Also following options are provided to disable default features.
+nomve.fp (disables MVE Floating point)
+nomve (disables MVE Integer and MVE Floating point)
+nodsp (disables dsp, MVE Integer and MVE Floating point)
+nopacbti (disables pacbti)
+nofp (disables floating point and MVE floating point)

Regression tested on arm-none-eabi and bootstrapped on arm-none-linux-gnueabihf.

Ok for master?

Regards,
Srinath.

gcc/ChangeLog:

2022-08-12  Srinath Parvathaneni  

 * config/arm/arm-cpus.in (cortex-m85): Define new CPU.
 * config/arm/arm-tables.opt: Regenerate.
 * config/arm/arm-tune.md: Likewise.
 * doc/invoke.texi (Arm Options): Document -mcpu=cortex-m85.
 * (-mfix-cmse-cve-2021-35465): Likewise.

gcc/testsuite/ChangeLog:

2022-08-12  Srinath Parvathaneni  

 * gcc.target/arm/multilib.exp: Add tests for cortex-m85.


OK, but in future, please don't send patches as octet-stream 
attachments; they should be plain text.


R.


Re: [PATCH, GCC, AARCH64, 5/6] Enable BTI : Add new pass for BTI.

2022-08-18 Thread Richard Earnshaw via Gcc-patches




On 18/08/2022 01:00, Andrew Pinski via Gcc-patches wrote:

On Fri, Nov 2, 2018 at 11:39 AM Sudakshina Das  wrote:


Hi

This patch is part of a series that enables ARMv8.5-A in GCC and
adds Branch Target Identification Mechanism.
(https://developer.arm.com/products/architecture/cpu-architecture/a-profile/exploration-tools)

This patch adds a new pass called "bti" which is triggered by the
command line argument -mbranch-protection whenever "bti" is turned on.

The pass iterates through the instructions and adds appropriated BTI
instructions based on the following:
 * Add a new "BTI C" at the beginning of a function, unless its already
   protected by a "PACIASP/PACIBSP". We exempt the functions that are
   only called directly.


Coming back to this because the check only_called_directly_p does not
work if the linker will insert a veneer as the compiler does not know
about that.
This is recorded as https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106671 .


I think the linker has to make sure to insert a veneer that ends with a 
branch in that case.


R.



Thanks,
Andrew Pinski



 * Add a new "BTI J" for every target of an indirect jump, jump table
   targets, non-local goto targets or labels that might be referenced
   by variables, constant pools, etc (NOTE_INSN_DELETED_LABEL)

Since we have already changed the use of indirect tail calls to only x16
and x17, we do not have to use "BTI JC".
(check patch 3/6).

Bootstrapped and regression tested with aarch64-none-linux-gnu. Added
new tests.
Is this ok for trunk?

Thanks
Sudi

*** gcc/ChangeLog ***

2018-xx-xx  Sudakshina Das  
 Ramana Radhakrishnan  

 * config.gcc (aarch64*-*-*): Add aarch64-bti-insert.o.
 * gcc/config/aarch64/aarch64.h: Update comment for
 TRAMPOLINE_SIZE.
 * config/aarch64/aarch64.c (aarch64_asm_trampoline_template):
 Update if bti is enabled.
 * config/aarch64/aarch64-bti-insert.c: New file.
 * config/aarch64/aarch64-passes.def (INSERT_PASS_BEFORE): Insert
 bti pass.
 * config/aarch64/aarch64-protos.h (make_pass_insert_bti):
 Declare the new bti pass.
 * config/aarch64/aarch64.md (bti_nop): Define.
 * config/aarch64/t-aarch64: Add rule for aarch64-bti-insert.o.

*** gcc/testsuite/ChangeLog ***

2018-xx-xx  Sudakshina Das  

 * gcc.target/aarch64/bti-1.c: New test.
 * gcc.target/aarch64/bti-2.c: New test.
 * lib/target-supports.exp
 (check_effective_target_aarch64_bti_hw): Add new check for
 BTI hw.



Re: [PATCH] arm: Define with_float to hard when target name ends with hf

2022-08-17 Thread Richard Earnshaw via Gcc-patches




On 17/08/2022 09:35, Christophe Lyon via Gcc-patches wrote:

On arm, the --with-float= configure option is used to define include
files search path (among other things).  However, when targeting
arm-linux-gnueabihf, one would expect to automatically default to the
hard-float ABI, but this is not the case. As a consequence, GCC
bootstrap fails on an arm-linux-gnueabihf target if --with-float=hard
is not used.

This patch checks if the target name ends with 'hf' and defines
with_float to hard if not already defined.  This is achieved in
gcc/config.gcc, just before selecting the default CPU depending on the
$with_float value.

2022-08-17  Christophe Lyon  

gcc/
* config.gcc (arm): Define with_float to hard if target name ends
with 'hf'.
---
  gcc/config.gcc | 7 +++
  1 file changed, 7 insertions(+)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 4e3b15bb5e9..02f58970db0 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1314,6 +1314,13 @@ arm*-*-linux-* | arm*-*-uclinuxfdpiceabi)
tm_file="$tm_file arm/uclinuxfdpiceabi.h"
;;
esac
+   # Define with_float to "hard" if not already defined and
+   # target name ends with "hf"
+   case $target:$with_float in
+   arm*-*-*hf:)
+   with_float=hard
+   ;;
+   esac
# Generation of floating-point instructions requires at least ARMv5te.
if [ "$with_float" = "hard" -o "$with_float" = "softfp" ] ; then
target_cpu_cname="arm10e"


OK.

R.


Re: [GCC][PATCH] arm: Add support for Arm Cortex-M85 CPU.

2022-08-05 Thread Richard Earnshaw via Gcc-patches




On 05/08/2022 16:20, Srinath Parvathaneni via Gcc-patches wrote:

Hi,

This patch adds the -mcpu support for the Arm Cortex-M85 CPU which is an
Armv8.1-M Mainline CPU supporting MVE and PACBTI by default.

-mpcu=cortex-m85 switch by default matches to 
-march=armv8.1-m.main+pacbti+mve.fp+fp.dp.

Also following options are provided to disable default features.
+nomve.fp (disables MVE Floating point)
+nomve (disables MVE Integer and MVE Floating point)
+nodsp (disables dsp, MVE Integer and MVE Floating point)
+nopacbti (disables pacbti)
+nofp (disables floating point and MVE floating point)

Regression tested on arm-none-eabi and bootstrapped on arm-none-linux-gnueabihf.

Ok for master?

Regards,
Srinath.

gcc/ChangeLog:

2022-08-05  Srinath Parvathaneni  

 * config/arm/arm-cpus.in (cortex-m85): Define new cpu.


CPU is an acronym, so: s/new cpu/new CPU/


 * config/arm/arm-tables.opt: Regenerate.
 * config/arm/arm-tune.md: Likewise.
 * config/arm/t-rmprofile: Re-use multilibs.
 * doc/invoke.texi (Arm Options): Document -mcpu=cortex-m85.
 * (-mfix-cmse-cve-2021-35465): Likewise.

gcc/testsuite/ChangeLog:

2022-08-05  Srinath Parvathaneni  

 * gcc.target/arm/multilib.exp: Add tests for cortex-m85.


### Attachment also inlined for ease of reply###


diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in
index 
9502a34fa974744f02ded4f32c03de6169950120..a6f364309f8728d6d2264b4e60feb75d51b87b64
 100644
--- a/gcc/config/arm/arm-cpus.in
+++ b/gcc/config/arm/arm-cpus.in
@@ -1643,6 +1643,21 @@ begin cpu cortex-m55
   vendor 41
  end cpu cortex-m55
  
+begin cpu cortex-m85

+ cname cortexm85
+ tune flags LDSCHED
+ architecture armv8.1-m.main+pacbti+mve.fp+fp.dp
+ option nopacbti remove pacbti
+ option nomve.fp remove mve_float
+ option nomve remove mve mve_float
+ option nofp remove ALL_FP mve_float
+ option nodsp remove MVE mve_float
+ isa quirk_no_asmcpu quirk_vlldm
+ costs v7m
+ part 0xd23
+ vendor 41
+end cpu cortex-m85
+
  # V8 R-profile implementations.
  begin cpu cortex-r52
   cname cortexr52
diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt
index 
ef0cc5ef0c87ce37958fc0ac9b1623078b890187..54f87da7852b3e495da9fd08106d9f6bd7c99716
 100644
--- a/gcc/config/arm/arm-tables.opt
+++ b/gcc/config/arm/arm-tables.opt
@@ -282,6 +282,9 @@ Enum(processor_type) String(cortex-m35p) Value( 
TARGET_CPU_cortexm35p)
  EnumValue
  Enum(processor_type) String(cortex-m55) Value( TARGET_CPU_cortexm55)
  
+EnumValue

+Enum(processor_type) String(cortex-m85) Value( TARGET_CPU_cortexm85)
+
  EnumValue
  Enum(processor_type) String(cortex-r52) Value( TARGET_CPU_cortexr52)
  
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md

index 
3422553604245035089e4f52b3feb9db4c51b2b5..27cafe9b4caf9270cb5f537c988d57715495a207
 100644
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -49,6 +49,6 @@
cortexa710,cortexx1,neoversen1,
cortexa75cortexa55,cortexa76cortexa55,neoversev1,
neoversen2,cortexm23,cortexm33,
-   cortexm35p,cortexm55,cortexr52,
+   cortexm35p,cortexm55,cortexm85,cortexr52,
cortexr52plus"
(const (symbol_ref "((enum attr_tune) arm_tune)")))
diff --git a/gcc/config/arm/t-rmprofile b/gcc/config/arm/t-rmprofile
index 
fe46a1efa1a8b212e6f4051283573debfc386ff8..77e248e47feeddb2328a82aec6b485ff0f6fd62e
 100644
--- a/gcc/config/arm/t-rmprofile
+++ b/gcc/config/arm/t-rmprofile
@@ -97,6 +97,13 @@ MULTILIB_MATCHES += $(foreach FP, $(v8_1m_sp_variants), \
  MULTILIB_MATCHES += $(foreach FP, $(v8_1m_dp_variants), \
 
march?armv8-m.main+fp.dp=mlibarch?armv8.1-m.main$(FP))
  
+MULTILIB_MATCHES	+= march?armv8.1-m.main+pacbti+fp.dp=march?armv8.1-m.main+pacbti+fp.dp+mve.fp

+MULTILIB_MATCHES   += 
march?armv8.1-m.main+pacbti+fp.dp=mlibarch?armv8.1-m.main+pacbti+fp.dp+mve.fp
+MULTILIB_MATCHES   += 
march?armv8.1-m.main+pacbti+fp.dp=march?armv8.1-m.main+pacbti+fp.dp+mve
+MULTILIB_MATCHES   += 
march?armv8.1-m.main+pacbti+fp.dp=mlibarch?armv8.1-m.main+pacbti+fp.dp+mve
+MULTILIB_MATCHES   += 
march?armv8.1-m.main+pacbti+fp.dp=march?armv8.1-m.main+dsp+pacbti+fp.dp
+MULTILIB_MATCHES   += 
march?armv8.1-m.main+pacbti+fp.dp=mlibarch?armv8.1-m.main+dsp+pacbti+fp.dp
+


This seems like generic pac/bti support in armv8.1-m.main, so should be 
a separate patch from the cortex-m85 support.  There should also be 
tests for these cases when -march= is used rather than -mcpu in multilib.exp



  # Map all mbranch-protection values other than 'none' to 'standard'.
  MULTILIB_MATCHES  += mbranch-protection?standard=mbranch-protection?bti
  MULTILIB_MATCHES  += 
mbranch-protection?standard=mbranch-protection?pac-ret
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 
a2be3446594822f0b13dc7ba92ada4213a3a965c..47988ca6bedccc8efd1801a784d4f313959e8bb4
 100644
--- a/gcc/doc/invoke.texi
+++ 

Re: [RFA configure parts] aarch64: Make cc1 handle --with options

2022-08-05 Thread Richard Earnshaw via Gcc-patches




On 05/08/2022 14:53, Richard Sandiford via Gcc-patches wrote:

Richard Earnshaw  writes:

On 13/06/2022 15:33, Richard Sandiford via Gcc-patches wrote:

On aarch64, --with-arch, --with-cpu and --with-tune only have an
effect on the driver, so “./xgcc -B./ -O3” can give significantly
different results from “./cc1 -O3”.  --with-arch did have a limited
effect on ./cc1 in previous releases, although it didn't work
entirely correctly.

Being of a lazy persuasion, I've got used to ./cc1 selecting SVE for
--with-arch=armv8.2-a+sve without having to supply an explicit -march,
so this patch makes ./cc1 emulate the relevant OPTION_DEFAULT_SPECS.
It relies on Wilco's earlier clean-ups.

The patch makes config.gcc define WITH_FOO_STRING macros for each
supported --with-foo option.  This could be done only in aarch64-
specific code, but I thought it could be useful on other targets
too (and can be safely ignored otherwise).  There didn't seem to
be any existing and potentially clashing uses of macros with this
style of name.

Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK for the configure
bits?

Richard


gcc/
* config.gcc: Define WITH_FOO_STRING macros for each supported
--with-foo option.
* config/aarch64/aarch64.cc (aarch64_override_options): Emulate
OPTION_DEFAULT_SPECS.
* config/aarch64/aarch64.h (OPTION_DEFAULT_SPECS): Reference the above.
---
   gcc/config.gcc| 14 ++
   gcc/config/aarch64/aarch64.cc |  8 
   gcc/config/aarch64/aarch64.h  |  5 -
   3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index cdbefb5b4f5..e039230431c 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -5865,6 +5865,20 @@ else
configure_default_options="{ ${t} }"
   fi
   
+for option in $supported_defaults

+do
+   lc_option=`echo $option | sed s/-/_/g`
+   uc_option=`echo $lc_option | tr a-z A-Z`
+   eval "val=\$with_$lc_option"
+   if test -n "$val"
+   then
+   val="\\\"$val\\\""
+   else
+   val=nullptr
+   fi
+   tm_defines="$tm_defines WITH_${uc_option}_STRING=$val"
+done


This bit would really be best reviewed by a non-arm maintainer.  It
generally looks OK.  My only comment would be why define anything if the
corresponding --with-foo was not specified.  They you can use #ifdef to
test if the user specified a default.


Yeah, could do it that way instead, but:


diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index d21e041eccb..0bc700b81ad 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -18109,6 +18109,14 @@ aarch64_override_options (void)
 if (aarch64_branch_protection_string)
   aarch64_validate_mbranch_protection (aarch64_branch_protection_string);
   
+  /* Emulate OPTION_DEFAULT_SPECS.  */

+  if (!aarch64_arch_string && !aarch64_cpu_string)
+aarch64_arch_string = WITH_ARCH_STRING;
+  if (!aarch64_arch_string && !aarch64_cpu_string)
+aarch64_cpu_string = WITH_CPU_STRING;
+  if (!aarch64_cpu_string && !aarch64_tune_string)
+aarch64_tune_string = WITH_TUNE_STRING;


(without the preprocessor stuff) IMO reads better.  If a preprocessor
is/isn't present test turns out to be useful, perhaps we should add
macros like HAVE_WITH_TUNE/WITH_TUNE_PRESENT/... too?  I guess it
should only be done when something needs it though.


It's relatively easy to add

#ifndef WITH_TUNE_STRING
#define WITH_TUNE_STRING (nulptr)
#endif

in a header, but much harder to go the other way.  The case I was 
thinking of was something like:


#if !defined(WITH_ARCH_STRING) && !defined(WITH_CPU_STRING)
#define WITH_ARCH_STRING ""
#endif

which saves having to have yet another level of fallback if nothing has 
been specified, but this is next to impossible if the macros are 
unconditionally defined.


R.



Thanks,
Richard


+
 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
If either of -march or -mtune is given, they override their
respective component of -mcpu.  */
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 80cfe4b7407..3122dbd7098 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -1267,7 +1267,10 @@ extern enum aarch64_code_model aarch64_cmodel;
   /* Support for configure-time --with-arch, --with-cpu and --with-tune.
  --with-arch and --with-cpu are ignored if either -mcpu or -march is used.
  --with-tune is ignored if either -mtune or -mcpu is used (but is not
-   affected by -march).  */
+   affected by -march).
+
+   There is corresponding code in aarch64_override_options that emulates
+   this behavior when cc1  are invoked directly.  */
   #define OPTION_DEFAULT_SPECS \
 {"arch", "%{!march=*:%{!mcpu=*:-march=%(VALUE)}}" },   \
 {"cpu",  "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" },   \


Re: [PATCH v2] cselib: add function to check if SET is redundant [PR106187]

2022-08-03 Thread Richard Earnshaw via Gcc-patches




On 03/08/2022 00:36, Jeff Law wrote:



On 8/2/2022 10:06 AM, Richard Earnshaw wrote:



On 01/08/2022 11:38, Richard Earnshaw via Gcc-patches wrote:



On 30/07/2022 20:57, Jeff Law via Gcc-patches wrote:



On 7/29/2022 7:52 AM, Richard Earnshaw via Gcc-patches wrote:
A SET operation that writes memory may have the same value as an 
earlier store but if the alias sets of the new and earlier store do 
not conflict then the set is not truly redundant.  This can happen, 
for example, if objects of different types share a stack slot.


To fix this we define a new function in cselib that first checks for
equality and if that is successful then finds the earlier store in the
value history and checks the alias sets.

The routine is used in two places elsewhere in the compiler. Firstly
in cfgcleanup and secondly in postreload.

gcc/ChangeLog:
* alias.h (mems_same_for_tbaa_p): Declare.
* alias.cc (mems_same_for_tbaa_p): New function.
* dse.cc (record_store): Use it instead of open-coding
alias check.
* cselib.h (cselib_redundant_set_p): Declare.
* cselib.cc: Include alias.h
(cselib_redundant_set_p): New function.
* cfgcleanup.cc: (mark_effect): Use cselib_redundant_set_p instead
of rtx_equal_for_cselib_p.
* postreload.c (reload_cse_simplify): Use cselib_redundant_set_p.
(reload_cse_noop_set_p): Delete.
Seems quite reasonable.   The only question I would have would be 
whether or not you considered including the aliasing info into the 
hashing used by cselib.  You'd probably still need the bulk of this 
patch as well since we could presumably still get a hash conflict 
with two stores of the same value to the same location, but with 
different alias sets (it's just much less likely), so perhaps it 
doesn't really buy us anything.


I thought about this, but if the alias set were included in the hash, 
then surely you'd get every alias set in a different value.  Then 
you'd miss the cases where the alias sets do conflict even though 
they are not the same.  Anyway, the values /are/ the same so in some 
circumstances you might want to know that.




Ideally this would include a testcase.  You might be able to turn 
that non-executawble reduced case into something useful by scanning 
the post-reload dumps.


I considered this as well, but the testcase I have is far too 
fragile, I think.  The existing test only fails on Arm, only fails on 
11.2 (not 11.3 or gcc-12 onwards), relies on two objects with the 
same value being in distinct alias sets but still assigned to the 
same stack slot and for some copy dance to end up trying to write 
back the original value to the same slot but with a non-conflicting 
set.  And finally, the scheduler has to then try to move a load past 
the non-aliasing store.





To get anywhere close to this I think we'd need something akin to the 
gimple reader but for RTL so that we could set up all the conditions 
for the failure without the risk of an earlier transform blowing the 
test away.


I wasn't aware of the rtl reader already in the compiler.  But it 
doesn't really get me any closer as it is lacking in so many regards:


- It can't handle (const_double:SF ...) - it tries to handle the 
argument as an int.  This is a consequence, I think, of the reader 
being based on that for reading machine descriptions where FP 
const_double is simply never encountered.


- It doesn't seem to handle anything much more than very basic types, 
and in particular appears to have no way of ensuring that alias sets 
match up with the type system.




I even considered whether we could start with a gimple dump and 
bypassing all the tree/gimple transformations, but even that would be 
still at the mercy of the stack-slot allocation algorithm.


I spent a while trying to get some gimple out of the dumpers in a form 
that was usable, but that's pretty much a non-starter.  To make it 
work we'd need to add support for gimple clobbers on objects - without 
that there's no way to get the stack-slot sharing code to work. 
Furthermore, even feeding fully-optimized gimple directly into expand 
is such a long way from the postreload pass, that I can't believe the 
testcase would remain stable for long.


And the other major issue is that the original testcase is heavily 
templated C++ and neither of the parsers gimple or rtl is supported in 
cc1plus: converting the boilerplate to be C-friendly is probably going 
to be hard.


I can't afford to spend much more time on this, especially given the 
low-quality test we're going to get out of the end of the process.
Understood.  Let's just go with the patch as-is.  That's normal for 
cases where we can't produce a reasonable test.




Thanks, committed to trunk.  Will work on backports if it doesn't throw 
up any issues in the next few days.


R.


jeff


Re: [PATCH v2] cselib: add function to check if SET is redundant [PR106187]

2022-08-02 Thread Richard Earnshaw via Gcc-patches




On 01/08/2022 11:38, Richard Earnshaw via Gcc-patches wrote:



On 30/07/2022 20:57, Jeff Law via Gcc-patches wrote:



On 7/29/2022 7:52 AM, Richard Earnshaw via Gcc-patches wrote:
A SET operation that writes memory may have the same value as an 
earlier store but if the alias sets of the new and earlier store do 
not conflict then the set is not truly redundant.  This can happen, 
for example, if objects of different types share a stack slot.


To fix this we define a new function in cselib that first checks for
equality and if that is successful then finds the earlier store in the
value history and checks the alias sets.

The routine is used in two places elsewhere in the compiler. Firstly
in cfgcleanup and secondly in postreload.

gcc/ChangeLog:
* alias.h (mems_same_for_tbaa_p): Declare.
* alias.cc (mems_same_for_tbaa_p): New function.
* dse.cc (record_store): Use it instead of open-coding
alias check.
* cselib.h (cselib_redundant_set_p): Declare.
* cselib.cc: Include alias.h
(cselib_redundant_set_p): New function.
* cfgcleanup.cc: (mark_effect): Use cselib_redundant_set_p instead
of rtx_equal_for_cselib_p.
* postreload.c (reload_cse_simplify): Use cselib_redundant_set_p.
(reload_cse_noop_set_p): Delete.
Seems quite reasonable.   The only question I would have would be 
whether or not you considered including the aliasing info into the 
hashing used by cselib.  You'd probably still need the bulk of this 
patch as well since we could presumably still get a hash conflict with 
two stores of the same value to the same location, but with different 
alias sets (it's just much less likely), so perhaps it doesn't really 
buy us anything.


I thought about this, but if the alias set were included in the hash, 
then surely you'd get every alias set in a different value.  Then you'd 
miss the cases where the alias sets do conflict even though they are not 
the same.  Anyway, the values /are/ the same so in some circumstances 
you might want to know that.




Ideally this would include a testcase.  You might be able to turn that 
non-executawble reduced case into something useful by scanning the 
post-reload dumps.


I considered this as well, but the testcase I have is far too fragile, I 
think.  The existing test only fails on Arm, only fails on 11.2 (not 
11.3 or gcc-12 onwards), relies on two objects with the same value being 
in distinct alias sets but still assigned to the same stack slot and for 
some copy dance to end up trying to write back the original value to the 
same slot but with a non-conflicting set.  And finally, the scheduler 
has to then try to move a load past the non-aliasing store.





To get anywhere close to this I think we'd need something akin to the 
gimple reader but for RTL so that we could set up all the conditions for 
the failure without the risk of an earlier transform blowing the test away.


I wasn't aware of the rtl reader already in the compiler.  But it 
doesn't really get me any closer as it is lacking in so many regards:


- It can't handle (const_double:SF ...) - it tries to handle the 
argument as an int.  This is a consequence, I think, of the reader being 
based on that for reading machine descriptions where FP const_double is 
simply never encountered.


- It doesn't seem to handle anything much more than very basic types, 
and in particular appears to have no way of ensuring that alias sets 
match up with the type system.




I even considered whether we could start with a gimple dump and 
bypassing all the tree/gimple transformations, but even that would be 
still at the mercy of the stack-slot allocation algorithm.


I spent a while trying to get some gimple out of the dumpers in a form 
that was usable, but that's pretty much a non-starter.  To make it work 
we'd need to add support for gimple clobbers on objects - without that 
there's no way to get the stack-slot sharing code to work.  Furthermore, 
even feeding fully-optimized gimple directly into expand is such a long 
way from the postreload pass, that I can't believe the testcase would 
remain stable for long.


And the other major issue is that the original testcase is heavily 
templated C++ and neither of the parsers gimple or rtl is supported in 
cc1plus: converting the boilerplate to be C-friendly is probably going 
to be hard.


I can't afford to spend much more time on this, especially given the 
low-quality test we're going to get out of the end of the process.






Jeff


R.


R.


Re: [RFA configure parts] aarch64: Make cc1 handle --with options

2022-08-02 Thread Richard Earnshaw via Gcc-patches




On 13/06/2022 15:33, Richard Sandiford via Gcc-patches wrote:

On aarch64, --with-arch, --with-cpu and --with-tune only have an
effect on the driver, so “./xgcc -B./ -O3” can give significantly
different results from “./cc1 -O3”.  --with-arch did have a limited
effect on ./cc1 in previous releases, although it didn't work
entirely correctly.

Being of a lazy persuasion, I've got used to ./cc1 selecting SVE for
--with-arch=armv8.2-a+sve without having to supply an explicit -march,
so this patch makes ./cc1 emulate the relevant OPTION_DEFAULT_SPECS.
It relies on Wilco's earlier clean-ups.

The patch makes config.gcc define WITH_FOO_STRING macros for each
supported --with-foo option.  This could be done only in aarch64-
specific code, but I thought it could be useful on other targets
too (and can be safely ignored otherwise).  There didn't seem to
be any existing and potentially clashing uses of macros with this
style of name.

Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK for the configure
bits?

Richard


gcc/
* config.gcc: Define WITH_FOO_STRING macros for each supported
--with-foo option.
* config/aarch64/aarch64.cc (aarch64_override_options): Emulate
OPTION_DEFAULT_SPECS.
* config/aarch64/aarch64.h (OPTION_DEFAULT_SPECS): Reference the above.
---
  gcc/config.gcc| 14 ++
  gcc/config/aarch64/aarch64.cc |  8 
  gcc/config/aarch64/aarch64.h  |  5 -
  3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index cdbefb5b4f5..e039230431c 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -5865,6 +5865,20 @@ else
configure_default_options="{ ${t} }"
  fi
  
+for option in $supported_defaults

+do
+   lc_option=`echo $option | sed s/-/_/g`
+   uc_option=`echo $lc_option | tr a-z A-Z`
+   eval "val=\$with_$lc_option"
+   if test -n "$val"
+   then
+   val="\\\"$val\\\""
+   else
+   val=nullptr
+   fi
+   tm_defines="$tm_defines WITH_${uc_option}_STRING=$val"
+done


This bit would really be best reviewed by a non-arm maintainer.  It 
generally looks OK.  My only comment would be why define anything if the 
corresponding --with-foo was not specified.  They you can use #ifdef to 
test if the user specified a default.


R.


+
  if test "$target_cpu_default2" != ""
  then
if test "$target_cpu_default" != ""
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index d21e041eccb..0bc700b81ad 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -18109,6 +18109,14 @@ aarch64_override_options (void)
if (aarch64_branch_protection_string)
  aarch64_validate_mbranch_protection (aarch64_branch_protection_string);
  
+  /* Emulate OPTION_DEFAULT_SPECS.  */

+  if (!aarch64_arch_string && !aarch64_cpu_string)
+aarch64_arch_string = WITH_ARCH_STRING;
+  if (!aarch64_arch_string && !aarch64_cpu_string)
+aarch64_cpu_string = WITH_CPU_STRING;
+  if (!aarch64_cpu_string && !aarch64_tune_string)
+aarch64_tune_string = WITH_TUNE_STRING;
+
/* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
   If either of -march or -mtune is given, they override their
   respective component of -mcpu.  */
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 80cfe4b7407..3122dbd7098 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -1267,7 +1267,10 @@ extern enum aarch64_code_model aarch64_cmodel;
  /* Support for configure-time --with-arch, --with-cpu and --with-tune.
 --with-arch and --with-cpu are ignored if either -mcpu or -march is used.
 --with-tune is ignored if either -mtune or -mcpu is used (but is not
-   affected by -march).  */
+   affected by -march).
+
+   There is corresponding code in aarch64_override_options that emulates
+   this behavior when cc1  are invoked directly.  */
  #define OPTION_DEFAULT_SPECS  \
{"arch", "%{!march=*:%{!mcpu=*:-march=%(VALUE)}}" },\
{"cpu",  "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" },   \


Re: [PATCH v2] cselib: add function to check if SET is redundant [PR106187]

2022-08-01 Thread Richard Earnshaw via Gcc-patches




On 30/07/2022 20:57, Jeff Law via Gcc-patches wrote:



On 7/29/2022 7:52 AM, Richard Earnshaw via Gcc-patches wrote:
A SET operation that writes memory may have the same value as an 
earlier store but if the alias sets of the new and earlier store do 
not conflict then the set is not truly redundant.  This can happen, 
for example, if objects of different types share a stack slot.


To fix this we define a new function in cselib that first checks for
equality and if that is successful then finds the earlier store in the
value history and checks the alias sets.

The routine is used in two places elsewhere in the compiler. Firstly
in cfgcleanup and secondly in postreload.

gcc/ChangeLog:
* alias.h (mems_same_for_tbaa_p): Declare.
* alias.cc (mems_same_for_tbaa_p): New function.
* dse.cc (record_store): Use it instead of open-coding
alias check.
* cselib.h (cselib_redundant_set_p): Declare.
* cselib.cc: Include alias.h
(cselib_redundant_set_p): New function.
* cfgcleanup.cc: (mark_effect): Use cselib_redundant_set_p instead
of rtx_equal_for_cselib_p.
* postreload.c (reload_cse_simplify): Use cselib_redundant_set_p.
(reload_cse_noop_set_p): Delete.
Seems quite reasonable.   The only question I would have would be 
whether or not you considered including the aliasing info into the 
hashing used by cselib.  You'd probably still need the bulk of this 
patch as well since we could presumably still get a hash conflict with 
two stores of the same value to the same location, but with different 
alias sets (it's just much less likely), so perhaps it doesn't really 
buy us anything.


I thought about this, but if the alias set were included in the hash, 
then surely you'd get every alias set in a different value.  Then you'd 
miss the cases where the alias sets do conflict even though they are not 
the same.  Anyway, the values /are/ the same so in some circumstances 
you might want to know that.




Ideally this would include a testcase.  You might be able to turn that 
non-executawble reduced case into something useful by scanning the 
post-reload dumps.


I considered this as well, but the testcase I have is far too fragile, I 
think.  The existing test only fails on Arm, only fails on 11.2 (not 
11.3 or gcc-12 onwards), relies on two objects with the same value being 
in distinct alias sets but still assigned to the same stack slot and for 
some copy dance to end up trying to write back the original value to the 
same slot but with a non-conflicting set.  And finally, the scheduler 
has to then try to move a load past the non-aliasing store.


To get anywhere close to this I think we'd need something akin to the 
gimple reader but for RTL so that we could set up all the conditions for 
the failure without the risk of an earlier transform blowing the test 
away.


I even considered whether we could start with a gimple dump and 
bypassing all the tree/gimple transformations, but even that would be 
still at the mercy of the stack-slot allocation algorithm.




Jeff


R.


[PATCH v2] cselib: add function to check if SET is redundant [PR106187]

2022-07-29 Thread Richard Earnshaw via Gcc-patches
A SET operation that writes memory may have the same value as an earlier 
store but if the alias sets of the new and earlier store do not conflict 
then the set is not truly redundant.  This can happen, for example, if 
objects of different types share a stack slot.


To fix this we define a new function in cselib that first checks for
equality and if that is successful then finds the earlier store in the
value history and checks the alias sets.

The routine is used in two places elsewhere in the compiler.  Firstly
in cfgcleanup and secondly in postreload.

gcc/ChangeLog:
* alias.h (mems_same_for_tbaa_p): Declare.
* alias.cc (mems_same_for_tbaa_p): New function.
* dse.cc (record_store): Use it instead of open-coding
alias check.
* cselib.h (cselib_redundant_set_p): Declare.
* cselib.cc: Include alias.h
(cselib_redundant_set_p): New function.
* cfgcleanup.cc: (mark_effect): Use cselib_redundant_set_p instead
of rtx_equal_for_cselib_p.
* postreload.c (reload_cse_simplify): Use cselib_redundant_set_p.
(reload_cse_noop_set_p): Delete.diff --git a/gcc/alias.cc b/gcc/alias.cc
index 8c08452e0ac..d54feb15268 100644
--- a/gcc/alias.cc
+++ b/gcc/alias.cc
@@ -389,6 +389,20 @@ refs_same_for_tbaa_p (tree earlier, tree later)
 	  || alias_set_subset_of (later_base_set, earlier_base_set));
 }
 
+/* Similar to refs_same_for_tbaa_p() but for use on MEM rtxs.  */
+bool
+mems_same_for_tbaa_p (rtx earlier, rtx later)
+{
+  gcc_assert (MEM_P (earlier));
+  gcc_assert (MEM_P (later));
+
+  return ((MEM_ALIAS_SET (earlier) == MEM_ALIAS_SET (later)
+	   || alias_set_subset_of (MEM_ALIAS_SET (later),
+   MEM_ALIAS_SET (earlier)))
+	  && (!MEM_EXPR (earlier)
+	  || refs_same_for_tbaa_p (MEM_EXPR (earlier), MEM_EXPR (later;
+}
+
 /* Returns a pointer to the alias set entry for ALIAS_SET, if there is
such an entry, or NULL otherwise.  */
 
diff --git a/gcc/alias.h b/gcc/alias.h
index b2596518ac9..ee3db466763 100644
--- a/gcc/alias.h
+++ b/gcc/alias.h
@@ -40,6 +40,7 @@ tree reference_alias_ptr_type_1 (tree *);
 bool alias_ptr_types_compatible_p (tree, tree);
 int compare_base_decls (tree, tree);
 bool refs_same_for_tbaa_p (tree, tree);
+bool mems_same_for_tbaa_p (rtx, rtx);
 
 /* This alias set can be used to force a memory to conflict with all
other memories, creating a barrier across which no memory reference
diff --git a/gcc/cfgcleanup.cc b/gcc/cfgcleanup.cc
index 18047da7b24..a8b0139bb4d 100644
--- a/gcc/cfgcleanup.cc
+++ b/gcc/cfgcleanup.cc
@@ -208,7 +208,7 @@ mark_effect (rtx exp, regset nonequal)
   return false;
 
 case SET:
-  if (rtx_equal_for_cselib_p (SET_DEST (exp), SET_SRC (exp)))
+  if (cselib_redundant_set_p (exp))
 	return false;
   dest = SET_DEST (exp);
   if (dest == pc_rtx)
diff --git a/gcc/cselib.cc b/gcc/cselib.cc
index 6769beeeaf8..6a5609786fa 100644
--- a/gcc/cselib.cc
+++ b/gcc/cselib.cc
@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "dumpfile.h"
 #include "cselib.h"
 #include "function-abi.h"
+#include "alias.h"
 
 /* A list of cselib_val structures.  */
 struct elt_list
@@ -1157,6 +1158,75 @@ rtx_equal_for_cselib_1 (rtx x, rtx y, machine_mode memmode, int depth)
   return 1;
 }
 
+/* Wrapper for rtx_equal_for_cselib_p to determine whether a SET is
+   truly redundant, taking into account aliasing information.  */
+bool
+cselib_redundant_set_p (rtx set)
+{
+  gcc_assert (GET_CODE (set) == SET);
+  rtx dest = SET_DEST (set);
+  if (cselib_reg_set_mode (dest) != GET_MODE (dest))
+return false;
+
+  if (!rtx_equal_for_cselib_p (dest, SET_SRC (set)))
+return false;
+
+  while (GET_CODE (dest) == SUBREG
+	 || GET_CODE (dest) == ZERO_EXTRACT
+	 || GET_CODE (dest) == STRICT_LOW_PART)
+dest = XEXP (dest, 0);
+
+  if (!flag_strict_aliasing || !MEM_P (dest))
+return true;
+
+  /* For a store we need to check that suppressing it will not change
+ the effective alias set.  */
+  rtx dest_addr = XEXP (dest, 0);
+
+  /* Lookup the equivalents to the original dest (rather than just the
+ MEM).  */
+  cselib_val *src_val = cselib_lookup (SET_DEST (set),
+   GET_MODE (SET_DEST (set)),
+   0, VOIDmode);
+
+  if (src_val)
+{
+  /* Walk the list of source equivalents to find the MEM accessing
+	 the same location.  */
+  for (elt_loc_list *l = src_val->locs; l; l = l->next)
+	{
+	  rtx src_equiv = l->loc;
+	  while (GET_CODE (src_equiv) == SUBREG
+		 || GET_CODE (src_equiv) == ZERO_EXTRACT
+		 || GET_CODE (src_equiv) == STRICT_LOW_PART)
+	src_equiv = XEXP (src_equiv, 0);
+
+	  if (MEM_P (src_equiv))
+	{
+	  /* Match the MEMs by comparing the addresses.  We can
+		 only remove the later store if the earlier aliases at
+		 least all the accesses of the later one.  */
+	  if (rtx_equal_for_cselib_1 (dest_addr, XEXP (src_equiv, 0),
+	  GET_MODE (dest), 0))
+		return mems_same_for_tbaa_p (src_equiv, 

Re: cselib: add function to check if SET is redundant [PR106187]

2022-07-29 Thread Richard Earnshaw via Gcc-patches




On 29/07/2022 08:06, Richard Biener via Gcc-patches wrote:

On Thu, Jul 28, 2022 at 6:46 PM Richard Earnshaw
 wrote:


[resend with correct subject line]

A SET operation that writes memory may have the same value as an earlier
store but if the alias sets of the new and earlier store do not conflict
then the set is not truly redundant.  This can happen, for example, if
objects of different types share a stack slot.

To fix this we define a new function in cselib that first checks for
equality and if that is successful then finds the earlier store in the
value history and checks the alias sets.

The routine is used in two places elsewhere in the compiler.  Firstly
in cfgcleanup and secondly in postreload.


I can't comment on the stripping on SUBREGs and friends but it seems
to be conservative apart from

+  if (!flag_strict_aliasing || !MEM_P (dest))
+return true;

where if dest is not a MEM but were to contain one we'd miss it.
Double-checking
from more RTL literate people appreciated.


There are very few things that can wrap a MEM in a SET_DEST.  I'm pretty 
sure that's all of them.  It certainly matches the code in 
cselib_invalidate_rtx which has to deal with this sort of case.




+  /* Lookup the equivalents to the dest.  This is more likely to succeed
+ than looking up the equivalents to the source (for example, when the
+ src is some form of constant).  */

I think the comment is misleading - we _do_ have to lookup the MEM,
looking up equivalences of a reg or an expression on the RHS isn't
what we are interested in.


OK, I'll try to reword it.



+   return alias_sets_conflict_p (MEM_ALIAS_SET (dest),
+ MEM_ALIAS_SET (src_equiv));

that's not conservative enough - dse.cc has correct boilerplate, we have
to check both MEM_ALIAS_SET and MEM_EXPR here (the latter only
if the former load/store has a MEM_EXPR).  Note in particular
using alias_set_subset_of instead of alias_sets_conflict_p.

   /* We can only remove the later store if the earlier aliases
  at least all accesses the later one.  */
   && ((MEM_ALIAS_SET (mem) == MEM_ALIAS_SET (s_info->mem)
|| alias_set_subset_of (MEM_ALIAS_SET (mem),
MEM_ALIAS_SET (s_info->mem)))
   && (!MEM_EXPR (s_info->mem)
   || refs_same_for_tbaa_p (MEM_EXPR (s_info->mem),
MEM_EXPR (mem)



OK, that's an easy enough change.


+  /* We failed to find a recorded value in the cselib history, so try the
+ source of this set.  */
+  rtx src = SET_SRC (set);
+  while (GET_CODE (src) == SUBREG)
+src = XEXP (src, 0);
+
+  if (MEM_P (src) && rtx_equal_for_cselib_1 (dest_addr, XEXP (src, 0),
+GET_MODE (dest), 0))
+return alias_sets_conflict_p (MEM_ALIAS_SET (dest),
+ MEM_ALIAS_SET (src));

this looks like an odd case to me - wouldn't that only catch things
like self-assignments, aka *p = *p?  So I'd simply drop this fallback.


It catches the case of *p = *q when p and q have the same value.  It did 
come up in testing on x86 (when previously I was aborting to make sure 
I'd caught everything).  We could leave it out as the fallback case in 
this instance is to record a conflict, but it's not a path that's likely 
to be performance critical and the probability of this being a redundant 
store is quite high.  I'll update the comment to make this clearer.



R.



Otherwise it looks OK to me.

Thanks,
Richard.


gcc/ChangeLog:
 * cselib.h (cselib_redundant_set_p): Declare.
 * cselib.cc: Include alias.h
 (cselib_redundant_set_p): New function.
 * cfgcleanup.cc: (mark_effect): Use cselib_redundant_set_p instead
 of rtx_equal_for_cselib_p.
 * postreload.c (reload_cse_simplify): Use cselib_redundant_set_p.
 (reload_cse_noop_set_p): Delete.


cselib: add function to check if SET is redundant [PR106187]

2022-07-28 Thread Richard Earnshaw via Gcc-patches

[resend with correct subject line]

A SET operation that writes memory may have the same value as an earlier 
store but if the alias sets of the new and earlier store do not conflict 
then the set is not truly redundant.  This can happen, for example, if 
objects of different types share a stack slot.


To fix this we define a new function in cselib that first checks for 
equality and if that is successful then finds the earlier store in the 
value history and checks the alias sets.


The routine is used in two places elsewhere in the compiler.  Firstly
in cfgcleanup and secondly in postreload.

gcc/ChangeLog:
* cselib.h (cselib_redundant_set_p): Declare.
* cselib.cc: Include alias.h
(cselib_redundant_set_p): New function.
* cfgcleanup.cc: (mark_effect): Use cselib_redundant_set_p instead
of rtx_equal_for_cselib_p.
* postreload.c (reload_cse_simplify): Use cselib_redundant_set_p.
(reload_cse_noop_set_p): Delete.
diff --git a/gcc/cfgcleanup.cc b/gcc/cfgcleanup.cc
index 18047da7b24..a8b0139bb4d 100644
--- a/gcc/cfgcleanup.cc
+++ b/gcc/cfgcleanup.cc
@@ -208,7 +208,7 @@ mark_effect (rtx exp, regset nonequal)
   return false;
 
 case SET:
-  if (rtx_equal_for_cselib_p (SET_DEST (exp), SET_SRC (exp)))
+  if (cselib_redundant_set_p (exp))
 	return false;
   dest = SET_DEST (exp);
   if (dest == pc_rtx)
diff --git a/gcc/cselib.cc b/gcc/cselib.cc
index 6769beeeaf8..fcfd8340a4a 100644
--- a/gcc/cselib.cc
+++ b/gcc/cselib.cc
@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "dumpfile.h"
 #include "cselib.h"
 #include "function-abi.h"
+#include "alias.h"
 
 /* A list of cselib_val structures.  */
 struct elt_list
@@ -1157,6 +1158,75 @@ rtx_equal_for_cselib_1 (rtx x, rtx y, machine_mode memmode, int depth)
   return 1;
 }
 
+/* Wrapper for rtx_equal_for_cselib_p to determine whether a SET is
+   truly redundant, taking into account aliasing information.  */
+bool
+cselib_redundant_set_p (rtx set)
+{
+  gcc_assert (GET_CODE (set) == SET);
+  rtx dest = SET_DEST (set);
+  if (cselib_reg_set_mode (dest) != GET_MODE (dest))
+return false;
+
+  if (!rtx_equal_for_cselib_p (dest, SET_SRC (set)))
+return false;
+
+  while (GET_CODE (dest) == SUBREG
+	 || GET_CODE (dest) == ZERO_EXTRACT
+	 || GET_CODE (dest) == STRICT_LOW_PART)
+dest = XEXP (dest, 0);
+
+  if (!flag_strict_aliasing || !MEM_P (dest))
+return true;
+
+  /* For a store we need to check that suppressing it will not change
+ the effective alias set.  */
+  rtx dest_addr = XEXP (dest, 0);
+
+  /* Lookup the equivalents to the dest.  This is more likely to succeed
+ than looking up the equivalents to the source (for example, when the
+ src is some form of constant).  */
+  cselib_val *src_val = cselib_lookup (SET_DEST (set),
+   GET_MODE (SET_DEST (set)),
+   0, VOIDmode);
+
+  if (src_val)
+{
+  /* Walk the list of source equivalents to find the MEM accessing the same
+	 location.  */
+  for (elt_loc_list *l = src_val->locs; l; l = l->next)
+	{
+	  rtx src_equiv = l->loc;
+	  while (GET_CODE (src_equiv) == SUBREG
+		 || GET_CODE (src_equiv) == ZERO_EXTRACT
+		 || GET_CODE (src_equiv) == STRICT_LOW_PART)
+	src_equiv = XEXP (src_equiv, 0);
+
+	  if (MEM_P (src_equiv))
+	{
+	  /* Match the MEMs by comparing the addresses.  */
+	  if (rtx_equal_for_cselib_1 (dest_addr, XEXP (src_equiv, 0),
+	  GET_MODE (dest), 0))
+		return alias_sets_conflict_p (MEM_ALIAS_SET (dest),
+	  MEM_ALIAS_SET (src_equiv));
+	}
+	}
+}
+
+  /* We failed to find a recorded value in the cselib history, so try the
+ source of this set.  */
+  rtx src = SET_SRC (set);
+  while (GET_CODE (src) == SUBREG)
+src = XEXP (src, 0);
+
+  if (MEM_P (src) && rtx_equal_for_cselib_1 (dest_addr, XEXP (src, 0),
+	 GET_MODE (dest), 0))
+return alias_sets_conflict_p (MEM_ALIAS_SET (dest),
+  MEM_ALIAS_SET (src));
+
+  return false;
+}
+
 /* Helper function for cselib_hash_rtx.  Arguments like for cselib_hash_rtx,
except that it hashes (plus:P x c).  */
 
diff --git a/gcc/cselib.h b/gcc/cselib.h
index 9ae65e6459e..b0905053ea5 100644
--- a/gcc/cselib.h
+++ b/gcc/cselib.h
@@ -83,6 +83,7 @@ extern void cselib_process_insn (rtx_insn *);
 extern bool fp_setter_insn (rtx_insn *);
 extern machine_mode cselib_reg_set_mode (const_rtx);
 extern int rtx_equal_for_cselib_1 (rtx, rtx, machine_mode, int);
+extern bool cselib_redundant_set_p (rtx);
 extern int references_value_p (const_rtx, int);
 extern rtx cselib_expand_value_rtx (rtx, bitmap, int);
 typedef rtx (*cselib_expand_callback)(rtx, bitmap, int, void *);
diff --git a/gcc/postreload.cc b/gcc/postreload.cc
index d1c99fe6dc9..41f61d32648 100644
--- a/gcc/postreload.cc
+++ b/gcc/postreload.cc
@@ -43,7 +43,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "function-abi.h"
 #include "rtl-iter.h"
 
-static int reload_cse_noop_set_p 

http://pdtlreviewboard.cambridge.arm.com/r/16099/

2022-07-28 Thread Richard Earnshaw via Gcc-patches
A SET operation that writes memory may have the same value as an earlier 
store but if the alias sets of the new and earlier store do not conflict 
then the set is not truly redundant.  This can happen, for example, if 
objects of different types share a stack slot.


To fix this we define a new function in cselib that first checks for 
equality and if that is successful then finds the earlier store in the 
value history and checks the alias sets.


The routine is used in two places elsewhere in the compiler.  Firstly
in cfgcleanup and secondly in postreload.

gcc/ChangeLog:
* cselib.h (cselib_redundant_set_p): Declare.
* cselib.cc: Include alias.h
(cselib_redundant_set_p): New function.
* cfgcleanup.cc: (mark_effect): Use cselib_redundant_set_p instead
of rtx_equal_for_cselib_p.
* postreload.c (reload_cse_simplify): Use cselib_redundant_set_p.
(reload_cse_noop_set_p): Delete.
diff --git a/gcc/cfgcleanup.cc b/gcc/cfgcleanup.cc
index 18047da7b24..a8b0139bb4d 100644
--- a/gcc/cfgcleanup.cc
+++ b/gcc/cfgcleanup.cc
@@ -208,7 +208,7 @@ mark_effect (rtx exp, regset nonequal)
   return false;
 
 case SET:
-  if (rtx_equal_for_cselib_p (SET_DEST (exp), SET_SRC (exp)))
+  if (cselib_redundant_set_p (exp))
 	return false;
   dest = SET_DEST (exp);
   if (dest == pc_rtx)
diff --git a/gcc/cselib.cc b/gcc/cselib.cc
index 6769beeeaf8..fcfd8340a4a 100644
--- a/gcc/cselib.cc
+++ b/gcc/cselib.cc
@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "dumpfile.h"
 #include "cselib.h"
 #include "function-abi.h"
+#include "alias.h"
 
 /* A list of cselib_val structures.  */
 struct elt_list
@@ -1157,6 +1158,75 @@ rtx_equal_for_cselib_1 (rtx x, rtx y, machine_mode memmode, int depth)
   return 1;
 }
 
+/* Wrapper for rtx_equal_for_cselib_p to determine whether a SET is
+   truly redundant, taking into account aliasing information.  */
+bool
+cselib_redundant_set_p (rtx set)
+{
+  gcc_assert (GET_CODE (set) == SET);
+  rtx dest = SET_DEST (set);
+  if (cselib_reg_set_mode (dest) != GET_MODE (dest))
+return false;
+
+  if (!rtx_equal_for_cselib_p (dest, SET_SRC (set)))
+return false;
+
+  while (GET_CODE (dest) == SUBREG
+	 || GET_CODE (dest) == ZERO_EXTRACT
+	 || GET_CODE (dest) == STRICT_LOW_PART)
+dest = XEXP (dest, 0);
+
+  if (!flag_strict_aliasing || !MEM_P (dest))
+return true;
+
+  /* For a store we need to check that suppressing it will not change
+ the effective alias set.  */
+  rtx dest_addr = XEXP (dest, 0);
+
+  /* Lookup the equivalents to the dest.  This is more likely to succeed
+ than looking up the equivalents to the source (for example, when the
+ src is some form of constant).  */
+  cselib_val *src_val = cselib_lookup (SET_DEST (set),
+   GET_MODE (SET_DEST (set)),
+   0, VOIDmode);
+
+  if (src_val)
+{
+  /* Walk the list of source equivalents to find the MEM accessing the same
+	 location.  */
+  for (elt_loc_list *l = src_val->locs; l; l = l->next)
+	{
+	  rtx src_equiv = l->loc;
+	  while (GET_CODE (src_equiv) == SUBREG
+		 || GET_CODE (src_equiv) == ZERO_EXTRACT
+		 || GET_CODE (src_equiv) == STRICT_LOW_PART)
+	src_equiv = XEXP (src_equiv, 0);
+
+	  if (MEM_P (src_equiv))
+	{
+	  /* Match the MEMs by comparing the addresses.  */
+	  if (rtx_equal_for_cselib_1 (dest_addr, XEXP (src_equiv, 0),
+	  GET_MODE (dest), 0))
+		return alias_sets_conflict_p (MEM_ALIAS_SET (dest),
+	  MEM_ALIAS_SET (src_equiv));
+	}
+	}
+}
+
+  /* We failed to find a recorded value in the cselib history, so try the
+ source of this set.  */
+  rtx src = SET_SRC (set);
+  while (GET_CODE (src) == SUBREG)
+src = XEXP (src, 0);
+
+  if (MEM_P (src) && rtx_equal_for_cselib_1 (dest_addr, XEXP (src, 0),
+	 GET_MODE (dest), 0))
+return alias_sets_conflict_p (MEM_ALIAS_SET (dest),
+  MEM_ALIAS_SET (src));
+
+  return false;
+}
+
 /* Helper function for cselib_hash_rtx.  Arguments like for cselib_hash_rtx,
except that it hashes (plus:P x c).  */
 
diff --git a/gcc/cselib.h b/gcc/cselib.h
index 9ae65e6459e..b0905053ea5 100644
--- a/gcc/cselib.h
+++ b/gcc/cselib.h
@@ -83,6 +83,7 @@ extern void cselib_process_insn (rtx_insn *);
 extern bool fp_setter_insn (rtx_insn *);
 extern machine_mode cselib_reg_set_mode (const_rtx);
 extern int rtx_equal_for_cselib_1 (rtx, rtx, machine_mode, int);
+extern bool cselib_redundant_set_p (rtx);
 extern int references_value_p (const_rtx, int);
 extern rtx cselib_expand_value_rtx (rtx, bitmap, int);
 typedef rtx (*cselib_expand_callback)(rtx, bitmap, int, void *);
diff --git a/gcc/postreload.cc b/gcc/postreload.cc
index d1c99fe6dc9..41f61d32648 100644
--- a/gcc/postreload.cc
+++ b/gcc/postreload.cc
@@ -43,7 +43,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "function-abi.h"
 #include "rtl-iter.h"
 
-static int reload_cse_noop_set_p (rtx);
 static bool 

Re: [PATCH Rust front-end v1 3/4] Add Rust target hooks to ARM

2022-07-27 Thread Richard Earnshaw via Gcc-patches




On 27/07/2022 14:40, herron.philip--- via Gcc-patches wrote:

From: Philip Herron 

This adds the nessecary target hooks for the arm target.

gcc/ChangeLog:

 * config.gcc: add rust_target_objs for arm

gcc/config/arm/ChangeLog:

* arm-protos.h: define arm_rust_target_cpu_info
 * arm-rust.cc: new file to generate info
* arm.h: define TARGET_RUST_CPU_INFO
* bpabi.h: define TARGET_RUST_OS_INFO
* freebsd.h: likewise
* linux-eabi.h: likewise
* linux-elf.h: likewise
* netbsd-eabi.h: likewise
* netbsd-elf.h: likewise
* rtems.h: likewise
* symbian.h: likewise
* t-arm: compile arm-rust.cc
* uclinux-eabi.h: define TARGET_RUST_OS_INFO
* uclinux-elf.h: likewise
* vxworks.h: likewise

Co-authored-by: SimplyTheOther 
---
  gcc/config.gcc|   1 +
  gcc/config/arm/arm-protos.h   |   3 +
  gcc/config/arm/arm-rust.cc| 304 ++
  gcc/config/arm/arm.h  |   3 +
  gcc/config/arm/bpabi.h|  11 ++
  gcc/config/arm/freebsd.h  |   9 +
  gcc/config/arm/linux-eabi.h   |   8 +
  gcc/config/arm/linux-elf.h|   5 +
  gcc/config/arm/netbsd-eabi.h  |  10 ++
  gcc/config/arm/netbsd-elf.h   |   8 +
  gcc/config/arm/rtems.h|  14 ++
  gcc/config/arm/symbian.h  |  15 ++
  gcc/config/arm/t-arm  |   4 +
  gcc/config/arm/uclinux-eabi.h |  13 ++
  gcc/config/arm/uclinux-elf.h  |  12 ++
  gcc/config/arm/vxworks.h  |  14 ++
  16 files changed, 434 insertions(+)
  create mode 100644 gcc/config/arm/arm-rust.cc

diff --git a/gcc/config.gcc b/gcc/config.gcc
index cdd4fb4392a..9d686019b28 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -368,6 +368,7 @@ arm*-*-*)
c_target_objs="arm-c.o"
cxx_target_objs="arm-c.o"
d_target_objs="arm-d.o"
+rust_target_objs="arm-rust.o"
extra_options="${extra_options} arm/arm-tables.opt"
target_gtfiles="\$(srcdir)/config/arm/arm-builtins.cc 
\$(srcdir)/config/arm/arm-mve-builtins.h \$(srcdir)/config/arm/arm-mve-builtins.cc"
;;
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index f8aabbdae37..9513f96fdbc 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -406,6 +406,9 @@ extern void arm_cpu_cpp_builtins (struct cpp_reader *);
  extern void arm_d_target_versions (void);
  extern void arm_d_register_target_info (void);
  
+/* Defined in arm-rust.c  */

+extern void arm_rust_target_cpu_info (void);
+
  extern bool arm_is_constant_pool_ref (rtx);
  
  /* The bits in this mask specify which instruction scheduling options should

diff --git a/gcc/config/arm/arm-rust.cc b/gcc/config/arm/arm-rust.cc
new file mode 100644
index 000..7c83e3fa3a6
--- /dev/null
+++ b/gcc/config/arm/arm-rust.cc
@@ -0,0 +1,304 @@
+/* Subroutines for the Rust front end on the ARM architecture.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tm_p.h"
+#include "rust/rust-target.h"
+#include "rust/rust-target-def.h"
+
+/* Implement TARGET_RUST_CPU_INFO for ARM targets.  */
+
+void arm_rust_target_cpu_info(void) {
+rust_add_target_info("target_arch", "arm");
+
+/* TODO: further research support for CLREX, acquire-release (lda/ldaex), 
slow-fp-brcc (slow FP
+ * compare and branch), perfmon, trustzone, fpao, fuse-aes, fuse-literals, 
read-tp-hard, zcz,
+ * prof-unpr, slow-vgetlni32, slow-vdup32, prefer-vmovsr, prefer-ishst, 
muxed-units, slow-odd-reg,
+ * slow-load-D-subreg, wide-stride-vfp, dont-widen-vmovs, splat-vfp-neon, 
expand-fp-mlx,
+ * vmlx-hazards, neon-fpmovs, neonfp (as in using neon for scalar fp), 
vldn-align,
+ * nonpipelined-vfp, slowfpvmlx, slowfpvfmx, vmlx-forwarding, 32bit 
(prefer 32-bit Thumb),
+ * loop-align, mve1beat, mve2beat, mve4beat, avoid-partial-cpsr, 
cheap-predictable-cpsr,
+ * avoid-movs-shop, ret-addr-stack, no-branch-predictor, virtualization, 
nacl-trap, execute-only,
+ * reserve-r9, no-movt, no-neg-immediates, use-misched, 
disable-postra-scheduler, lob (Low
+ * Overhead Branch), noarm, cde - can't find them. */
+/* TODO: figure out if gcc has an equivalent to "fpregs" (floating-point 
registers even if only
+ * used for integer - shared 

Re: [PATCH 9/12 V2] arm: Make libgcc bti compatible

2022-07-25 Thread Richard Earnshaw via Gcc-patches




On 22/07/2022 16:09, Andrea Corallo via Gcc-patches wrote:

Richard Earnshaw  writes:


On 21/07/2022 10:17, Andrea Corallo via Gcc-patches wrote:

Richard Earnshaw  writes:


On 28/04/2022 10:48, Andrea Corallo via Gcc-patches wrote:

This change add bti instructions at the beginning of arm specific
libgcc hand written assembly routines.
2022-03-31  Andrea Corallo  
* libgcc/config/arm/crti.S (FUNC_START): Add bti instruction
if
necessary.
* libgcc/config/arm/lib1funcs.S (THUMB_FUNC_START, FUNC_START):
Likewise.



+#if defined(__ARM_FEATURE_BTI)

Wouldn't it be better to use __ARM_FEATURE_BTI_DEFAULT?  That way we
only get BTI instructions in multilib variants that have asked for
BTI.

R.

Hi Richard,
good point, yes I think so.
Please find attached the updated patch.
BR
Andrea



I've been pondering this patch.  The way it is implemented would put a
BTI instruction at the start of every assembler routine in libgcc.
But the vast majority of functions in libgcc cannot have their address
taken, so a BTI isn't needed (BTI is only needed when an indirect jump
could be used).  So I wonder if we really need to do this so
aggressively?

Perhaps a better approach would be to define a macro (eg MAYBEBTI)
which expands a BTI if the compilation requires it and nothing
otherwise), and then manually insert that in any functions that really
need this (if any).


I guess the main downside of this approach would be the maintanace
burden, we'll have to remember forever that every time an asm function
is called by function pointer we have to add the bti landing pad
manually, otherwise this will be broken when pacbti enabled. WDYT?

If we want to go this way I'll start reworking the patch in this
direction (tho this might not be trivial).



Yes, it's a trade-off.  The lazy way, however, costs all users even if a 
function is never addressed (which I think is the case for practically 
all functions in libgcc).


So I think in this case it's worth taking that extra development pain.

R.

BR

   Andrea


Re: [PATCH 9/12 V2] arm: Make libgcc bti compatible

2022-07-21 Thread Richard Earnshaw via Gcc-patches

On 21/07/2022 10:17, Andrea Corallo via Gcc-patches wrote:

Richard Earnshaw  writes:


On 28/04/2022 10:48, Andrea Corallo via Gcc-patches wrote:

This change add bti instructions at the beginning of arm specific
libgcc hand written assembly routines.
2022-03-31  Andrea Corallo  
* libgcc/config/arm/crti.S (FUNC_START): Add bti instruction
if
necessary.
* libgcc/config/arm/lib1funcs.S (THUMB_FUNC_START, FUNC_START):
Likewise.



+#if defined(__ARM_FEATURE_BTI)

Wouldn't it be better to use __ARM_FEATURE_BTI_DEFAULT?  That way we
only get BTI instructions in multilib variants that have asked for
BTI.

R.


Hi Richard,

good point, yes I think so.

Please find attached the updated patch.

BR

   Andrea



I've been pondering this patch.  The way it is implemented would put a 
BTI instruction at the start of every assembler routine in libgcc.  But 
the vast majority of functions in libgcc cannot have their address 
taken, so a BTI isn't needed (BTI is only needed when an indirect jump 
could be used).  So I wonder if we really need to do this so aggressively?


Perhaps a better approach would be to define a macro (eg MAYBEBTI) which 
expands a BTI if the compilation requires it and nothing otherwise), and 
then manually insert that in any functions that really need this (if any).


R.


Re: [PATCH 8/12 V3] arm: Introduce multilibs for PACBTI target feature

2022-07-21 Thread Richard Earnshaw via Gcc-patches




On 21/07/2022 10:04, Andrea Corallo via Gcc-patches wrote:

Richard Earnshaw  writes:

[...]


The documentation mentions -mbranch-protection=standard+leaf, so
you're missing a mapping for that.
OK with that change.
R.


Oh, and please add some tests to gcc/testsuite/gcc.target/arm/multilib.exp

R.


Hi Richard,

thanks, here the updated patch.

PS I've also added three mlibarch -> march matches that were missing.

BR

   Andrea



+MULTILIB_REQUIRED	+= 
mthumb/march=armv8.1-m.main+pacbti+fp/mbranch-protection=standard/mfloat-abi=hard
+MULTILIB_REQUIRED	+= 
mthumb/march=armv8.1-m.main+pacbti+fp.dp/mbranch-protection=standard/mfloat-abi=softfp
+MULTILIB_REQUIRED	+= 
mthumb/march=armv8.1-m.main+pacbti+fp.dp/mbranch-protection=standard/mfloat-abi=hard
+MULTILIB_REQUIRED	+= 
mthumb/march=armv8.1-m.main+pacbti+mve/mbranch-protection=standard/mfloat-abi=hard

+
+
 # Arch Matches
 MULTILIB_MATCHES   += march?armv6s-m=march?armv6-m

Just one blank line between sections.

Otherwise OK.

R.


Re: [PATCH 7/12 V2] arm: Emit build attributes for PACBTI target feature

2022-07-21 Thread Richard Earnshaw via Gcc-patches




On 13/07/2022 09:58, Andrea Corallo via Gcc-patches wrote:

Richard Earnshaw  writes:


On 28/04/2022 10:45, Andrea Corallo via Gcc-patches wrote:

This patch emits assembler directives for PACBTI build attributes as
defined by the
ABI.

gcc/ChangeLog:
* config/arm/arm.c (arm_file_start): Emit EABI attributes for
Tag_PAC_extension, Tag_BTI_extension, TAG_BTI_use, TAG_PACRET_use.


This bit is OK.


gcc/testsuite/ChangeLog:
* gcc.target/arm/acle/pacbti-m-predef-1.c: New test.
* gcc.target/arm/acle/pacbti-m-predef-3: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-6.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-7.c: Likewise.


These tests contain directives like:

+/* { dg-additional-options " -mbranch-protection=pac-ret+bti
--save-temps" } */

But they don't check that the architecture permits this (it has to be
armv8-m.main or later).


Hi Richard & all,

please find attached the updated patch.

BR

  Andrea



The tests in this patch have similar issues to my previous reply.  You 
need to make sure that adding options will not cause a conflict with 
other options added by the test driver.


R.


Re: [PATCH 5/12 V2] arm: Implement target feature macros for PACBTI

2022-07-21 Thread Richard Earnshaw via Gcc-patches




On 12/07/2022 16:45, Andrea Corallo via Gcc-patches wrote:

Richard Earnshaw  writes:


On 28/04/2022 10:42, Andrea Corallo via Gcc-patches wrote:

This patch implements target feature macros when PACBTI is enabled
through the -march option or -mbranch-protection.  The target feature
macros __ARM_FEATURE_PAC_DEFAULT and __ARM_FEATURE_BTI_DEFAULT are
specified in ARM ACLE

__ARM_FEATURE_PAUTH and __ARM_FEATURE_BTI are specified in the
pull-request .
Approved here
.
gcc/ChangeLog:
* config/arm/arm-c.c (arm_cpu_builtins): Define
__ARM_FEATURE_BTI_DEFAULT, __ARM_FEATURE_PAC_DEFAULT,
__ARM_FEATURE_PAUTH and __ARM_FEATURE_BTI.


This bit is OK.


gcc/testsuite/ChangeLog:
* gcc.target/arm/acle/pacbti-m-predef-2.c: New test.
* gcc.target/arm/acle/pacbti-m-predef-4.c: New test.
* gcc.target/arm/acle/pacbti-m-predef-5.c: New test.



These are all execution tests.  I think we also need some compile-only
tests so that we get better coverage when the target does not directly
support PACBTI.

We also need some tests for the defines when targetting armv8-m.main
and some tests for checking __ARM_FEATURE_BTI and __ARM_FEATURE_PAC
(the tests here check only the '..._DEFAULT' macros.


Hi Richard & all,

please find attached the updated version of this patch.

Best Regards

   Andrea

gcc/ChangeLog:

* config/arm/arm-c.c (arm_cpu_builtins): Define
__ARM_FEATURE_BTI_DEFAULT, __ARM_FEATURE_PAC_DEFAULT,
__ARM_FEATURE_PAUTH and __ARM_FEATURE_BTI.

gcc/testsuite/ChangeLog:

* gcc.target/arm/acle/pacbti-m-predef-2.c: New test.
* gcc.target/arm/acle/pacbti-m-predef-4.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-5.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-8.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-9.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-10.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-11.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-12.c: Likewise.



diff --git a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-10.c 
b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-10.c

new file mode 100644
index 000..311cf572dd9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-10.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options " -mbranch-protection=bti+pac-ret" } */

This is not enough.  For example, if the testsuite is being run with 
"-march=armv6-m" as the testrun options, we'll get an error that will 
cause a test failure.  You need to run a pre-test rule that validates 
that adding -mbranch-protection is safe.


+++ b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-11.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.1-m.main+pacbti" } */

Similarly here, this would conflict with, for example, "-marm" as test 
options.


+++ b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-5.c
@@ -0,0 +1,24 @@
+
+/* { dg-do run } */

Blank line at the start of the test.

The other tests have similar issues.

R.


Re: [PATCH 4/12] arm: Add testsuite library support for PACBTI target

2022-07-05 Thread Richard Earnshaw via Gcc-patches




On 04/07/2022 15:47, Andrea Corallo wrote:

Richard Earnshaw  writes:


On 01/07/2022 14:03, Richard Earnshaw via Gcc-patches wrote:

On 28/04/2022 10:40, Andrea Corallo via Gcc-patches wrote:

Add targeting-checking entities for PACBTI in testsuite
framework.

Pre-approved with the requested changes here
<https://gcc.gnu.org/pipermail/gcc-patches/2021-December/586331.html 

<https://gcc.gnu.org/pipermail/gcc-patches/2021-December/586331.html>>.


gcc/testsuite/ChangeLog:

* testsuite/lib/target-supports.exp:
(check_effective_target_arm_pacbti_hw): New.
* doc/sourcebuild.texi: Document arm_pacbti_hw.

Co-Authored-By: Tejas Belagod  


+proc check_effective_target_arm_pacbti_hw {} {
+    return [check_runtime arm_pacbti_hw_available {
+    __attribute__ ((naked)) int
+    main (void)
+    {
+  asm ("pac r12, lr, sp");
So the armv8-m Arm ARM says that this instruction is in the NOP
space and that it is undefined if we aren't armv8-m.main or higher.
+  asm ("mov r0, #0");
+  asm ("autg r12, lr, sp");
This isn't in the nop space, but the Arm ARM says it is
unpredictable if the extension isn't present.  Unfortunately, that
means this isn't a particularly reliable way of detecting that the
PACBTI feature is present.
However, I can't think off hand of more reliable way of testing this
since reading the feature register ID_ISAR5 is not possible when in
unprivileged mode.
So I think we'll have to live with this.
+  asm ("bx lr");
+    }
+    } ""]
OK.



Or perhaps not. The test does not try to add the right options to
enable PAC/BTI if those aren't in the default selection for the
current testsuite run.

Perhaps we also need some additional tests to work out what
architecture options to add (if any) to ensure the test will at least
assemble.


Hi Richard,
thanks for reviewing.

Wouldn't be sufficient for that to have this test compiled with
-march=armv8-m.main?


Take a look at how, for example, check_effective_target_arm_mve_hw (and 
add_options_for_v8_1_m_mve_fp) is implemented.


R.



BR

   Andrea


Re: [PATCH 3/12 V2] arm: Add option -mbranch-protection

2022-07-04 Thread Richard Earnshaw via Gcc-patches




On 04/07/2022 10:27, Andrea Corallo via Gcc-patches wrote:

Richard Earnshaw  writes:

[...]


+@item
+-mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf}][+@var{bti}]|@var{bti}[+@var{pac-ret}[+@var{leaf}]]
+@opindex mbranch-protection
+Enable branch protection features (armv8.1-m.main only).
+@samp{none} generate code without branch protection or return address
+signing.
+@samp{standard[+@var{leaf}]} generate code with all branch protection
+features enabled at their standard level.
+@samp{pac-ret[+@var{leaf}]} generate code with return address signing
+set to its standard level, which is to sign all functions that save
+the return address to memory.
+@samp{leaf} When return address signing is enabled, also sign leaf
+functions even if they do not write the return address to memory.
++@samp{bti} Add landing-pad instructions at the permitted targets of
+indirect branch instructions.
+
+If the @samp{+pacbti} architecture extension is not enabled, then all
+branch protection and return address signing operations are
+constrained to use only the instructions defined in the
+architectural-NOP space. The generated code will remain
+backwards-compatible with earlier versions of the architecture, but
+the additional security can be enabled at run time on processors that
+support the @samp{PACBTI} extension.
+
+Branch target enforcement using BTI can only be enabled at runtime if
+all code in the application has been compiled with at least
+@samp{-mbranch-protection=bti}.
+
+The default is to generate code without branch protection or return
+address signing.

This needs to make it clear that -mbranch-protection != none is only
supported on armv8-m.main or later.

R.


Hi Richard,

thanks for reviewing, please find attached the respinned patch.

Ok for trunk (when the rest of the series will be approved)?

Best Regards

   Andrea

gcc/ChangeLog:

* config/arm/arm.c (arm_configure_build_target): Parse and validate
-mbranch-protection option and initialize appropriate data structures.
* config/arm/arm.opt (-mbranch-protection): New option.
* doc/invoke.texi (Arm Options): Document it.

Co-Authored-By: Tejas Belagod  
Co-Authored-By: Richard Earnshaw 



OK.

R.


Re: [PATCH 12/12 V2] arm: implement bti injection

2022-07-01 Thread Richard Earnshaw via Gcc-patches




On 28/06/2022 10:21, Andrea Corallo via Gcc-patches wrote:

Hi all,

second iteration of this patch enabling Branch Target Identification
Armv8.1-M Mechanism [1].

This is achieved by using the bti pass made common with Aarch64.

The pass iterates through the instructions and adds the necessary BTI
instructions at the beginning of every function and at every landing
pads targeted by indirect jumps.

Best Regards

   Andrea

[1]


gcc/ChangeLog

2022-04-07  Andrea Corallo  

* config.gcc (arm*-*-*): Add 'aarch-bti-insert.o' object.
* config/arm/arm-protos.h: Update.
* config/arm/arm.cc (aarch_bti_enabled, aarch_bti_j_insn_p)
(aarch_pac_insn_p, aarch_gen_bti_c, aarch_gen_bti_j): New
functions.
* config/arm/arm.md (bti_nop): New insn.
* config/arm/t-arm (PASSES_EXTRA): Add 'arm-passes.def'.
(aarch-bti-insert.o): New target.
* config/arm/unspecs.md (UNSPEC_BTI_NOP): New unspec.
* config/arm/aarch-bti-insert.cc (rest_of_insert_bti): Update
to verify arch compatibility.

gcc/testsuite/ChangeLog

2022-04-07  Andrea Corallo  

* gcc.target/arm/bti-1.c: New testcase.
* gcc.target/arm/bti-2.c: Likewise.


@@ -104,6 +105,14 @@ rest_of_insert_bti (void)
   rtx_insn *insn;
   basic_block bb;

+#if defined (TARGET_32BIT) || defined (TARGET_THUMB1)

See the comment about errors in response to patch 10.  I'd simply expect 
the gate function to be disabled when we can't support PAC, so we should 
never get here.



+  if (!arm_arch8)
+{
+  error ("This architecture does not support branch protection 
instructions");

+  goto exit;
+}
+#endif
+
...
+
+rtx aarch_gen_bti_c (void)
+{
+  return gen_bti_nop ();
+}
+
+rtx aarch_gen_bti_j (void)
+{
+  return gen_bti_nop ();
+}
+

Function names should start a new line... Thus:

rtx
aarch_gen_bti_c (void)

etc.

+(define_insn "bti_nop"
+  [(unspec_volatile [(const_int 0)] UNSPEC_BTI_NOP)]
+  "TARGET_THUMB2"

This isn't quite right.  We need v8-m.main as the baseline architecture 
for the NOPs to behave as NOPs.


+  "bti"
+  [(set_attr "type" "mov_reg")])
+

How to deal with architectural NOPs is an interesting question.  I think 
really, for the scheduler we need to describe each newly defined NOP 
separately, then in the scheduling descriptions we can handle all 
unimplemented NOPs by grouping them together for that architecture, 
whilst describing more accurately how to handle them on CPUs where they 
acquire a defined behaviour.


diff --git a/gcc/config.gcc b/gcc/config.gcc
index 2021bdf9d2f..004e1dfa8d8 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -353,7 +353,7 @@ arc*-*-*)
;;
 arm*-*-*)
cpu_type=arm
-   extra_objs="arm-builtins.o arm-mve-builtins.o aarch-common.o"
+	extra_objs="arm-builtins.o arm-mve-builtins.o aarch-common.o 
aarch-bti-insert.o"


--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -175,3 +175,13 @@ arm-d.o: $(srcdir)/config/arm/arm-d.cc
 arm-common.o: arm-cpu-cdata.h

 driver-arm.o: arm-native.h
+
+PASSES_EXTRA += $(srcdir)/config/arm/arm-passes.def

See comment on patch 11.  Perhaps the right thing to do is to move the 
hunk that adds arm-passes.def into this patch.


Re: [PATCH 11/12] aarch64: Make bti pass generic so it can be used by the arm backend

2022-07-01 Thread Richard Earnshaw via Gcc-patches




On 28/04/2022 10:51, Andrea Corallo via Gcc-patches wrote:

Hi all,

this patch splits and restructures the aarch64 bti pass code in order
to have it usable by the arm backend as well.  These changes have no
functional impact.

Best Regards

   Andrea

gcc/Changelog

* config.gcc (aarch64*-*-*): Rename 'aarch64-bti-insert.o' into
'aarch-bti-insert.o'.
* config/aarch64/aarch64-protos.h: Remove 'aarch64_bti_enabled'
proto.
* config/aarch64/aarch64.cc (aarch_bti_enabled): Rename.
(aarch_bti_j_insn_p, aarch_pac_insn_p): New functions.
(aarch64_output_mi_thunk)
(aarch64_print_patchable_function_entry)
(aarch64_file_end_indicate_exec_stack): Update renamed function
calls to renamed functions.
* config/aarch64/t-aarch64 (aarch-bti-insert.o): Update target.
* config/arm/aarch-bti-insert.cc: New file including and
generalizing code from aarch64-bti-insert.cc.
* config/arm/aarch-common-protos.h: Update.
* config/arm/arm-passes.def: New file.



Is this patch fully stand-alone?  It adds arm-passes.def, which adds a 
reference to pass_insert_bti, but that isn't fully wired up until the 
next patch.


R.


Re: [PATCH 10/12 V2] arm: Implement cortex-M return signing address codegen

2022-07-01 Thread Richard Earnshaw via Gcc-patches




On 28/06/2022 10:17, Andrea Corallo via Gcc-patches wrote:

Hi all,

second version of this patch enabling address return signature and
verification based on Armv8.1-M Pointer Authentication [1].

To sign the return address, we use the PAC R12, LR, SP instruction
upon function entry.  This is signing LR using SP and storing the
result in R12.  R12 will be pushed into the stack.

During function epilogue R12 will be popped and AUT R12, LR, SP will
be used to verify that the content of LR is still valid before return.

Here an example of PAC instrumented function prologue and epilogue:

void foo (void);

int main()
{
   foo ();
   return 0;
}

Compiled with '-march=armv8.1-m.main -mbranch-protection=pac-ret
-mthumb' translates into:

main:
pac ip, lr, sp
push{r3, r7, ip, lr}
add r7, sp, #0
bl  foo
movsr3, #0
mov r0, r3
pop {r3, r7, ip, lr}
aut ip, lr, sp
bx  lr

The patch also takes care of generating a PACBTI instruction in place
of the sequence BTI+PAC when Branch Target Identification is enabled
contextually.

Ex. the previous example compiled with '-march=armv8.1-m.main
-mbranch-protection=pac-ret+bti -mthumb' translates into:

main:
pacbti  ip, lr, sp
push{r3, r7, ip, lr}
add r7, sp, #0
bl  foo
movsr3, #0
mov r0, r3
pop {r3, r7, ip, lr}
aut ip, lr, sp
bx  lr

As part of previous upstream suggestions a test for varargs has been
added and '-mtpcs-frame' is deemed being incompatible with this return
signing address feature being introduced.

[1] 


gcc/Changelog

* config/arm/arm.c: (arm_compute_frame_layout)
(arm_expand_prologue, thumb2_expand_return, arm_expand_epilogue)
(arm_conditional_register_usage): Update for pac codegen.
(arm_current_function_pac_enabled_p): New function.
* config/arm/arm.md (pac_ip_lr_sp, pacbti_ip_lr_sp, aut_ip_lr_sp):
Add new patterns.
* config/arm/unspecs.md (UNSPEC_PAC_IP_LR_SP)
(UNSPEC_PACBTI_IP_LR_SP, UNSPEC_AUT_IP_LR_SP): Add unspecs.

gcc/testsuite/Changelog

* gcc.target/arm/pac.h : New file.
* gcc.target/arm/pac-1.c : New test case.
* gcc.target/arm/pac-2.c : Likewise.
* gcc.target/arm/pac-3.c : Likewise.
* gcc.target/arm/pac-4.c : Likewise.
* gcc.target/arm/pac-5.c : Likewise.
* gcc.target/arm/pac-6.c : Likewise.
* gcc.target/arm/pac-7.c : Likewise.
* gcc.target/arm/pac-8.c : Likewise.



@@ -21139,6 +21139,14 @@ arm_compute_save_core_reg_mask (void)

   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();

+  if (arm_current_function_pac_enabled_p ())
+{
+  if (TARGET_TPCS_FRAME
+ || (TARGET_TPCS_LEAF_FRAME && crtl->is_leaf))
+   error ("TPCS incompatible with return address signing.");
+  save_reg_mask |= 1 << IP_REGNUM;
+}
+

This is the wrong place for a test like this as it will be generated 
every time this function is called (which might be more than once per 
compiled function).


However, TPCS frames are only supported for 'thumb-1' code and PACBTI 
needs armv8-m.main (ie Thumb-2), so the test is really pretty pointless 
at the moment.  I think we should just drop the error.


@@ -22302,7 +22310,7 @@ arm_emit_multi_reg_pop (unsigned long 
saved_regs_mask)

 par = emit_insn (par);

   REG_NOTES (par) = dwarf;
-  if (!return_in_pc)
+  if (!return_in_pc && !frame_pointer_needed)
 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
 stack_pointer_rtx, stack_pointer_rtx);
 }

What's this hunk for?  It doesn't seem related to the PAC changes.  Is 
this some generic bug?  If so, it should be pulled out into a separate 
patch.  If not, it needs some comment as to why we do it this way.


@@ -23352,6 +23360,11 @@ output_probe_stack_range (rtx reg1, rtx reg2)
   return "";
 }

+static bool  aarch_bti_enabled ()
+{
+  return false;
+}
+

GNU style requires the function name to be in the first column:

static bool
aarch_bti_enabled ()
{
  ...

@@ -23431,11 +23444,12 @@ arm_expand_prologue (void)
   /* The static chain register is the same as the IP register.  If it is
  clobbered when creating the frame, we need to save and restore 
it.  */

   clobber_ip = IS_NESTED (func_type)
-  && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
-  || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
-   || flag_stack_clash_protection)
-  && !df_regs_ever_live_p (LR_REGNUM)
-  && arm_r3_live_at_start_p ()));
+&& (((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
+|| 

Re: [PATCH 9/12] arm: Make libgcc bti compatible

2022-07-01 Thread Richard Earnshaw via Gcc-patches




On 28/04/2022 10:48, Andrea Corallo via Gcc-patches wrote:

This change add bti instructions at the beginning of arm specific
libgcc hand written assembly routines.

2022-03-31  Andrea Corallo  

* libgcc/config/arm/crti.S (FUNC_START): Add bti instruction if
necessary.
* libgcc/config/arm/lib1funcs.S (THUMB_FUNC_START, FUNC_START):
Likewise.



+#if defined(__ARM_FEATURE_BTI)

Wouldn't it be better to use __ARM_FEATURE_BTI_DEFAULT?  That way we 
only get BTI instructions in multilib variants that have asked for BTI.


R.


Re: [PATCH 8/12 V2] arm: Introduce multilibs for PACBTI target feature

2022-07-01 Thread Richard Earnshaw via Gcc-patches




On 01/07/2022 15:54, Richard Earnshaw via Gcc-patches wrote:



On 01/06/2022 13:32, Andrea Corallo via Gcc-patches wrote:

Hi all,

second iteration of the previous patch adding the following new
multilibs:

thumb/v8.1-m.main+pacbti/mbranch-protection/nofp
thumb/v8.1-m.main+pacbti+dp/mbranch-protection/soft
thumb/v8.1-m.main+pacbti+dp/mbranch-protection/hard
thumb/v8.1-m.main+pacbti+fp/mbranch-protection/soft
thumb/v8.1-m.main+pacbti+fp/mbranch-protection/hard
thumb/v8.1-m.main+pacbti+mve/mbranch-protection/hard

To trigger the following compiler flags:

-mthumb -march=armv8.1-m.main+pacbti -mbranch-protection=standard 
-mfloat-abi=soft
-mthumb -march=armv8.1-m.main+pacbti+fp -mbranch-protection=standard 
-mfloat-abi=softfp
-mthumb -march=armv8.1-m.main+pacbti+fp -mbranch-protection=standard 
-mfloat-abi=hard
-mthumb -march=armv8.1-m.main+pacbti+fp.dp 
-mbranch-protection=standard -mfloat-abi=softfp
-mthumb -march=armv8.1-m.main+pacbti+fp.dp 
-mbranch-protection=standard -mfloat-abi=hard
-mthumb -march=armv8.1-m.main+pacbti+mve -mbranch-protection=standard 
-mfloat-abi=hard


gcc/ChangeLog:

* config/arm/t-rmprofile: Add multilib rules for march +pacbti
   and mbranch-protection.



+# Map all mbranch-protection values other than 'none' to 'standard'.
+MULTILIB_MATCHES    += mbranch-protection?standard=mbranch-protection?bti
+MULTILIB_MATCHES    += 
mbranch-protection?standard=mbranch-protection?pac-ret
+MULTILIB_MATCHES    += 
mbranch-protection?standard=mbranch-protection?pac-ret+leaf
+MULTILIB_MATCHES    += 
mbranch-protection?standard=mbranch-protection?pac-ret+bti
+MULTILIB_MATCHES    += 
mbranch-protection?standard=mbranch-protection?pac-ret+leaf+bti
+MULTILIB_MATCHES    += 
mbranch-protection?standard=mbranch-protection?bti+pac-ret
+MULTILIB_MATCHES    += 
mbranch-protection?standard=mbranch-protection?bti+pac-ret+leaf

+

The documentation mentions -mbranch-protection=standard+leaf, so you're 
missing a mapping for that.



OK with that change.

R.


Oh, and please add some tests to gcc/testsuite/gcc.target/arm/multilib.exp

R.


Re: [PATCH 8/12 V2] arm: Introduce multilibs for PACBTI target feature

2022-07-01 Thread Richard Earnshaw via Gcc-patches




On 01/06/2022 13:32, Andrea Corallo via Gcc-patches wrote:

Hi all,

second iteration of the previous patch adding the following new
multilibs:

thumb/v8.1-m.main+pacbti/mbranch-protection/nofp
thumb/v8.1-m.main+pacbti+dp/mbranch-protection/soft
thumb/v8.1-m.main+pacbti+dp/mbranch-protection/hard
thumb/v8.1-m.main+pacbti+fp/mbranch-protection/soft
thumb/v8.1-m.main+pacbti+fp/mbranch-protection/hard
thumb/v8.1-m.main+pacbti+mve/mbranch-protection/hard

To trigger the following compiler flags:

-mthumb -march=armv8.1-m.main+pacbti -mbranch-protection=standard 
-mfloat-abi=soft
-mthumb -march=armv8.1-m.main+pacbti+fp -mbranch-protection=standard 
-mfloat-abi=softfp
-mthumb -march=armv8.1-m.main+pacbti+fp -mbranch-protection=standard 
-mfloat-abi=hard
-mthumb -march=armv8.1-m.main+pacbti+fp.dp -mbranch-protection=standard 
-mfloat-abi=softfp
-mthumb -march=armv8.1-m.main+pacbti+fp.dp -mbranch-protection=standard 
-mfloat-abi=hard
-mthumb -march=armv8.1-m.main+pacbti+mve -mbranch-protection=standard 
-mfloat-abi=hard

gcc/ChangeLog:

* config/arm/t-rmprofile: Add multilib rules for march +pacbti
   and mbranch-protection.



+# Map all mbranch-protection values other than 'none' to 'standard'.
+MULTILIB_MATCHES   += mbranch-protection?standard=mbranch-protection?bti
+MULTILIB_MATCHES   += 
mbranch-protection?standard=mbranch-protection?pac-ret
+MULTILIB_MATCHES	+= 
mbranch-protection?standard=mbranch-protection?pac-ret+leaf
+MULTILIB_MATCHES	+= 
mbranch-protection?standard=mbranch-protection?pac-ret+bti
+MULTILIB_MATCHES	+= 
mbranch-protection?standard=mbranch-protection?pac-ret+leaf+bti
+MULTILIB_MATCHES	+= 
mbranch-protection?standard=mbranch-protection?bti+pac-ret
+MULTILIB_MATCHES	+= 
mbranch-protection?standard=mbranch-protection?bti+pac-ret+leaf

+

The documentation mentions -mbranch-protection=standard+leaf, so you're 
missing a mapping for that.



OK with that change.

R.


Re: [PATCH 7/12] arm: Emit build attributes for PACBTI target feature

2022-07-01 Thread Richard Earnshaw via Gcc-patches




On 28/04/2022 10:45, Andrea Corallo via Gcc-patches wrote:

This patch emits assembler directives for PACBTI build attributes as
defined by the
ABI.



gcc/ChangeLog:

* config/arm/arm.c (arm_file_start): Emit EABI attributes for
Tag_PAC_extension, Tag_BTI_extension, TAG_BTI_use, TAG_PACRET_use.


This bit is OK.



gcc/testsuite/ChangeLog:

* gcc.target/arm/acle/pacbti-m-predef-1.c: New test.
* gcc.target/arm/acle/pacbti-m-predef-3: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-6.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-7.c: Likewise.


These tests contain directives like:

+/* { dg-additional-options " -mbranch-protection=pac-ret+bti 
--save-temps" } */


But they don't check that the architecture permits this (it has to be 
armv8-m.main or later).




Co-Authored-By: Tejas Belagod  



R.


Re: [PATCH 6/12] arm: Add pointer authentication for stack-unwinding runtime

2022-07-01 Thread Richard Earnshaw via Gcc-patches




On 28/04/2022 10:44, Andrea Corallo via Gcc-patches wrote:

This patch adds authentication for when the stack is unwound when an
exception is taken.  All the changes here are done to the runtime code
in libgcc's unwinder code for Arm target. All the changes are guarded
under defined (__ARM_FEATURE_PAC_DEFAULT) and activated only if the
+pacbti feature is switched on for the architecture. This means that
switching on the target feature via -march or -mcpu is sufficient and
-mbranch-protection need not be enabled. This ensures that the
unwinder is authenticated only if the PACBTI instructions are
available in the non-NOP space as it uses AUTG.  Just generating
PAC/AUT instructions using -mbranch-protection will not enable
authentication on the unwinder.

Pre-approved with the requested changes here
.

gcc/ChangeLog:

* ginclude/unwind-arm-common.h (_Unwind_VRS_RegClass): Introduce
new pseudo register class _UVRSC_PAC.
* libgcc/config/arm/pr-support.c (__gnu_unwind_execute): Decode
exception opcode (0xb4) for saving RA_AUTH_CODE and authenticate
with AUTG if found.
* libgcc/config/arm/unwind-arm.c (struct pseudo_regs): New.
(phase1_vrs): Introduce new field to store pseudo-reg state.
(phase2_vrs): Likewise.
(_Unwind_VRS_Get): Load pseudo register state from virtual reg set.
(_Unwind_VRS_Set): Store pseudo register state to virtual reg set.
(_Unwind_VRS_Pop): Load pseudo register value from stack into VRS.

Co-Authored-By: Tejas Belagod  



Ok.

R.


Re: [PATCH 5/12] arm: Implement target feature macros for PACBTI

2022-07-01 Thread Richard Earnshaw via Gcc-patches




On 28/04/2022 10:42, Andrea Corallo via Gcc-patches wrote:

This patch implements target feature macros when PACBTI is enabled
through the -march option or -mbranch-protection.  The target feature
macros __ARM_FEATURE_PAC_DEFAULT and __ARM_FEATURE_BTI_DEFAULT are
specified in ARM ACLE

__ARM_FEATURE_PAUTH and __ARM_FEATURE_BTI are specified in the
pull-request .

Approved here
.

gcc/ChangeLog:

* config/arm/arm-c.c (arm_cpu_builtins): Define
__ARM_FEATURE_BTI_DEFAULT, __ARM_FEATURE_PAC_DEFAULT,
__ARM_FEATURE_PAUTH and __ARM_FEATURE_BTI.


This bit is OK.



gcc/testsuite/ChangeLog:

* gcc.target/arm/acle/pacbti-m-predef-2.c: New test.
* gcc.target/arm/acle/pacbti-m-predef-4.c: New test.
* gcc.target/arm/acle/pacbti-m-predef-5.c: New test.



These are all execution tests.  I think we also need some compile-only 
tests so that we get better coverage when the target does not directly 
support PACBTI.


We also need some tests for the defines when targetting armv8-m.main and 
some tests for checking __ARM_FEATURE_BTI and __ARM_FEATURE_PAC (the 
tests here check only the '..._DEFAULT' macros.



Co-Authored-By: Tejas Belagod  



R.


Re: [PATCH 4/12] arm: Add testsuite library support for PACBTI target

2022-07-01 Thread Richard Earnshaw via Gcc-patches




On 01/07/2022 14:03, Richard Earnshaw via Gcc-patches wrote:



On 28/04/2022 10:40, Andrea Corallo via Gcc-patches wrote:

Add targeting-checking entities for PACBTI in testsuite
framework.

Pre-approved with the requested changes here
<https://gcc.gnu.org/pipermail/gcc-patches/2021-December/586331.html>.

gcc/testsuite/ChangeLog:

* testsuite/lib/target-supports.exp:
(check_effective_target_arm_pacbti_hw): New.
* doc/sourcebuild.texi: Document arm_pacbti_hw.

Co-Authored-By: Tejas Belagod  


+proc check_effective_target_arm_pacbti_hw {} {
+    return [check_runtime arm_pacbti_hw_available {
+    __attribute__ ((naked)) int
+    main (void)
+    {
+  asm ("pac r12, lr, sp");

So the armv8-m Arm ARM says that this instruction is in the NOP space 
and that it is undefined if we aren't armv8-m.main or higher.


+  asm ("mov r0, #0");
+  asm ("autg r12, lr, sp");

This isn't in the nop space, but the Arm ARM says it is unpredictable if 
the extension isn't present.  Unfortunately, that means this isn't a 
particularly reliable way of detecting that the PACBTI feature is present.


However, I can't think off hand of more reliable way of testing this 
since reading the feature register ID_ISAR5 is not possible when in 
unprivileged mode.


So I think we'll have to live with this.

+  asm ("bx lr");
+    }
+    } ""]

OK.



Or perhaps not. The test does not try to add the right options to enable 
PAC/BTI if those aren't in the default selection for the current 
testsuite run.


Perhaps we also need some additional tests to work out what architecture 
options to add (if any) to ensure the test will at least assemble.



R.

R.


Re: [PATCH 4/12] arm: Add testsuite library support for PACBTI target

2022-07-01 Thread Richard Earnshaw via Gcc-patches




On 28/04/2022 10:40, Andrea Corallo via Gcc-patches wrote:

Add targeting-checking entities for PACBTI in testsuite
framework.

Pre-approved with the requested changes here
.

gcc/testsuite/ChangeLog:

* testsuite/lib/target-supports.exp:
(check_effective_target_arm_pacbti_hw): New.
* doc/sourcebuild.texi: Document arm_pacbti_hw.

Co-Authored-By: Tejas Belagod  


+proc check_effective_target_arm_pacbti_hw {} {
+return [check_runtime arm_pacbti_hw_available {
+   __attribute__ ((naked)) int
+   main (void)
+   {
+ asm ("pac r12, lr, sp");

So the armv8-m Arm ARM says that this instruction is in the NOP space 
and that it is undefined if we aren't armv8-m.main or higher.


+ asm ("mov r0, #0");
+ asm ("autg r12, lr, sp");

This isn't in the nop space, but the Arm ARM says it is unpredictable if 
the extension isn't present.  Unfortunately, that means this isn't a 
particularly reliable way of detecting that the PACBTI feature is present.


However, I can't think off hand of more reliable way of testing this 
since reading the feature register ID_ISAR5 is not possible when in 
unprivileged mode.


So I think we'll have to live with this.

+ asm ("bx lr");
+   }
+} ""]

OK.

R.


Re: [PATCH 3/12] arm: Add option -mbranch-protection

2022-07-01 Thread Richard Earnshaw via Gcc-patches




On 28/04/2022 10:38, Andrea Corallo via Gcc-patches wrote:

[PATCH 3/12] arm: Add option -mbranch-protection

Add -mbranch-protection option.  This option enables the
code-generation of pointer signing and authentication instructions in
function prologues and epilogues.

gcc/ChangeLog:

* config/arm/arm.c (arm_configure_build_target): Parse and validate
-mbranch-protection option and initialize appropriate data structures.
* config/arm/arm.opt (-mbranch-protection): New option.
* doc/invoke.texi (Arm Options): Document it.

Co-Authored-By: Tejas Belagod  
Co-Authored-By: Richard Earnshaw 


+@item
+-mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf}][+@var{bti}]|@var{bti}[+@var{pac-ret}[+@var{leaf}]]
+@opindex mbranch-protection
+Enable branch protection features (armv8.1-m.main only).
+@samp{none} generate code without branch protection or return address
+signing.
+@samp{standard[+@var{leaf}]} generate code with all branch protection
+features enabled at their standard level.
+@samp{pac-ret[+@var{leaf}]} generate code with return address signing
+set to its standard level, which is to sign all functions that save
+the return address to memory.
+@samp{leaf} When return address signing is enabled, also sign leaf
+functions even if they do not write the return address to memory.
++@samp{bti} Add landing-pad instructions at the permitted targets of
+indirect branch instructions.
+
+If the @samp{+pacbti} architecture extension is not enabled, then all
+branch protection and return address signing operations are
+constrained to use only the instructions defined in the
+architectural-NOP space. The generated code will remain
+backwards-compatible with earlier versions of the architecture, but
+the additional security can be enabled at run time on processors that
+support the @samp{PACBTI} extension.
+
+Branch target enforcement using BTI can only be enabled at runtime if
+all code in the application has been compiled with at least
+@samp{-mbranch-protection=bti}.
+
+The default is to generate code without branch protection or return
+address signing.

This needs to make it clear that -mbranch-protection != none is only 
supported on armv8-m.main or later.


R.


  1   2   3   4   >