[PATCH][GCC] arm: remove unnecessary armv9-a multilib variant [PR104144]

2022-04-08 Thread Przemyslaw Wirkus via Gcc-patches
Hi,

This patch is removing unnecessary armv9-a multilib variant which was
introduced in commit 32ba7860ccaddd5219e6dae94a3d0653e124c9dd (add
armv9-a architecture to -march). Now armv9-a(+simd) multilibs point to
already existing armv8-a(+simd) ones as there are no changes between
the two.
Users will no longer require Binutils supporting `armv9-a` to build toolchain.

See multilib print below as well:

$ for v in {7..9}
  do
./arm-gcc -march=armv$v-a test.c --specs=rdimon.specs -print-multi-directory
  done
thumb/v7-a/nofp
thumb/v8-a/nofp
thumb/v8-a/nofp

Regtested on arm-none-eabi cross and no issues.
OK to install?

PS: If approved can I ask for someone to commit in my name pretty please?

gcc/ChangeLog:

PR target/PR104144
* config/arm/t-aprofile: Remove armv9-a multilib.
* config/arm/t-multilib: Remove armv9-a multilib.

gcc/testsuite/ChangeLog:

PR target/PR104144
* gcc.target/arm/multilib.exp: Updated tests.

-- 

diff --git a/gcc/config/arm/t-aprofile b/gcc/config/arm/t-aprofile
index 
574951c5f0bcd6fe1a60b6b2421ae0970a4c0e48..fe2ec88d92de55813816deabd9da461c86c6c7e3
 100644
--- a/gcc/config/arm/t-aprofile
+++ b/gcc/config/arm/t-aprofile
@@ -26,8 +26,8 @@
 
 # Arch and FPU variants to build libraries with
 
-MULTI_ARCH_OPTS_A   = 
march=armv7-a/march=armv7-a+fp/march=armv7-a+simd/march=armv7ve+simd/march=armv8-a/march=armv8-a+simd/march=armv9-a/march=armv9-a+simd
-MULTI_ARCH_DIRS_A   = v7-a v7-a+fp v7-a+simd v7ve+simd v8-a v8-a+simd v9-a 
v9-a+simd
+MULTI_ARCH_OPTS_A   = 
march=armv7-a/march=armv7-a+fp/march=armv7-a+simd/march=armv7ve+simd/march=armv8-a/march=armv8-a+simd
+MULTI_ARCH_DIRS_A   = v7-a v7-a+fp v7-a+simd v7ve+simd v8-a v8-a+simd
 
 # ARMv7-A - build nofp, fp-d16 and SIMD variants
 
@@ -46,11 +46,6 @@ MULTILIB_REQUIRED+= mthumb/march=armv8-a/mfloat-abi=soft
 MULTILIB_REQUIRED  += mthumb/march=armv8-a+simd/mfloat-abi=hard
 MULTILIB_REQUIRED  += mthumb/march=armv8-a+simd/mfloat-abi=softfp
 
-# Armv9-A - build nofp and SIMD variants.
-MULTILIB_REQUIRED  += mthumb/march=armv9-a/mfloat-abi=soft
-MULTILIB_REQUIRED  += mthumb/march=armv9-a+simd/mfloat-abi=hard
-MULTILIB_REQUIRED  += mthumb/march=armv9-a+simd/mfloat-abi=softfp
-
 # Matches
 
 # Arch Matches
@@ -135,14 +130,12 @@ MULTILIB_MATCHES  += $(foreach ARCH, 
$(v8_6_a_simd_variants), \
 march?armv8-a+simd=march?armv8.6-a$(ARCH))
 
 # Armv9 without SIMD: map down to base architecture
-MULTILIB_MATCHES   += $(foreach ARCH, $(v9_a_nosimd_variants), \
-march?armv9-a=march?armv9-a$(ARCH))
+MULTILIB_MATCHES+= march?armv8-a=march?armv9-a
+# No variants without SIMD.
 
 # Armv9 with SIMD: map down to base arch + simd
-MULTILIB_MATCHES   += march?armv9-a+simd=march?armv9-a+crc+simd \
-  $(foreach ARCH, $(filter-out +simd, 
$(v9_a_simd_variants)), \
-march?armv9-a+simd=march?armv9-a$(ARCH) \
-march?armv9-a+simd=march?armv9-a+crc$(ARCH))
+MULTILIB_MATCHES+= $(foreach ARCH, $(v9_a_simd_variants), \
+march?armv8-a+simd=march?armv9-a$(ARCH))
 
 # Use Thumb libraries for everything.
 
@@ -150,13 +143,11 @@ MULTILIB_REUSE+= 
mthumb/march.armv7-a/mfloat-abi.soft=marm/march.armv7-a/mfloa
 
 MULTILIB_REUSE += 
mthumb/march.armv8-a/mfloat-abi.soft=marm/march.armv8-a/mfloat-abi.soft
 
-MULTILIB_REUSE += 
mthumb/march.armv9-a/mfloat-abi.soft=marm/march.armv9-a/mfloat-abi.soft
-
 MULTILIB_REUSE += $(foreach ABI, hard softfp, \
-$(foreach ARCH, armv7-a+fp armv7-a+simd 
armv7ve+simd armv8-a+simd armv9-a+simd, \
+$(foreach ARCH, armv7-a+fp armv7-a+simd 
armv7ve+simd armv8-a+simd, \
   
mthumb/march.$(ARCH)/mfloat-abi.$(ABI)=marm/march.$(ARCH)/mfloat-abi.$(ABI)))
 
 # Softfp but no FP, use the soft-float libraries.
 MULTILIB_REUSE += $(foreach MODE, arm thumb, \
-$(foreach ARCH, armv7-a armv8-a armv9-a, \
+$(foreach ARCH, armv7-a armv8-a, \
   
mthumb/march.$(ARCH)/mfloat-abi.soft=m$(MODE)/march.$(ARCH)/mfloat-abi.softfp))
diff --git a/gcc/config/arm/t-multilib b/gcc/config/arm/t-multilib
index 
ea258b1c21091ec1d5ae671aa4cd014ce3a3952c..6bb58d39ea90f717218b7e4fa5671c65a764ffc7
 100644
--- a/gcc/config/arm/t-multilib
+++ b/gcc/config/arm/t-multilib
@@ -78,7 +78,6 @@ v8_4_a_simd_variants  := $(call all_feat_combs, simd fp16 
crypto i8mm bf16)
 v8_5_a_simd_variants   := $(call all_feat_combs, simd fp16 crypto i8mm bf16)
 v8_6_a_simd_variants   := $(call all_feat_combs, simd fp16 crypto i8mm bf16)
 v8_r_nosimd_variants   := +crc
-v9_a_nosimd_variants   := +crc
 v9_a_simd_variants := $(call all_feat_combs, simd fp16 crypto i8mm bf16)
 
 ifneq (,$(HAS_APROFILE))
@@ -206,14 +205,10 @@ 

RE: [PATCH][GCC] aarch64: fix: ls64 tests fail on aarch64-linux-gnu_ilp32 [PR103729]

2022-02-23 Thread Przemyslaw Wirkus via Gcc-patches
Ping :)

> This patch is sorting issue with LS64 intrinsics tests failing with
> aarch64-linux-gnu_ilp32 target.
>
> Regtested on aarch64-linux-gnu_ilp32, aarch64-elf and aarch64_be-elf
> and no issues.
>
> OK to install?
>
> gcc/ChangeLog:
>
>    PR target/103729
>    * config/aarch64/aarch64-builtins.c 
>(aarch64_expand_builtin_ls64):
>    Handle SImode for ILP32.

--- 

diff --git a/gcc/config/aarch64/aarch64-builtins.c 
b/gcc/config/aarch64/aarch64-builtins.c
index 
0d09fe9dd6dd65c655f5bd0b9a622e7550b61a4b..58bcd99d25b79191589cf9bf8a99db4f4b6a6ba1
 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -2216,7 +2216,8 @@ aarch64_expand_builtin_ls64 (int fcode, tree exp, rtx 
target)
   {
 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
 create_output_operand ([0], target, V8DImode);
-create_input_operand ([1], op0, DImode);
+create_input_operand ([1],
+GET_MODE (op0) == SImode ? gen_reg_rtx (DImode) : op0, DImode);
 expand_insn (CODE_FOR_ld64b, 2, ops);
 return ops[0].value;
   }
@@ -2234,7 +2235,8 @@ aarch64_expand_builtin_ls64 (int fcode, tree exp, rtx 
target)
 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
 create_output_operand ([0], target, DImode);
-create_input_operand ([1], op0, DImode);
+create_input_operand ([1],
+GET_MODE (op0) == SImode ? gen_reg_rtx (DImode) : op0, DImode);
 create_input_operand ([2], op1, V8DImode);
 expand_insn (CODE_FOR_st64bv, 3, ops);
 return ops[0].value;
@@ -2244,7 +2246,8 @@ aarch64_expand_builtin_ls64 (int fcode, tree exp, rtx 
target)
 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
 create_output_operand ([0], target, DImode);
-create_input_operand ([1], op0, DImode);
+create_input_operand ([1],
+GET_MODE (op0) == SImode ? gen_reg_rtx (DImode) : op0, DImode);
 create_input_operand ([2], op1, V8DImode);
 expand_insn (CODE_FOR_st64bv0, 3, ops);
 return ops[0].value;



[PATCH][GCC] aarch64: fix: ls64 tests fail on aarch64-linux-gnu_ilp32 [PR103729]

2021-12-22 Thread Przemyslaw Wirkus via Gcc-patches
This patch is sorting issue with LS64 intrinsics tests failing with
aarch64-linux-gnu_ilp32 target.

Regtested on aarch64-linux-gnu_ilp32, aarch64-elf and aarch64_be-elf
and no issues.

OK to install?

gcc/ChangeLog:

PR target/103729
* config/aarch64/aarch64-builtins.c 
(aarch64_expand_builtin_ls64):
Handle SImode for ILP32.


rb15171.patch
Description: rb15171.patch


RE: [PATCH][GCC] aarch64: fix: ls64 tests fail on aarch64_be [PR103729]

2021-12-16 Thread Przemyslaw Wirkus via Gcc-patches



> -Original Message-
> From: Richard Sandiford 
> Sent: 16 December 2021 10:46
> To: Przemyslaw Wirkus 
> Cc: gcc-patches@gcc.gnu.org; christophe.l...@linaro.org; Richard Earnshaw
> ; Kyrylo Tkachov ;
> Marcus Shawcroft 
> Subject: Re: [PATCH][GCC] aarch64: fix: ls64 tests fail on aarch64_be 
> [PR103729]
> 
> Przemyslaw Wirkus  writes:
> > Hi,
> >
> > This patch is sorting issue with LS64 intrinsics tests failing with
> > AArch64_be targets.
> >
> > Regtested on aarch64_be_elf and aarch64_elf, no issues.
> >
> > OK to install?
> >
> > gcc/ChangeLog:
> >
> > PR target/PR103729
> > * config/aarch64/aarch64-simd.md (aarch64_movv8di): Allow big
> endian
> > targets to move V8DI.
> 
> OK, thanks.

Thank you.

commit 0a68862e782847752be0ea2b2a987278cdbefc9e

Przemek

> Richard
> 
> > diff --git a/gcc/config/aarch64/aarch64-simd.md
> > b/gcc/config/aarch64/aarch64-simd.md
> > index
> >
> 9ebf795a624f0183e049d0db7a71ba2d17dd..f95a7e1d91c97c9e981d75e71
> f0b
> > 49c02ef748ba 100644
> > --- a/gcc/config/aarch64/aarch64-simd.md
> > +++ b/gcc/config/aarch64/aarch64-simd.md
> > @@ -7265,9 +7265,8 @@ (define_insn "*aarch64_mov"
> >  (define_insn "*aarch64_movv8di"
> >[(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
> > (match_operand:V8DI 1 "general_operand" " r,r,m"))]
> > -  "!BYTES_BIG_ENDIAN
> > -   && (register_operand (operands[0], V8DImode)
> > -   || register_operand (operands[1], V8DImode))"
> > +  "(register_operand (operands[0], V8DImode)
> > +|| register_operand (operands[1], V8DImode))"
> >"#"
> >[(set_attr "type" "multiple,multiple,multiple")
> > (set_attr "length" "32,16,16")]


[PATCH][GCC] aarch64: fix: ls64 tests fail on aarch64_be [PR103729]

2021-12-16 Thread Przemyslaw Wirkus via Gcc-patches
Hi,

This patch is sorting issue with LS64 intrinsics tests failing with
AArch64_be targets.

Regtested on aarch64_be_elf and aarch64_elf, no issues.

OK to install?

gcc/ChangeLog:

PR target/PR103729
* config/aarch64/aarch64-simd.md (aarch64_movv8di): Allow big endian
targets to move V8DI.


rb15153.patch
Description: rb15153.patch


RE: [PATCH][GCC] aarch64: Add LS64 extension and intrinsics

2021-12-14 Thread Przemyslaw Wirkus via Gcc-patches
> -Original Message-
> From: Richard Sandiford 
> Sent: 14 December 2021 11:58
> To: Przemyslaw Wirkus 
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> ; Marcus Shawcroft
> ; Kyrylo Tkachov 
> Subject: Re: [PATCH][GCC] aarch64: Add LS64 extension and intrinsics
> 
> Przemyslaw Wirkus  writes:
> > Hello Richard,
> >
> > I've updated my patch following all your comments. Thank you.
> >
> > Boostrapped on aarch64-linux-gnu and all new ACLE tests pass.
> >
> > OK to install?
> 
> Thanks.  OK with a couple of formatting nits:

Updated and committed:

commit fdcddba8f29ea3878851b8b4cd37d0fd3476d3bf

Thank you!

> > @@ -2130,6 +2203,57 @@ aarch64_expand_builtin_tme (int fcode, tree
> exp, rtx target)
> >  return target;
> >  }
> >
> > +/* Function to expand an expression EXP which calls one of the
> Load/Store
> > +   64 Byte extension (LS64) builtins FCODE with the result going to
> > +TARGET.  */ static rtx
> > +aarch64_expand_builtin_ls64 (int fcode, tree exp, rtx target) {
> > +  expand_operand ops[3];
> > +
> > +  switch (fcode)
> > +{
> > +case AARCH64_LS64_BUILTIN_LD64B:
> > +  {
> > +rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> > +create_output_operand ([0], target, V8DImode);
> > +create_input_operand ([1], op0, DImode);
> > +expand_insn (CODE_FOR_ld64b, 2, ops);
> > +return ops[0].value;
> > +  }
> > +case AARCH64_LS64_BUILTIN_ST64B:
> > +  {
> > +rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> > +rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
> > +create_output_operand ([0], op0, DImode);
> > +create_input_operand ([1], op1, V8DImode);
> > +expand_insn (CODE_FOR_st64b, 2, ops);
> > +return const0_rtx;
> > +  }
> > +case AARCH64_LS64_BUILTIN_ST64BV:
> > +  {
> > +rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> > +rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
> > +create_output_operand ([0], target, DImode);
> > +create_input_operand ([1], op0, DImode);
> > +create_input_operand ([2], op1, V8DImode);
> > +expand_insn (CODE_FOR_st64bv, 3, ops);
> > +return ops[0].value;
> > +  }
> > +case AARCH64_LS64_BUILTIN_ST64BV0:
> > +  {
> > +rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> > +rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
> > +create_output_operand ([0], target, DImode);
> > +create_input_operand ([1], op0, DImode);
> > +create_input_operand ([2], op1, V8DImode);
> > +expand_insn (CODE_FOR_st64bv0, 3, ops);
> > +return ops[0].value;
> > +  }
> > +}
> > +
> > +gcc_unreachable ();
> 
> This line should be indented by 2 spaces rather than 4.
> 
> > +}
> > +
> >  /* Expand a random number builtin EXP with code FCODE, putting the
> result
> > int TARGET.  If IGNORE is true the return value is ignored.  */
> >
> > […]
> > diff --git a/gcc/config/aarch64/aarch64.c
> > b/gcc/config/aarch64/aarch64.c index
> >
> be24b7320d28deed9a19a0451c96bd67d2fb3104..e0ceba68968a28a9fcf1ba6
> e3a30
> > 36783b0931b0 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -10013,8 +10013,12 @@ aarch64_classify_address (struct
> aarch64_address_info *info,
> >  instruction memory accesses.  */
> >   if (mode == TImode || mode == TFmode)
> > return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
> > -   && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
> > -   || offset_12bit_unsigned_scaled_p (mode, offset)));
> > +   && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
> > +   || offset_12bit_unsigned_scaled_p (mode, offset)));
> 
> The original formatting was correct here.
> 
> > +
> > + if (mode == V8DImode)
> > +   return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
> > +   && aarch64_offset_7bit_signed_scaled_p (DImode, offset +
> > +48));
> >
> >   /* A 7bit offset check because OImode will emit a ldp/stp
> >  instruction (only big endian will get here).


RE: [PATCH][GCC] aarch64: Add LS64 extension and intrinsics

2021-12-13 Thread Przemyslaw Wirkus via Gcc-patches
Hello Richard,

I've updated my patch following all your comments. Thank you.

Boostrapped on aarch64-linux-gnu and all new ACLE tests pass.

OK to install?

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.c (enum aarch64_builtins):
Define AARCH64_LS64_BUILTIN_LD64B, AARCH64_LS64_BUILTIN_ST64B,
AARCH64_LS64_BUILTIN_ST64BV, AARCH64_LS64_BUILTIN_ST64BV0.
(aarch64_init_ls64_builtin_decl): Helper function.
(aarch64_init_ls64_builtins): Helper function.
(aarch64_init_ls64_builtins_types): Helper function.
(aarch64_general_init_builtins): Init LS64 intrisics for
TARGET_LS64.
(aarch64_expand_builtin_ls64): LS64 intrinsics expander.
(aarch64_general_expand_builtin): Handle aarch64_expand_builtin_ls64.
(ls64_builtins_data): New helper struct.
(v8di_UP): New define.
* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Define
__ARM_FEATURE_LS64.
* config/aarch64/aarch64-simd.md (movv8di): New pattern.
(aarch64_movv8di): New pattern.
* config/aarch64/aarch64.c (aarch64_classify_address): New test for TI.
* config/aarch64/aarch64-simd.md: Add new V8DI mov expand.
* config/aarch64/aarch64.c (aarch64_classify_address): Enforce the
TI range (7-bit signed scaled) for both ends of the range.
* config/aarch64/aarch64.h (AARCH64_ISA_LS64): New define.
(TARGET_LS64): New define.
* config/aarch64/aarch64.md: Add UNSPEC_LD64B, UNSPEC_ST64B,
UNSPEC_ST64BV and UNSPEC_ST64BV0.
(ld64b): New define_insn.
(st64b): New define_insn.
(st64bv): New define_insn.
(st64bv0): New define_insn.
* config/aarch64/arm_acle.h (target):
(data512_t): New type derived from __arm_data512_t.
(__arm_data512_t): New internal type.
(__arm_ld64b): New intrinsic.
(__arm_st64b): New intrinsic.
(__arm_st64bv): New intrinsic.
(__arm_st64bv0): New intrinsic.
* config/arm/types.md: Add new type ls64.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/acle/ls64_asm.c: New test.
* gcc.target/aarch64/acle/ls64_ld64b.c: New test.
* gcc.target/aarch64/acle/ls64_ld64b-2.c: New test.
* gcc.target/aarch64/acle/ls64_ld64b-3.c: New test.
* gcc.target/aarch64/acle/ls64_st64b.c: New test.
* gcc.target/aarch64/acle/ls64_ld_st_o0.c: New test.
* gcc.target/aarch64/acle/ls64_st64b-2.c: New test.
* gcc.target/aarch64/acle/ls64_st64bv.c: New test.
* gcc.target/aarch64/acle/ls64_st64bv-2.c: New test.
* gcc.target/aarch64/acle/ls64_st64bv-3.c: New test.
* gcc.target/aarch64/acle/ls64_st64bv0.c: New test.
* gcc.target/aarch64/acle/ls64_st64bv0-2.c: New test.
* gcc.target/aarch64/acle/ls64_st64bv0-3.c: New test.
* gcc.target/aarch64/pragma_cpp_predefs_2.c: Add checks
for __ARM_FEATURE_LS64.

Kind regards, 
Przemyslaw Wirkus

> -Original Message-
> From: Richard Sandiford 
> Sent: 15 November 2021 13:43
> To: Przemyslaw Wirkus 
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> ; Marcus Shawcroft
> ; Kyrylo Tkachov 
> Subject: Re: [PATCH][GCC] aarch64: Add LS64 extension and intrinsics
> 
> Przemyslaw Wirkus  writes:
> > Hi,
> >
> > This patch is adding support for LS64 (Armv8.7-A Load/Store 64 Byte
> > extension) which is part of Armv8.7-A architecture. Changes include
> > missing plumbing for TARGET_LS64, LS64 data structure and intrinsics
> > defined in ACLE [0]. Machine description of intrinsics is using new V8DI 
> > mode
> added in a separate patch.
> > __ARM_FEATURE_LS64 is defined if the Armv8.7-A LS64 instructions for
> > atomic 64-byte access to device memory are supported.
> >
> > New compiler internal type is added wrapping ACLE struct data512_t [0]:
> >
> > typedef struct {
> >   uint64_t val[8];
> > } __arm_data512_t;
> >
> > Please note that command line support for this feature was already added 
> > [1].
> >
> >   [0] https://github.com/ARM-software/acle/blob/main/main/acle.rst#load-
> store-64-byte-intrinsics
> >   [1] commit e159c0aa10e50c292a534535c73f38d22b6129a8 (AArch64: Add
> command-line
> >   support for Armv8.7-a)
> >
> > For below C code see example snippets of generated code:
> >
> > #include 
> >
> > void
> > func(const void * addr, data512_t *data) {
> >   *data = __arm_ld64b (addr);
> > }
> >
> > func:
> > ld64b   x8, [x0]
> > stp x8, x9, [x1]
> > sub sp, sp, #64
> > stp x10, x11, [x1, 16]
> > stp x12, x13, [x1, 32]
> > stp x14, x15, [x1, 48]
> > add sp, sp, 64
> > ret
> > ~~~
> >
> >

RE: [PATCH][GCC] aarch64: Add new vector mode V8DI

2021-11-17 Thread Przemyslaw Wirkus via Gcc-patches



> -Original Message-
> From: Richard Sandiford 
> Sent: 17 November 2021 10:08
> To: Przemyslaw Wirkus 
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> ; Kyrylo Tkachov ;
> Marcus Shawcroft 
> Subject: Re: [PATCH][GCC] aarch64: Add new vector mode V8DI
> 
> Oops, only just realised that I hadn't reviewed this.
> 
> Przemyslaw Wirkus  writes:
> > Hi,
> > This patch is adding new V8DI mode which will be used with new
> > Armv8.7-A
> > LS64 extension intrinsics.
> >
> > Regtested on aarch64-elf and no issues.
> >
> > OK for master?
> >
> > gcc/ChangeLog:
> >
> > 2021-11-10  Przemyslaw Wirkus  
> >
> > * config/aarch64/aarch64-modes.def (VECTOR_MODE): New V8DI
> mode.
> > * config/aarch64/aarch64.c (aarch64_hard_regno_mode_ok): Handle
> > V8DImode.
> > * config/aarch64/iterators.md (define_mode_attr nunits): Add entry
> > for V8DI.
> >
> > Kind regards,
> > Przemyslaw Wirkus
> >
> > ---
> >
> > diff --git a/gcc/config/aarch64/aarch64-modes.def
> > b/gcc/config/aarch64/aarch64-modes.def
> > index
> >
> ac97d222789c6701d858c014736f8c211512a4d9..62595b8af6e1eea8fc769885
> bba9
> > fe54f0a9ec05 100644
> > --- a/gcc/config/aarch64/aarch64-modes.def
> > +++ b/gcc/config/aarch64/aarch64-modes.def
> > @@ -81,6 +81,11 @@ INT_MODE (OI, 32);
> >  INT_MODE (CI, 48);
> >  INT_MODE (XI, 64);
> >
> > +/* V8DI mode.  */
> > +VECTOR_MODE_WITH_PREFIX (V, INT, DI, 8, 5); \
> > +  \
> > +  ADJUST_ALIGNMENT (V8DI, 8);
> 
> The backslashes aren't needed here, can just be:
> 
> VECTOR_MODE_WITH_PREFIX (V, INT, DI, 8, 5);
> 
> ADJUST_ALIGNMENT (V8DI, 8);
> 
> > +
> >  /* Define Advanced SIMD modes for structures of 2, 3 and 4
> > d-registers.  */  #define ADV_SIMD_D_REG_STRUCT_MODES(NVECS, VB,
> VH, VS, VD) \
> >VECTOR_MODES_WITH_PREFIX (V##NVECS##x, INT, 8, 3); \ diff --git
> > a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index
> >
> 69f08052ce808c140ed2933ab6b2e2617ca6f669..0e102a83a8dc34e715fafb58
> 1698
> > 97b12c9b3a20 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -3376,6 +3376,9 @@ aarch64_hard_regno_nregs (unsigned regno,
> > machine_mode mode)  static bool  aarch64_hard_regno_mode_ok
> (unsigned
> > regno, machine_mode mode)  {
> > +  if (mode == V8DImode)
> > +return IN_RANGE (regno, R0_REGNUM, R23_REGNUM);
> 
> As you pointed out off-list, this should also check for even registers:
> 
> return (IN_RANGE (regno, R0_REGNUM, R23_REGNUM);
>   && multiple_p (regno - R0_REGNUM, 2));
> 
> OK with those changes, thanks.

Thank you.

Installed with changes:
commit dd159a4167ca19b5ff26e7156333c88e854943bf

/Przemek

> Richard
> 
> > +
> >if (GET_MODE_CLASS (mode) == MODE_CC)
> >  return regno == CC_REGNUM;
> >
> > diff --git a/gcc/config/aarch64/iterators.md
> > b/gcc/config/aarch64/iterators.md index
> >
> bdc8ba3576cf2c9b4ae96b45a382234e4e25b13f..cea277f3a03cfd20178e51e6
> abd7
> > e256e206299f 100644
> > --- a/gcc/config/aarch64/iterators.md
> > +++ b/gcc/config/aarch64/iterators.md
> > @@ -1053,7 +1053,7 @@ (define_mode_attr vas [(DI "") (SI ".2s")])
> > (define_mode_attr nunits [(V8QI "8") (V16QI "16")
> >   (V4HI "4") (V8HI "8")
> >   (V2SI "2") (V4SI "4")
> > -(V2DI "2")
> > + (V2DI "2") (V8DI "8")
> >   (V4HF "4") (V8HF "8")
> >   (V4BF "4") (V8BF "8")
> >   (V2SF "2") (V4SF "4")


[PATCH][GCC] aarch64: Add LS64 extension and intrinsics

2021-11-11 Thread Przemyslaw Wirkus via Gcc-patches
Hi,

This patch is adding support for LS64 (Armv8.7-A Load/Store 64 Byte extension)
which is part of Armv8.7-A architecture. Changes include missing plumbing for
TARGET_LS64, LS64 data structure and intrinsics defined in ACLE [0]. Machine
description of intrinsics is using new V8DI mode added in a separate patch.
__ARM_FEATURE_LS64 is defined if the Armv8.7-A LS64 instructions for atomic
64-byte access to device memory are supported.

New compiler internal type is added wrapping ACLE struct data512_t [0]:

typedef struct {
  uint64_t val[8];
} __arm_data512_t;

Please note that command line support for this feature was already added [1].

  [0] 
https://github.com/ARM-software/acle/blob/main/main/acle.rst#load-store-64-byte-intrinsics
  [1] commit e159c0aa10e50c292a534535c73f38d22b6129a8 (AArch64: Add command-line
  support for Armv8.7-a)

For below C code see example snippets of generated code:

#include 

void
func(const void * addr, data512_t *data) {
  *data = __arm_ld64b (addr);
}

func:
ld64b   x8, [x0]
stp x8, x9, [x1]
sub sp, sp, #64
stp x10, x11, [x1, 16]
stp x12, x13, [x1, 32]
stp x14, x15, [x1, 48]
add sp, sp, 64
ret
~~~

#include 

uint64_t
func(void *addr, data512_t value) {
return  __arm_st64bv (addr, value);
}

func:
ldp x8, x9, [x1]
ldp x10, x11, [x1, 16]
ldp x12, x13, [x1, 32]
ldp x14, x15, [x1, 48]
st64bv  x1, x8, [x0]
mov x0, x1
ret

~~~

uint64_t
ls64_store_v0(const data512_t *input, void *addr)
{
uint64_t status;
__asm__ volatile ("st64bv0 %0, %2, [%1]"
  : "=r" (status), "=r" (addr)
  : "r" (*input)
  : "memory");
return status;
}

ls64_store_v0:
ldp x8, x9, [x0]
ldp x10, x11, [x0, 16]
ldp x12, x13, [x0, 32]
ldp x14, x15, [x0, 48]
st64bv0 x0, x8, [x1]
ret

Regtested on aarch64-elf cross and no issues.

OK for master?

gcc/ChangeLog:

2021-11-11  Przemyslaw Wirkus  

* config/aarch64/aarch64-builtins.c (enum aarch64_builtins):
Define AARCH64_LS64_BUILTIN_LD64B, AARCH64_LS64_BUILTIN_ST64B,
AARCH64_LS64_BUILTIN_ST64BV, AARCH64_LS64_BUILTIN_ST64BV0.
(aarch64_init_ls64_builtin_decl): Helper function.
(aarch64_init_ls64_builtins): Helper function.
(aarch64_init_ls64_builtins_types): Helper function.
(aarch64_general_init_builtins): Init LS64 intrisics for
TARGET_LS64.
(aarch64_expand_builtin_ls64): LS64 intrinsics expander.
(aarch64_general_expand_builtin): Handle aarch64_expand_builtin_ls64.
(ls64_builtins_data): New helper struct.
(v8di_UP): New define.
* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Define
__ARM_FEATURE_LS64.
* config/aarch64/aarch64.h (AARCH64_ISA_LS64): New define.
(AARCH64_ISA_V8_7): New define.
(TARGET_LS64): New define.
* config/aarch64/aarch64.md: Add UNSPEC_LD64B, UNSPEC_ST64B,
UNSPEC_ST64BV and UNSPEC_ST64BV0.
(ld64b): New define_insn.
(st64b): New define_insn.
(st64bv): New define_insn.
(st64bv0): New define_insn.
* config/aarch64/arm_acle.h (target):
(data512_t): New type derived from __arm_data512_t.
(__arm_data512_t): New internal type.
(__arm_ld64b): New intrinsic.
(__arm_st64b): New intrinsic.
(__arm_st64bv): New intrinsic.
(__arm_st64bv0): New intrinsic.
* config/arm/types.md: Add new type ls64.

gcc/testsuite/ChangeLog:

2021-11-11  Przemyslaw Wirkus  

* gcc.target/aarch64/acle/ls64_asm.c: New test.
* gcc.target/aarch64/acle/ls64_ld64b-2.c: New test.
* gcc.target/aarch64/acle/ls64_ld64b.c: New test.
* gcc.target/aarch64/acle/ls64_st64b.c: New test.
* gcc.target/aarch64/acle/ls64_st64bv-2.c: New test.
* gcc.target/aarch64/acle/ls64_st64bv.c: New test.
* gcc.target/aarch64/acle/ls64_st64bv0-2.c: New test.
* gcc.target/aarch64/acle/ls64_st64bv0.c: New test.
* gcc.target/aarch64/pragma_cpp_predefs_2.c: Add checks
for __ARM_FEATURE_LS64.


rb14982.patch
Description: rb14982.patch


Re: [PATCH][GCC] arm: enable cortex-a710 CPU

2021-11-10 Thread Przemyslaw Wirkus via Gcc-patches
> > Hi,
> >
> > This patch is adding support for Cortex-A710 CPU [0].
> >
> >   [0] https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a710
> >
> > OK for master?

> Ok.
> Thanks,
> Kyrill

commit 9701f153f6dfcc365ac0d96cdcf7df69a2de81dc


> >
> > gcc/ChangeLog:
> >
> >  * config/arm/arm-cpus.in (cortex-a710): New CPU.
> >  * config/arm/arm-tables.opt: Regenerate.
> >  * config/arm/arm-tune.md: Regenerate.
> >  * doc/invoke.texi: Update docs.
> >
> > --
> > kind regards,
> > Przemyslaw Wirkus
> >
> > Staff Compiler Engineer | Arm
> > . . . . . . . . . . . . . . . . . . . . . . . . . .
> >
> > Arm.com


[PATCH][GCC] aarch64: Add new vector mode V8DI

2021-11-10 Thread Przemyslaw Wirkus via Gcc-patches
Hi,
This patch is adding new V8DI mode which will be used with new Armv8.7-A
LS64 extension intrinsics.

Regtested on aarch64-elf and no issues.

OK for master?

gcc/ChangeLog:

2021-11-10  Przemyslaw Wirkus  

* config/aarch64/aarch64-modes.def (VECTOR_MODE): New V8DI mode.
* config/aarch64/aarch64.c (aarch64_hard_regno_mode_ok): Handle
V8DImode.
* config/aarch64/iterators.md (define_mode_attr nunits): Add entry
for V8DI.

Kind regards,
Przemyslaw Wirkus

--- 

diff --git a/gcc/config/aarch64/aarch64-modes.def 
b/gcc/config/aarch64/aarch64-modes.def
index 
ac97d222789c6701d858c014736f8c211512a4d9..62595b8af6e1eea8fc769885bba9fe54f0a9ec05
 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -81,6 +81,11 @@ INT_MODE (OI, 32);
 INT_MODE (CI, 48);
 INT_MODE (XI, 64);
 
+/* V8DI mode.  */
+VECTOR_MODE_WITH_PREFIX (V, INT, DI, 8, 5); \
+  \
+  ADJUST_ALIGNMENT (V8DI, 8);
+
 /* Define Advanced SIMD modes for structures of 2, 3 and 4 d-registers.  */
 #define ADV_SIMD_D_REG_STRUCT_MODES(NVECS, VB, VH, VS, VD) \
   VECTOR_MODES_WITH_PREFIX (V##NVECS##x, INT, 8, 3); \
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
69f08052ce808c140ed2933ab6b2e2617ca6f669..0e102a83a8dc34e715fafb58169897b12c9b3a20
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3376,6 +3376,9 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode 
mode)
 static bool
 aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
 {
+  if (mode == V8DImode)
+return IN_RANGE (regno, R0_REGNUM, R23_REGNUM);
+
   if (GET_MODE_CLASS (mode) == MODE_CC)
 return regno == CC_REGNUM;
 
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 
bdc8ba3576cf2c9b4ae96b45a382234e4e25b13f..cea277f3a03cfd20178e51e6abd7e256e206299f
 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1053,7 +1053,7 @@ (define_mode_attr vas [(DI "") (SI ".2s")])
 (define_mode_attr nunits [(V8QI "8") (V16QI "16")
  (V4HI "4") (V8HI "8")
  (V2SI "2") (V4SI "4")
-(V2DI "2")
+ (V2DI "2") (V8DI "8")
  (V4HF "4") (V8HF "8")
  (V4BF "4") (V8BF "8")
  (V2SF "2") (V4SF "4")


RE: [PATCH][GCC] arm: add armv9-a architecture to -march

2021-11-09 Thread Przemyslaw Wirkus via Gcc-patches
> > > -Original Message-
> > > From: Przemyslaw Wirkus
> > > Sent: 18 October 2021 10:37
> > > To: gcc-patches@gcc.gnu.org
> > > Cc: Richard Earnshaw ; Ramana
> > > Radhakrishnan ; Kyrylo Tkachov
> > > ; ni...@redhat.com
> > > Subject: [PATCH][GCC] arm: add armv9-a architecture to -march
> > >
> > > Hi,
> > >
> > > This patch is adding `armv9-a` to -march in Arm GCC.
> > >
> > > In this patch:
> > >   + Add `armv9-a` to -march.
> > >   + Update multilib with armv9-a and armv9-a+simd.
> > >
> > > After this patch three additional multilib directories are available:
> > >
> > > $ arm-none-eabi-gcc --print-multi-lib .; [...vanilla multi-lib
> > > dirs...] thumb/v9-a/nofp;@mthumb@march=armv9-a@mfloat-abi=soft
> > > thumb/v9-a+simd/softfp;@mthumb@march=armv9-a+simd@mfloat-
> > > abi=softfp
> > > thumb/v9-a+simd/hard;@mthumb@march=armv9-a+simd@mfloat-
> > > abi=hard
> > >
> > > New multi-lib directories under
> > > $GCC_INSTALL_DIE/lib/gcc/arm-none-eabi/12.0.0/thumb are created:
> > >
> > > thumb/
> > > +--- v9-a
> > > ||--- nofp
> > > |
> > > +--- v9-a+simd
> > >  |--- hard
> > >  |--- softfp
> > >
> > > Regtested on arm-none-eabi cross and no issues.
> > >
> > > OK for master?

Thanks.

commit 32ba7860ccaddd5219e6dae94a3d0653e124c9dd

> Ok.
> Thanks,
> Kyrill
> 
> 
> > >
> > > gcc/ChangeLog:
> > >
> > >   * config/arm/arm-cpus.in (armv9): New define.
> > >   (ARMv9a): New group.
> > >   (armv9-a): New arch definition.
> > >   * config/arm/arm-tables.opt: Regenerate.
> > >   * config/arm/arm.h (BASE_ARCH_9A): New arch enum value.
> > >   * config/arm/t-aprofile: Added armv9-a and armv9+simd.
> > >   * config/arm/t-arm-elf: Added arm9-a, v9_fps and all_v9_archs
> > >   to MULTILIB_MATCHES.
> > >   * config/arm/t-multilib: Added v9_a_nosimd_variants and
> > >   v9_a_simd_variants to MULTILIB_MATCHES.
> > >   * doc/invoke.texi: Update docs.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >   * gcc.target/arm/multilib.exp: Update test with armv9-a entries.
> > >   * lib/target-supports.exp (v9a): Add new armflag.
> > >   (__ARM_ARCH_9A__): Add new armdef.
> > >
> > > --
> > > kind regards,
> > > Przemyslaw Wirkus



RE: [PATCH][GCC] arm: enable cortex-a710 CPU

2021-11-08 Thread Przemyslaw Wirkus via Gcc-patches
Ping :)

> -Original Message-
> From: Przemyslaw Wirkus
> Sent: 18 October 2021 10:40
> To: gcc-patches@gcc.gnu.org
> Cc: Richard Earnshaw ; Ramana Radhakrishnan 
> ; Kyrylo Tkachov 
> ; ni...@redhat.com
> Subject: [PATCH][GCC] arm: enable cortex-a710 CPU
> 
> Hi,
> 
> This patch is adding support for Cortex-A710 CPU [0].
> 
>   [0] https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a710
> 
> OK for master?
> 
> gcc/ChangeLog:
> 
>   * config/arm/arm-cpus.in (cortex-a710): New CPU.
>   * config/arm/arm-tables.opt: Regenerate.
>   * config/arm/arm-tune.md: Regenerate.
>   * doc/invoke.texi: Update docs.
> 
> --
> kind regards,
> Przemyslaw Wirkus
> 
> Staff Compiler Engineer | Arm
> . . . . . . . . . . . . . . . . . . . . . . . . . .
> 
> Arm.com


RE: [PATCH][GCC] arm: add armv9-a architecture to -march

2021-11-08 Thread Przemyslaw Wirkus via Gcc-patches
Ping :)

> -Original Message-
> From: Przemyslaw Wirkus
> Sent: 18 October 2021 10:37
> To: gcc-patches@gcc.gnu.org
> Cc: Richard Earnshaw ; Ramana
> Radhakrishnan ; Kyrylo Tkachov
> ; ni...@redhat.com
> Subject: [PATCH][GCC] arm: add armv9-a architecture to -march
> 
> Hi,
> 
> This patch is adding `armv9-a` to -march in Arm GCC.
> 
> In this patch:
>   + Add `armv9-a` to -march.
>   + Update multilib with armv9-a and armv9-a+simd.
> 
> After this patch three additional multilib directories are available:
> 
> $ arm-none-eabi-gcc --print-multi-lib
> .;
> [...vanilla multi-lib dirs...]
> thumb/v9-a/nofp;@mthumb@march=armv9-a@mfloat-abi=soft
> thumb/v9-a+simd/softfp;@mthumb@march=armv9-a+simd@mfloat-
> abi=softfp
> thumb/v9-a+simd/hard;@mthumb@march=armv9-a+simd@mfloat-
> abi=hard
> 
> New multi-lib directories under
> $GCC_INSTALL_DIE/lib/gcc/arm-none-eabi/12.0.0/thumb are created:
> 
> thumb/
> +--- v9-a
> ||--- nofp
> |
> +--- v9-a+simd
>  |--- hard
>  |--- softfp
> 
> Regtested on arm-none-eabi cross and no issues.
> 
> OK for master?
> 
> gcc/ChangeLog:
> 
>   * config/arm/arm-cpus.in (armv9): New define.
>   (ARMv9a): New group.
>   (armv9-a): New arch definition.
>   * config/arm/arm-tables.opt: Regenerate.
>   * config/arm/arm.h (BASE_ARCH_9A): New arch enum value.
>   * config/arm/t-aprofile: Added armv9-a and armv9+simd.
>   * config/arm/t-arm-elf: Added arm9-a, v9_fps and all_v9_archs
>   to MULTILIB_MATCHES.
>   * config/arm/t-multilib: Added v9_a_nosimd_variants and
>   v9_a_simd_variants to MULTILIB_MATCHES.
>   * doc/invoke.texi: Update docs.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/arm/multilib.exp: Update test with armv9-a entries.
>   * lib/target-supports.exp (v9a): Add new armflag.
>   (__ARM_ARCH_9A__): Add new armdef.
> 
> --
> kind regards,
> Przemyslaw Wirkus


[PATCH][GCC] arm: enable cortex-a710 CPU

2021-10-18 Thread Przemyslaw Wirkus via Gcc-patches
Hi, 

This patch is adding support for Cortex-A710 CPU [0].

  [0] https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a710

OK for master?

gcc/ChangeLog:

* config/arm/arm-cpus.in (cortex-a710): New CPU.
* config/arm/arm-tables.opt: Regenerate.
* config/arm/arm-tune.md: Regenerate.
* doc/invoke.texi: Update docs.

-- 
kind regards, 
Przemyslaw Wirkus

Staff Compiler Engineer | Arm 
. . . . . . . . . . . . . . . . . . . . . . . . . .

Arm.com diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in
index 3756ba56c6ea36fa9d017347bd73b27ab7752325..a6a8e4319a69be0913281701f3a85610d637922e 100644
--- a/gcc/config/arm/arm-cpus.in
+++ b/gcc/config/arm/arm-cpus.in
@@ -1513,6 +1513,17 @@ begin cpu cortex-a78c
  part d4b
 end cpu cortex-a78c
 
+begin cpu cortex-a710
+ cname cortexa710
+ tune for cortex-a57
+ tune flags LDSCHED
+ architecture armv9-a+fp16+bf16+i8mm
+ option crypto add FP_ARMv8 CRYPTO
+ costs cortex_a57
+ vendor 41
+ part d47
+end cpu cortex-a710
+
 begin cpu cortex-x1
  cname cortexx1
  tune for cortex-a57
diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt
index c00e252ec5aa0f1a9004718dbea3cf969a4e5be6..6e457fb250223eac22c033424dae406cb74b7df8 100644
--- a/gcc/config/arm/arm-tables.opt
+++ b/gcc/config/arm/arm-tables.opt
@@ -249,6 +249,9 @@ Enum(processor_type) String(cortex-a78ae) Value( TARGET_CPU_cortexa78ae)
 EnumValue
 Enum(processor_type) String(cortex-a78c) Value( TARGET_CPU_cortexa78c)
 
+EnumValue
+Enum(processor_type) String(cortex-a710) Value( TARGET_CPU_cortexa710)
+
 EnumValue
 Enum(processor_type) String(cortex-x1) Value( TARGET_CPU_cortexx1)
 
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
index 6482833fc35b5758f66f2c7082e89c8ded250242..54e701f439b1a6f33267fd54248623755acef3b4 100644
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -46,8 +46,9 @@ (define_attr "tune"
 	cortexa73cortexa53,cortexa55,cortexa75,
 	cortexa76,cortexa76ae,cortexa77,
 	cortexa78,cortexa78ae,cortexa78c,
-	cortexx1,neoversen1,cortexa75cortexa55,
-	cortexa76cortexa55,neoversev1,neoversen2,
-	cortexm23,cortexm33,cortexm35p,
-	cortexm55,cortexr52,cortexr52plus"
+	cortexa710,cortexx1,neoversen1,
+	cortexa75cortexa55,cortexa76cortexa55,neoversev1,
+	neoversen2,cortexm23,cortexm33,
+	cortexm35p,cortexm55,cortexr52,
+	cortexr52plus"
 	(const (symbol_ref "((enum attr_tune) arm_tune)")))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ce738e830a948016d89e456539fef5f5b18688fb..c5966de3231f9b50df68c0ad434789b0abe7f616 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -20477,7 +20477,7 @@ Permissible names are: @samp{arm7tdmi}, @samp{arm7tdmi-s}, @samp{arm710t},
 @samp{cortex-a32}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
 @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
 @samp{cortex-a76}, @samp{cortex-a76ae}, @samp{cortex-a77},
-@samp{cortex-a78}, @samp{cortex-a78ae}, @samp{cortex-a78c},
+@samp{cortex-a78}, @samp{cortex-a78ae}, @samp{cortex-a78c}, @samp{cortex-a710},
 @samp{ares}, @samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-r5},
 @samp{cortex-r7}, @samp{cortex-r8}, @samp{cortex-r52}, @samp{cortex-r52plus},
 @samp{cortex-m0}, @samp{cortex-m0plus}, @samp{cortex-m1}, @samp{cortex-m3},


[PATCH][GCC] arm: add armv9-a architecture to -march

2021-10-18 Thread Przemyslaw Wirkus via Gcc-patches
Hi,

This patch is adding `armv9-a` to -march in Arm GCC.

In this patch:
+ Add `armv9-a` to -march.
+ Update multilib with armv9-a and armv9-a+simd.

After this patch three additional multilib directories are available:

$ arm-none-eabi-gcc --print-multi-lib
.;
[...vanilla multi-lib dirs...]
thumb/v9-a/nofp;@mthumb@march=armv9-a@mfloat-abi=soft
thumb/v9-a+simd/softfp;@mthumb@march=armv9-a+simd@mfloat-abi=softfp
thumb/v9-a+simd/hard;@mthumb@march=armv9-a+simd@mfloat-abi=hard

New multi-lib directories under
$GCC_INSTALL_DIE/lib/gcc/arm-none-eabi/12.0.0/thumb are created:

thumb/
+--- v9-a
||--- nofp
|
+--- v9-a+simd
 |--- hard
 |--- softfp

Regtested on arm-none-eabi cross and no issues.

OK for master?

gcc/ChangeLog:

* config/arm/arm-cpus.in (armv9): New define.
(ARMv9a): New group.
(armv9-a): New arch definition.
* config/arm/arm-tables.opt: Regenerate.
* config/arm/arm.h (BASE_ARCH_9A): New arch enum value.
* config/arm/t-aprofile: Added armv9-a and armv9+simd.
* config/arm/t-arm-elf: Added arm9-a, v9_fps and all_v9_archs
to MULTILIB_MATCHES.
* config/arm/t-multilib: Added v9_a_nosimd_variants and
v9_a_simd_variants to MULTILIB_MATCHES.
* doc/invoke.texi: Update docs.

gcc/testsuite/ChangeLog:

* gcc.target/arm/multilib.exp: Update test with armv9-a entries.
* lib/target-supports.exp (v9a): Add new armflag.
(__ARM_ARCH_9A__): Add new armdef.

-- 
kind regards, 
Przemyslaw Wirkus
diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in
index d0d0d0f1c7e4176fc4aa30d82394fe938b083a59..3756ba56c6ea36fa9d017347bd73b27ab7752325 100644
--- a/gcc/config/arm/arm-cpus.in
+++ b/gcc/config/arm/arm-cpus.in
@@ -132,6 +132,9 @@ define feature cmse
 # Architecture rel 8.1-M.
 define feature armv8_1m_main
 
+# Architecture rel 9.0.
+define feature armv9
+
 # Floating point and Neon extensions.
 # VFPv1 is not supported in GCC.
 
@@ -293,6 +296,7 @@ define fgroup ARMv8m_base ARMv6m armv8 cmse tdiv
 define fgroup ARMv8m_main ARMv7m armv8 cmse
 define fgroup ARMv8r  ARMv8a
 define fgroup ARMv8_1m_main ARMv8m_main armv8_1m_main
+define fgroup ARMv9a  ARMv8_5a armv9
 
 # Useful combinations.
 define fgroup VFPv2	vfpv2
@@ -751,6 +755,21 @@ begin arch armv8.1-m.main
  option cdecp7 add cdecp7
 end arch armv8.1-m.main
 
+begin arch armv9-a
+ tune for cortex-a53
+ tune flags CO_PROC
+ base 9A
+ profile A
+ isa ARMv9a
+ option simd add FP_ARMv8 DOTPROD
+ option fp16 add fp16 fp16fml FP_ARMv8 DOTPROD
+ option crypto add FP_ARMv8 CRYPTO DOTPROD
+ option nocrypto remove ALL_CRYPTO
+ option nofp remove ALL_FP
+ option i8mm add i8mm FP_ARMv8 DOTPROD
+ option bf16 add bf16 FP_ARMv8 DOTPROD
+end arch armv9-a
+
 begin arch iwmmxt
  tune for iwmmxt
  tune flags LDSCHED STRONG XSCALE
diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt
index 8bb0c9f6a7bd9230e7b2de1e2ef4ed5177f89495..c00e252ec5aa0f1a9004718dbea3cf969a4e5be6 100644
--- a/gcc/config/arm/arm-tables.opt
+++ b/gcc/config/arm/arm-tables.opt
@@ -383,10 +383,13 @@ EnumValue
 Enum(arm_arch) String(armv8.1-m.main) Value(30)
 
 EnumValue
-Enum(arm_arch) String(iwmmxt) Value(31)
+Enum(arm_arch) String(armv9-a) Value(31)
 
 EnumValue
-Enum(arm_arch) String(iwmmxt2) Value(32)
+Enum(arm_arch) String(iwmmxt) Value(32)
+
+EnumValue
+Enum(arm_arch) String(iwmmxt2) Value(33)
 
 Enum
 Name(arm_fpu) Type(enum fpu_type)
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 015299c15346f1bea59d70fdcb1d19545473b23b..3a8d223ee622ffe5b25e14ed07bfaa07835dc683 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -452,7 +452,8 @@ enum base_architecture
   BASE_ARCH_8A = 8,
   BASE_ARCH_8M_BASE = 8,
   BASE_ARCH_8M_MAIN = 8,
-  BASE_ARCH_8R = 8
+  BASE_ARCH_8R = 8,
+  BASE_ARCH_9A = 9
 };
 
 /* The major revision number of the ARM Architecture implemented by the target.  */
diff --git a/gcc/config/arm/t-aprofile b/gcc/config/arm/t-aprofile
index 8574ac3e24d0d67c12bae5f88d1410ec1e0f983d..68e2251c7266712177723a7d634016f4fddaacac 100644
--- a/gcc/config/arm/t-aprofile
+++ b/gcc/config/arm/t-aprofile
@@ -26,8 +26,8 @@
 
 # Arch and FPU variants to build libraries with
 
-MULTI_ARCH_OPTS_A   = march=armv7-a/march=armv7-a+fp/march=armv7-a+simd/march=armv7ve+simd/march=armv8-a/march=armv8-a+simd
-MULTI_ARCH_DIRS_A   = v7-a v7-a+fp v7-a+simd v7ve+simd v8-a v8-a+simd
+MULTI_ARCH_OPTS_A   = march=armv7-a/march=armv7-a+fp/march=armv7-a+simd/march=armv7ve+simd/march=armv8-a/march=armv8-a+simd/march=armv9-a/march=armv9-a+simd
+MULTI_ARCH_DIRS_A   = v7-a v7-a+fp v7-a+simd v7ve+simd v8-a v8-a+simd v9-a v9-a+simd
 
 # ARMv7-A - build nofp, fp-d16 and SIMD variants
 
@@ -46,6 +46,11 @@ MULTILIB_REQUIRED	+= mthumb/march=armv8-a/mfloat-abi=soft
 MULTILIB_REQUIRED	+= mthumb/march=armv8-a+simd/mfloat-abi=hard
 MULTILIB_REQUIRED	+= mthumb/march=armv8-a+simd/mfloat-abi=softfp
 
+# Armv9-A - build nofp

RE: [PATCH][GCC] arm: Add Cortex-R52+ multilib

2021-10-04 Thread Przemyslaw Wirkus via Gcc-patches
> > On Thu, Sep 30, 2021, 3:37 PM Przemyslaw Wirkus 
> > <mailto:przemyslaw.wir...@arm.com> wrote:
> > Subject: Re: [PATCH][GCC] arm: Add Cortex-R52+ multilib
> >
> > I think the RTEMS multilibs are based on the products that RTEMS supports,
> > so this is really the RTEMS maintainers' call.
> > 
> > Joel?

> > > Ping :)

> I'm ok deferring it since Sebastian doesn't think there is a user right now. 
> But I'm actually rather ambivalent. If it makes it easier to maintain versus 
> the other embedded arm targets then I'm all for it. Maintaining these 
> configurations are a pain.

OK, let's discard this patch as there is no consensus it's useful.

Cheers!

/Przemyslaw

> --joel

> On 22/09/2021 09:46, Przemyslaw Wirkus via Gcc-patches wrote:
> > Patch is adding multilib entries for `cortex-r52plus` CPU.
> >
> > See:
> > https://www.arm.com/products/silicon-ip-cpu/cortex-r/cortex-r52-plus
> >
> > OK for master?
> >
> > gcc/ChangeLog:
> >
> > 2021-09-16  Przemyslaw Wirkus  <mailto:przemyslaw.wir...@arm.com>
> >
> >     * config/arm/t-rtems: Add "-mthumb -mcpu=cortex-r52plus
> >     -mfloat-abi=hard" multilib.
> >


RE: [PATCH][GCC] aarch64: enable cortex-x2 CPU

2021-10-01 Thread Przemyslaw Wirkus via Gcc-patches
> Subject: RE: [PATCH][GCC] aarch64: enable cortex-x2 CPU
> 
> 
> 
> > -Original Message-
> > From: Kyrylo Tkachov
> > Sent: Friday, October 1, 2021 1:17 PM
> > To: Przemyslaw Wirkus ; gcc-
> > patc...@gcc.gnu.org
> > Cc: Richard Earnshaw ; Richard Sandiford
> > ; Marcus Shawcroft
> > 
> > Subject: RE: [PATCH][GCC] aarch64: enable cortex-x2 CPU
> >
> >
> >
> > > -Original Message-
> > > From: Przemyslaw Wirkus 
> > > Sent: Wednesday, September 22, 2021 9:38 AM
> > > To: gcc-patches@gcc.gnu.org
> > > Cc: Richard Earnshaw ; Richard Sandiford
> > > ; Marcus Shawcroft
> > > ; Kyrylo Tkachov
> > 
> > > Subject: [PATCH][GCC] aarch64: enable cortex-x2 CPU
> > >
> > > Patch is adding 'cortex-x2' to -mcpu command line option.
> > >
> > > OK for master?
> > >
> > > gcc/ChangeLog:
> > >
> > > 2021-09-02  Przemyslaw Wirkus  
> > >
> > >   * config/aarch64/aarch64-cores.def (AARCH64_CORE): New
> > >   Cortex-X2 core.
> > >   * config/aarch64/aarch64-tune.md: Regenerate.
> > >   * doc/invoke.texi: Update docs.
> > diff --git a/gcc/config/aarch64/aarch64-cores.def
> > b/gcc/config/aarch64/aarch64-cores.def
> > index
> >
> a8027e92fa8f7554e2b19d00f7c85c6ed48a92e5..34d9646ab6a32a19e7cd09d9
> > 5594b59278d02920 100644
> > --- a/gcc/config/aarch64/aarch64-cores.def
> > +++ b/gcc/config/aarch64/aarch64-cores.def
> > @@ -168,4 +168,6 @@ AARCH64_CORE("cortex-a510",  cortexa510,
> > cortexa55, 9A,  AARCH64_FL_FOR_ARCH9 |
> >
> >  AARCH64_CORE("cortex-a710",  cortexa710, cortexa55, 9A,
> > AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM |
> AARCH64_FL_MEMTAG |
> > AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
> >
> > +AARCH64_CORE("cortex-x2",  cortexx2, cortexa55, 9A,
> > AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM |
> AARCH64_FL_MEMTAG |
> > AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
> > +
> >
> > Let's use cortexa57 for scheduling here for now.
> 
> I should have said, ok with that change.

commit 257d2890a769a8aa564d079170377e637e07acb1

> Kyrill
> > Thanks,
> > Kyrill


RE: [PATCH][GCC] aarch64: enable cortex-a510 CPU

2021-10-01 Thread Przemyslaw Wirkus via Gcc-patches
> Hi Przemek,
> 
> > -Original Message-
> > From: Przemyslaw Wirkus 
> > Sent: Wednesday, September 22, 2021 9:35 AM
> > To: gcc-patches@gcc.gnu.org
> > Cc: Richard Earnshaw ; Richard Sandiford
> > ; Marcus Shawcroft
> > ; Kyrylo Tkachov
> 
> > Subject: [PATCH][GCC] aarch64: enable cortex-a510 CPU
> >
> > Patch is adding 'cortex-a510' to -mcpu command line option.
> >
> > gcc/ChangeLog:
> >
> > 2021-09-02  Przemyslaw Wirkus  
> >
> > * config/aarch64/aarch64-cores.def (AARCH64_CORE): New
> > Cortex-A510 core.
> > * config/aarch64/aarch64-tune.md: Regenerate.
> > * doc/invoke.texi: Update docs.
> 
> +/* Arm9.0-A Architecture Processors.  */
> 
> Typo, should be "Armv9.0-a".
> 
> +
> +/* Arm ('A') cores. */
> +AARCH64_CORE("cortex-a510",  cortexa510, cortexa55, 9A,
> +AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM |
> AARCH64_FL_MEMTAG |
> +AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd46, -1)
> +
> 
> We'll need to update the tuning anyway once we do it properly, but for now I
> think for the COSTS field (4th to last) we should go with cortexa53 rather
> than neoversen2.
> Ok with those changes.

commit 8aa3ab5a47664023d83ea5097a53a66bd6cbb978

> Thanks,
> Kyrill


RE: [PATCH][GCC] aarch64: enable cortex-a710 CPU

2021-10-01 Thread Przemyslaw Wirkus via Gcc-patches



> -Original Message-
> From: Kyrylo Tkachov 
> Sent: 01 October 2021 13:16
> To: Przemyslaw Wirkus ; gcc-
> patc...@gcc.gnu.org
> Cc: Richard Earnshaw ; Richard Sandiford
> ; Marcus Shawcroft
> 
> Subject: RE: [PATCH][GCC] aarch64: enable cortex-a710 CPU
> 
> 
> 
> > -Original Message-
> > From: Przemyslaw Wirkus 
> > Sent: Wednesday, September 22, 2021 9:37 AM
> > To: gcc-patches@gcc.gnu.org
> > Cc: Richard Earnshaw ; Richard Sandiford
> > ; Marcus Shawcroft
> > ; Kyrylo Tkachov
> 
> > Subject: [PATCH][GCC] aarch64: enable cortex-a710 CPU
> >
> > Patch is adding 'cortex-a710' to -mcpu command line option.
> >
> > gcc/ChangeLog:
> >
> > 2021-09-02  Przemyslaw Wirkus  
> >
> > * config/aarch64/aarch64-cores.def (AARCH64_CORE): New
> > Cortex-A710 core.
> > * config/aarch64/aarch64-tune.md: Regenerate.
> > * doc/invoke.texi: Update docs.
> 
> diff --git a/gcc/config/aarch64/aarch64-cores.def
> b/gcc/config/aarch64/aarch64-cores.def
> index
> 478f7e1c8145365f42f43ad94d90c633aae66ebd..a8027e92fa8f7554e2b19d00f
> 7c85c6ed48a92e5 100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -166,4 +166,6 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53,
> 8R, AARCH64_FL_FOR_ARCH8_R, cor
>  /* Arm ('A') cores. */
>  AARCH64_CORE("cortex-a510",  cortexa510, cortexa55, 9A,
> AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM |
> AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16,
> neoversen2, 0x41, 0xd46, -1)
> 
> +AARCH64_CORE("cortex-a710",  cortexa710, cortexa55, 9A,
> +AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM |
> AARCH64_FL_MEMTAG |
> +AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
> +
> 
> Again, we'd need to revisit big-core scheduling properly at some point, but
> for now I think for the scheduling field (3rd) we should use cortexa57 rather
> than cortexa55.
> Ok with that change.

commit f3cb2114d8b892fed0b6a717dab0a71d3da604bc

> Thanks,
> Kyrill



[PATCH][GCC][committed] aarch64: fix AARCH64_FL_V9 flag value

2021-10-01 Thread Przemyslaw Wirkus via Gcc-patches
Patch is fixing AARCH64_FL_V9 flag value which is now wrongly set due to
merge error.

Committed as obvious.

gcc/ChangeLog:

* config/aarch64/aarch64.h (AARCH64_FL_V9): Update value.

--- 

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 6908b8f4a16..2792bb29adb 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -230,8 +230,6 @@ extern unsigned aarch64_architecture_version;

 /* Pointer Authentication (PAUTH) extension.  */
 #define AARCH64_FL_PAUTH  (1ULL << 40)
-/* Armv9.0-A.  */
-#define AARCH64_FL_V9 (1ULL << 41)  /* Armv9.0-A Architecture.  */

 /* 64-byte atomic load/store extensions.  */
 #define AARCH64_FL_LS64  (1ULL << 41)
@@ -239,6 +237,9 @@ extern unsigned aarch64_architecture_version;
 /* Armv8.7-a architecture extensions.  */
 #define AARCH64_FL_V8_7   (1ULL << 42)

+/* Armv9.0-A.  */
+#define AARCH64_FL_V9 (1ULL << 43)  /* Armv9.0-A Architecture.  */
+
 /* Has FP and SIMD.  */
 #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD)


RE: [PATCH][GCC] aarch64: add armv9-a to -march

2021-10-01 Thread Przemyslaw Wirkus via Gcc-patches
> > Subject: [PATCH][GCC] aarch64: add armv9-a to -march
> >
> > Patch is adding new command line option 'armv9-a' to -march.
> >
> > OK for master?
> 
> Ok.

commit f0688d42c9b74a6999548ff2e79ae440b049b87f

> Thanks,
> Kyrill
> 
> >
> > gcc/ChangeLog:
> >
> > 2021-09-22  Przemyslaw Wirkus  
> >
> > * config/aarch64/aarch64-arches.def (AARCH64_ARCH): Added
> > armv9-a.
> > * config/aarch64/aarch64.h (AARCH64_FL_V9): New.
> > (AARCH64_FL_FOR_ARCH9): New flags for Armv9-A.
> > (AARCH64_ISA_V9): New ISA flag.



RE: [PATCH][GCC] arm: Enable Cortex-R52+ CPU

2021-09-30 Thread Przemyslaw Wirkus via Gcc-patches
> Subject: Re: [PATCH][GCC] arm: Enable Cortex-R52+ CPU
> 
> This is OK

Applying as r52+ is now in Binutils.

commit cd08eae26ed23497ace5f4ee6f3a41eb5bd36c38

> Ramana
> 
> On 22/09/2021, 09:45, "Przemyslaw Wirkus" 
> wrote:
> 
> Patch is adding Cortex-R52+ as 'cortex-r52plus' command line
> flag for -mcpu option.
> 
> See: https://www.arm.com/products/silicon-ip-cpu/cortex-r/cortex-r52-
> plus
> 
> OK for master?
> 
> gcc/ChangeLog:
> 
> 2021-09-22  Przemyslaw Wirkus  
> 
> * config/arm/arm-cpus.in: Add Cortex-R52+ CPU.
> * config/arm/arm-tables.opt: Regenerate.
> * config/arm/arm-tune.md: Regenerate.
> * doc/invoke.texi: Update docs.
> 



RE: [PATCH][GCC] arm: Add Cortex-R52+ multilib

2021-09-30 Thread Przemyslaw Wirkus via Gcc-patches
> Subject: Re: [PATCH][GCC] arm: Add Cortex-R52+ multilib
> 
> I think the RTEMS multilibs are based on the products that RTEMS supports,
> so this is really the RTEMS maintainers' call.
> 
> Joel?

Ping :)

> On 22/09/2021 09:46, Przemyslaw Wirkus via Gcc-patches wrote:
> > Patch is adding multilib entries for `cortex-r52plus` CPU.
> >
> > See:
> > https://www.arm.com/products/silicon-ip-cpu/cortex-r/cortex-r52-plus
> >
> > OK for master?
> >
> > gcc/ChangeLog:
> >
> > 2021-09-16  Przemyslaw Wirkus  
> >
> > * config/arm/t-rtems: Add "-mthumb -mcpu=cortex-r52plus
> > -mfloat-abi=hard" multilib.
> >


[PATCH][GCC] arm: Add Cortex-R52+ multilib

2021-09-22 Thread Przemyslaw Wirkus via Gcc-patches
Patch is adding multilib entries for `cortex-r52plus` CPU.

See: https://www.arm.com/products/silicon-ip-cpu/cortex-r/cortex-r52-plus

OK for master?

gcc/ChangeLog:

2021-09-16  Przemyslaw Wirkus  

* config/arm/t-rtems: Add "-mthumb -mcpu=cortex-r52plus
-mfloat-abi=hard" multilib.


rb14858.patch
Description: rb14858.patch


[PATCH][GCC] arm: Enable Cortex-R52+ CPU

2021-09-22 Thread Przemyslaw Wirkus via Gcc-patches
Patch is adding Cortex-R52+ as 'cortex-r52plus' command line
flag for -mcpu option.

See: https://www.arm.com/products/silicon-ip-cpu/cortex-r/cortex-r52-plus

OK for master?

gcc/ChangeLog:

2021-09-22  Przemyslaw Wirkus  

* config/arm/arm-cpus.in: Add Cortex-R52+ CPU.
* config/arm/arm-tables.opt: Regenerate.
* config/arm/arm-tune.md: Regenerate.
* doc/invoke.texi: Update docs.


rb14856.patch
Description: rb14856.patch


[PATCH][GCC] aarch64: enable cortex-x2 CPU

2021-09-22 Thread Przemyslaw Wirkus via Gcc-patches
Patch is adding 'cortex-x2' to -mcpu command line option.

OK for master?

gcc/ChangeLog:

2021-09-02  Przemyslaw Wirkus  

* config/aarch64/aarch64-cores.def (AARCH64_CORE): New
Cortex-X2 core.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi: Update docs.


rb14825.patch
Description: rb14825.patch


[PATCH][GCC] aarch64: enable cortex-a710 CPU

2021-09-22 Thread Przemyslaw Wirkus via Gcc-patches
Patch is adding 'cortex-a710' to -mcpu command line option.

gcc/ChangeLog:

2021-09-02  Przemyslaw Wirkus  

* config/aarch64/aarch64-cores.def (AARCH64_CORE): New
Cortex-A710 core.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi: Update docs.


rb14824.patch
Description: rb14824.patch


[PATCH][GCC] aarch64: enable cortex-a510 CPU

2021-09-22 Thread Przemyslaw Wirkus via Gcc-patches
Patch is adding 'cortex-a510' to -mcpu command line option.

gcc/ChangeLog:

2021-09-02  Przemyslaw Wirkus  

* config/aarch64/aarch64-cores.def (AARCH64_CORE): New
Cortex-A510 core.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi: Update docs.


rb14822.patch
Description: rb14822.patch


[PATCH][GCC] aarch64: add armv9-a to -march

2021-09-22 Thread Przemyslaw Wirkus via Gcc-patches
Patch is adding new command line option 'armv9-a' to -march.

OK for master?

gcc/ChangeLog:

2021-09-22  Przemyslaw Wirkus  

* config/aarch64/aarch64-arches.def (AARCH64_ARCH): Added
armv9-a.
* config/aarch64/aarch64.h (AARCH64_FL_V9): New.
(AARCH64_FL_FOR_ARCH9): New flags for Armv9-A.
(AARCH64_ISA_V9): New ISA flag.


rb14821.patch
Description: rb14821.patch


RE: [backport gcc10, gcc9] Requet to backport PR97969

2021-06-03 Thread Przemyslaw Wirkus via Gcc-patches


> -Original Message-
> From: Christophe Lyon 
> Sent: 03 June 2021 10:10
> To: Przemyslaw Wirkus 
> Cc: Vladimir Makarov ; ja...@redhat.com; Richard
> Earnshaw ; Richard Biener
> ; gcc-patches@gcc.gnu.org; Ramana Radhakrishnan
> 
> Subject: Re: [backport gcc10, gcc9] Requet to backport PR97969
> 
> On Thu, 3 Jun 2021 at 10:54, Przemyslaw Wirkus
>  wrote:
> >
> >
> >
> > > -Original Message-
> > > From: Christophe Lyon 
> > > Sent: 03 June 2021 09:45
> > > To: Przemyslaw Wirkus 
> > > Cc: Vladimir Makarov ; ja...@redhat.com;
> > > Richard Earnshaw ; Richard Biener
> > > ; gcc-patches@gcc.gnu.org; Ramana Radhakrishnan
> > > 
> > > Subject: Re: [backport gcc10, gcc9] Requet to backport PR97969
> > >
> > > On Thu, 3 Jun 2021 at 00:31, Przemyslaw Wirkus via Gcc-patches  > > patc...@gcc.gnu.org> wrote:
> > > >
> > > > Hi,
> > > >
> > > > > -Original Message-
> > > > > From: Vladimir Makarov 
> > > > > Sent: 31 May 2021 16:52
> > > > > To: Przemyslaw Wirkus ; Richard
> > > > > Biener 
> > > > > Cc: gcc-patches@gcc.gnu.org; ja...@redhat.com; ni...@redhat.com;
> > > > > Richard Earnshaw ; Ramana
> > > Radhakrishnan
> > > > > ; Kyrylo Tkachov
> > > > > 
> > > > > Subject: Re: [backport gcc10, gcc9] Requet to backport PR97969
> > > > >
> > > > >
> > > > > On 2021-05-25 5:14 a.m., Przemyslaw Wirkus wrote:
> > > > > > Hi,
> > > > > > Just a follow up after GCC 11 release.
> > > > > >
> > > > > > I've backported to gcc-10 branch (without any change to
> > > > > > original
> > > > > > patches)
> > > > > > PR97969 and following PR98722 & PR98777 patches.
> > > > > >
> > > > > > Commits apply cleanly without changes.
> > > > > > Built and regression tested on:
> > > > > > * arm-none-eabi and
> > > > > > * aarch64-none-linux-gnu cross toolchains.
> > > > > >
> > > > > > There were no issues and no regressions (all OK).
> > > > > >
> > > > > > OK for backport to gcc-10 branch ?
> > > > >
> > > > > Sorry for delay with the answer due to my vacation.
> > > > >
> > > > > As the patches did not introduce new PRs I believe they are ok for 
> > > > > gcc-
> 10.
> > > >
> > > > Backported to gcc-10 branch. Thank you for your support.
> > > >
> > >
> > > Hi,
> > >
> > > I'm surprised to see many new errors on arm after the backport for
> > > PR98722
> > > See:
> > > https://people.linaro.org/~christophe.lyon/cross-validation/gcc/gcc-
> > > 10/r10-9881-g1791b11d9cae388ae18a768eeb96c998439c986a/report-
> build-
> > > info.html
> > >
> > > Przemyslaw, Vladimir do you confirm r10-9881 has no such errors (new
> > > ICEs) on your side?
> >
> > Apologies.
> >
> > I've built and regtested before submitting backport yesterday.
> > I will check on my side and build one of your failing configurations.
> >
> 
> After I sent the previous email, I received the validation results for the 
> next
> backport, and it seems it fixes the ICEs introduced by r10-9881:
> https://people.linaro.org/~christophe.lyon/cross-validation/gcc/gcc-10/r10-
> 9882-g05f6971ac40912ef062915f88b3ea0bf27278285/report-build-info.html
> 
> I guess you ran validations with the 3 backports combined, rather than
> individually?


Yes, these three PRs are all connected to each other. That's why I've ran 
validation
after third one, not for each one separately.

PS: Office folks now roast me with true passion ;)

P.

> So it looks OK now.
> 
> Thanks,
> 
> Christophe
> 
> 
> > Przemyslaw
> >
> > > Thanks
> > >
> > > > Kind regards
> > > > Przemyslaw
> > > >
> > > > > Thank you.
> > > > >
> > > > > >
> > > > > > Kind regards,
> > > > > > Przemyslaw Wirkus
> > > > > >
> > > > > > ---
> > > > > > commits I've backported:
> > > > > >
> > > > > > commit cf2ac1c30af0fa783c8d72e527904dda5d8cc330
> > > > > > Author: Vladimir N. Makarov 
> > > > > > Date:   Tue Jan 12 11:26:15 2021 -0500
> > > > > >
> > > > > >  [PR97969] LRA: Transform pattern `plus (plus (hard reg,
> > > > > > const), pseudo)`
> > > > > after elimination
> > > > > >
> > > > > > commit 4334b524274203125193a08a8485250c41c2daa9
> > > > > > Author: Vladimir N. Makarov 
> > > > > > Date:   Wed Jan 20 11:40:14 2021 -0500
> > > > > >
> > > > > >  [PR98722] LRA: Check that target has no 3-op add insn to
> > > > > > transform 2
> > > > > plus expression.
> > > > > >
> > > > > > commit 68ba1039c7daf0485b167fe199ed7e8031158091
> > > > > > Author: Vladimir N. Makarov 
> > > > > > Date:   Thu Jan 21 17:27:01 2021 -0500
> > > > > >
> > > > > >  [PR98777] LRA: Use preliminary created pseudo for in LRA
> > > > > > elimination
> > > > > subpass
> > > > > >
> > > > > > $ ./contrib/git-backport.py
> > > > > > cf2ac1c30af0fa783c8d72e527904dda5d8cc330
> > > > > > $ ./contrib/git-backport.py
> > > > > > 4334b524274203125193a08a8485250c41c2daa9
> > > > > > $ ./contrib/git-backport.py
> > > > > > 68ba1039c7daf0485b167fe199ed7e8031158091
> > > > > >
> > > > > >
> > > > > >> Richard.
> > > >


RE: [backport gcc10, gcc9] Requet to backport PR97969

2021-06-03 Thread Przemyslaw Wirkus via Gcc-patches


> -Original Message-
> From: Christophe Lyon 
> Sent: 03 June 2021 09:45
> To: Przemyslaw Wirkus 
> Cc: Vladimir Makarov ; ja...@redhat.com; Richard
> Earnshaw ; Richard Biener
> ; gcc-patches@gcc.gnu.org; Ramana Radhakrishnan
> 
> Subject: Re: [backport gcc10, gcc9] Requet to backport PR97969
> 
> On Thu, 3 Jun 2021 at 00:31, Przemyslaw Wirkus via Gcc-patches  patc...@gcc.gnu.org> wrote:
> >
> > Hi,
> >
> > > -Original Message-
> > > From: Vladimir Makarov 
> > > Sent: 31 May 2021 16:52
> > > To: Przemyslaw Wirkus ; Richard Biener
> > > 
> > > Cc: gcc-patches@gcc.gnu.org; ja...@redhat.com; ni...@redhat.com;
> > > Richard Earnshaw ; Ramana
> Radhakrishnan
> > > ; Kyrylo Tkachov
> > > 
> > > Subject: Re: [backport gcc10, gcc9] Requet to backport PR97969
> > >
> > >
> > > On 2021-05-25 5:14 a.m., Przemyslaw Wirkus wrote:
> > > > Hi,
> > > > Just a follow up after GCC 11 release.
> > > >
> > > > I've backported to gcc-10 branch (without any change to original
> > > > patches)
> > > > PR97969 and following PR98722 & PR98777 patches.
> > > >
> > > > Commits apply cleanly without changes.
> > > > Built and regression tested on:
> > > > * arm-none-eabi and
> > > > * aarch64-none-linux-gnu cross toolchains.
> > > >
> > > > There were no issues and no regressions (all OK).
> > > >
> > > > OK for backport to gcc-10 branch ?
> > >
> > > Sorry for delay with the answer due to my vacation.
> > >
> > > As the patches did not introduce new PRs I believe they are ok for gcc-10.
> >
> > Backported to gcc-10 branch. Thank you for your support.
> >
> 
> Hi,
> 
> I'm surprised to see many new errors on arm after the backport for PR98722
> See: https://people.linaro.org/~christophe.lyon/cross-validation/gcc/gcc-
> 10/r10-9881-g1791b11d9cae388ae18a768eeb96c998439c986a/report-build-
> info.html
> 
> Przemyslaw, Vladimir do you confirm r10-9881 has no such errors (new
> ICEs) on your side?

Apologies.

I've built and regtested before submitting backport yesterday.
I will check on my side and build one of your failing configurations.

Przemyslaw

> Thanks
> 
> > Kind regards
> > Przemyslaw
> >
> > > Thank you.
> > >
> > > >
> > > > Kind regards,
> > > > Przemyslaw Wirkus
> > > >
> > > > ---
> > > > commits I've backported:
> > > >
> > > > commit cf2ac1c30af0fa783c8d72e527904dda5d8cc330
> > > > Author: Vladimir N. Makarov 
> > > > Date:   Tue Jan 12 11:26:15 2021 -0500
> > > >
> > > >  [PR97969] LRA: Transform pattern `plus (plus (hard reg,
> > > > const), pseudo)`
> > > after elimination
> > > >
> > > > commit 4334b524274203125193a08a8485250c41c2daa9
> > > > Author: Vladimir N. Makarov 
> > > > Date:   Wed Jan 20 11:40:14 2021 -0500
> > > >
> > > >  [PR98722] LRA: Check that target has no 3-op add insn to
> > > > transform 2
> > > plus expression.
> > > >
> > > > commit 68ba1039c7daf0485b167fe199ed7e8031158091
> > > > Author: Vladimir N. Makarov 
> > > > Date:   Thu Jan 21 17:27:01 2021 -0500
> > > >
> > > >  [PR98777] LRA: Use preliminary created pseudo for in LRA
> > > > elimination
> > > subpass
> > > >
> > > > $ ./contrib/git-backport.py
> > > > cf2ac1c30af0fa783c8d72e527904dda5d8cc330
> > > > $ ./contrib/git-backport.py
> > > > 4334b524274203125193a08a8485250c41c2daa9
> > > > $ ./contrib/git-backport.py
> > > > 68ba1039c7daf0485b167fe199ed7e8031158091
> > > >
> > > >
> > > >> Richard.
> >


RE: [backport gcc10, gcc9] Requet to backport PR97969

2021-06-02 Thread Przemyslaw Wirkus via Gcc-patches
Hi,

> -Original Message-
> From: Vladimir Makarov 
> Sent: 31 May 2021 16:52
> To: Przemyslaw Wirkus ; Richard Biener
> 
> Cc: gcc-patches@gcc.gnu.org; ja...@redhat.com; ni...@redhat.com;
> Richard Earnshaw ; Ramana Radhakrishnan
> ; Kyrylo Tkachov
> 
> Subject: Re: [backport gcc10, gcc9] Requet to backport PR97969
> 
> 
> On 2021-05-25 5:14 a.m., Przemyslaw Wirkus wrote:
> > Hi,
> > Just a follow up after GCC 11 release.
> >
> > I've backported to gcc-10 branch (without any change to original patches)
> > PR97969 and following PR98722 & PR98777 patches.
> >
> > Commits apply cleanly without changes.
> > Built and regression tested on:
> > * arm-none-eabi and
> > * aarch64-none-linux-gnu cross toolchains.
> >
> > There were no issues and no regressions (all OK).
> >
> > OK for backport to gcc-10 branch ?
> 
> Sorry for delay with the answer due to my vacation.
> 
> As the patches did not introduce new PRs I believe they are ok for gcc-10.

Backported to gcc-10 branch. Thank you for your support.

Kind regards
Przemyslaw

> Thank you.
> 
> >
> > Kind regards,
> > Przemyslaw Wirkus
> >
> > ---
> > commits I've backported:
> >
> > commit cf2ac1c30af0fa783c8d72e527904dda5d8cc330
> > Author: Vladimir N. Makarov 
> > Date:   Tue Jan 12 11:26:15 2021 -0500
> >
> >  [PR97969] LRA: Transform pattern `plus (plus (hard reg, const), 
> > pseudo)`
> after elimination
> >
> > commit 4334b524274203125193a08a8485250c41c2daa9
> > Author: Vladimir N. Makarov 
> > Date:   Wed Jan 20 11:40:14 2021 -0500
> >
> >  [PR98722] LRA: Check that target has no 3-op add insn to transform 2
> plus expression.
> >
> > commit 68ba1039c7daf0485b167fe199ed7e8031158091
> > Author: Vladimir N. Makarov 
> > Date:   Thu Jan 21 17:27:01 2021 -0500
> >
> >  [PR98777] LRA: Use preliminary created pseudo for in LRA elimination
> subpass
> >
> > $ ./contrib/git-backport.py cf2ac1c30af0fa783c8d72e527904dda5d8cc330
> > $ ./contrib/git-backport.py 4334b524274203125193a08a8485250c41c2daa9
> > $ ./contrib/git-backport.py 68ba1039c7daf0485b167fe199ed7e8031158091
> >
> >
> >> Richard.



RE: [backport gcc10, gcc9] Requet to backport PR97969

2021-05-25 Thread Przemyslaw Wirkus via Gcc-patches
> -Original Message-
> From: Richard Biener 
> Sent: 02 February 2021 10:08
> To: Przemyslaw Wirkus 
> Cc: Vladimir Makarov ; gcc-patches@gcc.gnu.org;
> ja...@redhat.com; ni...@redhat.com; Richard Earnshaw
> ; Ramana Radhakrishnan
> ; Kyrylo Tkachov
> 
> Subject: RE: [backport gcc10, gcc9] Requet to backport PR97969
> 
> On Tue, 2 Feb 2021, Przemyslaw Wirkus wrote:
> 
> > > On 2021-01-18 7:50 a.m., Richard Biener wrote:
> > > > On Mon, 18 Jan 2021, Przemyslaw Wirkus wrote:
> > > >
> > > >> Hi all,
> > > >>
> > > >> Can we backport PR97969 patch to GCC 10 and (maybe) GCC 9 ?:
> > > >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97969
> > > >>
> > > >> IMHO bug is severe and could land in GCC 10 and 9. Vladimir's
> > > >> original
> > > patch:
> > > >> https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563322.htm
> > > >> l applies without changes to both gcc-10 and gcc-9.
> > > >>
> > > >> I've regression tested this patch on both gcc-10 and gcc-9
> > > >> branched for
> > > >> x86_64 cross (arm-eabi target) and no issues.
> > > >>
> > > >> OK for gcc-10 and gcc-9 ?
> > > > I see two fallout PRs with a trivial search: PR98643 and PR98722.
> > > > LRA patches quite easily trigger unexpected fallout unfortunately ...
> > > >
> > > Yes, I am agree.  We should wait until the new regressions are
> > > fixed.  I am going to work on this patch more to fix the new
> > > regressions.� Although the basic idea of the original problem solution
> probably will stay the same.
> >
> > I've retested series of three patches which are related to this PR:
> >
> > 19af25c0b3aa2a78b4d45d295359ec26cb9fc607 [PR98777]
> > 79c57603602c4493b6baa1d47ed451e8f5e9c0f3 [PR98722]
> > 34aa56af2547e1646c0f07b9b88b210ebdb2a9f5 [PR97969]
> >
> > on top of gcc-10 branch.
> >
> > Bootstrapped and regression tested on aarch64-linux-gnu machine and no
> issues.
> > Regression tested on x86_64 host (arm-eabi target) cross and no issues.
> >
> > OK for gcc-10 ?
> 
> I think this warrants waiting until at least the GCC 11 release.

Hi,
Just a follow up after GCC 11 release.

I've backported to gcc-10 branch (without any change to original patches)
PR97969 and following PR98722 & PR98777 patches.

Commits apply cleanly without changes.
Built and regression tested on:
* arm-none-eabi and
* aarch64-none-linux-gnu cross toolchains.

There were no issues and no regressions (all OK).

OK for backport to gcc-10 branch ?

Kind regards,
Przemyslaw Wirkus

---
commits I've backported:

commit cf2ac1c30af0fa783c8d72e527904dda5d8cc330
Author: Vladimir N. Makarov 
Date:   Tue Jan 12 11:26:15 2021 -0500

[PR97969] LRA: Transform pattern `plus (plus (hard reg, const), pseudo)` 
after elimination

commit 4334b524274203125193a08a8485250c41c2daa9
Author: Vladimir N. Makarov 
Date:   Wed Jan 20 11:40:14 2021 -0500

[PR98722] LRA: Check that target has no 3-op add insn to transform 2 plus 
expression.

commit 68ba1039c7daf0485b167fe199ed7e8031158091
Author: Vladimir N. Makarov 
Date:   Thu Jan 21 17:27:01 2021 -0500

[PR98777] LRA: Use preliminary created pseudo for in LRA elimination subpass

$ ./contrib/git-backport.py cf2ac1c30af0fa783c8d72e527904dda5d8cc330
$ ./contrib/git-backport.py 4334b524274203125193a08a8485250c41c2daa9
$ ./contrib/git-backport.py 68ba1039c7daf0485b167fe199ed7e8031158091


> Richard.


RE: [backport gcc10, gcc9] Requet to backport PR97969

2021-02-02 Thread Przemyslaw Wirkus via Gcc-patches
> On 2021-01-18 7:50 a.m., Richard Biener wrote:
> > On Mon, 18 Jan 2021, Przemyslaw Wirkus wrote:
> >
> >> Hi all,
> >>
> >> Can we backport PR97969 patch to GCC 10 and (maybe) GCC 9 ?:
> >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97969
> >>
> >> IMHO bug is severe and could land in GCC 10 and 9. Vladimir's original
> patch:
> >> https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563322.html
> >> applies without changes to both gcc-10 and gcc-9.
> >>
> >> I've regression tested this patch on both gcc-10 and gcc-9 branched
> >> for
> >> x86_64 cross (arm-eabi target) and no issues.
> >>
> >> OK for gcc-10 and gcc-9 ?
> > I see two fallout PRs with a trivial search: PR98643 and PR98722.  LRA
> > patches quite easily trigger unexpected fallout unfortunately ...
> >
> Yes, I am agree.  We should wait until the new regressions are fixed.  I am
> going to work on this patch more to fix the new regressions. Although the
> basic idea of the original problem solution probably will stay the same.

I've retested series of three patches which are related to this PR:

19af25c0b3aa2a78b4d45d295359ec26cb9fc607 [PR98777]
79c57603602c4493b6baa1d47ed451e8f5e9c0f3 [PR98722]
34aa56af2547e1646c0f07b9b88b210ebdb2a9f5 [PR97969]

on top of gcc-10 branch.

Bootstrapped and regression tested on aarch64-linux-gnu machine and no issues.
Regression tested on x86_64 host (arm-eabi target) cross and no issues.

OK for gcc-10 ?

> >> PS: I can commit if approved.
> >>



[backport gcc10] Request to backport PR94230 (-flarge-source-files)

2021-01-26 Thread Przemyslaw Wirkus via Gcc-patches
Hi,

Can we backport PR94230 patch (add a new diagnostic
flag -flarge-source-files) to GCC 10 ?
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94230

PR94230 backport will benefit people moving from
GCC 9 to GCC 10 who face issue while working with
large header/source files.
See example issue on Linaro Bugzilla:
https://bugs.linaro.org/show_bug.cgi?id=5735

Patch applies without any changes to gcc-10 branch.
I took the liberty and tested backport on AArch64
host (aarch64-linux-gnu target). Bootstrapped and
regtested and no issues.

OK for gcc-10 branch ?

PS: I can backport and commit If no objections.

Kind regards,
Przemyslaw Wirkus



[backport gcc10, gcc9] Requet to backport PR97969

2021-01-18 Thread Przemyslaw Wirkus via Gcc-patches
Hi all,

Can we backport PR97969 patch to GCC 10 and (maybe) GCC 9 ?:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97969

IMHO bug is severe and could land in GCC 10 and 9. Vladimir's original patch:
https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563322.html
applies without changes to both gcc-10 and gcc-9.

I've regression tested this patch on both gcc-10 and gcc-9 branched for
x86_64 cross (arm-eabi target) and no issues.

OK for gcc-10 and gcc-9 ?

PS: I can commit if approved.

Kind regards,
Przemyslaw Wirkus



RE: [PATCH][GCC][PR target/98177] aarch64: SVE: ICE in expand_direct_optab_fn

2020-12-18 Thread Przemyslaw Wirkus via Gcc-patches
> Przemyslaw Wirkus  writes:
> > > This is a bug in the vectoriser: the vectoriser shouldn't generate
> > > IFN_REDUC_MAX calls that the target doesn't support.
> > >
> > > I think the problem comes from using the wrong interface to get the
> > > index type for a COND_REDUCTION.  vectorizable_reduction has:
> > >
> > >   cr_index_vector_type = build_vector_type (cr_index_scalar_type,
> > > nunits_out);
> > >
> > > which means that for fixed-length SVE we get a V2SI (a 64-bit
> > > Advanced SIMD
> > > vector) instead of a VNx2SI (an SVE vector that stores SI elements
> > > in DI containers).  It should be using:
> > >
> > >   cr_index_vector_type = get_same_sized_vectype
> (cr_index_scalar_type,
> > >  vectype_out);
> > >
> > > instead.  Same idea for the build_vector_type call in
> > > vect_create_epilog_for_reduction.
> 
> Note that for this last bit I meant:
> 
>   tree vectype_unsigned = build_vector_type
>   (scalar_type_unsigned, TYPE_VECTOR_SUBPARTS (vectype));
> 
> which should become:
> 
>   tree vectype_unsigned = get_same_sized_vectype (scalar_type_unsigned,
> vectype);
> 
> This is the “transform” code that partners the “analysis” code that you're
> patching.  Changing one but not the other would cause problems if (say) the
> Advanced SIMD REDUC_MAX patterns were disabled.  We'd then correctly
> pick an SVE mode like VNx4SI when doing the analysis, but generate an
> unsupported V4SI REDUC_MAX in vect_create_epilog_for_reduction.
> That in turn would trip the kind of expand-time assert that was reported in
> the PR, just for a different case.
> 
> It's better for the modes to match up anyway: we should use a VNx4SI
> reduction when operating on SVE and a V4SI reducation when operating on
> Advanced SIMD.  This is particularly true for big endian, where mixing SVE
> and Advanced SIMD can involve a permute.
> 
> > diff --git a/gcc/testsuite/g++.target/aarch64/pr98177-1.C
> > b/gcc/testsuite/g++.target/aarch64/pr98177-1.C
> > new file mode 100644
> > index
> >
> ..a776b7352f966f6b1d870e
> d51a7c
> > 94647bc46d80
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/aarch64/pr98177-1.C
> > @@ -0,0 +1,10 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-Ofast -march=armv8.2-a+sve -msve-vector-bits=128" }
> > +*/
> > +
> > +int a, b;
> > +short c;
> > +void d(long e) {
> > +  for (int f = 0; f < b; f += 1)
> > +for (short g = 0; g < c; g += 5)
> > +  a = (short)e;
> > +}
> 
> It'd be better to put these g++.target/aarch64/sve and drop the -march
> option.  That way we'll test with the user's specified -march or -mcpu if 
> that -
> march/-mcpu already supports SVE.
> 
> Same idea for the other tests (including the C ones).
> 
> OK for trunk with those changes, thanks.

commit d44d47b49267b4265cee16d25b3f89dbf967cc0c

> Richard


rb13905_v3.patch
Description: rb13905_v3.patch


RE: [PATCH][GCC] arm: Add support for Cortex-A78C

2020-12-17 Thread Przemyslaw Wirkus via Gcc-patches
> > Subject: [PATCH][GCC] arm: Add support for Cortex-A78C
> >
> > This patch adds support for -mcpu=cortex-a78c command line option.
> > For more information about this processor, see [0]:
> >
> > [0] https://developer.arm.com/ip-products/processors/cortex-a/cortex-
> > a78c
> >
> > OK from master ?
>
> Whoops, sorry I missed this.
> Ok for master.

commit 35b8d268746362fa66e297c4ae152a9cfafd4bb0

> Thanks,
> Kyrill
> 
> >
> > gcc/ChangeLog:
> >
> > * config/arm/arm-cpus.in: Add Cortex-A78C core.
> > * config/arm/arm-tables.opt: Regenerate.
> > * config/arm/arm-tune.md: Regenerate.
> > * doc/invoke.texi: Update docs.
> >



RE: [PATCH][GCC][PR target/98177] aarch64: SVE: ICE in expand_direct_optab_fn

2020-12-16 Thread Przemyslaw Wirkus via Gcc-patches
> Przemyslaw Wirkus  writes:
> > Hi,
> >
> > Recent 'support SVE comparisons for unpacked integers' patch extends
> > operands of define_expands from SVE_FULL to SVE_ALL. This causes an
> > ICE hence this PR patch.
> >
> > This patch adds this relaxation for:
> > + reduc__scal_ and
> > + arch64_pred_reduc__
> > in order to support extra modes. Missing modes were used in REDUC_MAX.
> >
> > Original PR snippet proposed to reproduce issue was only causing ICE
> > for C++ compiler (see pr98177-1 test cases). I've slightly modified
> > original snippet in order to reproduce issue on both C and C++
> > compilers. These are pr98177-2 test cases.
> >
> > Bootstrap/regression test for AArch64 aarch64-elf and no issues.
> 
> This is a bug in the vectoriser: the vectoriser shouldn't generate
> IFN_REDUC_MAX calls that the target doesn't support.
> 
> I think the problem comes from using the wrong interface to get the index
> type for a COND_REDUCTION.  vectorizable_reduction has:
> 
>   cr_index_vector_type = build_vector_type (cr_index_scalar_type,
> nunits_out);
> 
> which means that for fixed-length SVE we get a V2SI (a 64-bit Advanced SIMD
> vector) instead of a VNx2SI (an SVE vector that stores SI elements in DI
> containers).  It should be using:
> 
>   cr_index_vector_type = get_same_sized_vectype (cr_index_scalar_type,
>  vectype_out);
> 
> instead.  Same idea for the build_vector_type call in
> vect_create_epilog_for_reduction.

Hi Richard,
I've followed your guidance and indeed root cause was as you described.
Please see new patch in attachment.

Bootstrap/regression test for AArch64 aarch64-elf and no issues.

OK for master?

gcc/ChangeLog:

PR target/98177
* tree-vect-loop.c (vectorizable_reduction): Use get_same_sized_vectype 
to
obtain index type.

gcc/testsuite/ChangeLog:

PR target/98177
* g++.target/aarch64/pr98177-1.C: New test.
* g++.target/aarch64/pr98177-2.C: New test.
* gcc.target/aarch64/pr98177-1.c: New test.
* gcc.target/aarch64/pr98177-2.c: New test.

> Thanks,
> Richard


rb13905_v2.patch
Description: rb13905_v2.patch


[PATCH][GCC][PR target/98177] aarch64: SVE: ICE in expand_direct_optab_fn

2020-12-14 Thread Przemyslaw Wirkus via Gcc-patches
Hi,

Recent 'support SVE comparisons for unpacked integers' patch extends
operands of define_expands from SVE_FULL to SVE_ALL. This causes an ICE
hence this PR patch.

This patch adds this relaxation for:
+ reduc__scal_ and
+ arch64_pred_reduc__
in order to support extra modes. Missing modes were used in REDUC_MAX.

Original PR snippet proposed to reproduce issue was only causing ICE for C++
compiler (see pr98177-1 test cases). I've slightly modified original snippet in
order to reproduce issue on both C and C++ compilers. These are pr98177-2
test cases.

Bootstrap/regression test for AArch64 aarch64-elf and no issues.

OK for master?

gcc/ChangeLog:

PR target/98177
* config/aarch64/aarch64-sve.md: Extend integer SVE modes.
(reduc__scal_): Extend SVE_FULL_I to SVE_I.
(arch64_pred_reduc__): Likewise.

gcc/testsuite/ChangeLog:

PR target/98177
* g++.target/aarch64/pr98177-1.C: New test.
* g++.target/aarch64/pr98177-2.C: New test.
* gcc.target/aarch64/pr98177-1.c: New test.
* gcc.target/aarch64/pr98177-2.c: New test.



rb13905.patch
Description: rb13905.patch


RE: [PATCH][GCC] aarch64: Add support for Cortex-A78C

2020-12-14 Thread Przemyslaw Wirkus via Gcc-patches
> > OK for master ?
> >
> > gcc/ChangeLog:
> >
> > * config/aarch64/aarch64-cores.def (AARCH64_CORE): Add Cortex-
> A78C core.
> > * config/aarch64/aarch64-tune.md: Regenerate.
> > * doc/invoke.texi: Update docs.
> 
> OK, thanks.
> Richard

commit cf7efe2d36f4f940afebae04fc342dbd3d386b9b


RE: [PATCH][GCC] aarch64: Add +pauth to -march

2020-12-09 Thread Przemyslaw Wirkus via Gcc-patches
> > Subject: [PATCH][GCC] aarch64: Add +pauth to -march
> >
> > New +pauth (Pointer Authentication from Armv8.3-A) feature option for
> > -march command line option.
> >
> > Please note that majority of PAUTH instructions are implemented behind
> > HINT instruction. PAUTH stays a Armv8.3-A feature but now can be
> > assigned to other architectures or CPUs.
> >
> > Patch includes:
> > - new +pauth command line option.
> > - docs update to +flagm command line option in docs.
> >
> > Regression tested and no issues.
> >
> > OK for master?
> Ok.
> Thanks,
> Kyrill

commit ef33047a8b93d416f08f3f640dd65f3887fb05c1

> >
> > gcc/ChangeLog:
> >
> > * config/aarch64/aarch64-option-extensions.def
> > (AARCH64_OPT_EXTENSION): New +pauth option in -march for AArch64.
> > * config/aarch64/aarch64.h (AARCH64_FL_PAUTH): New pauth extension
> > bitmask.
> > (AARCH64_ISA_PUATH): New ISA bitmask for PAUTH.
> > (AARCH64_FL_FOR_ARCH8_3): Add PAUTH to Armv8.3-A.
> > (TARGET_PAUTH): New target mask to isolate PAUTH instructions.
> > * config/aarch64/aarch64.md (do_return): Condition set to TARGET_PAUTH.
> > * doc/invoke.texi: Update docs (+flagm, +pauth).



[PATCH][GCC] arm: Add support for Cortex-A78C

2020-12-08 Thread Przemyslaw Wirkus via Gcc-patches
This patch adds support for -mcpu=cortex-a78c command line option.
For more information about this processor, see [0]:

[0] https://developer.arm.com/ip-products/processors/cortex-a/cortex-a78c

OK from master ?

gcc/ChangeLog:

* config/arm/arm-cpus.in: Add Cortex-A78C core.
* config/arm/arm-tables.opt: Regenerate.
* config/arm/arm-tune.md: Regenerate.
* doc/invoke.texi: Update docs.



rb13728.patch
Description: rb13728.patch


[PATCH][GCC] aarch64: Add support for Cortex-A78C

2020-12-08 Thread Przemyslaw Wirkus via Gcc-patches
This patch adds support for -mcpu=cortex-a78c command line option.
For more information about this processor, see [0]:

[0] https://developer.arm.com/ip-products/processors/cortex-a/cortex-a78c

OK for master ?

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def (AARCH64_CORE): Add Cortex-A78C core.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi: Update docs.


rb13727.patch
Description: rb13727.patch


[PATCH][GCC] aarch64: Add +pauth to -march

2020-12-07 Thread Przemyslaw Wirkus via Gcc-patches
New +pauth (Pointer Authentication from Armv8.3-A) feature option for
-march command line option.

Please note that majority of PAUTH instructions are implemented behind HINT
instruction. PAUTH stays a Armv8.3-A feature but now can be assigned to other
architectures or CPUs.

Patch includes:
- new +pauth command line option.
- docs update to +flagm command line option in docs.

Regression tested and no issues.

OK for master?

gcc/ChangeLog:

* config/aarch64/aarch64-option-extensions.def
(AARCH64_OPT_EXTENSION): New +pauth option in -march for AArch64.
* config/aarch64/aarch64.h (AARCH64_FL_PAUTH): New pauth extension 
bitmask.
(AARCH64_ISA_PUATH): New ISA bitmask for PAUTH.
(AARCH64_FL_FOR_ARCH8_3): Add PAUTH to Armv8.3-A.
(TARGET_PAUTH): New target mask to isolate PAUTH instructions.
* config/aarch64/aarch64.md (do_return): Condition set to TARGET_PAUTH.
* doc/invoke.texi: Update docs (+flagm, +pauth).


rb13808.patch
Description: rb13808.patch


RE: [PATCH][GCC] aarch64: Add +flagm to -march

2020-12-03 Thread Przemyslaw Wirkus via Gcc-patches
> >> >> gcc/ChangeLog:
> >> >>
> >> >> * config/aarch64/aarch64-option-extensions.def
> >> >> (AARCH64_OPT_EXTENSION): New +flagm option in -march for AArch64.
> >> >> * config/aarch64/aarch64.h (AARCH64_FL_FLAGM): Add new flagm
> >> >> extension bit mask.
> >> >> (AARCH64_FL_FOR_ARCH8_4): Add flagm to Armv8.4-A.
> >> >
> >> > OK, thanks, and sorry for the slow review.
> >>
> >> Just remembered that we also need documentation for the new feature
> >> flag in doc/invoke.texi.
> >
> > Done. Thank you for the reminder.
> >
> > commit 48ff86adfd96a0f5132273719932b48a14941881
> 
> I think you also need to add flagm to:
> 
> ---
> The table below summarizes the permissible values for @var{arch} and the
> features that they enable by default:
> 
> @multitable @columnfractions 0.20 0.20 0.60 @headitem @var{arch} value
> @tab Architecture @tab Includes by default … @item @samp{armv8.4-a}
> @tab Armv8.4-A @tab @samp{armv8.3-a}, @samp{+fp16fml},
> @samp{+dotprod}
> ---

Yes, I've just noticed that :/
I will update this with my next patch which adds yet another -march flag if 
that's OK with you?

> Thanks,
> Richard


RE: [PATCH][GCC] aarch64: Add +flagm to -march

2020-12-03 Thread Przemyslaw Wirkus via Gcc-patches
> >> gcc/ChangeLog:
> >>
> >> * config/aarch64/aarch64-option-extensions.def
> >> (AARCH64_OPT_EXTENSION): New +flagm option in -march for AArch64.
> >> * config/aarch64/aarch64.h (AARCH64_FL_FLAGM): Add new flagm
> >> extension bit mask.
> >> (AARCH64_FL_FOR_ARCH8_4): Add flagm to Armv8.4-A.
> >
> > OK, thanks, and sorry for the slow review.
> 
> Just remembered that we also need documentation for the new feature flag in
> doc/invoke.texi.

Done. Thank you for the reminder.

commit 48ff86adfd96a0f5132273719932b48a14941881

> Thanks,
> Richard


[PATCH][GCC] aarch64: Add +flagm to -march

2020-11-30 Thread Przemyslaw Wirkus via Gcc-patches
New +flagm (Condition flag manipulation from Armv8.4-A) feature option for
-march command line option.

Please note that FLAGM stays an Armv8.4-A feature but now can be
assigned to other architectures or CPUs.

OK for master?

gcc/ChangeLog:

* config/aarch64/aarch64-option-extensions.def
(AARCH64_OPT_EXTENSION): New +flagm option in -march for AArch64.
* config/aarch64/aarch64.h (AARCH64_FL_FLAGM): Add new flagm extension 
bit
mask.
(AARCH64_FL_FOR_ARCH8_4): Add flagm to Armv8.4-A.


rb13807.patch
Description: rb13807.patch


RE: [PATCH][GCC-10 backport] arm: Fix fp16 move patterns for base MVE

2020-10-13 Thread Przemyslaw Wirkus via Gcc-patches
> > Backport of commit 6abd428605e3a279e533fde1cecbc9735ce03b66
> > from master branch.
> >
> > OK for gcc-10 ?

Cherry-picked and applied: commit eb061188276d0ac9ec53fd5619c578a6bce6b129

> Ok.
> Thanks,
> Kyrill


RE: [PATCH][GCC-10 backport] arm: Fix ICEs in no-literal-pool.c on MVE [PR97251]

2020-10-13 Thread Przemyslaw Wirkus via Gcc-patches
> > This patch is a backport of PR97251 fix already commited to master.
> >
> > OK for gcc-10 branch ?

Cherry-picked and applied: commit d121b3259b77203e62402024add1538c1bdf5fdf

> Ok.
> Thanks,
> Kyrill


[PATCH][GCC-10 backport] arm: Fix fp16 move patterns for base MVE

2020-10-13 Thread Przemyslaw Wirkus via Gcc-patches
Backport of commit 6abd428605e3a279e533fde1cecbc9735ce03b66
from master branch.

OK for gcc-10 ?

This patch fixes ICEs in gcc.dg/torture/float16-basic.c for
-march=armv8.1-m.main+mve -mfloat-abi=hard.  The problem was
that an fp16 argument was (rightly) being passed in FPRs,
but the fp16 move patterns only handled GPRs.  LRA then cycled
trying to look for a way of handling the FPR.

It looks like there are three related problems here:

(1) We're using the wrong fp16 move pattern for base MVE.
*mov_vfp_16 (the pattern we use for +mve.fp)
works for base MVE too.

(2) The fp16 MVE load and store patterns are separate from the
main move patterns.  The loads and stores should instead be
alternatives of the main move patterns, so that LRA knows
what to do with pseudo registers that become stack slots.

(3) The range restrictions for the loads and stores were wrong
for fp16: we were enforcing a multiple of 4 in [-255*4, 255*4]
instead of a multiple of 2 in [-255*2, 255*2].

(2) came from a patch to prevent writeback being used for MVE.
That patch also added a Uj constraint to enforce the correct
memory types for MVE.  I think the simplest fix is therefore to merge
the loads and stores back into the main pattern and extend the Uj
constraint so that it acts like Um for non-MVE.

The testcase for that patch was mve-vldstr16-no-writeback.c, whose
main function is:

void
fn1 (__fp16 *pSrc)
{
  __fp16 high;
  __fp16 *pDst = 0;
  unsigned i;
  for (i = 0;; i++)
if (pSrc[i])
  pDst[i] = high;
}

Fixing (2) causes the store part to fail, not because we're using
writeback, but because we decide to use GPRs to store high (which is
uninitialised, and so gets replaced with zero).  This patch therefore
adds some scan-assembler-nots instead.  (I wondered about changing the
testcase to initialise high, but that seemed like a bad idea for
a regression test.)

For (3): MVE seems to be the only thing to use arm_coproc_mem_operand_wb
(and its various interfaces) for 16-bit scalars: the Neon patterns only
use it for 32-bit scalars.

I've added new tests to try the various FPR alternatives of the
move patterns.  The range of offsets that GCC uses for FPR loads
and stores is the intersection of the range allowed for GPRs and
FPRs, so the tests include GPR<->memory tests as well.

The fp32 and fp64 tests already pass, they're just there for
completeness.

gcc/
* config/arm/arm-protos.h (arm_mve_mode_and_operands_type_check):
Delete.
* config/arm/arm.c (arm_coproc_mem_operand_wb): Use a scale factor
of 2 rather than 4 for 16-bit modes.
(arm_mve_mode_and_operands_type_check): Delete.
* config/arm/constraints.md (Uj): Allow writeback for Neon,
but continue to disallow it for MVE.
* config/arm/arm.md (*arm32_mov): Add !TARGET_HAVE_MVE.
* config/arm/vfp.md (*mov_load_vfp_hf16, *mov_store_vfp_hf16): Fold
back into...
(*mov_vfp_16): ...here but use Uj for the FPR memory
constraints.  Use for base MVE too.

gcc/testsuite/
* gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c: Allow
the store to use GPRs instead of FPRs.  Add scan-assembler-nots
for writeback.
* gcc.target/arm/armv8_1m-fp16-move-1.c: New test.
* gcc.target/arm/armv8_1m-fp32-move-1.c: Likewise.
* gcc.target/arm/armv8_1m-fp64-move-1.c: Likewise.


mve_move_backport.patch
Description: mve_move_backport.patch


[PATCH][GCC-10 backport] arm: Fix ICEs in no-literal-pool.c on MVE [PR97251]

2020-10-13 Thread Przemyslaw Wirkus via Gcc-patches
This patch is a backport of PR97251 fix already commited to master.

OK for gcc-10 branch ?

This patch fixes ICEs when compiling
gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c with
-mfp16-format=ieee -mfloat-abi=hard -march=armv8.1-m.main+mve
-mpure-code.

The existing conditions in the movsf/movdf expanders (as well as the
no_literal_pool patterns) were too restrictive, requiring
TARGET_HARD_FLOAT instead of TARGET_VFP_BASE, which caused unrecognised
insns when compiling this testcase with integer MVE and -mpure-code.

gcc/:

PR target/97251
* config/arm/arm.md (movsf): Relax TARGET_HARD_FLOAT to
TARGET_VFP_BASE.
(movdf): Likewise.
* config/arm/vfp.md (no_literal_pool_df_immediate): Likewise.
(no_literal_pool_sf_immediate): Likewise.


PR97251_backport.patch
Description: PR97251_backport.patch


RE: [PATCH][GCC][ARM] Add support for Cortex-A78 and Cortex-A78AE

2020-09-30 Thread Przemyslaw Wirkus via Gcc-patches
> > Subject: [PATCH][GCC][ARM] Add support for Cortex-A78 and Cortex-A78AE
> >
> > This patch introduces support for Cortex-A78 [0] and Cortex-A78AE [1]
> > cpus.
> >
> > [0]: https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-
> > a78
> > [1]: https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-
> > a78ae
> >
> > OK for master branch ?

commit 60e4b3cade5c63f919df4ddc0f0d23261f968e13

> Ok.
> Thanks,
> Kyrill
> 
> >
> > kind regards
> > Przemyslaw Wirkus
> >
> > gcc/ChangeLog:
> >
> > * config/arm/arm-cpus.in: Add Cortex-A78 and Cortex-A78AE cores.
> > * config/arm/arm-tables.opt: Regenerate.
> > * config/arm/arm-tune.md: Regenerate.
> > * doc/invoke.texi: Update docs.


RE: [PATCH][GCC][AArch64] Add support for Cortex-A78 and Cortex-A78AE

2020-09-30 Thread Przemyslaw Wirkus via Gcc-patches
> > Subject: [PATCH][GCC][AArch64] Add support for Cortex-A78 and Cortex-
> > A78AE
> >
> > This patch introduces support for Cortex-A78 [0] and Cortex-A78AE [1]
> > cpus.
> >
> > [0]: https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a78
> > [1]:
> > https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a78ae
> >
> > OK for master branch ?

commit b6860cb96d038fe7519797adfb9c3c2e635234de

> Ok.
> Thanks,
> Kyrill
> 
> >
> > kind regards
> > Przemyslaw Wirkus
> >
> > gcc/ChangeLog:
> >
> > * config/aarch64/aarch64-cores.def: Add Cortex-A78 and Cortex-
> A78AE
> > cores.
> > * config/aarch64/aarch64-tune.md: Regenerate.
> > * doc/invoke.texi: Add -mtune=cortex-a78 and -mtune=cortex-a78ae.


[PATCH][GCC][ARM] Add support for Cortex-A78 and Cortex-A78AE

2020-09-30 Thread Przemyslaw Wirkus via Gcc-patches
This patch introduces support for Cortex-A78 [0] and Cortex-A78AE [1]
cpus.

[0]: https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a78
[1]: https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a78ae

OK for master branch ?

kind regards
Przemyslaw Wirkus

gcc/ChangeLog:

* config/arm/arm-cpus.in: Add Cortex-A78 and Cortex-A78AE cores.
* config/arm/arm-tables.opt: Regenerate.
* config/arm/arm-tune.md: Regenerate.
* doc/invoke.texi: Update docs.


rb13552.patch
Description: rb13552.patch


[PATCH][GCC][AArch64] Add support for Cortex-A78 and Cortex-A78AE

2020-09-30 Thread Przemyslaw Wirkus via Gcc-patches
This patch introduces support for Cortex-A78 [0] and Cortex-A78AE [1]
cpus.

[0]: https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a78
[1]: https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a78ae

OK for master branch ?

kind regards
Przemyslaw Wirkus

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def: Add Cortex-A78 and Cortex-A78AE 
cores.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi: Add -mtune=cortex-a78 and -mtune=cortex-a78ae.


rb13551.patch
Description: rb13551.patch


RE: [PATCH][GCC][AArch64] Add support for Cortex-X1

2020-09-29 Thread Przemyslaw Wirkus via Gcc-patches
> Ok. Please make sure aarch64-tune.md is properly regenerated when
> committing as Alex has been adding new CPUs in there recently too.

commit f836f3bc8f76ef3e3ad21762590302ad11abc9f8

> Thanks,
> Kyrill
> 
> >
> > kind regards,
> > Przemyslaw Wirkus
> >
> > gcc/ChangeLog:
> >
> > * config/aarch64/aarch64-cores.def: Add Cortex-X1 Arm core.
> > * config/aarch64/aarch64-tune.md: Regenerate.
> > * doc/invoke.texi: Add -mtune=cortex-x1 docs.



RE: [PATCH][GCC][ARM] Add support for Cortex-X1

2020-09-29 Thread Przemyslaw Wirkus via Gcc-patches
> Ok, but please make sure this is properly rebased on top of Alex's patches
> that have recently gone in in this area.

commit 0eef5eea2b42d892df52b655e55458f27ac3fb81

> Thanks,
> Kyrill
> 
> 
> kind regards,
> Przemyslaw Wirkus
> 
> gcc/ChangeLog:
> 
>   * config/arm/arm-cpus.in: Add Cortex-X1 core.
>   * config/arm/arm-tables.opt: Regenerate.
>   * config/arm/arm-tune.md: Regenerate.
>   * doc/invoke.texi: Update docs.


[PATCH][GCC][ARM] Add support for Cortex-X1

2020-09-29 Thread Przemyslaw Wirkus
Hi,

This change adds support for the Arm Cortex-X1 CPU. For more information about
this processor, see [0].

[0] : https://www.arm.com/products/cortex-x

OK for master branch ?

kind regards,
Przemyslaw Wirkus

gcc/ChangeLog:

  * config/arm/arm-cpus.in: Add Cortex-X1 core.
  * config/arm/arm-tables.opt: Regenerate.
  * config/arm/arm-tune.md: Regenerate.
  * doc/invoke.texi: Update docs.


rb13543.patch
Description: rb13543.patch


[PATCH][GCC][AArch64] Add support for Cortex-X1

2020-09-29 Thread Przemyslaw Wirkus
Hi,

This change adds support for the Arm Cortex-X1 CPU in AArch64 GCC. For more
information about this processor, see [0].

[0] : https://www.arm.com/products/cortex-x

OK for master branch ?

kind regards,
Przemyslaw Wirkus

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def: Add Cortex-X1 Arm core.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi: Add -mtune=cortex-x1 docs.


rb13542.patch
Description: rb13542.patch


RE: [PATCH PR96357][GCC][AArch64]: could not split insn UNSPEC_COND_FSUB with AArch64 SVE

2020-09-09 Thread Przemyslaw Wirkus
> Przemyslaw Wirkus  writes:
> > Hello maintainers,
> >
> > Can I backport this patch to GCC 10 please ?
> 
> Sure, that's fine.

commit 41d22ec51c4190133a082197e7ff67b4741fc09b
Date:   Fri Aug 28 11:31:04 2020 +0100

> Thanks,
> Richard
> 
> >
> > Regards
> > Przemyslaw
> >
> >> Committed with:
> >>
> >> commit b648814c02eb418aaf27897c480452172ee96303
> >> Date:   Fri Aug 28 11:31:04 2020 +0100
> >>
> >> Kind regards,
> >> Przemyslaw


RE: [PATCH PR96357][GCC][AArch64]: could not split insn UNSPEC_COND_FSUB with AArch64 SVE

2020-09-09 Thread Przemyslaw Wirkus
Hello maintainers,

Can I backport this patch to GCC 10 please ?

Regards
Przemyslaw

> Committed with:
> 
> commit b648814c02eb418aaf27897c480452172ee96303
> Date:   Fri Aug 28 11:31:04 2020 +0100
> 
> Kind regards,
> Przemyslaw



RE: [PATCH PR96357][GCC][AArch64]: could not split insn UNSPEC_COND_FSUB with AArch64 SVE

2020-08-28 Thread Przemyslaw Wirkus
> Sorry for the micromanagement, but I think this is easier to read if it flows 
> as a
> single paragraph:

[snip...]

> I should have realised this would be the case, sorry, but now that there's 
> only
> one rewrite, this should simply be:
> 
>   "&& reload_completed
>&& register_operand (operands[4], mode)
>&& !rtx_equal_p (operands[0], operands[4]))"
>   {
> emit_insn (gen_vcond_mask_ (operands[0], operands[3],
>operands[4], operands[1]));
> operands[4] = operands[3] = operands[0];
>   }

Done and done.

> OK with those changes, thanks.
> 
> Richard

Committed with:

commit b648814c02eb418aaf27897c480452172ee96303
Date:   Fri Aug 28 11:31:04 2020 +0100

Kind regards,
Przemyslaw



RE: [PATCH PR96357][GCC][AArch64]: could not split insn UNSPEC_COND_FSUB with AArch64 SVE

2020-08-25 Thread Przemyslaw Wirkus
Hi Richard,

Thank you for your comments.
I've attached updated  patch with changes reflecting your comments.

Kind regards,
Przemyslaw

> -Original Message-
> From: Richard Sandiford 
> Sent: 19 August 2020 11:32
> To: Przemyslaw Wirkus 
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> ; Marcus Shawcroft
> ; Kyrylo Tkachov 
> Subject: Re: [PATCH PR96357][GCC][AArch64]: could not split insn
> UNSPEC_COND_FSUB with AArch64 SVE
> 
> Przemyslaw Wirkus  writes:
> > Hi,
> >
> > Problem is related to that operand 4 (In original pattern
> > *cond_sub_any_const) is no longer the same as operand 1, and so
> > the pattern doesn't match the split condition.
> >
> > Pattern *cond_sub_any_const is being split by this patch into
> > two separate patterns:
> > * Pattern *cond_sub_relaxed_const now matches const_int
> >   SVE_RELAXED_GP operand.
> > * Pattern *cond_sub_strict_const now matches const_int
> >   SVE_STRICT_GP operand.
> > * Remove aarch64_sve_pred_dominates_p condition from both patterns.
> 
> Thanks for doing this.
> 
> > @@ -5271,6 +5270,43 @@ (define_insn_and_rewrite
> "*cond_sub_any_const"
> >[(set_attr "movprfx" "yes")]
> >  )
> >
> > +;; Predicated floating-point subtraction from a constant, merging
> > +with an ;; independent value.
> 
> The previous pattern had the same comment.  Maybe add:
> 
>   The subtraction predicate and the merge predicate are allowed to be
>   different.
> 
> to the relaxed one and:
> 
>   The subtraction predicate and the merge predicate must be the same.
> 
> to this one.
> 
> > +(define_insn_and_rewrite "*cond_sub_strict_const"
> > +  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
> > +   (unspec:SVE_FULL_F
> > + [(match_operand: 1 "register_operand" "Upl, Upl, Upl")
> > +  (unspec:SVE_FULL_F
> > +[(match_dup 1)
> > + (const_int SVE_STRICT_GP)
> > + (match_operand:SVE_FULL_F 2
> "aarch64_sve_float_arith_immediate")
> > + (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
> > +UNSPEC_COND_FSUB)
> > +  (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0,
> w")]
> > + UNSPEC_SEL))]
> > +  "TARGET_SVE
> > +   && !rtx_equal_p (operands[3], operands[4])"
> 
> Very minor, but the file generally puts conditions on a single line if 
> they'll fit.
> Same for the relaxed version.
> 
> > +  "@
> > +
> movprfx\t%0., %1/z, %3.\;fsubr\t%0., %1/m, %0.<
> Vetype>, #%2
> > +
> movprfx\t%0., %1/m, %3.\;fsubr\t%0., %1/m, %0.
> , #%2
> > +   #"
> > +  "&& 1"
> > +  {
> > +if (reload_completed
> > +&& register_operand (operands[4], mode)
> > +&& !rtx_equal_p (operands[0], operands[4]))
> > +  {
> > +   emit_insn (gen_vcond_mask_ (operands[0],
> operands[3],
> > +operands[4], operands[1]));
> > +   operands[4] = operands[3] = operands[0];
> > +  }
> > +else if (!rtx_equal_p (operands[1], operands[5]))
> > +  operands[5] = copy_rtx (operands[1]);
> 
> The last two lines are a hold-over from the relaxed version, where there were
> two predicates.  There's no operand 5 in this pattern, so we should just 
> delete
> the lines.
> 
> Thanks,
> Richard


rb13404.patch
Description: rb13404.patch


[PATCH PR96357][GCC][AArch64]: could not split insn UNSPEC_COND_FSUB with AArch64 SVE

2020-08-18 Thread Przemyslaw Wirkus
Hi,

Problem is related to that operand 4 (In original pattern
*cond_sub_any_const) is no longer the same as operand 1, and so
the pattern doesn't match the split condition.

Pattern *cond_sub_any_const is being split by this patch into two
separate patterns:
* Pattern *cond_sub_relaxed_const now matches const_int
  SVE_RELAXED_GP operand.
* Pattern *cond_sub_strict_const now matches const_int
  SVE_STRICT_GP operand.
* Remove aarch64_sve_pred_dominates_p condition from both patterns.

Bootstrapped and tested on aarch64-none-linux-gnu.

OK for master?

Cheers,
Przemyslaw

gcc/ChangeLog:

PR target/96357
* config/aarch64/aarch64-sve.md
(*cond_sub_relaxed_const): Updated and renamed from
*cond_sub_any_const pattern.
(*cond_sub_strict_const): New pattern.

gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sve/pr96357.c: New test.


rb13393.patch
Description: rb13393.patch


RE: [PATCH][GCC][aarch64] Generation of adjusted ldp/stp for vector types

2020-08-03 Thread Przemyslaw Wirkus
Commited cd91a084877dabcc53aec57ab70ca4fc32f3d985

> -Original Message-
> From: Przemyslaw Wirkus
> Sent: 22 July 2020 09:49
> To: Richard Sandiford 
> Cc: gcc-patches@gcc.gnu.org
> Subject: RE: [PATCH][GCC][aarch64] Generation of adjusted ldp/stp for vector
> types
> 
> [snip...]
> 
> > Przemek, if you don't have commit access already, please follow the
> > steps on https://gcc.gnu.org/gitwrite.html (happy to sponsor).
> 
> Done.
> 
> Thank you, Richard, for sponsoring this and all the support!
> 
> Kind regards,
> Przemek



[committed] MAINTAINERS: Add myself for write after approval

2020-07-22 Thread Przemyslaw Wirkus
ChangeLog:

2020-07-22  Przemyslaw Wirkus  

* MAINTAINERS (Write After Approval): Add myself.

---

diff --git a/MAINTAINERS b/MAINTAINERS
index 
d1343d33f1abb4a4bec7deac6c86551b83ecbdf1..300c10edf196d5698ea0fb0f8ee6a8f50a642292
 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -645,6 +645,7 @@ Mark Wielaard   

 Edmar Wienskoski   
 Ollie Wild 
 Kevin Williams 
+Przemyslaw Wirkus  
 Carlo Wood 
 Jackson Woodruff   
 Mingjie Xing   


RE: [PATCH][GCC][aarch64] Generation of adjusted ldp/stp for vector types

2020-07-22 Thread Przemyslaw Wirkus
[snip...]

> Przemek, if you don't have commit access already, please follow the steps on
> https://gcc.gnu.org/gitwrite.html (happy to sponsor).

Done.

Thank you, Richard, for sponsoring this and all the support!

Kind regards, 
Przemek



RE: [PATCH][GCC][aarch64] Generation of adjusted ldp/stp for vector types

2020-07-21 Thread Przemyslaw Wirkus
Richard,
In attachment reworked patch.

> -Original Message-
> From: Richard Sandiford 
> Sent: 13 July 2020 17:13
> To: Przemyslaw Wirkus 
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> ; Marcus Shawcroft
> ; Kyrylo Tkachov 
> Subject: Re: [PATCH][GCC][aarch64] Generation of adjusted ldp/stp for vector
> types
> 
> Hi,
> 
> Sorry for the slow review.

Thank you for all your comments. They were insightful. I've simplified
my patch to match them.

> Przemyslaw Wirkus  writes:
> > Hi,
> >
> > Introduce simple peephole2 optimization which substitutes a sequence
> > of four consecutive load or store (LDR, STR) instructions with two
> > load or store pair (LDP, STP) instructions for 2 element supported
> > vector modes (V2SI, V2SF, V2DI, and V2DF).
> > Generated load / store pair instruction offset is adjusted accordingly.

[snip...]

Kind regards, 
Przemyslaw Wirkus


rb13293.patch
Description: rb13293.patch


[PATCH][GCC][aarch64] Generation of adjusted ldp/stp for vector types

2020-07-07 Thread Przemyslaw Wirkus
Hi,

Introduce simple peephole2 optimization which substitutes a sequence of
four consecutive load or store (LDR, STR) instructions with two load or
store pair (LDP, STP) instructions for 2 element supported vector modes
(V2SI, V2SF, V2DI, and V2DF).
Generated load / store pair instruction offset is adjusted accordingly.

Bootstrapped and tested on aarch64-none-linux-gnu.

Example:
$ cat stp_vec_v2sf.c
typedef float __attribute__((vector_size(8))) vec;

void
store_adjusted(vec *out, vec x, vec y)
{
  out[400] = x;
  out[401] = y;
  out[402] = y;
  out[403] = x;
}

Example compiled with:
$ ./aarch64-none-linux-gnu-gcc -S -O2 stp_vec_v2sf.c -dp

Before the patch:

store_adjusted:
str d0, [x0, 3200]// 9[c=4 l=4]  *aarch64_simd_movv2si/2
str d1, [x0, 3208]// 11   [c=4 l=4]  *aarch64_simd_movv2si/2
str d1, [x0, 3216]// 13   [c=4 l=4]  *aarch64_simd_movv2si/2
str d0, [x0, 3224]// 15   [c=4 l=4]  *aarch64_simd_movv2si/2
ret   // 26   [c=0 l=4]  *do_return

After the patch:

store_adjusted:
add x1, x0, 3200// 27   [c=4 l=4]  *adddi3_aarch64/0
stp d0, d1, [x1]// 28   [c=0 l=4]  vec_store_pairv2siv2si
stp d1, d0, [x1, 16]// 29   [c=0 l=4]  vec_store_pairv2siv2si
ret // 22   [c=0 l=4]  *do_return


OK for master ?

kind regards,
Przemyslaw

gcc/Changelog:
* config/aarch64/aarch64-ldpstp.md: Add two peepholes for adjusted 
vector
V2SI, V2SF, V2DI, V2DF load and store modes.
* config/aarch64/aarch64-protos.h (aarch64_gen_adjusted_ldpstp): Add new
parameter nunits.
(aarch64_operands_adjust_ok_for_ldpstp): Add new parameter nunits.
* config/aarch64/aarch64.c (aarch64_operands_adjust_ok_for_ldpstp): Add
new parameter nunits and support for vector types.
(aarch64_gen_adjusted_ldpstp): Add new parameter nunits and support for
vector types.
* config/aarch64/iterators.md (VP_2E): New iterator for 2 element 
vectors.
(nunits): Add SI and DI to mode attribute.

gcc/testsuite/Changelog:
* gcc.target/aarch64/ldp_vec_v2sf.c: New test.
* gcc.target/aarch64/ldp_vec_v2si.c: New test.
* gcc.target/aarch64/stp_vec_v2df.c: New test.
* gcc.target/aarch64/stp_vec_v2di.c: New test.
* gcc.target/aarch64/stp_vec_v2sf.c: New test.
* gcc.target/aarch64/stp_vec_v2si.c: New test.
diff --git a/gcc/config/aarch64/aarch64-ldpstp.md 
b/gcc/config/aarch64/aarch64-ldpstp.md
index 
dd6f39615c51105a45b7b3dcde7b86e900ae7119..94c312f8f4f6472ebbeca0c2f3e760e0e316f7b7
 100644
--- a/gcc/config/aarch64/aarch64-ldpstp.md
+++ b/gcc/config/aarch64/aarch64-ldpstp.md
@@ -186,10 +186,10 @@ (define_peephole2
(set (match_operand:GPI 6 "register_operand" "")
(match_operand:GPI 7 "memory_operand" ""))
(match_dup 8)]
-  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, mode)"
+  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, mode, 
)"
   [(const_int 0)]
 {
-  if (aarch64_gen_adjusted_ldpstp (operands, true, mode, UNKNOWN))
+  if (aarch64_gen_adjusted_ldpstp (operands, true, mode, , 
UNKNOWN))
 DONE;
   else
 FAIL;
@@ -206,10 +206,10 @@ (define_peephole2
(set (match_operand:GPF 6 "register_operand" "")
(match_operand:GPF 7 "memory_operand" ""))
(match_dup 8)]
-  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, mode)"
+  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, mode, 
)"
   [(const_int 0)]
 {
-  if (aarch64_gen_adjusted_ldpstp (operands, true, mode, UNKNOWN))
+  if (aarch64_gen_adjusted_ldpstp (operands, true, mode, , 
UNKNOWN))
 DONE;
   else
 FAIL;
@@ -226,10 +226,10 @@ (define_peephole2
(set (match_operand:DI 6 "register_operand" "")
(sign_extend:DI (match_operand:SI 7 "memory_operand" "")))
(match_dup 8)]
-  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)"
+  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode, 1)"
   [(const_int 0)]
 {
-  if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, SIGN_EXTEND))
+  if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, 1, SIGN_EXTEND))
 DONE;
   else
 FAIL;
@@ -246,10 +246,10 @@ (define_peephole2
(set (match_operand:DI 6 "register_operand" "")
(zero_extend:DI (match_operand:SI 7 "memory_operand" "")))
(match_dup 8)]
-  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)"
+  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode, 1)"
   [(const_int 0)]
 {
-  if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, ZERO_EXTEND))
+  if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, 1, ZERO_EXTEND))
 DONE;
   else
 FAIL;
@@ -266,10 +266,10 @@ (define_peephole2
(set (match_operand:GPI 6 "memory_operand" "")
(match_operand:GPI 7 "aarch64_reg_or_zero" ""))
(match_dup 8)]
-  "aarch64_operands_adjust_ok_for_ldpstp (operands, false, mode)"
+  

RE: [PATCH][GCC]: Fix for PR94880: Failure to recognize andn pattern

2020-06-19 Thread Przemyslaw Wirkus
> From: Richard Biener 
> Subject: Re: [PATCH][GCC]: Fix for PR94880: Failure to recognize andn
> pattern

Snip...

> The patch is OK.

I do not have write access yet. Can I ask someone to push my patch to
master please ?

Thanks in advance, 
Przemyslaw

> Thanks,
> Richard.
> 
> > > On Fri, 19 Jun 2020, Przemyslaw Wirkus wrote:
> > >
> > > > Hi all,
> > > >
> > > > Pattern "(x | y) - y" can be optimized to simple "(x & ~y)" andn 
> > > > pattern.
> > > >
> > > > Bootstrapped and tested on aarch64-none-linux-gnu.
> > > >
> > > > OK for master ?
> > > >
> > > > Cheers,
> > > > Przemyslaw
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > > PR tree-optimization/94880
> > > > * match.pd (A | B) - B -> (A & ~B): New simplification.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > > PR tree-optimization/94880
> > > > * gcc.dg/tree-ssa/pr94880.c: New Test.
> > > >
> > > >
> > >
> > > --
> > > Marc Glisse
> >
> > --
> > Przemyslaw


RE: [PATCH][GCC]: Fix for PR94880: Failure to recognize andn pattern

2020-06-19 Thread Przemyslaw Wirkus


On Fri, Jun 19 2020 Marc Glisse wrote:
> (not a reviewer)
> 
> It looks fine to me. Do we already handle the related (x|y)^y and (x|y)&~y ?

These are already in match.pd: 

/* (X | Y) ^ X -> Y & ~ X*/
/* (x | y) & ~x -> y & ~x */

> On Fri, 19 Jun 2020, Przemyslaw Wirkus wrote:
> 
> > Hi all,
> >
> > Pattern "(x | y) - y" can be optimized to simple "(x & ~y)" andn pattern.
> >
> > Bootstrapped and tested on aarch64-none-linux-gnu.
> >
> > OK for master ?
> >
> > Cheers,
> > Przemyslaw
> >
> > gcc/ChangeLog:
> >
> > PR tree-optimization/94880
> > * match.pd (A | B) - B -> (A & ~B): New simplification.
> >
> > gcc/testsuite/ChangeLog:
> >
> > PR tree-optimization/94880
> > * gcc.dg/tree-ssa/pr94880.c: New Test.
> >
> >
> 
> --
> Marc Glisse

-- 
Przemyslaw


RE: [PATCH][GCC]: Fix for PR94880: Failure to recognize andn pattern

2020-06-19 Thread Przemyslaw Wirkus
Hi all,

Pattern "(x | y) - y" can be optimized to simple "(x & ~y)" andn pattern.

Bootstrapped and tested on aarch64-none-linux-gnu.

OK for master ?

Cheers,
Przemyslaw

gcc/ChangeLog:

PR tree-optimization/94880
* match.pd (A | B) - B -> (A & ~B): New simplification.

gcc/testsuite/ChangeLog:

PR tree-optimization/94880
* gcc.dg/tree-ssa/pr94880.c: New Test.

diff --git a/gcc/match.pd b/gcc/match.pd
index 
33ee1a920bf4a036cc5fdb3c96b38b52765bdefb..10bf33d8cb215dfb0f54cb0ad02ad0af9d9dea7b
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1109,6 +1109,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   && !TYPE_SATURATING (type))
   (bit_ior @0 @1)))
 
+/* (x | y) - y -> (x & ~y) */
+(simplify
+ (minus (bit_ior:cs @0 @1) @1)
+ (bit_and @0 (bit_not @1)))
+
 /* (x | y) - (x ^ y) -> x & y */
 (simplify
  (minus (bit_ior @0 @1) (bit_xor @0 @1))
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c
new file mode 100644
index 
..f72166181479d43762423e7e153ee1832760cad0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c
@@ -0,0 +1,29 @@
+/* PR tree-optimization/94786 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "= ~\[xy\]_" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " & \[xy\]_" 4 "optimized" } } */
+
+unsigned
+foo_u(unsigned x, unsigned y)
+{
+  return (x | y) - y;
+}
+
+int
+foo_i(int x, int y)
+{
+  return (x | y) - y;
+}
+
+unsigned long long
+foo_ull(unsigned long long x, unsigned long long y)
+{
+  return (x | y) - y;
+}
+
+long long
+foo_ll(long long x, long long y)
+{
+  return (x | y) - y;
+}


[PATCH][GCC]: Fix for PR94880: Failure to recognize andn pattern

2020-06-17 Thread Przemyslaw Wirkus
Hi,

Pattern "(x | y) - y" can be optimized to simple "(x & ~y)" andn pattern.

Bootstrapped and tested on aarch64-none-linux-gnu.

OK for master ?

Cheers,
Przemyslaw

gcc/ChangeLog:

PR tree-optimization/94880
* match.pd (A | B) - B -> (A & ~B): New simplification.

gcc/testsuite/ChangeLog:

PR tree-optimization/94880
* gcc.dg/tree-ssa/pr94880.c: New Test.


pr94880.patch
Description: pr94880.patch


RE: [PATCH][GCC][Aarch64]: Fix for PR94880: Failure to recognize andn pattern

2020-06-16 Thread Przemyslaw Wirkus
On 12 June 2020 20:55 Andrew Pinski wrote:
> Subject: Re: [PATCH][GCC][Aarch64]: Fix for PR94880: Failure to recognize
> andn pattern
> 
> On Fri, Jun 12, 2020 at 7:50 AM Przemyslaw Wirkus
>  wrote:
> >
> > Hi all,
> >
> > Pattern "(x | y) - y" can be optimized to simple "(x & ~y)" andn pattern.
> 
> Isn't it better to do this transformation on the gimple level and not in a
> target specific form?  Or at least do it in the RTL level in a generic form 
> rather
> than adding target specific patterns.

Yes, I will rework this and add simplification pattern on the gimple level.

Cheers,
Przemyslaw Wirkus

> Thanks,
> Andrew Pinski
> 
> 
> >
> > So, for the example code:
> >
> > $ cat main.c
> > int
> > f_i(int x, int y)
> > {
> > return (x | y) - y;
> > }
> >
> > long long
> > f_l(long long x, long long y)
> > {
> > return (x | y) - y;
> > }
> >
> > typedef int v4si __attribute__ ((vector_size (16))); typedef long long
> > v2di __attribute__ ((vector_size (16)));
> >
> > v4si
> > f_v4si(v4si a, v4si b) {
> > return (a | b) - b;
> > }
> >
> > v2di
> > f_v2di(v2di a, v2di b) {
> > return (a | b) - b;
> > }
> >
> > void
> > f(v4si *d, v4si *a, v4si *b) {
> > for (int i=0; i > d[i] = (a[i] | b[i]) - b[i]; }
> >
> > Before this patch:
> > $ ./aarch64-none-linux-gnu-gcc -S -O2 main.c -dp
> >
> > f_i:
> > orr w0, w0, w1// 8[c=4 l=4]  iorsi3/0
> > sub w0, w0, w1// 14   [c=4 l=4]  subsi3
> > ret   // 24   [c=0 l=4]  *do_return
> > f_l:
> > orr x0, x0, x1// 8[c=4 l=4]  iordi3/0
> > sub x0, x0, x1// 14   [c=4 l=4]  subdi3/0
> > ret   // 24   [c=0 l=4]  *do_return
> > f_v4si:
> > orr v0.16b, v0.16b, v1.16b// 8[c=8 l=4]  
> > iorv4si3/0
> > sub v0.4s, v0.4s, v1.4s   // 14 [c=8 l=4]  subv4si3
> > ret   // 24   [c=0 l=4]  *do_return
> > f_v2di:
> > orr v0.16b, v0.16b, v1.16b// 8[c=8 l=4]  
> > iorv2di3/0
> > sub v0.2d, v0.2d, v1.2d   // 14 [c=8 l=4]  subv2di3
> > ret   // 24   [c=0 l=4]  *do_return
> >
> > After this patch:
> > $ ./aarch64-none-linux-gnu-gcc -S -O2 main.c -dp
> >
> > f_i:
> > bic w0, w0, w1  // 13   [c=8 l=4]  *bic_and_not_si3
> > ret // 23   [c=0 l=4]  *do_return
> > f_l:
> > bic x0, x0, x1  // 13   [c=8 l=4]  *bic_and_not_di3
> > ret // 23   [c=0 l=4]  *do_return
> > f_v4si:
> > bic v0.16b, v0.16b, v1.16b  // 13   [c=16 l=4]
> *bic_and_not_simd_v4si3
> > ret // 23   [c=0 l=4]  *do_return
> > f_v2di:
> > bic v0.16b, v0.16b, v1.16b  // 13   [c=16 l=4]
> *bic_and_not_simd_v2di3
> > ret // 23   [c=0 l=4]  *do_return
> >
> > Bootstrapped and tested on aarch64-none-linux-gnu.
> >
> > OK for master ?
> >
> > Cheers,
> > Przemyslaw
> >
> > gcc/ChangeLog:
> >
> > PR tree-optimization/94880
> > * config/aarch64/aarch64.md (bic_and_not_3): New
> define_insn.
> > * config/aarch64/aarch64-simd.md (bic_and_not_simd_3):
> New
> > define_insn.
> >
> > gcc/testsuite/ChangeLog:
> >
> > PR tree-optimization/94880
> > * gcc.target/aarch64/bic_and_not_di3.c: New test.
> > * gcc.target/aarch64/bic_and_not_si3.c: New test.
> > * gcc.target/aarch64/bic_and_not_v2di3.c: New test.
> > * gcc.target/aarch64/bic_and_not_v4si3.c: New test.


[PATCH][GCC][Aarch64]: Fix for PR94880: Failure to recognize andn pattern

2020-06-12 Thread Przemyslaw Wirkus
Hi all,

Pattern "(x | y) - y" can be optimized to simple "(x & ~y)" andn pattern.

So, for the example code:

$ cat main.c
int
f_i(int x, int y)
{
return (x | y) - y;
}

long long
f_l(long long x, long long y)
{
return (x | y) - y;
}

typedef int v4si __attribute__ ((vector_size (16)));
typedef long long v2di __attribute__ ((vector_size (16)));

v4si
f_v4si(v4si a, v4si b) {
return (a | b) - b;
}

v2di
f_v2di(v2di a, v2di b) {
return (a | b) - b;
}

void
f(v4si *d, v4si *a, v4si *b) {
for (int i=0; i3): New define_insn.
* config/aarch64/aarch64-simd.md (bic_and_not_simd_3): New
define_insn.

gcc/testsuite/ChangeLog:

PR tree-optimization/94880
* gcc.target/aarch64/bic_and_not_di3.c: New test.
* gcc.target/aarch64/bic_and_not_si3.c: New test.
* gcc.target/aarch64/bic_and_not_v2di3.c: New test.
* gcc.target/aarch64/bic_and_not_v4si3.c: New test.


patch.patch
Description: patch.patch


[PATCH][arm][backport] arm: fix v[78]-r multilibs when configured with --with-multlib-list=aprofile

2020-01-10 Thread Przemyslaw Wirkus
Hi,
When gcc for Arm is configured with --with-multilib-list=aprofile a
misplaced endif directive in the makefile was causing the arm->thumb
mapping for multilibs to be omitted from the reuse rules.  This
resulted in the default multilib being picked rather than the thumb2
opimized version.

gcc/ChangeLog:
2020-01-10  Przemyslaw Wirkus  

Backport from trunk
* config/arm/t-multilib: Use arm->thumb multilib reuse rules
on a-profile.

Ok for gcc-9-branch?

kind regards
Przemyslaw Wirkusdiff --git a/gcc/config/arm/t-multilib b/gcc/config/arm/t-multilib
index 
dc97c8f09fb0b7f53520432e1a174adfce1bf6af..d5ee537193f2416909516c563b9848a79dabb1bf
 100644
--- a/gcc/config/arm/t-multilib
+++ b/gcc/config/arm/t-multilib
@@ -185,6 +185,8 @@ MULTILIB_MATCHES+= march?armv7=march?armv8.5-a
 MULTILIB_MATCHES   += $(foreach ARCH, $(v8_5_a_simd_variants), \
 march?armv7+fp=march?armv8.5-a$(ARCH))
 
+endif  # Not APROFILE.
+
 # Use Thumb libraries for everything.
 
 MULTILIB_REUSE += 
mthumb/march.armv7/mfloat-abi.soft=marm/march.armv7/mfloat-abi.soft
@@ -198,4 +200,3 @@ MULTILIB_REUSE  += $(foreach MODE, arm thumb, \
 $(foreach ARCH, armv7, \
   
mthumb/march.$(ARCH)/mfloat-abi.soft=m$(MODE)/march.$(ARCH)/mfloat-abi.softfp))
 
-endif  # Not APROFILE.


[PATCH][arm] [backport] arm: Fix rmprofile multilibs when architecture includes +mp or +sec (PR target/93188)

2020-01-10 Thread Przemyslaw Wirkus
Hi,
When only the rmprofile multilibs are built, compiling for armv7-a
should select the generic v7 multilibs.  This used to work before +sec
and +mp were added to the architecture options but it was broken by
that update.  This patch fixes those variants and adds some tests to
ensure that they remain fixed ;-)

gcc/ChangeLog:
2020-01-08  Przemyslaw Wirkus  

Backport from trunk
PR target/93188
* config/arm/t-multilib (MULTILIB_MATCHES): Add rules to match
armv7-a{+mp,+sec,+mp+sec} to appropriate armv7 multilib variants
when only building rm-profile multilibs.

gcc/testsuite/ChangeLog:
2020-01-08  Przemyslaw Wirkus  

Backport from trunk
* gcc.target/arm/multilib.exp: Add new tests for rm-profile only.

Ok for gcc-9-branch?

kind regards
Przemyslaw Wirkus
diff --git a/gcc/config/arm/t-multilib b/gcc/config/arm/t-multilib
index 
dc97c8f09fb0b7f53520432e1a174adfce1bf6af..182df6e94133d01500de24f511b61beb80152a0e
 100644
--- a/gcc/config/arm/t-multilib
+++ b/gcc/config/arm/t-multilib
@@ -132,10 +132,19 @@ MULTILIB_MATCHES  += 
march?armv7-r+fp.sp=march?armv8-r+crc+fp.sp
 
 ifeq (,$(HAS_APROFILE))
 # Map all v7-a
+
 MULTILIB_MATCHES   += march?armv7=march?armv7-a
+
+MULTILIB_MATCHES   += $(foreach ARCH, $(v7_a_arch_variants), \
+march?armv7=march?armv7-a$(ARCH))
+
 MULTILIB_MATCHES   += $(foreach ARCH, $(v7_a_nosimd_variants) 
$(v7_a_simd_variants), \
 march?armv7+fp=march?armv7-a$(ARCH))
 
+MULTILIB_MATCHES   += $(foreach ARCHVAR, $(v7_a_arch_variants), \
+$(foreach ARCH, $(v7_a_nosimd_variants) 
$(v7_a_simd_variants), \
+  march?armv7+fp=march?armv7-a$(ARCHVAR)$(ARCH)))
+
 MULTILIB_MATCHES   += march?armv7=march?armv7ve
 
 # ARMv7ve FP/SIMD variants: map down to v7+fp
diff --git a/gcc/testsuite/gcc.target/arm/multilib.exp 
b/gcc/testsuite/gcc.target/arm/multilib.exp
index 
dcea829965eb15e372401e6389df5a1403393ecb..2df3f9c3117432299ad6ce8746babd5ff4abe405
 100644
--- a/gcc/testsuite/gcc.target/arm/multilib.exp
+++ b/gcc/testsuite/gcc.target/arm/multilib.exp
@@ -434,6 +434,22 @@ if {[multilib_config "aprofile"] } {
check_multi_dir $opts $dir
 }
 }
+if {[multilib_config "rmprofile"] && ![multilib_config "aprofile"]} {
+foreach {opts dir} {
+   {-mcpu=cortex-a9 -mfpu=auto -mfloat-abi=soft} "thumb/v7/nofp"
+   {-mcpu=cortex-a8 -mfpu=auto -mfloat-abi=softfp} "thumb/v7+fp/softfp"
+   {-mcpu=cortex-a5 -mfpu=auto -mfloat-abi=hard} "thumb/v7+fp/hard"
+   {-mcpu=cortex-a53 -mfpu=auto -mfloat-abi=hard} "thumb/v7+fp/hard"
+   {-march=armv7-a+fp -mfpu=auto -mfloat-abi=softfp} "thumb/v7+fp/softfp"
+   {-march=armv7-a+fp -mfpu=auto -mfloat-abi=soft} "thumb/v7/nofp"
+   {-march=armv7-a+mp+simd -mfpu=auto -mfloat-abi=softfp} 
"thumb/v7+fp/softfp"
+   {-march=armv7-a -mfpu=vfpv4 -mfloat-abi=hard} "thumb/v7+fp/hard"
+   {-march=armv7-a+fp -mfpu=auto -mfloat-abi=hard} "thumb/v7+fp/hard"
+   {-march=armv7-a -mfpu=vfpv4 -mfloat-abi=soft} "thumb/v7/nofp"
+} {
+   check_multi_dir $opts $dir
+}
+}
 if {[multilib_config "rmprofile"] } {
 foreach {opts dir} {
{-mcpu=cortex-m0 -mfpu=auto -mfloat-abi=soft} "thumb/v6-m/nofp"


Re: [PATCH][arm] Implement usadv16qi and ssadv16qi standard names

2019-06-07 Thread Przemyslaw Wirkus
Hi all,

This patch implements the usadv16qi and ssadv16qi standard names for arm.

The V16QImode variant is important as it is the most commonly used pattern:
reducing vectors of bytes into an int.
The midend expects the optab to compute the absolute differences of operands 1
and 2 and reduce them while widening along the way up to SImode. So the inputs
are V16QImode and the output is V4SImode.

I've based my solution on Aarch64 usadv16qi and ssadv16qi standard names
current implementation (r260437). This solution emits below sequence of
instructions:

VABDL.u8tmp, op1, op2   # op1, op2 lowpart
VABAL.u8tmp, op1, op2   # op1, op2 highpart
VPADAL.u16  op3, tmp

So, for the code:

$ arm-none-linux-gnueabihf-gcc -S -O3 -march=armv8-a+simd -mfpu=auto 
-mfloat-abi=hard usadv16qi.c -dp

#define N 1024
unsigned char pix1[N];
unsigned char pix2[N];

int
foo (void)
{
  int i_sum = 0;
  int i;
  for (i = 0; i < N; i++)
i_sum += __builtin_abs (pix1[i] - pix2[i]);
  return i_sum;
}

we now generate on arm:
foo:
movwr3, #:lower16:pix2  @ 57[c=4 l=4]  *arm_movsi_vfp/3
movtr3, #:upper16:pix2  @ 58[c=4 l=4]  *arm_movt/0
vmov.i32q9, #0  @ v4si  @ 3 [c=4 l=4]  *neon_movv4si/2
movwr2, #:lower16:pix1  @ 59[c=4 l=4]  *arm_movsi_vfp/3
movtr2, #:upper16:pix1  @ 60[c=4 l=4]  *arm_movt/0
add r1, r3, #1024   @ 8 [c=4 l=4]  *arm_addsi3/4
.L2:
vld1.8  {q11}, [r3]!@ 11[c=8 l=4]  *movmisalignv16qi_neon_load
vld1.8  {q10}, [r2]!@ 10[c=8 l=4]  *movmisalignv16qi_neon_load
cmp r1, r3  @ 21[c=4 l=4]  *arm_cmpsi_insn/2
vabdl.u8q8, d20, d22@ 12[c=8 l=4]  neon_vabdluv8qi
vabal.u8q8, d21, d23@ 15[c=88 l=4]  neon_vabaluv8qi
vpadal.u16  q9, q8  @ 16[c=8 l=4]  neon_vpadaluv8hi
bne .L2 @ 22[c=16 l=4]  arm_cond_branch
vadd.i32d18, d18, d19   @ 24[c=120 l=4]  
quad_halves_plusv4si
vpadd.i32   d18, d18, d18   @ 25[c=8 l=4]  
neon_vpadd_internalv2si
vmov.32 r0, d18[0]  @ 30[c=12 l=4]  vec_extractv2sisi/1

instead of:
foo:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
movwr3, #:lower16:pix1
movtr3, #:upper16:pix1
vmov.i32q9, #0  @ v4si
movwr2, #:lower16:pix2
movtr2, #:upper16:pix2
add r1, r3, #1024
.L2:
vld1.8  {q8}, [r3]!
vld1.8  {q11}, [r2]!
vmovl.u8 q10, d16
cmp r1, r3
vmovl.u8 q8, d17
vmovl.u8 q12, d22
vmovl.u8 q11, d23
vsub.i16q10, q10, q12
vsub.i16q8, q8, q11
vabs.s16q10, q10
vabs.s16q8, q8
vaddw.s16   q9, q9, d20
vaddw.s16   q9, q9, d21
vaddw.s16   q9, q9, d16
vaddw.s16   q9, q9, d17
bne .L2
vadd.i32d18, d18, d19
vpadd.i32   d18, d18, d18
vmov.32 r0, d18[0]

Bootstrapped and tested on arm-none-linux-gnueabihf.

Ok for trunk?

Thanks,
Przemyslaw

2019-05-29 Przemyslaw Wirkus 

* config/arm/iterators.md (VABAL): New int iterator.
* config/arm/neon.md (sadv16qi): New define_expand.
* config/arm/unspecs.md ("unspec"): Define UNSPEC_VABAL_S, 
UNSPEC_VABAL_U
values.

2019-05-29 Przemyslaw Wirkus 

* gcc.target/arm/ssadv16qi.c: New test.
* gcc.target/arm/usadv16qi.c: Likewise.diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 
eb07c5b90c1b1905d35d7b480bdbe7d7a45ab7ba..2462b8c87ea7dbe60ba50d22b1e494bb4fe905c2
 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -341,6 +341,8 @@
 
 (define_int_iterator VSUBHN [UNSPEC_VSUBHN UNSPEC_VRSUBHN])
 
+(define_int_iterator VABAL [UNSPEC_VABAL_S UNSPEC_VABAL_U])
+
 (define_int_iterator VABD [UNSPEC_VABD_S UNSPEC_VABD_U])
 
 (define_int_iterator VABDL [UNSPEC_VABDL_S UNSPEC_VABDL_U])
@@ -834,6 +836,7 @@
   (UNSPEC_VSUBW_S "s") (UNSPEC_VSUBW_U "u")
   (UNSPEC_VHSUB_S "s") (UNSPEC_VHSUB_U "u")
   (UNSPEC_VQSUB_S "s") (UNSPEC_VQSUB_U "u")
+  (UNSPEC_VABAL_S "s") (UNSPEC_VABAL_U "u")
   (UNSPEC_VABD_S "s") (UNSPEC_VABD_U "u")
   (UNSPEC_VABDL_S "s") (UNSPEC_VABDL_U "u")
   (UNSPEC_VMAX "s") (UNSPEC_VMAX_U "u")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 
de9ae43849038b3cf75feceec36429d5c40c63f2..51ed11abc519ea9d4f9e31751ac6d26a3d1ae5cd
 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -3255,6 +3255,32 @@
   [(set_attr "type" "neon_arith_acc")]
 )
 
+(define_expand &quo

Re: [PATCH][arm] Implement usadv16qi and ssadv16qi standard names

2019-06-06 Thread Przemyslaw Wirkus
Hi all,

This patch implements the usadv16qi and ssadv16qi standard names for arm.

The V16QImode variant is important as it is the most commonly used pattern:
reducing vectors of bytes into an int.
The midend expects the optab to compute the absolute differences of operands 1
and 2 and reduce them while widening along the way up to SImode. So the inputs
are V16QImode and the output is V4SImode.

I've based my solution on Aarch64 usadv16qi and ssadv16qi standard names
current implementation (r260437). This solution emits below sequence of
instructions:

VABDL.u8tmp, op1, op2   # op1, op2 lowpart
VABAL.u8tmp, op1, op2   # op1, op2 highpart
VPADAL.u16  op3, tmp

So, for the code:

$ arm-none-linux-gnueabihf-gcc -S -O3 -march=armv8-a+simd -mfpu=auto 
-mfloat-abi=hard usadv16qi.c -dp

#define N 1024
unsigned char pix1[N];
unsigned char pix2[N];

int
foo (void)
{
  int i_sum = 0;
  int i;
  for (i = 0; i < N; i++)
i_sum += __builtin_abs (pix1[i] - pix2[i]);
  return i_sum;
}

we now generate on arm:
foo:
movwr3, #:lower16:pix2  @ 57[c=4 l=4]  *arm_movsi_vfp/3
movtr3, #:upper16:pix2  @ 58[c=4 l=4]  *arm_movt/0
vmov.i32q9, #0  @ v4si  @ 3 [c=4 l=4]  *neon_movv4si/2
movwr2, #:lower16:pix1  @ 59[c=4 l=4]  *arm_movsi_vfp/3
movtr2, #:upper16:pix1  @ 60[c=4 l=4]  *arm_movt/0
add r1, r3, #1024   @ 8 [c=4 l=4]  *arm_addsi3/4
.L2:
vld1.8  {q11}, [r3]!@ 11[c=8 l=4]  *movmisalignv16qi_neon_load
vld1.8  {q10}, [r2]!@ 10[c=8 l=4]  *movmisalignv16qi_neon_load
cmp r1, r3  @ 21[c=4 l=4]  *arm_cmpsi_insn/2
vabdl.u8q8, d20, d22@ 12[c=8 l=4]  neon_vabdluv8qi
vabal.u8q8, d21, d23@ 15[c=88 l=4]  neon_vabaluv8qi
vpadal.u16  q9, q8  @ 16[c=8 l=4]  neon_vpadaluv8hi
bne .L2 @ 22[c=16 l=4]  arm_cond_branch
vadd.i32d18, d18, d19   @ 24[c=120 l=4]  
quad_halves_plusv4si
vpadd.i32   d18, d18, d18   @ 25[c=8 l=4]  
neon_vpadd_internalv2si
vmov.32 r0, d18[0]  @ 30[c=12 l=4]  vec_extractv2sisi/1

instead of:
foo:
movwr3, #:lower16:pix1
movtr3, #:upper16:pix1
vmov.i32q9, #0  @ v4si
movwr2, #:lower16:pix2
movtr2, #:upper16:pix2
add r1, r3, #1024
.L2:
vld1.8  {q8}, [r3]!
vld1.8  {q11}, [r2]!
vmovl.u8 q10, d16
cmp r1, r3
vmovl.u8 q8, d17
vmovl.u8 q12, d22
vmovl.u8 q11, d23
vsub.i16q10, q10, q12
vsub.i16q8, q8, q11
vabs.s16q10, q10
vabs.s16q8, q8
vaddw.s16   q9, q9, d20
vaddw.s16   q9, q9, d21
vaddw.s16   q9, q9, d16
vaddw.s16   q9, q9, d17
bne .L2
vadd.i32d18, d18, d19
vpadd.i32   d18, d18, d18
vmov.32 r0, d18[0]

Bootstrapped and tested on arm-none-linux-gnueabihf.

Ok for trunk?

Thanks,
Przemyslaw

2019-05-29 Przemyslaw Wirkus 

* config/arm/iterators.md (VABAL): New int iterator.
* config/arm/neon.md (sadv16qi): New define_expand.
* config/arm/unspecs.md ("unspec"): Define UNSPEC_VABAL_S, 
UNSPEC_VABAL_U
values.

2019-05-29 Przemyslaw Wirkus 

* gcc.target/arm/ssadv16qi.c: New test.
* gcc.target/arm/usadv16qi.c: Likewise.diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 
eb07c5b90c1b1905d35d7b480bdbe7d7a45ab7ba..2462b8c87ea7dbe60ba50d22b1e494bb4fe905c2
 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -341,6 +341,8 @@
 
 (define_int_iterator VSUBHN [UNSPEC_VSUBHN UNSPEC_VRSUBHN])
 
+(define_int_iterator VABAL [UNSPEC_VABAL_S UNSPEC_VABAL_U])
+
 (define_int_iterator VABD [UNSPEC_VABD_S UNSPEC_VABD_U])
 
 (define_int_iterator VABDL [UNSPEC_VABDL_S UNSPEC_VABDL_U])
@@ -834,6 +836,7 @@
   (UNSPEC_VSUBW_S "s") (UNSPEC_VSUBW_U "u")
   (UNSPEC_VHSUB_S "s") (UNSPEC_VHSUB_U "u")
   (UNSPEC_VQSUB_S "s") (UNSPEC_VQSUB_U "u")
+  (UNSPEC_VABAL_S "s") (UNSPEC_VABAL_U "u")
   (UNSPEC_VABD_S "s") (UNSPEC_VABD_U "u")
   (UNSPEC_VABDL_S "s") (UNSPEC_VABDL_U "u")
   (UNSPEC_VMAX "s") (UNSPEC_VMAX_U "u")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 
de9ae43849038b3cf75feceec36429d5c40c63f2..51ed11abc519ea9d4f9e31751ac6d26a3d1ae5cd
 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -3255,6 +3255,32 @@
   [(set_attr "type" "neon_arith_acc")]
 )
 
+(define_expand "sadv16qi"
+  [(use (match_operand:V4SI 0 "register_operand"))
+   (unspec:V16QI [(use (match_operand:V16QI 1 "register_op

[PATCH][arm] Implement usadv16qi and ssadv16qi standard names

2019-06-06 Thread Przemyslaw Wirkus
Hi all,

This patch implements the usadv16qi and ssadv16qi standard names for arm.

The V16QImode variant is important as it is the most commonly used pattern:
reducing vectors of bytes into an int.
The midend expects the optab to compute the absolute differences of operands 1
and 2 and reduce them while widening along the way up to SImode. So the inputs
are V16QImode and the output is V4SImode.

I've based my solution on Aarch64 usadv16qi and ssadv16qi standard names
current implementation (r260437). This solution emits below sequence of
instructions:

VABDL.u8tmp, op1, op2   # op1, op2 lowpart
VABAL.u8tmp, op1, op2   # op1, op2 highpart
VPADAL.u16  op3, tmp

So, for the code:

$ arm-none-linux-gnueabihf-gcc -S -O3 -march=armv8-a+simd -mfpu=auto 
-mfloat-abi=hard usadv16qi.c -dp

#define N 1024
unsigned char pix1[N];
unsigned char pix2[N];

int
foo (void)
{
  int i_sum = 0;
  int i;
  for (i = 0; i < N; i++)
i_sum += __builtin_abs (pix1[i] - pix2[i]);
  return i_sum;
}

we now generate on arm:
foo:
movwr3, #:lower16:pix2  @ 57[c=4 l=4]  *arm_movsi_vfp/3
movtr3, #:upper16:pix2  @ 58[c=4 l=4]  *arm_movt/0
vmov.i32q9, #0  @ v4si  @ 3 [c=4 l=4]  *neon_movv4si/2
movwr2, #:lower16:pix1  @ 59[c=4 l=4]  *arm_movsi_vfp/3
movtr2, #:upper16:pix1  @ 60[c=4 l=4]  *arm_movt/0
add r1, r3, #1024   @ 8 [c=4 l=4]  *arm_addsi3/4
.L2:
vld1.8  {q11}, [r3]!@ 11[c=8 l=4]  *movmisalignv16qi_neon_load
vld1.8  {q10}, [r2]!@ 10[c=8 l=4]  *movmisalignv16qi_neon_load
cmp r1, r3  @ 21[c=4 l=4]  *arm_cmpsi_insn/2
vabdl.u8q8, d20, d22@ 12[c=8 l=4]  neon_vabdluv8qi
vabal.u8q8, d21, d23@ 15[c=88 l=4]  neon_vabaluv8qi
vpadal.u16  q9, q8  @ 16[c=8 l=4]  neon_vpadaluv8hi
bne .L2 @ 22[c=16 l=4]  arm_cond_branch
vadd.i32d18, d18, d19   @ 24[c=120 l=4]  
quad_halves_plusv4si
vpadd.i32   d18, d18, d18   @ 25[c=8 l=4]  
neon_vpadd_internalv2si
vmov.32 r0, d18[0]  @ 30[c=12 l=4]  vec_extractv2sisi/1

instead of:
foo:
movwr3, #:lower16:pix1
movtr3, #:upper16:pix1
vmov.i32q9, #0  @ v4si
movwr2, #:lower16:pix2
movtr2, #:upper16:pix2
add r1, r3, #1024
.L2:
vld1.8  {q8}, [r3]!
vld1.8  {q11}, [r2]!
vmovl.u8 q10, d16
cmp r1, r3
vmovl.u8 q8, d17
vmovl.u8 q12, d22
vmovl.u8 q11, d23
vsub.i16q10, q10, q12
vsub.i16q8, q8, q11
vabs.s16q10, q10
vabs.s16q8, q8
vaddw.s16   q9, q9, d20
vaddw.s16   q9, q9, d21
vaddw.s16   q9, q9, d16
vaddw.s16   q9, q9, d17
bne .L2
vadd.i32d18, d18, d19
vpadd.i32   d18, d18, d18
vmov.32 r0, d18[0]

Bootstrapped and tested on arm-none-linux-gnueabihf.

Ok for trunk?

Thanks,
Przemyslaw

2019-06-06 Przemyslaw Wirkus 

* config/arm/iterators.md (VABAL): New int iterator.
* config/arm/neon.md (sadv16qi): New define_expand.
* config/arm/unspecs.md ("unspec"): Define UNSPEC_VABAL_S, 
UNSPEC_VABAL_U
values.

2019-06-06 Przemyslaw Wirkus 

* gcc.target/arm/ssadv16qi.c: New test.
* gcc.target/arm/usadv16qi.c: Likewise.

Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

2019-05-14 Thread Przemyslaw Wirkus
> What is that backslash in \> doing in the ChangeLog entries?

>    Jakub

My bad, tool I use for code review crafted in backslash when it wrongly assumed
I want a markdown. An early sign that machines want to take over, I guess...
I promise I will be more diligent next time.

kind regards
Przemyslaw


Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

2019-05-13 Thread Przemyslaw Wirkus
Hi all,

Vectorise __builtin_signbit (v2sf, v4sf) with unsigned shift right vector
instruction.

Bootstrapped and tested on aarch64-none-linux-gnu.

Assembly output for:
$ aarch64-elf-gcc -S -O3 signbitv2sf.c -dp

Before patch:

foo:
ldp w2, w1, [x1]// 37   [c=0 l=4]  
*load_pair_zero_extendsidi2_aarch64/0
and w2, w2, -2147483648 // 8[c=4 l=4]  andsi3/1
and w1, w1, -2147483648 // 12   [c=4 l=4]  andsi3/1
stp w2, w1, [x0]// 38   [c=0 l=4]  store_pair_sw_sisi/0
ret // 32   [c=0 l=4]  *do_return

After patch:

foo:
ldr d0, [x1]// 7[c=8 l=4]  *aarch64_simd_movv2sf/0
ushrv0.2s, v0.2s, 31// 8[c=12 l=4]  
aarch64_simd_lshrv2si
str d0, [x0]// 9[c=4 l=4]  *aarch64_simd_movv2si/2
ret // 28   [c=0 l=4]  *do_return

Assembly output for:
$ aarch64-elf-gcc -S -O3 signbitv4sf.c -dp

Before patch:

foo:
adrpx3, in  // 38   [c=4 l=4]  *movdi_aarch64/12
adrpx2, out // 41   [c=4 l=4]  *movdi_aarch64/12
add x3, x3, :lo12:in// 40   [c=4 l=4]  add_losym_di
add x2, x2, :lo12:out   // 43   [c=4 l=4]  add_losym_di
mov x0, 0   // 3[c=4 l=4]  *movdi_aarch64/3
.p2align 3,,7
.L2:
ldr w1, [x3, x0]// 10   [c=16 l=4]  *zero_extendsidi2_aarch64/1
and w1, w1, -2147483648 // 11   [c=4 l=4]  andsi3/1
str w1, [x2, x0]// 16   [c=4 l=4]  *movsi_aarch64/8
add x0, x0, 4   // 17   [c=4 l=4]  *adddi3_aarch64/0
cmp x0, 4096// 19   [c=4 l=4]  cmpdi/1
bne .L2 // 20   [c=4 l=4]  condjump
ret // 51   [c=0 l=4]  \*do_return

After patch:

foo:
adrpx2, in  // 37   [c=4 l=4]  *movdi_aarch64/12
adrpx1, out // 40   [c=4 l=4]  *movdi_aarch64/12
add x2, x2, :lo12:in// 39   [c=4 l=4]  add_losym_di
add x1, x1, :lo12:out   // 42   [c=4 l=4]  add_losym_di
mov x0, 0   // 3[c=4 l=4]  *movdi_aarch64/3
.p2align 3,,7
.L2:
ldr q0, [x2, x0]// 10   [c=8 l=4]  *aarch64_simd_movv4sf/0
ushrv0.4s, v0.4s, 31// 11   [c=12 l=4]  
aarch64_simd_lshrv4si
str q0, [x1, x0]// 15   [c=4 l=4]  *aarch64_simd_movv4si/2
add x0, x0, 16  // 16   [c=4 l=4]  *adddi3_aarch64/0
cmp x0, 4096// 18   [c=4 l=4]  cmpdi/1
bne .L2 // 19   [c=4 l=4]  condjump
ret // 50   [c=0 l=4]  *do_return

OK for Trunk ?

Thanks,
Przemyslaw

gcc/ChangeLog:

2019-05-13  Przemyslaw Wirkus  

* internal-fn.def (SIGNBIT): New.
* config/aarch64/aarch64-simd.md (signbitv2sf2): New expand
defined.
(signbitv4sf2): Likewise.

gcc/testsuite/ChangeLog:

2019-05-13  Przemyslaw Wirkus  

* gcc.target/aarch64/signbitv4sf.c: New test.
* gcc.target/aarch64/signbitv2sf.c: New test.
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
e3852c5d182b70978d7603225fce55c0b8ee2894..8f7227327cb960fb34c7b88e1bf283f8f17a3be9
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -935,6 +935,21 @@
   [(set_attr "type" "neon_ins")]
 )
 
+(define_expand "signbit2"
+  [(use (match_operand: 0 "register_operand"))
+   (use (match_operand:VDQSF 1 "register_operand"))]
+  "TARGET_SIMD"
+{
+  int shift_amount = GET_MODE_UNIT_BITSIZE (mode) - 1;
+  rtx shift_vector = aarch64_simd_gen_const_vector_dup (mode,
+shift_amount);
+  operands[1] = lowpart_subreg (mode, operands[1], mode);
+
+  emit_insn (gen_aarch64_simd_lshr (operands[0], operands[1],
+ shift_vector));
+  DONE;
+})
+
 (define_insn "aarch64_simd_lshr"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
(lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 
e370eaa84767839c827b6ebd0c86303bcc36fa54..016301a58d83d7128817824d7c7ef92825c7e03e
 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -217,6 +217,7 @@ DEF_INTERNAL_FLT_FN (LOG10, ECF_CONST, log10, unary)
 DEF_INTERNAL_FLT_FN (LOG1P, ECF_CONST, log1p, unary)
 DEF_INTERNAL_FLT_FN (LOG2, ECF_CONST, log2, unary)
 DEF_INTERNAL_FLT_FN (LOGB, ECF_CONST, logb, unary)
+DEF_INTERNAL_FLT_FN (SIGNBIT, ECF_CONST, signbit, unary)
 DEF_INTERNAL_FLT_FN (SIGNIFICAND, ECF_CONST, significand, unary)
 DEF_INTERNAL_FLT_FN (SIN, ECF_CONST, sin, unary)
 DEF_INTERNAL_FLT_FN (SINH, ECF_CONST, sinh, unary)
diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c 
b/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c
new file mode 100644
ind

Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

2019-05-03 Thread Przemyslaw Wirkus
Hi Richard,
New patch adds a new IFN_SIGNBIT internal function that maps
to signbit_optab.

gcc/ChangeLog:

2019-05-05  Przemyslaw Wirkus  

* gcc/internal-fn.def (SIGNBIT): New.
* gcc/config/aarch64/aarch64-simd.md (signbitv4sf2): New expand
defined.

gcc/testsuite/ChangeLog:

2019-05-05  Przemyslaw Wirkus  

* gcc/testsuite/gcc.target/aarch64/signbitv4sf.c: New test.diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
e3852c5d182b70978d7603225fce55c0b8ee2894..3374ce95b912cceaca49660df0579467f758974d
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -935,6 +935,21 @@
   [(set_attr "type" "neon_ins")]
 )
 
+(define_expand "signbitv4sf2"
+  [(use (match_operand:V4SI 0 "register_operand"))
+   (use (match_operand:V4SF 1 "register_operand"))]
+  "TARGET_SIMD"
+{
+  int shift_amount = GET_MODE_UNIT_BITSIZE (V4SImode) - 1;
+  rtx shift_vector = aarch64_simd_gen_const_vector_dup (V4SImode,
+  shift_amount);
+  operands[1] = lowpart_subreg (V4SImode, operands[1], V4SFmode);
+
+  emit_insn (gen_aarch64_simd_lshrv4si (operands[0], operands[1],
+  shift_vector));
+  DONE;
+})
+
 (define_insn "aarch64_simd_lshr"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
(lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 
e370eaa84767839c827b6ebd0c86303bcc36fa54..016301a58d83d7128817824d7c7ef92825c7e03e
 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -217,6 +217,7 @@ DEF_INTERNAL_FLT_FN (LOG10, ECF_CONST, log10, unary)
 DEF_INTERNAL_FLT_FN (LOG1P, ECF_CONST, log1p, unary)
 DEF_INTERNAL_FLT_FN (LOG2, ECF_CONST, log2, unary)
 DEF_INTERNAL_FLT_FN (LOGB, ECF_CONST, logb, unary)
+DEF_INTERNAL_FLT_FN (SIGNBIT, ECF_CONST, signbit, unary)
 DEF_INTERNAL_FLT_FN (SIGNIFICAND, ECF_CONST, significand, unary)
 DEF_INTERNAL_FLT_FN (SIN, ECF_CONST, sin, unary)
 DEF_INTERNAL_FLT_FN (SINH, ECF_CONST, sinh, unary)
diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c 
b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
new file mode 100644
index 
..aa06a5df1dbb3e295355d485b39963127a828b68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 --save-temps" } */
+
+extern void abort ();
+
+#define N 1024
+float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0};
+int out[N];
+
+void
+foo ()
+{
+  for (int i = 0; i < N; i++)
+out[i] = __builtin_signbit (in[i]);
+}
+
+/* { dg-final { scan-assembler-not {-2147483648} } } */
+/* { dg-final { scan-assembler {\tushr\tv[0-9]+.4s, v[0-9]+.4s, 31} } } */
+
+int
+main ()
+{
+  foo ();
+
+  for (int i = 0; i < N; i++)
+  {
+if (in[i] >= 0.0 && out[i])
+  abort ();
+if (in[i] < 0.0 && !out[i])
+  abort ();
+  }
+
+  return 0;
+}
+


[PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

2019-03-21 Thread Przemyslaw Wirkus
Hi all,

Vectorise __builtin_signbit (v4sf) with unsigned shift right vector
instruction.

Bootstrapped and tested on aarch64-none-linux-gnu.

Assembly output for:
$ aarch64-elf-gcc -S -O3 signbitv4sf.c -dp

Before patch:

foo:
adrpx3, in  // 37   [c=4 l=4]  *movdi_aarch64/12
adrpx2, out // 40   [c=4 l=4]  *movdi_aarch64/12
add x3, x3, :lo12:in// 39   [c=4 l=4]  add_losym_di
add x2, x2, :lo12:out   // 42   [c=4 l=4]  add_losym_di
mov x0, 0   // 3[c=4 l=4]  *movdi_aarch64/3
.p2align 3,,7
.L2:
ldr w1, [x3, x0]// 10   [c=16 l=4]  *zero_extendsidi2_aarch64/1
and w1, w1, -2147483648 // 11   [c=4 l=4]  andsi3/1
str w1, [x2, x0]// 16   [c=4 l=4]  *movsi_aarch64/8
add x0, x0, 4   // 17   [c=4 l=4]  *adddi3_aarch64/0
cmp x0, 4096// 19   [c=4 l=4]  cmpdi/1
bne .L2 // 20   [c=4 l=4]  condjump
ret // 50   [c=0 l=4]  *do_return

After patch:

foo:
adrpx2, in  // 36   [c=4 l=4]  *movdi_aarch64/12
adrpx1, out // 39   [c=4 l=4]  *movdi_aarch64/12
add x2, x2, :lo12:in// 38   [c=4 l=4]  add_losym_di
add x1, x1, :lo12:out   // 41   [c=4 l=4]  add_losym_di
mov x0, 0   // 3[c=4 l=4]  *movdi_aarch64/3
.p2align 3,,7
.L2:
ldr q0, [x2, x0]// 10   [c=8 l=4]  *aarch64_simd_movv4sf/0
ushrv0.4s, v0.4s, 31// 11   [c=12 l=4]  
aarch64_simd_lshrv4si
str q0, [x1, x0]// 15   [c=4 l=4]  *aarch64_simd_movv4si/2
add x0, x0, 16  // 16   [c=4 l=4]  *adddi3_aarch64/0
cmp x0, 4096// 18   [c=4 l=4]  cmpdi/1
bne .L2 // 19   [c=4 l=4]  condjump
ret // 49   [c=0 l=4]  *do_return

Thanks,
Przemyslaw

gcc/ChangeLog:

2019-03-20  Przemyslaw Wirkus  

* config/aarch64/aarch64-builtins.c
(aarch64_builtin_vectorized_function): Added CASE_CFN_SIGNBIT.
* config/aarch64/aarch64-simd-builtins.def: (signbit)
Extend to V4SF mode.
* config/aarch64/aarch64-simd.md (signbitv4sf2): New expand
defined.

gcc/testsuite/ChangeLog:

2019-02-28  Przemyslaw Wirkus  

* gcc.target/aarch64/signbitv4sf.c: New test.
diff --git a/gcc/config/aarch64/aarch64-builtins.c 
b/gcc/config/aarch64/aarch64-builtins.c
index 
04063e5ed134d2e64487db23b8fa7794817b2739..86f8345848abd1515cef61824db525dc26ec9bdb
 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1709,6 +1709,13 @@ aarch64_builtin_vectorized_function (unsigned int fn, 
tree type_out,
 
return aarch64_builtin_decls[builtin];
   }
+CASE_CFN_SIGNBIT:
+  {
+   if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_signbitv4sf];
+   else
+ return NULL_TREE;
+  }
 case CFN_BUILT_IN_BSWAP16:
 #undef AARCH64_CHECK_BUILTIN_MODE
 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def 
b/gcc/config/aarch64/aarch64-simd-builtins.def
index 
17bb0c4869b12ede2fc51a8f89d841ded8fac230..d568f0ba4e61febf0590b22789b006f3bfe11ccd
 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -324,6 +324,9 @@
   VAR1 (UNOP, rint, 2, hf)
   VAR1 (UNOP, round, 2, hf)
 
+  /* Implemented by signbit2 pattern */
+  VAR1 (UNOP, signbit, 2, v4sf)
+
   /* Implemented by l2.  */
   VAR1 (UNOP, lbtruncv4hf, 2, v4hi)
   VAR1 (UNOP, lbtruncv8hf, 2, v8hi)
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
be6c27d319a1ca6fee581d8f8856a4dff8f4a060..87e2a58649c3e5d490c499115cf6b7495d448c29
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -915,6 +915,21 @@
   [(set_attr "type" "neon_ins")]
 )
 
+(define_expand "signbitv4sf2"
+[(use (match_operand:V4SI 0 "register_operand"))
+ (use (match_operand:V4SF 1 "register_operand"))]
+ "TARGET_SIMD"
+{
+  int shift_amount = GET_MODE_UNIT_BITSIZE (V4SImode) - 1;
+  rtx shift_vector = aarch64_simd_gen_const_vector_dup (V4SImode,
+  shift_amount);
+  operands[1] = lowpart_subreg (V4SImode, operands[1], V4SFmode);
+
+  emit_insn (gen_aarch64_simd_lshrv4si (operands[0], operands[1],
+  shift_vector));
+  DONE;
+})
+
 (define_insn "aarch64_simd_lshr"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
(lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c 
b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
new file mode 100644
index 
..aa06a5df1dbb3e295355d485b3

Re: [libstdc++-,doc] Mislocated

2017-11-21 Thread Przemyslaw Wirkus
On 21/11/17 15:27 +, Jonathan Wakely wrote:
>>OK for trunk?

>OK, thanks.

I don't have privileges to commit. Could you please commit it on my behalf?





[libstdc++-,doc] Mislocated

2017-11-21 Thread Przemyslaw Wirkus
Hello,

Wrong  element position causes libstdc++v3 make doc-pdf-docbook
docs generation procedure to fail.

PDF documentation generation for libstdc++v3 is broken for make doc-pdf-docbook
rule. Pdflatex compilation fail because Latex is not correctly generated from
wrongly formatted variablelist elements in XML docs: one varlist entry is
outside section of variablelist elements:


  
  
  

  

Commit that caused the regression is:

  Author: redi
  Date: Fri Jul 21 16:05:10 2017
  New Revision: 250430
  URL: https://gcc.gnu.org/viewcvs?rev=250430=gcc=rev

Tested by regenerating libstdc++v3 docs with 'make doc-pdf-docbook'.

OK for trunk?

Kind regards,
Przemyslaw Wirkus

libstdc++-v3/ChangeLog:

2017-11-08  Przemyslaw Wirkus  <przemyslaw.wir...@arm.com>

* doc/xml/manual/using.xml (manual.intro.using.macros): Move
variablelist element at the end of its list.
diff --git a/libstdc++-v3/doc/xml/manual/using.xml 
b/libstdc++-v3/doc/xml/manual/using.xml
index 
6ce29fd30be74fcc5273ec0971a3b72115aaba73..fdbaa5730072189b828ca2ca120ff752f58da2d3
 100644
--- a/libstdc++-v3/doc/xml/manual/using.xml
+++ b/libstdc++-v3/doc/xml/manual/using.xml
@@ -989,7 +989,6 @@ g++ -Winvalid-pch -I. -include stdc++.h -H -g -O2 hello.cc 
-o test.exe
enables support for ISO/IEC 29124 Special Math Functions.
   
 
-
 
 _GLIBCXX_SANITIZE_VECTOR
 
@@ -1008,6 +1007,7 @@ g++ -Winvalid-pch -I. -include stdc++.h -H -g -O2 
hello.cc -o test.exe
 destroy or modify vectors.
   
 
+