https://gcc.gnu.org/g:d228af5cbc2f635d0837ed67fe95641d6e567aff

commit r14-10906-gd228af5cbc2f635d0837ed67fe95641d6e567aff
Author: Richard Sandiford <richard.sandif...@arm.com>
Date:   Fri Nov 8 14:07:46 2024 +0000

    aarch64: Restrict FCLAMP to SME2
    
    There are two sets of patterns for FCLAMP: one set for single registers
    and one set for multiple registers.  The multiple-register set was
    correctly gated on SME2, but the single-register set only required SME.
    This doesn't matter for ACLE usage, since the intrinsic definitions
    are correctly gated.  But it does matter for automatic generation of
    FCLAMP from separate minimum and maximum operations (either ACLE
    intrinsics or autovectorised code).
    
    gcc/
            * config/aarch64/aarch64-sve2.md (@aarch64_sve_fclamp<mode>)
            (*aarch64_sve_fclamp<mode>_x): Require TARGET_STREAMING_SME2
            rather than TARGET_STREAMING_SME.
    
    gcc/testsuite/
            * gcc.target/aarch64/sme/clamp_3.c: Force sme2
            * gcc.target/aarch64/sme/clamp_4.c: Likewise.
            * gcc.target/aarch64/sme/clamp_5.c: New test.
    
    (cherry picked from commit f5962839d6e0c3115931e68d938d9a0cd7a383b1)

Diff:
---
 gcc/config/aarch64/aarch64-sve2.md             |  4 ++--
 gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c |  2 ++
 gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c |  2 ++
 gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c | 24 ++++++++++++++++++++++++
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve2.md 
b/gcc/config/aarch64/aarch64-sve2.md
index 934e57055d34..bae153b2c8c3 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1117,7 +1117,7 @@
             UNSPEC_FMAXNM)
           (match_operand:SVE_FULL_F 3 "register_operand")]
          UNSPEC_FMINNM))]
-  "TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2"
   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
      [       w, %0, w, w; *             ] fclamp\t%0.<Vetype>, %2.<Vetype>, 
%3.<Vetype>
      [     ?&w,  w, w, w; yes           ] movprfx\t%0, 
%1\;fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
@@ -1137,7 +1137,7 @@
             UNSPEC_COND_FMAXNM)
           (match_operand:SVE_FULL_F 3 "register_operand")]
          UNSPEC_COND_FMINNM))]
-  "TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2"
   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
      [       w, %0, w, w; *             ] #
      [     ?&w,  w, w, w; yes           ] #
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c 
b/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
index 44959f794909..162de6224d58 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
@@ -2,6 +2,8 @@
 
 #include <arm_sme.h>
 
+#pragma GCC target "+sme2"
+
 #define TEST(TYPE)                                                     \
   TYPE                                                                 \
   tied1_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming                 \
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c 
b/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
index 643b2635b90e..453c82cd8605 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
@@ -2,6 +2,8 @@
 
 #include <arm_sme.h>
 
+#pragma GCC target "+sme2"
+
 #define TEST(TYPE)                                                     \
   TYPE                                                                 \
   untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d) __arm_streaming                
\
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c 
b/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c
new file mode 100644
index 000000000000..7c5464bdc366
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c
@@ -0,0 +1,24 @@
+// { dg-options "-O" }
+
+#include <arm_sme.h>
+
+#pragma GCC target "+nosme2"
+
+#define TEST(TYPE)                                                     \
+  TYPE                                                                 \
+  tied1_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming                 \
+  {                                                                    \
+    return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), a, b), c);  \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  tied2_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming                 \
+  {                                                                    \
+    return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), b, a), c);  \
+  }
+
+TEST(svfloat16_t)
+TEST(svfloat32_t)
+TEST(svfloat64_t)
+
+/* { dg-final { scan-assembler-not {\tfclamp\t} } } */

Reply via email to