Hi Richard,

> -----Original Message-----
> From: Richard Sandiford <richard.sandif...@arm.com>
> Sent: Monday, May 10, 2021 5:49 PM
> To: Tamar Christina <tamar.christ...@arm.com>
> Cc: gcc-patches@gcc.gnu.org; nd <n...@arm.com>; Richard Earnshaw
> <richard.earns...@arm.com>; Marcus Shawcroft
> <marcus.shawcr...@arm.com>; Kyrylo Tkachov <kyrylo.tkac...@arm.com>
> Subject: Re: [PATCH 2/4]AArch64: Add support for sign differing dot-product
> usdot for NEON and SVE.
> 
> Tamar Christina <tamar.christ...@arm.com> writes:
> > diff --git a/gcc/config/aarch64/aarch64-simd.md
> > b/gcc/config/aarch64/aarch64-simd.md
> > index
> >
> 4edee99051c4e2112b546becca47da32aae21df2..c9fb8e702732dd311fb10de1
> 7126
> > 432e2a63a32b 100644
> > --- a/gcc/config/aarch64/aarch64-simd.md
> > +++ b/gcc/config/aarch64/aarch64-simd.md
> > @@ -648,6 +648,22 @@ (define_expand "<sur>dot_prod<vsi2qi>"
> >    DONE;
> >  })
> >
> > +;; Auto-vectorizer pattern for usdot
> > +(define_expand "usdot_prod<vsi2qi>"
> > +  [(set (match_operand:VS 0 "register_operand")
> > +   (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1
> "register_operand")
> > +                       (match_operand:<VSI2QI> 2 "register_operand")]
> > +            UNSPEC_USDOT)
> > +           (match_operand:VS 3 "register_operand")))]
> > +  "TARGET_I8MM"
> > +{
> > +  emit_insn (
> > +    gen_aarch64_usdot<vsi2qi> (operands[3], operands[3], operands[1],
> > +                          operands[2]));
> > +  emit_move_insn (operands[0], operands[3]);
> > +  DONE;
> > +})
> 
> We can't modify operands[3] here; it's an input rather than an output.

Sorry, I should have noticed this.. I had blindly copied the existing pattern 
for dot-product and that looks like it's wrong.
I'll send a different patch to fix that one.

> 
> It looks like this would work with just the {…} removed though.
> The pattern will match aarch64_usdot<vsi2qi> on its own accord.
> 
> Even better would be to rename __builtin_aarch64_usdot… to
> __builtin_usdot_prod…, change its arguments so that they line up with the
> optabs, and change arm_neon.h to match.
> 
> > diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c
> > b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c
> > new file mode 100644
> > index
> >
> 0000000000000000000000000000000000000000..b99a945903c043c7410becaf6f
> 09
> > 496dd038410d
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c
> > @@ -0,0 +1,38 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */
> > +
> > +#define N 480
> > +#define SIGNEDNESS_1 unsigned
> > +#define SIGNEDNESS_2 signed
> > +#define SIGNEDNESS_3 signed
> > +#define SIGNEDNESS_4 unsigned
> > +
> > +SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res,
> > +SIGNEDNESS_3 char *restrict a,
> > +   SIGNEDNESS_4 char *restrict b)
> > +{
> > +  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
> > +    {
> > +      int av = a[i];
> > +      int bv = b[i];
> > +      SIGNEDNESS_2 short mult = av * bv;
> > +      res += mult;
> > +    }
> > +  return res;
> > +}
> > +
> > +SIGNEDNESS_1 int __attribute__ ((noipa)) g (SIGNEDNESS_1 int res,
> > +SIGNEDNESS_3 char *restrict b,
> > +   SIGNEDNESS_4 char *restrict a)
> > +{
> > +  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
> > +    {
> > +      int av = a[i];
> > +      int bv = b[i];
> > +      SIGNEDNESS_2 short mult = av * bv;
> > +      res += mult;
> > +    }
> > +  return res;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c
> > b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c
> > new file mode 100644
> > index
> >
> 0000000000000000000000000000000000000000..094dd51cea62e0ba05ec35056
> 57b
> > f05320e5fdbb
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c
> > @@ -0,0 +1,38 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -march=armv8.2-a+i8mm+sve" } */
> > +
> > +#define N 480
> > +#define SIGNEDNESS_1 unsigned
> > +#define SIGNEDNESS_2 signed
> > +#define SIGNEDNESS_3 signed
> > +#define SIGNEDNESS_4 unsigned
> > +
> > +SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res,
> > +SIGNEDNESS_3 char *restrict a,
> > +   SIGNEDNESS_4 char *restrict b)
> > +{
> > +  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
> > +    {
> > +      int av = a[i];
> > +      int bv = b[i];
> > +      SIGNEDNESS_2 short mult = av * bv;
> > +      res += mult;
> > +    }
> > +  return res;
> > +}
> > +
> > +SIGNEDNESS_1 int __attribute__ ((noipa)) g (SIGNEDNESS_1 int res,
> > +SIGNEDNESS_3 char *restrict b,
> > +   SIGNEDNESS_4 char *restrict a)
> > +{
> > +  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
> > +    {
> > +      int av = a[i];
> > +      int bv = b[i];
> > +      SIGNEDNESS_2 short mult = av * bv;
> > +      res += mult;
> > +    }
> > +  return res;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */
> 
> Guess this is personal preference, but I don't think the SIGNEDNESS_*
> macros add anything when used like this.  I remember doing something
> similar in the past when including .c files from other .c files(!) in order to
> avoid cut-&-paste, but there doesn't seem much benefit for standalone files
> like these.

If it's the same to you, I do prefer this version, since it's identical to the 
mid-end tests,
It does allow when familiar with the  tests to just quickly see what it's 
testing.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * config/aarch64/aarch64-simd.md (aarch64_usdot<vsi2qi>): Rename to...
        (usdot_prod<vsi2qi>): ... This.
        * config/aarch64/aarch64-simd-builtins.def (usdot): Rename to...
        (usdot_prod): ...This.
        * config/aarch64/arm_neon.h (vusdot_s32, vusdotq_s32): Likewise.
        * config/aarch64/aarch64-sve.md (@aarch64_<sur>dot_prod<vsi2qi>):
        Rename to...
        (@<sur>dot_prod<vsi2qi>): ...This.
        * config/aarch64/aarch64-sve-builtins-base.cc
        (svusdot_impl::expand): Use it.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/simd/vusdot-autovec.c: New test.
        * gcc.target/aarch64/sve/vusdot-autovec.c: New test.

> 
> Thanks,
> Richard
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index b885bd5b38bf7ad83eb9d801284bf9b34db17210..c869ed9a6ab7d63f0e3d5fe393a93c1cc9142e78 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -361,10 +361,11 @@
   BUILTIN_VSDQ_I_DI (BINOP, srshl, 0, NONE)
   BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0, NONE)
 
-  /* Implemented by aarch64_<sur><dotprod>{_lane}{q}<dot_mode>.  */
+  /* Implemented by <sur><dotprod>_prod<dot_mode>.  */
   BUILTIN_VB (TERNOP, sdot, 0, NONE)
   BUILTIN_VB (TERNOPU, udot, 0, NONE)
-  BUILTIN_VB (TERNOP_SSUS, usdot, 0, NONE)
+  BUILTIN_VB (TERNOP_SSUS, usdot_prod, 10, NONE)
+  /* Implemented by aarch64_<sur><dotprod>_lane{q}<dot_mode>.  */
   BUILTIN_VB (QUADOP_LANE, sdot_lane, 0, NONE)
   BUILTIN_VB (QUADOPU_LANE, udot_lane, 0, NONE)
   BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 4edee99051c4e2112b546becca47da32aae21df2..253ddbe25d3a86af4b40b056132e6a86a0392ea6 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -601,7 +601,7 @@ (define_insn "aarch64_<sur>dot<vsi2qi>"
 
 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot
 ;; (vector) Dot Product operation.
-(define_insn "aarch64_usdot<vsi2qi>"
+(define_insn "usdot_prod<vsi2qi>"
   [(set (match_operand:VS 0 "register_operand" "=w")
 	(plus:VS
 	  (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index dfdf0e2fd186389cbddcff51ef52f8778d7fdb24..50adcd5404e97e610485140fdbfe4c8ebbf2f602 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -2366,7 +2366,7 @@ public:
        Hence we do the same rotation on arguments as svdot_impl does.  */
     e.rotate_inputs_left (0, 3);
     machine_mode mode = e.vector_mode (0);
-    insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode);
+    insn_code icode = code_for_dot_prod (UNSPEC_USDOT, mode);
     return e.use_exact_insn (icode);
   }
 
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 7db2938bb84e04d066a7b07574e5cf344a3a8fb6..1278f6f12fadf8eec693cd47fd545ff3277f08f1 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -6870,7 +6870,7 @@ (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
   [(set_attr "movprfx" "*,yes")]
 )
 
-(define_insn "@aarch64_<sur>dot_prod<vsi2qi>"
+(define_insn "@<sur>dot_prod<vsi2qi>"
   [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
         (plus:VNx4SI_ONLY
 	  (unspec:VNx4SI_ONLY
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index baa30bd5a9d96c1bf04a37fb105091ea56a6444a..373f06a24ea6ce686d7e0cdf53dd364041c61092 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -34384,14 +34384,14 @@ __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b)
 {
-  return __builtin_aarch64_usdotv8qi_ssus (__r, __a, __b);
+  return __builtin_aarch64_usdot_prodv8qi_ssus (__r, __a, __b);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b)
 {
-  return __builtin_aarch64_usdotv16qi_ssus (__r, __a, __b);
+  return __builtin_aarch64_usdot_prodv16qi_ssus (__r, __a, __b);
 }
 
 __extension__ extern __inline int32x2_t
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c
new file mode 100644
index 0000000000000000000000000000000000000000..b99a945903c043c7410becaf6f09496dd038410d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */
+
+#define N 480
+#define SIGNEDNESS_1 unsigned
+#define SIGNEDNESS_2 signed
+#define SIGNEDNESS_3 signed
+#define SIGNEDNESS_4 unsigned
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
+   SIGNEDNESS_4 char *restrict b)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    {
+      int av = a[i];
+      int bv = b[i];
+      SIGNEDNESS_2 short mult = av * bv;
+      res += mult;
+    }
+  return res;
+}
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b,
+   SIGNEDNESS_4 char *restrict a)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    {
+      int av = a[i];
+      int bv = b[i];
+      SIGNEDNESS_2 short mult = av * bv;
+      res += mult;
+    }
+  return res;
+}
+
+/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c
new file mode 100644
index 0000000000000000000000000000000000000000..094dd51cea62e0ba05ec3505657bf05320e5fdbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+i8mm+sve" } */
+
+#define N 480
+#define SIGNEDNESS_1 unsigned
+#define SIGNEDNESS_2 signed
+#define SIGNEDNESS_3 signed
+#define SIGNEDNESS_4 unsigned
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
+   SIGNEDNESS_4 char *restrict b)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    {
+      int av = a[i];
+      int bv = b[i];
+      SIGNEDNESS_2 short mult = av * bv;
+      res += mult;
+    }
+  return res;
+}
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b,
+   SIGNEDNESS_4 char *restrict a)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    {
+      int av = a[i];
+      int bv = b[i];
+      SIGNEDNESS_2 short mult = av * bv;
+      res += mult;
+    }
+  return res;
+}
+
+/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */

Reply via email to