Hi Segher,

on 2019/9/27 下午3:27, Segher Boessenkool wrote:
> Hi Kewen,
> 
>> +;; Support signed/unsigned long long to float conversion vectorization.
>> +(define_expand "vec_pack<su>_float_v2di"
>> +  [(match_operand:V4SF 0 "vfloat_operand")
>> +   (any_float:V4SF (parallel [(match_operand:V2DI 1 "vint_operand")
>> +     (match_operand:V2DI 2 "vint_operand")]))]
> 
> To concatenate two vectors, the syntax is vec_concat.  So
> 
>   [(set (match_operand:V4SF 0 "vfloat_operand")
>       (any_float:V4SF
>         (vec_concat:V4DI (match_operand:V2DI 1 "vint_operand")
>                          (match_operand:V2DI 2 "vint_operand"))))]
> 
> It is of course a define_expand here, and it always calls DONE, so the
> only thing the RTL template is used for is the match_operands; but also
> important here is that you use an iterator (any_float), so you need to
> work that into the template some way.
> 
> Your code would work, but it is a bit misleading, an unsuspecting reader
> (*cough* me *cough*) might think this is the actual insn this expander
> will create.
> 
>> +;; Support float to signed/unsigned long long conversion vectorization.
>> +(define_expand "vec_unpack_<su>fix_trunc_hi_v4sf"
>> +  [(match_operand:V2DI 0 "vint_operand")
>> +   (any_fix:V2DI (match_operand:V4SF 1 "vfloat_operand"))]
> 
> Similarly here: the pattern as you wrote it isn't valid RTL.
> 
>   [(set (match_operand:V2DI 0 "vint_operand")
>       (any_fix:V2DI (vec_select:V2SF ...
> uh-oh, we do not have a mode V2SF.
> Let's go with what you have then, add a comment that the template isn't
> valid RTL, but you need it for the iterator?
> 
> Or can you think of a different way of putting an iterator like this in
> the template?  Maybe something like
> 
> (define_expand "vec_unpack_<su>fix_trunc_hi_v4sf"
>   [(match_operand:V2DI 0 "vint_operand")
>    (match_operand:V4SF 1 "vfloat_operand")
>    (any_fix (pc))]
> 
> works?  If it does, please do that; if you cannot find a reasonably clear
> syntax, go with what you had, but please add a comment saying the template
> won't ever be inserted as instruction.
> 

Thanks for your advice on "any_fix (pc)", it works perfectly by testing.
Attached patch has adopted this writing and add a comment saying it's just
for code attribute.  Bootstrapped, I'll commit it once regress tested.
Thanks!

> (Maybe one of the gen* tools complains any_fix needs a mode? :QI will do
> if so, or :P if you like that better).

I didn't encounter any errors, it sounds it's allowable now?


Thanks,
Kewen
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 7633171..dc6a6f6 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5538,3 +5538,48 @@
   operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
 })
+
+;; Support signed/unsigned long long to float conversion vectorization.
+;; Note that any_float (pc) here is just for code attribute <su>.
+(define_expand "vec_pack<su>_float_v2di"
+  [(match_operand:V4SF 0 "vfloat_operand")
+   (match_operand:V2DI 1 "vint_operand")
+   (match_operand:V2DI 2 "vint_operand")
+   (any_float (pc))]
+  "TARGET_VSX"
+{
+  rtx r1 = gen_reg_rtx (V4SFmode);
+  rtx r2 = gen_reg_rtx (V4SFmode);
+  emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1]));
+  emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2]));
+  rs6000_expand_extract_even (operands[0], r1, r2);
+  DONE;
+})
+
+;; Support float to signed/unsigned long long conversion vectorization.
+;; Note that any_fix (pc) here is just for code attribute <su>.
+(define_expand "vec_unpack_<su>fix_trunc_hi_v4sf"
+  [(match_operand:V2DI 0 "vint_operand")
+   (match_operand:V4SF 1 "vfloat_operand")
+   (any_fix (pc))]
+  "TARGET_VSX"
+{
+  rtx reg = gen_reg_rtx (V4SFmode);
+  rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
+  emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
+  DONE;
+})
+
+;; Note that any_fix (pc) here is just for code attribute <su>.
+(define_expand "vec_unpack_<su>fix_trunc_lo_v4sf"
+  [(match_operand:V2DI 0 "vint_operand")
+   (match_operand:V4SF 1 "vfloat_operand")
+   (any_fix (pc))]
+  "TARGET_VSX"
+{
+  rtx reg = gen_reg_rtx (V4SFmode);
+  rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
+  emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
+  DONE;
+})
+
diff --git a/gcc/testsuite/gcc.target/powerpc/conv-vectorize-1.c 
b/gcc/testsuite/gcc.target/powerpc/conv-vectorize-1.c
new file mode 100644
index 0000000..d96db14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/conv-vectorize-1.c
@@ -0,0 +1,37 @@
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -ftree-vectorize -mvsx" } */
+
+/* Test vectorizer can exploit vector conversion instructions to convert
+   unsigned/signed long long to float.  */
+
+#include <stddef.h>
+
+#define SIZE 32
+#define ALIGN 16
+
+float sflt_array[SIZE] __attribute__ ((__aligned__ (ALIGN)));
+float uflt_array[SIZE] __attribute__ ((__aligned__ (ALIGN)));
+
+unsigned long long ulong_array[SIZE] __attribute__ ((__aligned__ (ALIGN)));
+signed long long slong_array[SIZE] __attribute__ ((__aligned__ (ALIGN)));
+
+void
+convert_slong_to_float (void)
+{
+  size_t i;
+
+  for (i = 0; i < SIZE; i++)
+    sflt_array[i] = (float) slong_array[i];
+}
+
+void
+convert_ulong_to_float (void)
+{
+  size_t i;
+
+  for (i = 0; i < SIZE; i++)
+    uflt_array[i] = (float) ulong_array[i];
+}
+
+/* { dg-final { scan-assembler {\mxvcvsxdsp\M} } } */
+/* { dg-final { scan-assembler {\mxvcvuxdsp\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/conv-vectorize-2.c 
b/gcc/testsuite/gcc.target/powerpc/conv-vectorize-2.c
new file mode 100644
index 0000000..5dd5dea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/conv-vectorize-2.c
@@ -0,0 +1,37 @@
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -ftree-vectorize -mvsx" } */
+
+/* Test vectorizer can exploit vector conversion instructions to convert
+   float to unsigned/signed long long.  */
+
+#include <stddef.h>
+
+#define SIZE 32
+#define ALIGN 16
+
+float sflt_array[SIZE] __attribute__ ((__aligned__ (ALIGN)));
+float uflt_array[SIZE] __attribute__ ((__aligned__ (ALIGN)));
+
+unsigned long long ulong_array[SIZE] __attribute__ ((__aligned__ (ALIGN)));
+signed long long slong_array[SIZE] __attribute__ ((__aligned__ (ALIGN)));
+
+void
+convert_float_to_slong (void)
+{
+  size_t i;
+
+  for (i = 0; i < SIZE; i++)
+    slong_array[i] = (signed long long) sflt_array[i];
+}
+
+void
+convert_float_to_ulong (void)
+{
+  size_t i;
+
+  for (i = 0; i < SIZE; i++)
+    ulong_array[i] = (unsigned long long) uflt_array[i];
+}
+
+/* { dg-final { scan-assembler {\mxvcvspsxds\M} } } */
+/* { dg-final { scan-assembler {\mxvcvspuxds\M} } } */

Reply via email to