On 10/17/18, Richard Sandiford <richard.sandif...@arm.com> wrote:
> "H.J. Lu" <hjl.to...@gmail.com> writes:
>> We may simplify
>>
>>   (subreg (vec_merge (vec_duplicate X) (vector) (const_int 1)) 0)
>>
>> to X when mode of X is the same as of mode of subreg.
>>
>> gcc/
>>
>>      PR target/87537
>>      * simplify-rtx.c (simplify_subreg): Simplify subreg of vec_merge
>>      of vec_duplicate.
>>
>> gcc/testsuite/
>>
>>      PR target/87537
>>      * gcc.target/i386/pr87537-1.c: New test.
>> ---
>>  gcc/simplify-rtx.c                        | 11 +++++++++++
>>  gcc/testsuite/gcc.target/i386/pr87537-1.c | 12 ++++++++++++
>>  2 files changed, 23 insertions(+)
>>  create mode 100644 gcc/testsuite/gcc.target/i386/pr87537-1.c
>>
>> diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
>> index 9bc53866b9f..e2a0533b23d 100644
>> --- a/gcc/simplify-rtx.c
>> +++ b/gcc/simplify-rtx.c
>> @@ -6601,6 +6601,17 @@ simplify_subreg (machine_mode outermode, rtx op,
>>        return NULL_RTX;
>>      }
>>
>> +  /* Return X for
>> +    (subreg (vec_merge (vec_duplicate X) (vector) (const_int 1)) 0)
>> +   */
>> +  if (known_eq (byte, 0U)
>> +      && GET_CODE (op) == VEC_MERGE
>> +      && GET_CODE (XEXP (op, 0)) == VEC_DUPLICATE
>> +      && GET_MODE (XEXP (XEXP (op, 0), 0)) == outermode
>> +      && CONST_INT_P (XEXP (op, 2))
>> +      && INTVAL (XEXP (op, 2)) == 1)
>> +    return XEXP (XEXP (op, 0), 0);
>
> Would be good to handle the more general case of:
>
>   unsigned int idx;
>   if (constant_multiple_p (byte, GET_MODE_SIZE (outermode), &idx)
>       ...
>       && (INTVAL (XEXP (op, 2)) & (HOST_WIDE_INT_1U << idx)) != 0)
>
> (untested!)  Please also add some selftests to test_vector_ops.
>

Like this?  OK for trunk?

Thanks.

-- 
H.J.
From 11baf753696ebf080270b445df701c716f786b76 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Wed, 10 Oct 2018 13:42:21 -0700
Subject: [PATCH] Simplify subreg of vec_merge of vec_duplicate

We can simplify

  (subreg (vec_merge (vec_duplicate X)
		     (vector)
		     (const_int ((1 << N) | M)))
	  (N * sizeof (X)))

to X when mode of X is the same as of mode of subreg.

gcc/

	PR target/87537
	* simplify-rtx.c (simplify_subreg): Simplify subreg of vec_merge
	of vec_duplicate.
	(test_vector_ops_duplicate): Add test for a scalar subreg of a
	VEC_MERGE of a VEC_DUPLICATE.

gcc/testsuite/

	PR target/87537
	* gcc.target/i386/pr87537-1.c: New test.
---
 gcc/simplify-rtx.c                        | 29 ++++++++++++++++++++++-
 gcc/testsuite/gcc.target/i386/pr87537-1.c | 12 ++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr87537-1.c

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 9bc53866b9f..b0cf3bbb2a9 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -6601,6 +6601,21 @@ simplify_subreg (machine_mode outermode, rtx op,
       return NULL_RTX;
     }
 
+  /* Return X for
+	(subreg (vec_merge (vec_duplicate X)
+			   (vector)
+			   (const_int ((1 << N) | M)))
+		(N * sizeof (X)))
+   */
+  unsigned int idx;
+  if (constant_multiple_p (byte, GET_MODE_SIZE (outermode), &idx)
+      && GET_CODE (op) == VEC_MERGE
+      && GET_CODE (XEXP (op, 0)) == VEC_DUPLICATE
+      && GET_MODE (XEXP (XEXP (op, 0), 0)) == outermode
+      && CONST_INT_P (XEXP (op, 2))
+      && (UINTVAL (XEXP (op, 2)) & (HOST_WIDE_INT_1U << idx)) != 0)
+    return XEXP (XEXP (op, 0), 0);
+
   /* A SUBREG resulting from a zero extension may fold to zero if
      it extracts higher bits that the ZERO_EXTEND's source bits.  */
   if (GET_CODE (op) == ZERO_EXTEND && SCALAR_INT_MODE_P (innermode))
@@ -6831,15 +6846,27 @@ test_vector_ops_duplicate (machine_mode mode, rtx scalar_reg)
 		     simplify_binary_operation (VEC_SELECT, inner_mode,
 						duplicate, zero_par));
 
-  /* And again with the final element.  */
   unsigned HOST_WIDE_INT const_nunits;
   if (nunits.is_constant (&const_nunits))
     {
+      /* And again with the final element.  */
       rtx last_index = gen_int_mode (const_nunits - 1, word_mode);
       rtx last_par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, last_index));
       ASSERT_RTX_PTR_EQ (scalar_reg,
 			 simplify_binary_operation (VEC_SELECT, inner_mode,
 						    duplicate, last_par));
+
+      /* Test a scalar subreg of a VEC_MERGE of a VEC_DUPLICATE.  */
+      rtx vector_reg = make_test_reg (mode);
+      for (unsigned HOST_WIDE_INT i = 0; i < const_nunits; i++)
+	{
+	  rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1));
+	  rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask);
+	  poly_uint64 offset = i * GET_MODE_SIZE (inner_mode);
+	  ASSERT_RTX_EQ (scalar_reg,
+			 simplify_gen_subreg (inner_mode, vm,
+					      mode, offset));
+	}
     }
 
   /* Test a scalar subreg of a VEC_DUPLICATE.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr87537-1.c b/gcc/testsuite/gcc.target/i386/pr87537-1.c
new file mode 100644
index 00000000000..df849b032e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87537-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vmovss" } } */
+
+#include <immintrin.h>
+
+__m128
+foo (float *x)
+{
+  return _mm_broadcastss_ps(_mm_load_ss(x));
+}
-- 
2.17.2

Reply via email to