On Wed, Oct 21, 2020 at 5:07 AM Segher Boessenkool
<seg...@kernel.crashing.org> wrote:
>
> On Tue, Oct 20, 2020 at 11:20:48AM +0800, Hongtao Liu wrote:
> > +       unsigned HOST_WIDE_INT subreg_offset = 0;
> > +       if (GET_CODE (trueop0) == SUBREG
> > +           && GET_MODE_INNER (mode)
> > +              == GET_MODE_INNER (GET_MODE (SUBREG_REG (trueop0)))
> > +           && (GET_MODE_NUNITS (mode)).is_constant (&l1)
> > +           && constant_multiple_p (SUBREG_BYTE (trueop0),
> > +                                   GET_MODE_UNIT_BITSIZE (mode),
> > +                                   &subreg_offset))
> > +         {
> > +           gcc_assert (XVECLEN (trueop1, 0) == l1);
>
> Why?  If we want to check that, it should be in RTL checking (and maybe
> it already is!)
>

Yes, RTL checking would guarantee that and it should be removed.

> > +           bool success = true;
> > +           poly_uint64 nunits
> > +             = GET_MODE_NUNITS (GET_MODE (SUBREG_REG (trueop0)));
> > +           for (int i = 0; i != l1; i++)
> > +             {
> > +               rtx idx = XVECEXP (trueop1, 0, i);
> > +               if (!CONST_INT_P (idx)
> > +                   || maybe_ge (UINTVAL (idx) + subreg_offset, nunits))
>
> Can that ever happen in valid code?  This seems to just hide problems.
>

for rtx like (vec_select:v4di:(subreg:v8di (reg:v2di))
 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])),
It seems valid for rtl checking.

> > +                 {
> > +                   success = false;
> > +                   break;
> > +                 }
> > +             }
> > +           if (success)
>
> If you have a huge piece of code like this, factor it?  Esp. if you now
> need to have all kinds of booleans where you really just want to do
> early returns.
>

I want to jump out of this if branch, since later codes in this function
 won't simplify VEC_SELECT further when it matches my if condition,
it's ok to use ealry returns.

>
> Segher

Update patch.

-- 
BR,
Hongtao
From e4e9c256efc636e994b0994c69cb0b4e7edc25a0 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao....@intel.com>
Date: Tue, 13 Oct 2020 15:35:29 +0800
Subject: [PATCH] Simplify vec_select of a subreg of X to just a vec_select of
 X.

gcc/ChangeLog
	PR rtl-optimization/97249
	* simplify-rtx.c (simplify_binary_operation_1): Simplify
	vec_select of a subreg of X to a vec_select of X.

gcc/testsuite/ChangeLog

	* gcc.target/i386/pr97249-1.c: New test.
---
 gcc/simplify-rtx.c                        | 34 +++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr97249-1.c | 30 ++++++++++++++++++++
 2 files changed, 64 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr97249-1.c

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 869f0d11b2e..947a9f37241 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -4170,6 +4170,40 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
 		    return subop1;
 		}
 	    }
+
+	  /* Simplify vec_select of a subreg of X to just a vec_select of X
+	     when X has same component mode as vec_select.  */
+	  unsigned HOST_WIDE_INT subreg_offset = 0;
+	  if (GET_CODE (trueop0) == SUBREG
+	      && GET_MODE_INNER (mode)
+		 == GET_MODE_INNER (GET_MODE (SUBREG_REG (trueop0)))
+	      && GET_MODE_NUNITS (mode).is_constant (&l1)
+	      && constant_multiple_p (subreg_memory_offset (trueop0),
+				      GET_MODE_UNIT_BITSIZE (mode),
+				      &subreg_offset))
+	    {
+	      poly_uint64 nunits
+		= GET_MODE_NUNITS (GET_MODE (SUBREG_REG (trueop0)));
+	      rtx par = trueop1;
+	      for (int i = 0; i != l1; i++)
+		{
+		  rtx idx = XVECEXP (trueop1, 0, i);
+		  if (!CONST_INT_P (idx)
+		      || maybe_ge (UINTVAL (idx) + subreg_offset, nunits))
+		    return 0;
+		}
+
+	      if (subreg_offset)
+		{
+		  rtvec vec = rtvec_alloc (l1);
+		  for (int i = 0; i < l1; i++)
+		    RTVEC_ELT (vec, i)
+		      = GEN_INT (INTVAL (XVECEXP (trueop1, 0, i))
+				 + subreg_offset);
+		  par = gen_rtx_PARALLEL (VOIDmode, vec);
+		}
+	      return gen_rtx_VEC_SELECT (mode, SUBREG_REG (trueop0), par);
+	    }
 	}
 
       if (XVECLEN (trueop1, 0) == 1
diff --git a/gcc/testsuite/gcc.target/i386/pr97249-1.c b/gcc/testsuite/gcc.target/i386/pr97249-1.c
new file mode 100644
index 00000000000..4478a34a9f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr97249-1.c
@@ -0,0 +1,30 @@
+/* PR target/97249  */
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3 -masm=att" } */
+/* { dg-final { scan-assembler-times {(?n)vpmovzxbw[ \t]+\(.*%xmm[0-9]} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)vpmovzxwd[ \t]+\(.*%xmm[0-9]} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)vpmovzxdq[ \t]+\(.*%xmm[0-9]} 2 } } */
+
+void
+foo (unsigned char* p1, unsigned char* p2, short* __restrict p3)
+{
+    for (int i = 0 ; i != 8; i++)
+     p3[i] = p1[i] + p2[i];
+     return;
+}
+
+void
+foo1 (unsigned short* p1, unsigned short* p2, int* __restrict p3)
+{
+    for (int i = 0 ; i != 4; i++)
+     p3[i] = p1[i] + p2[i];
+     return;
+}
+
+void
+foo2 (unsigned int* p1, unsigned int* p2, long long* __restrict p3)
+{
+    for (int i = 0 ; i != 2; i++)
+      p3[i] = (long long)p1[i] + (long long)p2[i];
+     return;
+}
-- 
2.18.1

Reply via email to