From 12cf9b362576735e4c584c48cd6db3d2b0f2e14b Mon Sep 17 00:00:00 2001
From: Kaipeng Zhou <zhoukaipeng3@huawei.com>
Date: Fri, 12 Jun 2020 00:54:15 +0800
Subject: [PATCH] vect: Remove extra variable created for memory reference in
 loop vectorization.

Vectorization create two copies of the same IV and IVOPTs does not perform
CSE.  But vinfo->ivexpr_map is supposed to catch those "IV base and/or step
expressions".  Just call cse_and_gimplify_to_preheader to handle
gathering/scattering to avoid the extra variable.

2020-06-12  Bin Cheng <bin.cheng@linux.alibaba.com>
	    Kaipeng Zhou  <zhoukaipeng3@huawei.com>

	PR tree-optimization/95199
	* tree-vect-stmts.c: Use CSE map to catch the IV step and eliminate
	extra variable.

2020-06-12  Bin Cheng <bin.cheng@linux.alibaba.com>
	    Kaipeng Zhou  <zhoukaipeng3@huawei.com>

	PR tree-optimization/95199
	* gcc.dg/vect/pr95199.c: New test.
---
 gcc/ChangeLog                       |  7 +++++++
 gcc/testsuite/ChangeLog             |  6 ++++++
 gcc/testsuite/gcc.dg/vect/pr95199.c | 17 +++++++++++++++++
 gcc/tree-vect-stmts.c               |  1 +
 4 files changed, 31 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr95199.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c92582df7fe..753d70fc94b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2020-06-12  Bin Cheng <bin.cheng@linux.alibaba.com>
+	    Kaipeng Zhou  <zhoukaipeng3@huawei.com>
+
+	PR tree-optimization/95199
+	* tree-vect-stmts.c: Use CSE map to catch the IV step and eliminate
+	extra variable.
+
 2020-06-08  Tobias Burnus  <tobias@codesourcery.com>
 
 	PR lto/94848
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 60d9ecca3ed..a27dd3fa072 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2020-06-12  Bin Cheng <bin.cheng@linux.alibaba.com>
+	    Kaipeng Zhou  <zhoukaipeng3@huawei.com>
+
+	PR tree-optimization/95199
+	* gcc.dg/vect/pr95199.c: New test.
+
 2020-06-08  Harald Anlauf  <anlauf@gmx.de>
 
 	PR fortran/95195
diff --git a/gcc/testsuite/gcc.dg/vect/pr95199.c b/gcc/testsuite/gcc.dg/vect/pr95199.c
new file mode 100644
index 00000000000..ec201feaec8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr95199.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target aarch64-*-* } } */
+/* { dg-options "-O3 -march=armv8.2-a+fp+sve" } */
+
+void
+foo (double *a, double *b, double m, int inc_x, int inc_y)
+{
+  int ix = 0, iy = 0;
+  for (int i = 0; i < 1000; ++i)
+    {
+      a[ix] += m * b[iy];
+      ix += inc_x;
+      iy += inc_y;
+    }
+  return ;
+}
+
+/* { dg-final { scan-assembler-times "add" 12 } } */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index b24b0fe4304..26a2b143b01 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2969,6 +2969,7 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
   tree bump = size_binop (MULT_EXPR,
 			  fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
 			  size_int (TYPE_VECTOR_SUBPARTS (vectype)));
+  bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
   *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
   if (stmts)
     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
-- 
2.19.1

