Hi:
  This patch is about to simplify (view_convert:type ~a) < 0 to
(view_convert:type a) >= 0 when type is signed integer. Similar for
(view_convert:type ~a) >= 0.
  Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
  Ok for the trunk?

gcc/ChangeLog:

        PR middle-end/100738
        * match.pd ((view_convert ~a) < 0 --> (view_convert a) >= 0,
        (view_convert ~a) >= 0 --> (view_convert a) < 0): New GIMPLE
        simplification.

gcc/testsuite/ChangeLog:

        PR middle-end/100738
        * g++.target/i386/avx2-pr100738-1.C: New test.
        * g++.target/i386/sse4_1-pr100738-1.C: New test.

-- 
BR,
Hongtao
From 8c13f61c25821aca63ef2920fddce9704bfadeec Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao....@intel.com>
Date: Thu, 27 May 2021 15:21:06 +0800
Subject: [PATCH] Optimize (view_convert:type ~a) < 0 to (view_convert:type a)
 >= 0 when type is signed integer. Similar for (view_convert:type ~a) >= 0.

gcc/ChangeLog:

	PR middle-end/100738
	* match.pd ((view_convert ~a) < 0 --> (view_convert a) >= 0,
	(view_convert ~a) >= 0 --> (view_convert a) < 0): New GIMPLE
	simplification.

gcc/testsuite/ChangeLog:

	PR middle-end/100738
	* g++.target/i386/avx2-pr100738-1.C: New test.
	* g++.target/i386/sse4_1-pr100738-1.C: New test.
---
 gcc/match.pd                                  |   9 ++
 .../g++.target/i386/avx2-pr100738-1.C         | 120 ++++++++++++++++++
 .../g++.target/i386/sse4_1-pr100738-1.C       | 120 ++++++++++++++++++
 3 files changed, 249 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/i386/avx2-pr100738-1.C
 create mode 100644 gcc/testsuite/g++.target/i386/sse4_1-pr100738-1.C

diff --git a/gcc/match.pd b/gcc/match.pd
index cdb87636951..d1c6b4ea2b4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3983,6 +3983,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 				  wide_int_to_tree (TREE_TYPE (cst),
 				  wi::to_wide (cst) - 1)); })))))
 
+/* ((view_convert:signed_type)~a) < 0 is just (view_convert) a >= 0.  */
+(for cmp  (lt ge)
+     acmp (ge lt)
+ (simplify
+  (cmp (view_convert (bit_not @0)) integer_zerop@1)
+  (if (!TYPE_UNSIGNED (TREE_TYPE (@1)))
+    (with { tree stype = TREE_TYPE (@1); }
+    (acmp (view_convert:stype @0) @1)))))
+
 /* We can simplify a logical negation of a comparison to the
    inverted comparison.  As we cannot compute an expression
    operator using invert_tree_comparison we have to simulate
diff --git a/gcc/testsuite/g++.target/i386/avx2-pr100738-1.C b/gcc/testsuite/g++.target/i386/avx2-pr100738-1.C
new file mode 100644
index 00000000000..80fdad3e5f0
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx2-pr100738-1.C
@@ -0,0 +1,120 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -std=c++14 -O2 -mno-avx512f -mno-xop" } */
+/* { dg-final { scan-assembler-not "pxor" } } */
+/* { dg-final { scan-assembler-not "pcmpgt\[bdq]" } } */
+/* { dg-final { scan-assembler-times "pblendvb" 6 } } */
+/* { dg-final { scan-assembler-times "blendvps" 6 } } */
+/* { dg-final { scan-assembler-times "blendvpd" 6 } } */
+
+typedef char v32qi __attribute__ ((vector_size (32)));
+typedef short v16hi __attribute__ ((vector_size (32)));
+typedef int v8si __attribute__ ((vector_size (32)));
+typedef long long v4di __attribute__ ((vector_size (32)));
+
+v8si
+f1 (v32qi a, v8si b, v8si c)
+{
+  return ((v8si)~a) < 0 ? b : c;
+}
+
+v4di
+f2 (v32qi a, v4di b, v4di c)
+{
+  return ((v4di)~a) < 0 ? b : c;
+}
+
+v32qi
+f3 (v16hi a, v32qi b, v32qi c)
+{
+  return ((v32qi)~a) < 0 ? b : c;
+}
+
+v8si
+f4 (v16hi a, v8si b, v8si c)
+{
+  return ((v8si)~a) < 0 ? b : c;
+}
+
+v4di
+f5 (v16hi a, v4di b, v4di c)
+{
+  return ((v4di)~a) < 0 ? b : c;
+}
+
+v32qi
+f6 (v8si a, v32qi b, v32qi c)
+{
+  return ((v32qi)~a) < 0 ? b : c;
+}
+
+v4di
+f7 (v8si a, v4di b, v4di c)
+{
+  return ((v4di)~a) < 0 ? b : c;
+}
+
+v32qi
+f8 (v4di a, v32qi b, v32qi c)
+{
+  return ((v32qi)~a) < 0 ? b : c;
+}
+
+v8si
+f9 (v4di a, v8si b, v8si c)
+{
+  return ((v8si)~a) < 0 ? b : c;
+}
+
+v8si
+f10 (v32qi a, v8si b, v8si c)
+{
+  return ((v8si)~a) >= 0 ? b : c;
+}
+
+v4di
+f11 (v32qi a, v4di b, v4di c)
+{
+  return ((v4di)~a) >= 0 ? b : c;
+}
+
+v32qi
+f12 (v16hi a, v32qi b, v32qi c)
+{
+  return ((v32qi)~a) >= 0 ? b : c;
+}
+
+v8si
+f13 (v16hi a, v8si b, v8si c)
+{
+  return ((v8si)~a) >= 0 ? b : c;
+}
+
+v4di
+f14 (v16hi a, v4di b, v4di c)
+{
+  return ((v4di)~a) >= 0 ? b : c;
+}
+
+v32qi
+f15 (v8si a, v32qi b, v32qi c)
+{
+  return ((v32qi)~a) >= 0 ? b : c;
+}
+
+v4di
+f16 (v8si a, v4di b, v4di c)
+{
+  return ((v4di)~a) >= 0 ? b : c;
+}
+
+v32qi
+f17 (v4di a, v32qi b, v32qi c)
+{
+  return ((v32qi)~a) >= 0 ? b : c;
+}
+
+v8si
+f18 (v4di a, v8si b, v8si c)
+{
+  return ((v8si)~a) >= 0 ? b : c;
+}
diff --git a/gcc/testsuite/g++.target/i386/sse4_1-pr100738-1.C b/gcc/testsuite/g++.target/i386/sse4_1-pr100738-1.C
new file mode 100644
index 00000000000..d3454c264cd
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/sse4_1-pr100738-1.C
@@ -0,0 +1,120 @@
+/* { dg-do compile } */
+/* { dg-options "-msse4 -std=c++14 -mno-avx2 -O2 -mno-xop" } */
+/* { dg-final { scan-assembler-not "pxor" } } */
+/* { dg-final { scan-assembler-not "pcmpgt\[bdq]" } } */
+/* { dg-final { scan-assembler-times "pblendvb" 6 } } */
+/* { dg-final { scan-assembler-times "blendvps" 6 } } */
+/* { dg-final { scan-assembler-times "blendvpd" 6 } } */
+
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef long long v2di __attribute__ ((vector_size (16)));
+
+v4si
+f1 (v16qi a, v4si b, v4si c)
+{
+  return ((v4si)~a) < 0 ? b : c;
+}
+
+v2di
+f2 (v16qi a, v2di b, v2di c)
+{
+  return ((v2di)~a) < 0 ? b : c;
+}
+
+v16qi
+f3 (v8hi a, v16qi b, v16qi c)
+{
+  return ((v16qi)~a) < 0 ? b : c;
+}
+
+v4si
+f4 (v8hi a, v4si b, v4si c)
+{
+  return ((v4si)~a) < 0 ? b : c;
+}
+
+v2di
+f5 (v8hi a, v2di b, v2di c)
+{
+  return ((v2di)~a) < 0 ? b : c;
+}
+
+v16qi
+f6 (v4si a, v16qi b, v16qi c)
+{
+  return ((v16qi)~a) < 0 ? b : c;
+}
+
+v2di
+f7 (v4si a, v2di b, v2di c)
+{
+  return ((v2di)~a) < 0 ? b : c;
+}
+
+v16qi
+f8 (v2di a, v16qi b, v16qi c)
+{
+  return ((v16qi)~a) < 0 ? b : c;
+}
+
+v4si
+f9 (v2di a, v4si b, v4si c)
+{
+  return ((v4si)~a) < 0 ? b : c;
+}
+
+v4si
+f10 (v16qi a, v4si b, v4si c)
+{
+  return ((v4si)~a) >= 0 ? b : c;
+}
+
+v2di
+f11 (v16qi a, v2di b, v2di c)
+{
+  return ((v2di)~a) >= 0 ? b : c;
+}
+
+v16qi
+f12 (v8hi a, v16qi b, v16qi c)
+{
+  return ((v16qi)~a) >= 0 ? b : c;
+}
+
+v4si
+f13 (v8hi a, v4si b, v4si c)
+{
+  return ((v4si)~a) >= 0 ? b : c;
+}
+
+v2di
+f14 (v8hi a, v2di b, v2di c)
+{
+  return ((v2di)~a) >= 0 ? b : c;
+}
+
+v16qi
+f15 (v4si a, v16qi b, v16qi c)
+{
+  return ((v16qi)~a) >= 0 ? b : c;
+}
+
+v2di
+f16 (v4si a, v2di b, v2di c)
+{
+  return ((v2di)~a) >= 0 ? b : c;
+}
+
+v16qi
+f17 (v2di a, v16qi b, v16qi c)
+{
+  return ((v16qi)~a) >= 0 ? b : c;
+}
+
+v4si
+f18 (v2di a, v4si b, v4si c)
+{
+  return ((v4si)~a) >= 0 ? b : c;
+}
-- 
2.18.1

Reply via email to