Hi: This patch is about to simplify (view_convert:type ~a) < 0 to (view_convert:type a) >= 0 when type is signed integer. Similar for (view_convert:type ~a) >= 0. Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. Ok for the trunk?
gcc/ChangeLog: PR middle-end/100738 * match.pd ((view_convert ~a) < 0 --> (view_convert a) >= 0, (view_convert ~a) >= 0 --> (view_convert a) < 0): New GIMPLE simplification. gcc/testsuite/ChangeLog: PR middle-end/100738 * g++.target/i386/avx2-pr100738-1.C: New test. * g++.target/i386/sse4_1-pr100738-1.C: New test. -- BR, Hongtao
From 8c13f61c25821aca63ef2920fddce9704bfadeec Mon Sep 17 00:00:00 2001 From: liuhongt <hongtao....@intel.com> Date: Thu, 27 May 2021 15:21:06 +0800 Subject: [PATCH] Optimize (view_convert:type ~a) < 0 to (view_convert:type a) >= 0 when type is signed integer. Similar for (view_convert:type ~a) >= 0. gcc/ChangeLog: PR middle-end/100738 * match.pd ((view_convert ~a) < 0 --> (view_convert a) >= 0, (view_convert ~a) >= 0 --> (view_convert a) < 0): New GIMPLE simplification. gcc/testsuite/ChangeLog: PR middle-end/100738 * g++.target/i386/avx2-pr100738-1.C: New test. * g++.target/i386/sse4_1-pr100738-1.C: New test. --- gcc/match.pd | 9 ++ .../g++.target/i386/avx2-pr100738-1.C | 120 ++++++++++++++++++ .../g++.target/i386/sse4_1-pr100738-1.C | 120 ++++++++++++++++++ 3 files changed, 249 insertions(+) create mode 100644 gcc/testsuite/g++.target/i386/avx2-pr100738-1.C create mode 100644 gcc/testsuite/g++.target/i386/sse4_1-pr100738-1.C diff --git a/gcc/match.pd b/gcc/match.pd index cdb87636951..d1c6b4ea2b4 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3983,6 +3983,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) wide_int_to_tree (TREE_TYPE (cst), wi::to_wide (cst) - 1)); }))))) +/* ((view_convert:signed_type)~a) < 0 is just (view_convert) a >= 0. */ +(for cmp (lt ge) + acmp (ge lt) + (simplify + (cmp (view_convert (bit_not @0)) integer_zerop@1) + (if (!TYPE_UNSIGNED (TREE_TYPE (@1))) + (with { tree stype = TREE_TYPE (@1); } + (acmp (view_convert:stype @0) @1))))) + /* We can simplify a logical negation of a comparison to the inverted comparison. As we cannot compute an expression operator using invert_tree_comparison we have to simulate diff --git a/gcc/testsuite/g++.target/i386/avx2-pr100738-1.C b/gcc/testsuite/g++.target/i386/avx2-pr100738-1.C new file mode 100644 index 00000000000..80fdad3e5f0 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/avx2-pr100738-1.C @@ -0,0 +1,120 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -std=c++14 -O2 -mno-avx512f -mno-xop" } */ +/* { dg-final { scan-assembler-not "pxor" } } */ +/* { dg-final { scan-assembler-not "pcmpgt\[bdq]" } } */ +/* { dg-final { scan-assembler-times "pblendvb" 6 } } */ +/* { dg-final { scan-assembler-times "blendvps" 6 } } */ +/* { dg-final { scan-assembler-times "blendvpd" 6 } } */ + +typedef char v32qi __attribute__ ((vector_size (32))); +typedef short v16hi __attribute__ ((vector_size (32))); +typedef int v8si __attribute__ ((vector_size (32))); +typedef long long v4di __attribute__ ((vector_size (32))); + +v8si +f1 (v32qi a, v8si b, v8si c) +{ + return ((v8si)~a) < 0 ? b : c; +} + +v4di +f2 (v32qi a, v4di b, v4di c) +{ + return ((v4di)~a) < 0 ? b : c; +} + +v32qi +f3 (v16hi a, v32qi b, v32qi c) +{ + return ((v32qi)~a) < 0 ? b : c; +} + +v8si +f4 (v16hi a, v8si b, v8si c) +{ + return ((v8si)~a) < 0 ? b : c; +} + +v4di +f5 (v16hi a, v4di b, v4di c) +{ + return ((v4di)~a) < 0 ? b : c; +} + +v32qi +f6 (v8si a, v32qi b, v32qi c) +{ + return ((v32qi)~a) < 0 ? b : c; +} + +v4di +f7 (v8si a, v4di b, v4di c) +{ + return ((v4di)~a) < 0 ? b : c; +} + +v32qi +f8 (v4di a, v32qi b, v32qi c) +{ + return ((v32qi)~a) < 0 ? b : c; +} + +v8si +f9 (v4di a, v8si b, v8si c) +{ + return ((v8si)~a) < 0 ? b : c; +} + +v8si +f10 (v32qi a, v8si b, v8si c) +{ + return ((v8si)~a) >= 0 ? b : c; +} + +v4di +f11 (v32qi a, v4di b, v4di c) +{ + return ((v4di)~a) >= 0 ? b : c; +} + +v32qi +f12 (v16hi a, v32qi b, v32qi c) +{ + return ((v32qi)~a) >= 0 ? b : c; +} + +v8si +f13 (v16hi a, v8si b, v8si c) +{ + return ((v8si)~a) >= 0 ? b : c; +} + +v4di +f14 (v16hi a, v4di b, v4di c) +{ + return ((v4di)~a) >= 0 ? b : c; +} + +v32qi +f15 (v8si a, v32qi b, v32qi c) +{ + return ((v32qi)~a) >= 0 ? b : c; +} + +v4di +f16 (v8si a, v4di b, v4di c) +{ + return ((v4di)~a) >= 0 ? b : c; +} + +v32qi +f17 (v4di a, v32qi b, v32qi c) +{ + return ((v32qi)~a) >= 0 ? b : c; +} + +v8si +f18 (v4di a, v8si b, v8si c) +{ + return ((v8si)~a) >= 0 ? b : c; +} diff --git a/gcc/testsuite/g++.target/i386/sse4_1-pr100738-1.C b/gcc/testsuite/g++.target/i386/sse4_1-pr100738-1.C new file mode 100644 index 00000000000..d3454c264cd --- /dev/null +++ b/gcc/testsuite/g++.target/i386/sse4_1-pr100738-1.C @@ -0,0 +1,120 @@ +/* { dg-do compile } */ +/* { dg-options "-msse4 -std=c++14 -mno-avx2 -O2 -mno-xop" } */ +/* { dg-final { scan-assembler-not "pxor" } } */ +/* { dg-final { scan-assembler-not "pcmpgt\[bdq]" } } */ +/* { dg-final { scan-assembler-times "pblendvb" 6 } } */ +/* { dg-final { scan-assembler-times "blendvps" 6 } } */ +/* { dg-final { scan-assembler-times "blendvpd" 6 } } */ + +typedef char v16qi __attribute__ ((vector_size (16))); +typedef short v8hi __attribute__ ((vector_size (16))); +typedef int v4si __attribute__ ((vector_size (16))); +typedef long long v2di __attribute__ ((vector_size (16))); + +v4si +f1 (v16qi a, v4si b, v4si c) +{ + return ((v4si)~a) < 0 ? b : c; +} + +v2di +f2 (v16qi a, v2di b, v2di c) +{ + return ((v2di)~a) < 0 ? b : c; +} + +v16qi +f3 (v8hi a, v16qi b, v16qi c) +{ + return ((v16qi)~a) < 0 ? b : c; +} + +v4si +f4 (v8hi a, v4si b, v4si c) +{ + return ((v4si)~a) < 0 ? b : c; +} + +v2di +f5 (v8hi a, v2di b, v2di c) +{ + return ((v2di)~a) < 0 ? b : c; +} + +v16qi +f6 (v4si a, v16qi b, v16qi c) +{ + return ((v16qi)~a) < 0 ? b : c; +} + +v2di +f7 (v4si a, v2di b, v2di c) +{ + return ((v2di)~a) < 0 ? b : c; +} + +v16qi +f8 (v2di a, v16qi b, v16qi c) +{ + return ((v16qi)~a) < 0 ? b : c; +} + +v4si +f9 (v2di a, v4si b, v4si c) +{ + return ((v4si)~a) < 0 ? b : c; +} + +v4si +f10 (v16qi a, v4si b, v4si c) +{ + return ((v4si)~a) >= 0 ? b : c; +} + +v2di +f11 (v16qi a, v2di b, v2di c) +{ + return ((v2di)~a) >= 0 ? b : c; +} + +v16qi +f12 (v8hi a, v16qi b, v16qi c) +{ + return ((v16qi)~a) >= 0 ? b : c; +} + +v4si +f13 (v8hi a, v4si b, v4si c) +{ + return ((v4si)~a) >= 0 ? b : c; +} + +v2di +f14 (v8hi a, v2di b, v2di c) +{ + return ((v2di)~a) >= 0 ? b : c; +} + +v16qi +f15 (v4si a, v16qi b, v16qi c) +{ + return ((v16qi)~a) >= 0 ? b : c; +} + +v2di +f16 (v4si a, v2di b, v2di c) +{ + return ((v2di)~a) >= 0 ? b : c; +} + +v16qi +f17 (v2di a, v16qi b, v16qi c) +{ + return ((v16qi)~a) >= 0 ? b : c; +} + +v4si +f18 (v2di a, v4si b, v4si c) +{ + return ((v4si)~a) >= 0 ? b : c; +} -- 2.18.1