Hi, As now it's stage-1, gently ping this: https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html
Gui Haochen Thanks 在 2023/4/24 13:35, HAO CHEN GUI 写道: > Hi, > Gently ping this: > https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html > > Thanks > Gui Haochen > > 在 2023/2/20 10:10, HAO CHEN GUI 写道: >> Hi, >> Gently ping this: >> https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html >> >> Gui Haochen >> Thanks >> >> 在 2023/2/8 13:08, HAO CHEN GUI 写道: >>> Hi, >>> The logical operations for TImode is split after reload pass right now. >>> Some >>> potential optimizations miss as the split is too late. This patch removes >>> TImode from "AND", "IOR", "XOR" and "NOT" expander so that these logical >>> operations can be split at expand pass. The new test case illustrates the >>> optimization. >>> >>> Two test cases of pr92398 are merged into one as all sub-targets generates >>> the same sequence of instructions with the patch. >>> >>> Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. >>> >>> Thanks >>> Gui Haochen >>> >>> >>> ChangeLog >>> 2023-02-08 Haochen Gui <guih...@linux.ibm.com> >>> >>> gcc/ >>> PR target/100694 >>> * config/rs6000/rs6000.md (BOOL_128_V): New mode iterator for 128-bit >>> vector types. >>> (and<mode>3): Replace BOOL_128 with BOOL_128_V. >>> (ior<mode>3): Likewise. >>> (xor<mode>3): Likewise. >>> (one_cmpl<mode>2 expander): New expander with BOOL_128_V. >>> (one_cmpl<mode>2 insn_and_split): Rename to ... >>> (*one_cmpl<mode>2): ... this. >>> >>> gcc/testsuite/ >>> PR target/100694 >>> * gcc.target/powerpc/pr100694.c: New. >>> * gcc.target/powerpc/pr92398.c: New. >>> * gcc.target/powerpc/pr92398.h: Remove. >>> * gcc.target/powerpc/pr92398.p9-.c: Remove. >>> * gcc.target/powerpc/pr92398.p9+.c: Remove. >>> >>> >>> patch.diff >>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md >>> index 4bd1dfd3da9..455b7329643 100644 >>> --- a/gcc/config/rs6000/rs6000.md >>> +++ b/gcc/config/rs6000/rs6000.md >>> @@ -743,6 +743,15 @@ (define_mode_iterator BOOL_128 [TI >>> (V2DF "TARGET_ALTIVEC") >>> (V1TI "TARGET_ALTIVEC")]) >>> >>> +;; Mode iterator for logical operations on 128-bit vector types >>> +(define_mode_iterator BOOL_128_V [(V16QI "TARGET_ALTIVEC") >>> + (V8HI "TARGET_ALTIVEC") >>> + (V4SI "TARGET_ALTIVEC") >>> + (V4SF "TARGET_ALTIVEC") >>> + (V2DI "TARGET_ALTIVEC") >>> + (V2DF "TARGET_ALTIVEC") >>> + (V1TI "TARGET_ALTIVEC")]) >>> + >>> ;; For the GPRs we use 3 constraints for register outputs, two that are the >>> ;; same as the output register, and a third where the output register is an >>> ;; early clobber, so we don't have to deal with register overlaps. For the >>> @@ -7135,23 +7144,23 @@ (define_expand "subti3" >>> ;; 128-bit logical operations expanders >>> >>> (define_expand "and<mode>3" >>> - [(set (match_operand:BOOL_128 0 "vlogical_operand") >>> - (and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") >>> - (match_operand:BOOL_128 2 "vlogical_operand")))] >>> + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") >>> + (and:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") >>> + (match_operand:BOOL_128_V 2 "vlogical_operand")))] >>> "" >>> "") >>> >>> (define_expand "ior<mode>3" >>> - [(set (match_operand:BOOL_128 0 "vlogical_operand") >>> - (ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") >>> - (match_operand:BOOL_128 2 "vlogical_operand")))] >>> + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") >>> + (ior:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") >>> + (match_operand:BOOL_128_V 2 "vlogical_operand")))] >>> "" >>> "") >>> >>> (define_expand "xor<mode>3" >>> - [(set (match_operand:BOOL_128 0 "vlogical_operand") >>> - (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") >>> - (match_operand:BOOL_128 2 "vlogical_operand")))] >>> + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") >>> + (xor:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") >>> + (match_operand:BOOL_128_V 2 "vlogical_operand")))] >>> "" >>> "") >>> >>> @@ -7449,7 +7458,14 @@ (define_insn_and_split "*eqv<mode>3_internal2" >>> (const_string "16")))]) >>> >>> ;; 128-bit one's complement >>> -(define_insn_and_split "one_cmpl<mode>2" >>> +(define_expand "one_cmpl<mode>2" >>> +[(set (match_operand:BOOL_128_V 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") >>> + (not:BOOL_128_V >>> + (match_operand:BOOL_128_V 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] >>> + "" >>> + "") >>> + >>> +(define_insn_and_split "*one_cmpl<mode>2" >>> [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") >>> (not:BOOL_128 >>> (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] >>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr100694.c >>> b/gcc/testsuite/gcc.target/powerpc/pr100694.c >>> new file mode 100644 >>> index 00000000000..96a895d6c44 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/powerpc/pr100694.c >>> @@ -0,0 +1,14 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-require-effective-target int128 } */ >>> +/* { dg-options "-O2" } */ >>> +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 3 } } */ >>> + >>> +/* It just needs two std and one blr. */ >>> +void foo (unsigned __int128* res, unsigned long long hi, unsigned long >>> long lo) >>> +{ >>> + unsigned __int128 i = hi; >>> + i <<= 64; >>> + i |= lo; >>> + *res = i; >>> +} >>> + >>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.c >>> b/gcc/testsuite/gcc.target/powerpc/pr92398.c >>> new file mode 100644 >>> index 00000000000..7d6201cc5bb >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/powerpc/pr92398.c >>> @@ -0,0 +1,12 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-require-effective-target int128 } */ >>> +/* { dg-options "-O2" } */ >>> +/* { dg-final { scan-assembler-times {\mnot\M} 2 } } */ >>> +/* { dg-final { scan-assembler-times {\mstd\M} 2 } } */ >>> + >>> +/* All platforms should generate the same instructions: not;not;std;std. >>> */ >>> +void bar (__int128_t *dst, __int128_t src) >>> +{ >>> + *dst = ~src; >>> +} >>> + >>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.h >>> b/gcc/testsuite/gcc.target/powerpc/pr92398.h >>> deleted file mode 100644 >>> index 5a4a8bcab80..00000000000 >>> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.h >>> +++ /dev/null >>> @@ -1,17 +0,0 @@ >>> -/* This test code is included into pr92398.p9-.c and pr92398.p9+.c. >>> - The two files have the tests for the number of instructions generated >>> for >>> - P9- versus P9+. >>> - >>> - store generates difference instructions as below: >>> - P9+: mtvsrdd;xxlnot;stxv. >>> - P8/P7/P6 LE: not;not;std;std. >>> - P8 BE: mtvsrd;mtvsrd;xxpermdi;xxlnor;stxvd2x. >>> - P7/P6 BE: std;std;addi;lxvd2x;xxlnor;stxvd2x. >>> - P9+ and P9- LE are expected, P6/P7/P8 BE are unexpected. */ >>> - >>> -void >>> -bar (__int128_t *dst, __int128_t src) >>> -{ >>> - *dst = ~src; >>> -} >>> - >>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c >>> b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c >>> deleted file mode 100644 >>> index 72dd1d9a274..00000000000 >>> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c >>> +++ /dev/null >>> @@ -1,12 +0,0 @@ >>> -/* { dg-do compile { target { lp64 && has_arch_pwr9 } } } */ >>> -/* { dg-require-effective-target powerpc_vsx_ok } */ >>> -/* { dg-options "-O2 -mvsx" } */ >>> - >>> -/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */ >>> -/* { dg-final { scan-assembler-times {\mxxlnor\M} 1 } } */ >>> -/* { dg-final { scan-assembler-times {\mstxv\M} 1 } } */ >>> -/* { dg-final { scan-assembler-not {\mld\M} } } */ >>> -/* { dg-final { scan-assembler-not {\mnot\M} } } */ >>> - >>> -/* Source code for the test in pr92398.h */ >>> -#include "pr92398.h" >>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c >>> b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c >>> deleted file mode 100644 >>> index bd7fa98af51..00000000000 >>> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c >>> +++ /dev/null >>> @@ -1,10 +0,0 @@ >>> -/* { dg-do compile { target { lp64 && {! has_arch_pwr9} } } } */ >>> -/* { dg-require-effective-target powerpc_vsx_ok } */ >>> -/* { dg-options "-O2 -mvsx" } */ >>> - >>> -/* { dg-final { scan-assembler-times {\mnot\M} 2 { xfail be } } } */ >>> -/* { dg-final { scan-assembler-times {\mstd\M} 2 { xfail { { {! >>> has_arch_pwr9} && has_arch_pwr8 } && be } } } } */ >>> - >>> -/* Source code for the test in pr92398.h */ >>> -#include "pr92398.h" >>> -