Hi, This patch creates an insn_and_split pattern which helps the duplicated constant vector replace the source pseudo of store insn in fwprop pass. Thus the store can be implemented by a single stxvd2x and it eliminates the unnecessary byte swap insn on P8 LE. The test case shows the optimization.
The patch depends on the first generic patch which uses insn cost in fwprop. Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog rs6000: Eliminate unnecessary byte swaps for duplicated constant vector store gcc/ PR target/113325 * config/rs6000/predicates.md (duplicate_easy_altivec_constant): New. * config/rs6000/vsx.md (vsx_stxvd2x4_le_const_<mode>): New. gcc/testsuite/ PR target/113325 * gcc.target/powerpc/pr113325.c: New. patch.diff diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index ef7d3f214c4..8ab6db630b7 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -759,6 +759,14 @@ (define_predicate "easy_vector_constant" return false; }) +;; Return 1 if it's a duplicated easy_altivec_constant. +(define_predicate "duplicate_easy_altivec_constant" + (and (match_code "const_vector") + (match_test "easy_altivec_constant (op, mode)")) +{ + return const_vec_duplicate_p (op); +}) + ;; Same as easy_vector_constant but only for EASY_VECTOR_15_ADD_SELF. (define_predicate "easy_vector_constant_add_self" (and (match_code "const_vector") diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 26fa32829af..98e4be26f64 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -3362,6 +3362,29 @@ (define_insn "*vsx_stxvd2x4_le_<mode>" "stxvd2x %x1,%y0" [(set_attr "type" "vecstore")]) +(define_insn_and_split "vsx_stxvd2x4_le_const_<mode>" + [(set (match_operand:VSX_W 0 "memory_operand" "=Z") + (match_operand:VSX_W 1 "duplicate_easy_altivec_constant" "W"))] + "!BYTES_BIG_ENDIAN + && VECTOR_MEM_VSX_P (<MODE>mode) + && !TARGET_P9_VECTOR" + "#" + "&& 1" + [(set (match_dup 2) + (match_dup 1)) + (set (match_dup 0) + (vec_select:VSX_W + (match_dup 2) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)])))] +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) + : operands[1]; + +} + [(set_attr "type" "vecstore") + (set_attr "length" "8")]) + (define_insn "*vsx_stxvd2x8_le_V8HI" [(set (match_operand:V8HI 0 "memory_operand" "=Z") (vec_select:V8HI diff --git a/gcc/testsuite/gcc.target/powerpc/pr113325.c b/gcc/testsuite/gcc.target/powerpc/pr113325.c new file mode 100644 index 00000000000..dff68ac0a51 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr113325.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mdejagnu-cpu=power8 -mvsx" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-final { scan-assembler-not {\mxxpermdi\M} } } */ + +void* foo (void* s1) +{ + return __builtin_memset (s1, 0, 32); +}