Unnecessary moves around dpadd and dpsub are caused by different pseudos being assigned to the input-output operands which correspond to the same register.
This forces the same pseudo to the input-output operands, which removes unnecesary moves. Tested on mips-mti-linux-gnu. gcc/ChangeLog: * gcc/config/mips/mips.c (mips_expand_builtin_insn): Force the operands which correspond to the same input-output register to have the same pseudo assigned to them. gcc/testsuite/ChangeLog: * gcc/testsuite/gcc.target/mips/msa-dpadd-dpsub.c: New test. --- gcc/config/mips/mips.c | 20 ++++++++++++++++++ gcc/testsuite/gcc.target/mips/msa-dpadd-dpsub.c | 28 +++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 gcc/testsuite/gcc.target/mips/msa-dpadd-dpsub.c diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 7f6a0db..3a77097 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -16960,6 +16960,26 @@ mips_expand_builtin_insn (enum insn_code icode, unsigned int nops, std::swap (ops[1], ops[3]); break; + case CODE_FOR_msa_dpadd_s_w: + case CODE_FOR_msa_dpadd_s_h: + case CODE_FOR_msa_dpadd_s_d: + case CODE_FOR_msa_dpadd_u_w: + case CODE_FOR_msa_dpadd_u_h: + case CODE_FOR_msa_dpadd_u_d: + case CODE_FOR_msa_dpsub_s_w: + case CODE_FOR_msa_dpsub_s_h: + case CODE_FOR_msa_dpsub_s_d: + case CODE_FOR_msa_dpsub_u_w: + case CODE_FOR_msa_dpsub_u_h: + case CODE_FOR_msa_dpsub_u_d: + /* Force the operands which correspond to the same in-out register + to have the same pseudo assigned to them. If the input operand + is not REG, create one for it. */ + if (!REG_P (ops[1].value)) + ops[1].value = copy_to_mode_reg (ops[1].mode, ops[1].value); + create_output_operand (&ops[0], ops[1].value, ops[1].mode); + break; + default: break; } diff --git a/gcc/testsuite/gcc.target/mips/msa-dpadd-dpsub.c b/gcc/testsuite/gcc.target/mips/msa-dpadd-dpsub.c new file mode 100644 index 0000000..c665bdf --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/msa-dpadd-dpsub.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-mfp64 -mhard-float -mmsa" } */ +/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */ + +typedef short v8i16 __attribute__ ((vector_size (16))); +typedef int v4i32 __attribute__ ((vector_size (16))); + +void foo (int *x, v8i16 *y, v8i16 *z) +{ + v4i32 acc[4]; + + acc[0] = __builtin_msa_ld_w(x, 0); + acc[1] = __builtin_msa_ld_w(x, 16); + acc[2] = __builtin_msa_ld_w(x, 32); + acc[3] = __builtin_msa_ld_w(x, 48); + + acc[0] = __builtin_msa_dpadd_s_w(acc[0], y[0], z[0]); + acc[1] = __builtin_msa_dpadd_s_w(acc[1], y[1], z[0]); + acc[2] = __builtin_msa_dpsub_s_w(acc[2], y[0], z[1]); + acc[3] = __builtin_msa_dpsub_s_w(acc[3], y[1], z[1]); + + __builtin_msa_st_w(acc[0], x, 0); + __builtin_msa_st_w(acc[1], x, 16); + __builtin_msa_st_w(acc[2], x, 32); + __builtin_msa_st_w(acc[3], x, 48); +} + +/* { dg-final { scan-assembler-not "move.v" } } */ -- 2.7.4