Re: [PATCH v2] AArch64: Improve immediate generation
On 24/10/2023 18:27, Wilco Dijkstra wrote: v2: Use check-function-bodies in tests Further improve immediate generation by adding support for 2-instruction MOV/EOR bitmask immediates. This reduces the number of 3/4-instruction immediates in SPECCPU2017 by ~2%. Passes regress, OK for commit? gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_internal_mov_immediate) Add support for immediates using MOV/EOR bitmask. gcc/testsuite: * gcc.target/aarch64/imm_choice_comparison.c: Change tests. * gcc.target/aarch64/moveor_imm.c: Add new test. * gcc.target/aarch64/pr106583.c: Change tests. --- diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 578a253d6e0e133e19592553fc873b3e73f9f218..ed5be2b64c9a767d74e9d78415da964c669001aa 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -5748,6 +5748,26 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, } return 2; } + + /* Try 2 bitmask immediates which are xor'd together. */ + for (i = 0; i < 64; i += 16) + { + val2 = (val >> i) & mask; + val2 |= val2 << 16; + val2 |= val2 << 32; + if (aarch64_bitmask_imm (val2) && aarch64_bitmask_imm (val ^ val2)) + break; + } + + if (i != 64) + { + if (generate) + { + emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); + emit_insn (gen_xordi3 (dest, dest, GEN_INT (val ^ val2))); + } + return 2; + } } /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */ diff --git a/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c b/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c index ebc44d6dbc7287d907603d77d7b54496de177c4b..a1fc90ad73411ae8ed848fa321586afcb8d710aa 100644 --- a/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c +++ b/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c @@ -1,32 +1,64 @@ /* { dg-do compile } */ /* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ /* Go from four moves to two. */ +/* +** foo: +** mov w[0-9]+, 2576980377 +** movkx[0-9]+, 0x, lsl 32 +** ... +*/ + int foo (long long x) { - return x <= 0x1998; + return x <= 0x9998; } +/* +** GT: +** mov w[0-9]+, -16777217 +** ... +*/ + int GT (unsigned int x) { return x > 0xfefe; } +/* +** LE: +** mov w[0-9]+, -16777217 +** ... +*/ + int LE (unsigned int x) { return x <= 0xfefe; } +/* +** GE: +** mov w[0-9]+, 4278190079 +** ... +*/ + int GE (long long x) { return x >= 0xff00; } +/* +** LT: +** mov w[0-9]+, -16777217 +** ... +*/ + int LT (int x) { @@ -35,6 +67,13 @@ LT (int x) /* Optimize the immediate in conditionals. */ +/* +** check: +** ... +** mov w[0-9]+, -16777217 +** ... +*/ + int check (int x, int y) { @@ -44,11 +83,15 @@ check (int x, int y) return x; } +/* +** tern: +** ... +** mov w[0-9]+, -16777217 +** ... +*/ + int tern (int x) { return x >= 0xff00 ? 5 : -3; } - -/* baz produces one movk instruction. */ -/* { dg-final { scan-assembler-times "movk" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/moveor_imm.c b/gcc/testsuite/gcc.target/aarch64/moveor_imm.c new file mode 100644 index ..1c0c3f3bf8c588f9661112a8b3f9a72c5ddff95c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/moveor_imm.c @@ -0,0 +1,63 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** f1: +** movx0, -6148914691236517206 +** eor x0, x0, -9223372036854775807 +** ret +*/ Some odd white space above. Also, I think it would be better to write the tests as ** f1: ** ... ** ** ... Then different prologue and epilogue options (such as BTI or pac-ret) won't affect the tests. + +long f1 (void) +{ + return 0x2aab; +} + +/* +** f2: +** mov x0, -1085102592571150096 +** eor x0, x0, -2305843009213693951 +** ret +*/ + +long f2 (void) +{ + return 0x10f0f0f0f0f0f0f1; +} + +/* +** f3: +** mov x0, -3689348814741910324 +** eor x0, x0, -4611686018427387903 +** ret +*/ + +long f3 (void) +{ + return 0xccd; +} + +/* +** f4: +** mov x0, -7378697629483820647 +** eor x0, x0, -9223372036854775807 +** ret +*/ + +long f4 (void) +{ + return 0x1998; +} + +/* +** f5: +** mov x0, 3689348814741910323 +** eor x0, x0, 864691128656461824 +** ret +*/ + +long f5 (void) +{ + return 0x3f333f33; +} diff --git a/gcc/testsuite/gcc.target/aarch64/pr106583.c b/gcc/testsuite/gcc.target/aarch64/pr106583.c index
[PATCH v2] AArch64: Improve immediate generation
v2: Use check-function-bodies in tests Further improve immediate generation by adding support for 2-instruction MOV/EOR bitmask immediates. This reduces the number of 3/4-instruction immediates in SPECCPU2017 by ~2%. Passes regress, OK for commit? gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_internal_mov_immediate) Add support for immediates using MOV/EOR bitmask. gcc/testsuite: * gcc.target/aarch64/imm_choice_comparison.c: Change tests. * gcc.target/aarch64/moveor_imm.c: Add new test. * gcc.target/aarch64/pr106583.c: Change tests. --- diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 578a253d6e0e133e19592553fc873b3e73f9f218..ed5be2b64c9a767d74e9d78415da964c669001aa 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -5748,6 +5748,26 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, } return 2; } + + /* Try 2 bitmask immediates which are xor'd together. */ + for (i = 0; i < 64; i += 16) + { + val2 = (val >> i) & mask; + val2 |= val2 << 16; + val2 |= val2 << 32; + if (aarch64_bitmask_imm (val2) && aarch64_bitmask_imm (val ^ val2)) + break; + } + + if (i != 64) + { + if (generate) + { + emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); + emit_insn (gen_xordi3 (dest, dest, GEN_INT (val ^ val2))); + } + return 2; + } } /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */ diff --git a/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c b/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c index ebc44d6dbc7287d907603d77d7b54496de177c4b..a1fc90ad73411ae8ed848fa321586afcb8d710aa 100644 --- a/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c +++ b/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c @@ -1,32 +1,64 @@ /* { dg-do compile } */ /* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ /* Go from four moves to two. */ +/* +** foo: +** mov w[0-9]+, 2576980377 +** movkx[0-9]+, 0x, lsl 32 +** ... +*/ + int foo (long long x) { - return x <= 0x1998; + return x <= 0x9998; } +/* +** GT: +** mov w[0-9]+, -16777217 +** ... +*/ + int GT (unsigned int x) { return x > 0xfefe; } +/* +** LE: +** mov w[0-9]+, -16777217 +** ... +*/ + int LE (unsigned int x) { return x <= 0xfefe; } +/* +** GE: +** mov w[0-9]+, 4278190079 +** ... +*/ + int GE (long long x) { return x >= 0xff00; } +/* +** LT: +** mov w[0-9]+, -16777217 +** ... +*/ + int LT (int x) { @@ -35,6 +67,13 @@ LT (int x) /* Optimize the immediate in conditionals. */ +/* +** check: +** ... +** mov w[0-9]+, -16777217 +** ... +*/ + int check (int x, int y) { @@ -44,11 +83,15 @@ check (int x, int y) return x; } +/* +** tern: +** ... +** mov w[0-9]+, -16777217 +** ... +*/ + int tern (int x) { return x >= 0xff00 ? 5 : -3; } - -/* baz produces one movk instruction. */ -/* { dg-final { scan-assembler-times "movk" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/moveor_imm.c b/gcc/testsuite/gcc.target/aarch64/moveor_imm.c new file mode 100644 index ..1c0c3f3bf8c588f9661112a8b3f9a72c5ddff95c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/moveor_imm.c @@ -0,0 +1,63 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** f1: +** movx0, -6148914691236517206 +** eor x0, x0, -9223372036854775807 +** ret +*/ + +long f1 (void) +{ + return 0x2aab; +} + +/* +** f2: +** mov x0, -1085102592571150096 +** eor x0, x0, -2305843009213693951 +** ret +*/ + +long f2 (void) +{ + return 0x10f0f0f0f0f0f0f1; +} + +/* +** f3: +** mov x0, -3689348814741910324 +** eor x0, x0, -4611686018427387903 +** ret +*/ + +long f3 (void) +{ + return 0xccd; +} + +/* +** f4: +** mov x0, -7378697629483820647 +** eor x0, x0, -9223372036854775807 +** ret +*/ + +long f4 (void) +{ + return 0x1998; +} + +/* +** f5: +** mov x0, 3689348814741910323 +** eor x0, x0, 864691128656461824 +** ret +*/ + +long f5 (void) +{ + return 0x3f333f33; +} diff --git a/gcc/testsuite/gcc.target/aarch64/pr106583.c b/gcc/testsuite/gcc.target/aarch64/pr106583.c index 0f931580817d78dc1cc58f03b251bd21bec71f59..63df7395edf9491720e3601848e15aa773c51e6d 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr106583.c +++ b/gcc/testsuite/gcc.target/aarch64/pr106583.c @@ -1,41 +1,94 @@ -/* { dg-do assemble } */ -/* { dg-options "-O2 --save-temps" } */ +/* { dg-do compile } */ +/* { dg-options "-O2" }