https://gcc.gnu.org/g:b67918fddab42c434c10bedff6c210c55ed907a0
commit r16-7666-gb67918fddab42c434c10bedff6c210c55ed907a0 Author: Wilco Dijkstra <[email protected]> Date: Thu Jan 22 12:28:36 2026 +0000 AArch64: Use anchors for vector constants [PR 121240] Enable anchors for vector constants - like FP, expand vector constants early and place them in the constdata section. Avoid unnecessary loads by expanding simple cases using DUP. Performance on SPECFP2017 is ~0.3% better, codesize increases by 0.05% due to extra const data. gcc: PR target/121240 * config/aarch64/aarch64-simd.md (mov<mode>): Expand vector constants early. * config/aarch64/aarch64.cc (aarch64_select_rtx_section): Force vector immediates <= 16 bytes to constdata. gcc/testsuite: PR target/121240 * gcc.target/aarch64/const_create_using_fmov.c: Fix test. * gcc.target/aarch64/pr121240.c: Add new test. * gcc.target/aarch64/vec-init-single-const.c: Fix test. * gcc.target/aarch64/vect-cse-codegen.c: Fix test. Diff: --- gcc/config/aarch64/aarch64-simd.md | 29 +++++++++++++++++++--- gcc/config/aarch64/aarch64.cc | 4 +-- .../gcc.target/aarch64/const_create_using_fmov.c | 4 +-- gcc/testsuite/gcc.target/aarch64/pr121240.c | 15 +++++++++++ .../gcc.target/aarch64/vec-init-single-const.c | 6 ++--- .../gcc.target/aarch64/vect-cse-codegen.c | 28 ++++++++++++--------- 6 files changed, 63 insertions(+), 23 deletions(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 505e8b1126fa..c314e85927d3 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -70,11 +70,32 @@ contains CONST_POLY_INTs), build it up from individual elements instead. We should only need to do this before RA; aarch64_legitimate_constant_p should ensure that we don't try to rematerialize the constant later. */ - if (GET_CODE (operands[1]) == CONST_VECTOR - && targetm.cannot_force_const_mem (<MODE>mode, operands[1])) + if (GET_CODE (operands[1]) == CONST_VECTOR) { - aarch64_expand_vector_init (operands[0], operands[1]); - DONE; + if (targetm.cannot_force_const_mem (<MODE>mode, operands[1])) + { + aarch64_expand_vector_init (operands[0], operands[1]); + DONE; + } + else if (!aarch64_simd_imm_zero (operands[1], <MODE>mode) + && !aarch64_simd_special_constant_p (operands[1], <MODE>mode) + && !aarch64_simd_valid_mov_imm (operands[1])) + { + rtx x; + /* Expand into VDUP. */ + if (TARGET_SIMD && const_vec_duplicate_p (operands[1], &x)) + { + x = force_reg (GET_MODE_INNER (<MODE>mode), x); + operands[1] = gen_vec_duplicate (<MODE>mode, x); + emit_move_insn (operands[0], operands[1]); + DONE; + } + + /* Expand into a literal load using anchors. */ + operands[1] = force_const_mem (<MODE>mode, operands[1]); + emit_move_insn (operands[0], operands[1]); + DONE; + } } " ) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index ffef5ad29b3a..f2ecb0ee8cb7 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -14349,8 +14349,8 @@ aarch64_select_rtx_section (machine_mode mode, return function_section (current_function_decl); /* When using anchors for constants use the readonly section. */ - if ((CONST_INT_P (x) || CONST_DOUBLE_P (x)) - && known_le (GET_MODE_SIZE (mode), 8)) + if ((CONST_INT_P (x) || CONST_DOUBLE_P (x) || CONST_VECTOR_P (x)) + && known_le (GET_MODE_SIZE (mode), 16)) return readonly_data_section; return default_elf_select_rtx_section (mode, x, align); diff --git a/gcc/testsuite/gcc.target/aarch64/const_create_using_fmov.c b/gcc/testsuite/gcc.target/aarch64/const_create_using_fmov.c index e080afed8aa3..21ea91f5073d 100644 --- a/gcc/testsuite/gcc.target/aarch64/const_create_using_fmov.c +++ b/gcc/testsuite/gcc.target/aarch64/const_create_using_fmov.c @@ -78,8 +78,8 @@ uint16x8_t f5() { /* ** f6: -** adrp x0, \.LC0 -** ldr q0, \[x0, #:lo12:\.LC0\] +** mov w0, 1333788672 +** dup v0.4s, w0 ** ret */ uint32x4_t f6() { diff --git a/gcc/testsuite/gcc.target/aarch64/pr121240.c b/gcc/testsuite/gcc.target/aarch64/pr121240.c new file mode 100644 index 000000000000..f04574d411c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr121240.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mcmodel=small" } */ + +const double b[4] = {0.2435334343f, 0.2233535343f, 0.4232433f, 0.34343434f}; +typedef double v2df __attribute__ ((vector_size (16))); +typedef double v2df __attribute__ ((vector_size (16))); + +v2df f (v2df c1, v2df c2) +{ + v2df a1 = *(v2df *)&b[0]; + v2df a2 = *(v2df *)&b[2]; + return (a1 * c1) + (a2 * c2); +} + +/* { dg-final { scan-assembler-times "adrp" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-single-const.c b/gcc/testsuite/gcc.target/aarch64/vec-init-single-const.c index 274b0b39ac43..587b7ec0e3b2 100644 --- a/gcc/testsuite/gcc.target/aarch64/vec-init-single-const.c +++ b/gcc/testsuite/gcc.target/aarch64/vec-init-single-const.c @@ -46,9 +46,9 @@ int32x4_t f_s32(int32_t x) /* ** f_s64: -** adrp x[0-9]+, .LC[0-9]+ -** ldr q0, \[x[0-9]+, #:lo12:.LC[0-9]+\] -** ins v0\.d\[0\], x0 +** fmov d0, x0 +** mov (x[0-9]+), 1 +** ins v0\.d\[1\], \1 ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-cse-codegen.c b/gcc/testsuite/gcc.target/aarch64/vect-cse-codegen.c index 2b8e64313bb4..1aa0d3edd323 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-cse-codegen.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-cse-codegen.c @@ -6,12 +6,14 @@ /* **test1: -** adrp x[0-9]+, .LC[0-9]+ -** ldr q[0-9]+, \[x[0-9]+, #:lo12:.LC[0-9]+\] +** mov x[0-9]+, 16502 +** movk x[0-9]+, 0x1023, lsl 16 +** movk x[0-9]+, 0x4308, lsl 32 +** movk x[0-9]+, 0x942, lsl 48 +** dup v[0-9]+.2d, x[0-9]+ ** add v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d -** str q[0-9]+, \[x[0-9]+\] -** fmov x[0-9]+, d[0-9]+ ** orr x[0-9]+, x[0-9]+, x[0-9]+ +** str q[0-9]+, \[x[0-9]+\] ** ret */ @@ -27,12 +29,14 @@ test1 (uint64_t a, uint64x2_t b, uint64x2_t* rt) /* **test2: -** adrp x[0-9]+, .LC[0-1]+ -** ldr q[0-9]+, \[x[0-9]+, #:lo12:.LC[0-9]+\] +** mov x[0-9]+, 16502 +** movk x[0-9]+, 0x4223, lsl 16 +** movk x[0-9]+, 0x3032, lsl 32 +** movk x[0-9]+, 0x424, lsl 48 +** dup v[0-9]+.2d, x[0-9]+ ** add v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d -** str q[0-9]+, \[x[0-9]+\] -** fmov x[0-9]+, d[0-9]+ ** orr x[0-9]+, x[0-9]+, x[0-9]+ +** str q[0-9]+, \[x[0-9]+\] ** ret */ @@ -48,12 +52,12 @@ test2 (uint64_t a, uint64x2_t b, uint64x2_t* rt) /* **test3: -** adrp x[0-9]+, .LC[0-9]+ -** ldr q[0-9]+, \[x[0-9]+, #:lo12:.LC[0-9]+\] +** mov w[0-9]+, 16963 +** movk w[0-9]+, 0x9, lsl 16 +** dup v[0-9]+.4s, w[0-9]+ ** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s -** str q[0-9]+, \[x1\] -** fmov w[0-9]+, s[0-9]+ ** orr w[0-9]+, w[0-9]+, w[0-9]+ +** str q[0-9]+, \[x1\] ** ret */
