LGTM :)
On Wed, Jul 24, 2024 at 9:31 PM Christoph Müllner <christoph.muell...@vrull.eu> wrote: > > When enabling XTheadMemIdx, we enable the pre- and post-modify > addressing modes in the RISC-V backend. > Unfortunately, the auto_inc_dec pass will then attempt to utilize > this feature regardless of the mode class (i.e. scalar or vector). > The assumption seems to be, that an enabled addressing mode for > scalar instructions will also be available for vector instructions. > > In case of XTheadMemIdx and RVV, this is ovbiously not the case. > Still, auto_inc_dec (-O3) performs optimizations like the following: > > (insn 23 20 27 3 (set (mem:V4QI (reg:DI 136 [ ivtmp.13 ]) [0 MEM <vector(4) > char> [(char *)_39]+0 S4 A32]) > (reg:V4QI 168)) "gcc/testsuite/gcc.target/riscv/pr116033.c":12:27 > 3183 {*movv4qi} > (nil)) > (insn 40 39 41 3 (set (reg:DI 136 [ ivtmp.13 ]) > (plus:DI (reg:DI 136 [ ivtmp.13 ]) > (const_int 20 [0x14]))) 5 {adddi3} > (nil)) > ====> > (insn 23 20 27 3 (set (mem:V4QI (post_modify:DI (reg:DI 136 [ ivtmp.13 ]) > (plus:DI (reg:DI 136 [ ivtmp.13 ]) > (const_int 20 [0x14]))) [0 MEM <vector(4) char> [(char > *)_39]+0 S4 A32]) > (reg:V4QI 168)) "gcc/testsuite/gcc.target/riscv/pr116033.c":12:27 > 3183 {*movv4qi} > (expr_list:REG_INC (reg:DI 136 [ ivtmp.13 ]) > (nil))) > > The resulting memory-store with post-modify addressing cannot be > lowered to an existing instruction (see PR116033). > At a lower optimization level (-O2) this is currently fine, > but we can't rely on that. > > One solution would be to introduce a target hook to check if a certain > type can be used for pre-/post-modify optimizations. > However, it will be hard to justify such a hook, if only a single > RISC-V vendor extension requires that. > Therefore, this patch takes a more drastic approach and disables > pre-/post-modify addressing if TARGET_VECTOR is set. > This results in not emitting pre-/post-modify instructions from > XTheadMemIdx if RVV is enabled. > > Note, that this is not an issue with XTheadVector, because > we currently don't have auto-vectorization for that extension. > To ensure this won't changed without being noticed, an additional > test is added. > > PR target/116033 > > gcc/ChangeLog: > > * config/riscv/riscv.h (HAVE_POST_MODIFY_DISP): Disable for RVV. > (HAVE_PRE_MODIFY_DISP): Likewise. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/pr116033-1.c: New test. > * gcc.target/riscv/pr116033-2.c: New test. > * gcc.target/riscv/pr116033-3.c: New test. > > Signed-off-by: Christoph Müllner <christoph.muell...@vrull.eu> > --- > gcc/config/riscv/riscv.h | 6 ++-- > gcc/testsuite/gcc.target/riscv/pr116033-1.c | 40 +++++++++++++++++++++ > gcc/testsuite/gcc.target/riscv/pr116033-2.c | 40 +++++++++++++++++++++ > gcc/testsuite/gcc.target/riscv/pr116033-3.c | 38 ++++++++++++++++++++ > 4 files changed, 122 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/pr116033-1.c > create mode 100644 gcc/testsuite/gcc.target/riscv/pr116033-2.c > create mode 100644 gcc/testsuite/gcc.target/riscv/pr116033-3.c > > diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h > index 6f040011864..e5760294506 100644 > --- a/gcc/config/riscv/riscv.h > +++ b/gcc/config/riscv/riscv.h > @@ -1254,8 +1254,10 @@ extern void riscv_remove_unneeded_save_restore_calls > (void); > e.g. RVVMF64BI vs RVVMF1BI on zvl512b, which is [1, 1] vs [64, 64]. */ > #define MAX_POLY_VARIANT 64 > > -#define HAVE_POST_MODIFY_DISP TARGET_XTHEADMEMIDX > -#define HAVE_PRE_MODIFY_DISP TARGET_XTHEADMEMIDX > +#define HAVE_POST_MODIFY_DISP \ > + (TARGET_XTHEADMEMIDX && (!TARGET_VECTOR || TARGET_XTHEADVECTOR)) > +#define HAVE_PRE_MODIFY_DISP \ > + (TARGET_XTHEADMEMIDX && (!TARGET_VECTOR || TARGET_XTHEADVECTOR)) > > /* Check TLS Descriptors mechanism is selected. */ > #define TARGET_TLSDESC (riscv_tls_dialect == TLS_DESCRIPTORS) > diff --git a/gcc/testsuite/gcc.target/riscv/pr116033-1.c > b/gcc/testsuite/gcc.target/riscv/pr116033-1.c > new file mode 100644 > index 00000000000..8dcbe6cc2b8 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/pr116033-1.c > @@ -0,0 +1,40 @@ > +/* { dg-do compile } */ > +/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-O2" "-Og" "-Os" "-Oz" } } */ > +/* { dg-options "-march=rv64gv_xtheadmemidx" { target { rv64 } } } */ > +/* { dg-options "-march=rv32gv_xtheadmemidx" { target { rv32 } } } */ > + > +char arr_3[20][20]; > +void init() > +{ > + for (int i_0 = 0; i_0 < 20; ++i_0) > + for (int i_1 = 0; i_0 < 20; ++i_0) > + for (int i_1 = 0; i_1 < 20; ++i_0) > + for (int i_1 = 0; i_1 < 20; ++i_1) > + arr_3[i_0][i_1] = i_1; > +} > + > +long > +lr_reg_imm_upd_char_1 (long *rs1, long rs2) > +{ > + /* Register+register addressing still works. */ > + *rs1 = *rs1 + (rs2 << 1); > + return *(char*)(*rs1); > +} > + > +void > +char_pre_dec_load_1 (char *p) > +{ > + /* Missed optimization for V+XTheadMemIdx. */ > + extern void fchar1(char*, long); > + p = p - 1; > + char x = *p; > + fchar1 (p, x); > +} > + > +/* { dg-final { scan-assembler "vse8.v\tv\[0-9\]+,\[0-9\]+\\(\[a-x0-9\]+\\)" > } } */ > +/* { dg-final { scan-assembler-not > "vse8.v\t\[a-x0-9\]+,\\(\[a-x0-9\]+\\),\[0-9\]+,\[0-9\]+" } } */ > + > +/* { dg-final { scan-assembler "th.lrbu\t" } } */ > + > +/* Missed optimization for V+XTheadMemIdx. */ > +/* { dg-final { scan-assembler "th.lbuib\t" { xfail "*-*-*" } } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/pr116033-2.c > b/gcc/testsuite/gcc.target/riscv/pr116033-2.c > new file mode 100644 > index 00000000000..92e69c17e65 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/pr116033-2.c > @@ -0,0 +1,40 @@ > +/* { dg-do compile } */ > +/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-O3" "-Og" "-Os" "-Oz" } } */ > +/* { dg-options "-march=rv64gv_xtheadmemidx" { target { rv64 } } } */ > +/* { dg-options "-march=rv32gv_xtheadmemidx" { target { rv32 } } } */ > + > +char arr_3[20][20]; > +void init() > +{ > + for (int i_0 = 0; i_0 < 20; ++i_0) > + for (int i_1 = 0; i_0 < 20; ++i_0) > + for (int i_1 = 0; i_1 < 20; ++i_0) > + for (int i_1 = 0; i_1 < 20; ++i_1) > + arr_3[i_0][i_1] = i_1; > +} > + > +long > +lr_reg_imm_upd_char_1 (long *rs1, long rs2) > +{ > + *rs1 = *rs1 + (rs2 << 1); > + return *(char*)(*rs1); > +} > + > +void > +char_pre_dec_load_1 (char *p) > +{ > + /* Missed optimization for V+XTheadMemIdx. */ > + extern void fchar1(char*, long); > + p = p - 1; > + char x = *p; > + fchar1 (p, x); > +} > + > +/* No auto-vectorization for XTheadVector. */ > +/* { dg-final { scan-assembler > "th.srb\t\[a-x0-9\]+,\[a-x0-9\]+,\[a-x0-9\]+,\[0-9\]+" } } */ > +/* { dg-final { scan-assembler-not "vse8.v" } } */ > + > +/* { dg-final { scan-assembler "th.lrbu\t" } } */ > + > +/* Missed optimization for V+XTheadMemIdx. */ > +/* { dg-final { scan-assembler "th.lbuib\t" { xfail "*-*-*" } } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/pr116033-3.c > b/gcc/testsuite/gcc.target/riscv/pr116033-3.c > new file mode 100644 > index 00000000000..cc89a6a0da5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/pr116033-3.c > @@ -0,0 +1,38 @@ > +/* { dg-do compile } */ > +/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" "-Os" "-Oz" } } */ > +/* { dg-options "-march=rv64g_xtheadvector_xtheadmemidx" { target { rv64 } } > } */ > +/* { dg-options "-march=rv32g_xtheadvector_xtheadmemidx" { target { rv32 } } > } */ > + > +char arr_3[20][20]; > +void init() > +{ > + for (int i_0 = 0; i_0 < 20; ++i_0) > + for (int i_1 = 0; i_0 < 20; ++i_0) > + for (int i_1 = 0; i_1 < 20; ++i_0) > + for (int i_1 = 0; i_1 < 20; ++i_1) > + arr_3[i_0][i_1] = i_1; > +} > + > +long > +lr_reg_imm_upd_char_1 (long *rs1, long rs2) > +{ > + *rs1 = *rs1 + (rs2 << 1); > + return *(char*)(*rs1); > +} > + > +void > +char_pre_dec_load_1 (char *p) > +{ > + extern void fchar1(char*, long); > + p = p - 1; > + char x = *p; > + fchar1 (p, x); > +} > + > +/* No auto-vectorization for XTheadVector. */ > +/* { dg-final { scan-assembler > "th.srb\t\[a-x0-9\]+,\[a-x0-9\]+,\[a-x0-9\]+,\[0-9\]+" } } */ > +/* { dg-final { scan-assembler-not "vse8.v" } } */ > + > +/* { dg-final { scan-assembler "th.lrbu\t" } } */ > + > +/* { dg-final { scan-assembler "th.lbuib\t" } } */ > -- > 2.45.2 >