https://gcc.gnu.org/g:34d9e6df85f888c707c6b3b069ccfc6ea49efd56
commit r17-905-g34d9e6df85f888c707c6b3b069ccfc6ea49efd56 Author: Jin Ma <[email protected]> Date: Tue May 26 11:25:57 2026 +0800 RISC-V: Fix REGNO_REG_CLASS for FP hard registers The GCC Internals Manual, section 19.8 "Register Classes", documents REGNO_REG_CLASS as: REGNO_REG_CLASS (regno) [Macro] A C expression whose value is a register class containing hard register regno. In general there is more than one such class; choose a class which is minimal, meaning that no smaller class also contains the register. riscv_regno_to_class[] currently maps every FP hard register to RVC_FP_REGS, but RVC_FP_REGS only contains f8-f15. The entries for f0-f7 and f16-f31 therefore violate the "containing hard register regno" half of the contract: the returned class does not contain the register at all. The mismatch corrupts IRA's cost model. setup_allocno_cost_vector indexes the per-hard-reg cost slot via REGNO_REG_CLASS: rclass = REGNO_REG_CLASS (hard_regno); num = cost_classes_ptr->index[rclass]; ... reg_costs[j] = COSTS (costs, i)->cost[num]; After setup_regno_cost_classes_by_mode adds RVC_FP_REGS to the cost classes, the cost for e.g. f16 is silently read from the RVC_FP_REGS slot. The new fp-reg-class.c testcase puts eight "cf"- and sixteen "f"- constrained doubles live across a call. In the buggy state IRA places the cf pseudos outside the cf class and LRA recovers with sixteen fmv.d to fs* registers; with the fix IRA spills those values honestly and the IRA "+++Costs" line reports a non-zero "mem" component. Fix it by giving each FP hard register its minimal class: FP_REGS for f0-f7 and f16-f31, RVC_FP_REGS for f8-f15. As a companion change, switch riscv_secondary_memory_needed from class-equality tests to reg_class_subset_p so it still recognises the FP side regardless of which subclass the table returns. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_regno_to_class): Use the minimal class containing each FP hard register: FP_REGS for f0-f7 and f16-f31, RVC_FP_REGS for f8-f15. (riscv_secondary_memory_needed): Use reg_class_subset_p to detect FP classes. gcc/testsuite/ChangeLog: * gcc.target/riscv/fp-reg-class.c: New test. Diff: --- gcc/config/riscv/riscv.cc | 16 ++++---- gcc/testsuite/gcc.target/riscv/fp-reg-class.c | 59 +++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 8 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 8a737bb41b66..abce8f5f8a54 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -351,14 +351,14 @@ const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = { JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, - RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, - RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, - RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, - RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, - RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, - RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, RVC_FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, FRAME_REGS, FRAME_REGS, NO_REGS, NO_REGS, NO_REGS, NO_REGS, NO_REGS, NO_REGS, NO_REGS, NO_REGS, NO_REGS, NO_REGS, @@ -10962,8 +10962,8 @@ static bool riscv_secondary_memory_needed (machine_mode mode, reg_class_t class1, reg_class_t class2) { - bool class1_is_fpr = class1 == FP_REGS || class1 == RVC_FP_REGS; - bool class2_is_fpr = class2 == FP_REGS || class2 == RVC_FP_REGS; + bool class1_is_fpr = reg_class_subset_p (class1, FP_REGS); + bool class2_is_fpr = reg_class_subset_p (class2, FP_REGS); return (!riscv_vector_mode_p (mode) && GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD && (class1_is_fpr != class2_is_fpr) diff --git a/gcc/testsuite/gcc.target/riscv/fp-reg-class.c b/gcc/testsuite/gcc.target/riscv/fp-reg-class.c new file mode 100644 index 000000000000..e40fcfcde92a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/fp-reg-class.c @@ -0,0 +1,59 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-O3" "-Og" "-Os" "-Oz" "-flto" } } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O2 -fdump-rtl-ira" } */ + +/* When riscv_regno_to_class[] mapped every FP hard register to + RVC_FP_REGS, ira-costs.cc:setup_allocno_cost_vector read the cost + slot for f0-f7 / f16-f31 from the wrong bucket and IRA mis-allocated + FP pseudos. Eight "cf"- and sixteen "f"-constrained doubles live + across a call expose this: in the buggy state IRA picks an + all-in-hardreg coloring with no spills ("+++Costs" shows "mem 0") + and LRA recovers with fmv.d to fs* registers; with the fix IRA + spills the cf values honestly and the "mem" component is non-zero. */ + +extern void use (double, double, double, double, + double, double, double, double); + +double +test (double *p, int n) +{ + double f0 = p[0], f1 = p[1], f2 = p[2], f3 = p[3]; + double f4 = p[4], f5 = p[5], f6 = p[6], f7 = p[7]; + double f8 = p[8], f9 = p[9], f10 = p[10], f11 = p[11]; + double f12 = p[12], f13 = p[13], f14 = p[14], f15 = p[15]; + + double c0, c1, c2, c3, c4, c5, c6, c7; + asm ("fadd.d %0,%1,%1" : "=cf" (c0) : "cf" (p[16])); + asm ("fadd.d %0,%1,%1" : "=cf" (c1) : "cf" (p[17])); + asm ("fadd.d %0,%1,%1" : "=cf" (c2) : "cf" (p[18])); + asm ("fadd.d %0,%1,%1" : "=cf" (c3) : "cf" (p[19])); + asm ("fadd.d %0,%1,%1" : "=cf" (c4) : "cf" (p[20])); + asm ("fadd.d %0,%1,%1" : "=cf" (c5) : "cf" (p[21])); + asm ("fadd.d %0,%1,%1" : "=cf" (c6) : "cf" (p[22])); + asm ("fadd.d %0,%1,%1" : "=cf" (c7) : "cf" (p[23])); + + for (int i = 0; i < n; ++i) + { + f0 = f0 * f1 + f2; f1 = f1 * f2 + f3; + f2 = f2 * f3 + f4; f3 = f3 * f4 + f5; + f4 = f4 * f5 + f6; f5 = f5 * f6 + f7; + f6 = f6 * f7 + f8; f7 = f7 * f8 + f9; + f8 = f8 * f9 + f10; f9 = f9 * f10 + f11; + f10 = f10 * f11 + f12; f11 = f11 * f12 + f13; + f12 = f12 * f13 + f14; f13 = f13 * f14 + f15; + f14 = f14 * f15 + f0; f15 = f15 * f0 + f1; + asm ("fadd.d %0,%0,%1" : "+cf" (c0) : "f" (f0)); + asm ("fadd.d %0,%0,%1" : "+cf" (c1) : "f" (f1)); + asm ("fadd.d %0,%0,%1" : "+cf" (c2) : "f" (f2)); + asm ("fadd.d %0,%0,%1" : "+cf" (c3) : "f" (f3)); + asm ("fadd.d %0,%0,%1" : "+cf" (c4) : "f" (f4)); + asm ("fadd.d %0,%0,%1" : "+cf" (c5) : "f" (f5)); + asm ("fadd.d %0,%0,%1" : "+cf" (c6) : "f" (f6)); + asm ("fadd.d %0,%0,%1" : "+cf" (c7) : "f" (f7)); + use (f8, f9, f10, f11, f12, f13, f14, f15); + } + return f0+f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15 + + c0+c1+c2+c3+c4+c5+c6+c7; +} + +/* { dg-final { scan-rtl-dump {\+\+\+Costs:[^\n]* mem [1-9]} "ira" } } */
