https://gcc.gnu.org/g:fd6d39208ba4edd9527171cc0f5426a48e63a6a2
commit r17-1049-gfd6d39208ba4edd9527171cc0f5426a48e63a6a2 Author: John David Anglin <[email protected]> Date: Sat May 30 20:43:04 2026 -0400 hppa: Fix clear_cache pattern and use it in pa_trampoline_init The clear_cache pattern was broken and only flushed the instruction cache. On PA-RISC, both the data and instruction caches need to be flushed and these flushes need to be separated by a sync instruction. The code is reworked and simplified. 2026-05-30 John David Anglin <[email protected]> gcc/ChangeLog: * config/pa/pa.cc (pa_trampoline_init): Rework to use clear_cache pattern. * config/pa/pa.md (dcacheflush): Use "<<" condition instead of "<<=". (icacheflush): Remove. (icacheflush1, icacheflush2, icacheflush3): New flush patterns for PA 1.x targets, PA 2.0 targets, and PA 1.x no space register targets. (clear_cache): Rework to flush data and instruction caches. Skip flush if the start address is greater than or equal to the end address. Don't align the end address to a cacheline boundary. Handle instruction flushes for PA 1.x targets, PA 2.0 targets, and PA 1.x no space register targets. Diff: --- gcc/config/pa/pa.cc | 54 +++++++++----------------------- gcc/config/pa/pa.md | 89 ++++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 89 insertions(+), 54 deletions(-) diff --git a/gcc/config/pa/pa.cc b/gcc/config/pa/pa.cc index beb5bba30b88..a4aed9e28813 100644 --- a/gcc/config/pa/pa.cc +++ b/gcc/config/pa/pa.cc @@ -10651,10 +10651,7 @@ static void pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) { rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); - rtx start_addr = gen_reg_rtx (Pmode); - rtx end_addr = gen_reg_rtx (Pmode); - rtx line_length = gen_reg_rtx (Pmode); - rtx r_tramp, tmp; + rtx start, end, r_tramp, tmp; emit_block_move (m_tramp, assemble_trampoline_template (), GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); @@ -10662,6 +10659,9 @@ pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) if (!TARGET_64BIT) { + /* Start of trampoline code. */ + start = r_tramp; + tmp = adjust_address (m_tramp, Pmode, 48); emit_move_insn (tmp, fnaddr); tmp = adjust_address (m_tramp, Pmode, 52); @@ -10669,28 +10669,15 @@ pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) /* Create a fat pointer for the trampoline. */ tmp = adjust_address (m_tramp, Pmode, 56); - emit_move_insn (tmp, r_tramp); + emit_move_insn (tmp, start); tmp = adjust_address (m_tramp, Pmode, 60); emit_move_insn (tmp, gen_rtx_REG (Pmode, 19)); - - /* fdc and fic only use registers for the address to flush, - they do not accept integer displacements. We align the - start and end addresses to the beginning of their respective - cache lines to minimize the number of lines flushed. */ - emit_insn (gen_andsi3 (start_addr, r_tramp, - GEN_INT (-MIN_CACHELINE_SIZE))); - tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, - TRAMPOLINE_CODE_SIZE-1)); - emit_insn (gen_andsi3 (end_addr, tmp, - GEN_INT (-MIN_CACHELINE_SIZE))); - emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); - emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length)); - emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length, - gen_reg_rtx (Pmode), - gen_reg_rtx (Pmode))); } else { + /* Start of trampoline code. */ + start = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32)); + tmp = adjust_address (m_tramp, Pmode, 56); emit_move_insn (tmp, fnaddr); tmp = adjust_address (m_tramp, Pmode, 64); @@ -10698,29 +10685,16 @@ pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) /* Create a fat pointer for the trampoline. */ tmp = adjust_address (m_tramp, Pmode, 16); - emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode, - r_tramp, 32))); + emit_move_insn (tmp, start); tmp = adjust_address (m_tramp, Pmode, 24); emit_move_insn (tmp, gen_rtx_REG (Pmode, 27)); - - /* fdc and fic only use registers for the address to flush, - they do not accept integer displacements. We align the - start and end addresses to the beginning of their respective - cache lines to minimize the number of lines flushed. */ - tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32)); - emit_insn (gen_anddi3 (start_addr, tmp, - GEN_INT (-MIN_CACHELINE_SIZE))); - tmp = force_reg (Pmode, plus_constant (Pmode, tmp, - TRAMPOLINE_CODE_SIZE - 1)); - emit_insn (gen_anddi3 (end_addr, tmp, - GEN_INT (-MIN_CACHELINE_SIZE))); - emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); - emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length)); - emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length, - gen_reg_rtx (Pmode), - gen_reg_rtx (Pmode))); } + end = force_reg (Pmode, plus_constant (Pmode, start, TRAMPOLINE_CODE_SIZE)); + + /* Flush trampoline. */ + emit_insn (gen_clear_cache (start, end)); + #ifdef HAVE_ENABLE_EXECUTE_STACK emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index d87f3047cdaf..04a239806d26 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -9947,10 +9947,9 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" [(set_attr "type" "fpalu") (set_attr "length" "4")]) -;; The following two patterns are used by the trampoline code for nested +;; The following four patterns are used by the trampoline code for nested ;; functions. They flush the I and D cache lines from the start address -;; (operand0) to the end address (operand1). No lines are flushed if the -;; end address is less than the start address (unsigned). +;; (operand0) to the end address (operand1). ;; ;; Because the range of memory flushed is variable and the size of a MEM ;; can only be a CONST_INT, the patterns specify that they perform an @@ -9959,9 +9958,9 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" ;; The address range for an icache flush must lie within a single ;; space on targets with non-equivalent space registers. ;; -;; Operand 0 contains the start address. +;; Operand 0 contains the line aligned start address. ;; Operand 1 contains the end address. -;; Operand 2 contains the line length to use. +;; Operand 2 contains the line size to use. (define_insn "dcacheflush<P:mode>" [(const_int 1) (unspec_volatile [(mem:BLK (scratch))] UNSPECV_DCACHE) @@ -9970,11 +9969,11 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" (use (match_operand 2 "pmode_register_operand" "r")) (clobber (match_scratch:P 3 "=&0"))] "" - "cmpb,<dwc><<=,n %3,%1,.\;fdc,m %2(%3)\;sync" + "{comb|cmpb},<dwc><<,n %3,%1,.\;fdc,m %2(%3)\;sync" [(set_attr "type" "multi") (set_attr "length" "12")]) -(define_insn "icacheflush<P:mode>" +(define_insn "icacheflush1<P:mode>" [(const_int 2) (unspec_volatile [(mem:BLK (scratch))] UNSPECV_ICACHE) (use (match_operand 0 "pmode_register_operand" "r")) @@ -9984,24 +9983,86 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" (clobber (match_operand 4 "pmode_register_operand" "=&r")) (clobber (match_scratch:P 5 "=&0"))] "" - "mfsp %%sr0,%4\;ldsid (%5),%3\;mtsp %3,%%sr0\;cmpb,<dwc><<=,n %5,%1,.\;fic,m %2(%%sr0,%5)\;sync\;mtsp %4,%%sr0\;nop\;nop\;nop\;nop\;nop\;nop" + "mfsp %%sr1,%4\;ldsid (%5),%3\;mtsp %3,%%sr1\;{comb|cmpb},<dwc><<,n %5,%1,.\;fic,m %2(%%sr1,%5)\;sync\;mtsp %4,%%sr1\;nop\;nop\;nop\;nop\;nop\;nop" [(set_attr "type" "multi") (set_attr "length" "52")]) +(define_insn "icacheflush2<P:mode>" + [(const_int 2) + (unspec_volatile [(mem:BLK (scratch))] UNSPECV_ICACHE) + (use (match_operand 0 "pmode_register_operand" "r")) + (use (match_operand 1 "pmode_register_operand" "r")) + (use (match_operand 2 "pmode_register_operand" "r")) + (clobber (match_scratch:P 3 "=&0"))] + "TARGET_PA_20" + "cmpb,<dwc><<,n %3,%1,.\;fic,m %2(%3)\;sync\;nop\;nop\;nop\;nop\;nop\;nop\;nop" + [(set_attr "type" "multi") + (set_attr "length" "40")]) + +(define_insn "icacheflush3<P:mode>" + [(const_int 3) + (unspec_volatile [(mem:BLK (scratch))] UNSPECV_ICACHE) + (use (match_operand 0 "pmode_register_operand" "r")) + (use (match_operand 1 "pmode_register_operand" "r")) + (use (match_operand 2 "pmode_register_operand" "r")) + (clobber (match_scratch:P 3 "=&0"))] + "!TARGET_PA_20 && TARGET_NO_SPACE_REGS" + "comb,<dwc><<,n %3,%1,.\;fic,m %2(%%sr4,%3)\;sync\;nop\;nop\;nop\;nop\;nop\;nop\;nop" + [(set_attr "type" "multi") + (set_attr "length" "40")]) + +;; clear_cache +;; +;; Note the data and instruction cache flushes need to be separated +;; by a sync instruction (see page 7-151 in PA 2.0 architecture manual). +;; Thus, we can't combine the flush operations in a single loop. + +;; Operand 0 contains the start address (inclusive). +;; Operand 1 contains the end address (exclusive). (define_expand "clear_cache" [(match_operand 0 "pmode_register_operand") (match_operand 1 "pmode_register_operand")] "" { - rtx line_length = gen_reg_rtx (Pmode); + rtx start = gen_reg_rtx (Pmode); + rtx line_size = gen_reg_rtx (Pmode); + rtx lab = gen_label_rtx (); + rtx op0 = operands[0]; + rtx op1 = operands[1]; + + /* Skip flush if op0 is greater than or equal to op1. */ + emit_cmp_and_jump_insns (op0, op1, GEU, NULL_RTX, Pmode, 0, lab); + + /* Load cache line size. */ + emit_move_insn (line_size, GEN_INT (MIN_CACHELINE_SIZE)); - emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); if (TARGET_64BIT) - emit_insn (gen_icacheflushdi (operands[0], operands[1], line_length, - gen_reg_rtx (Pmode), gen_reg_rtx (Pmode))); + { + /* Align the start address. */ + emit_insn (gen_anddi3 (start, op0, GEN_INT (-MIN_CACHELINE_SIZE))); + + /* Flush the cache. */ + emit_insn (gen_dcacheflushdi (start, op1, line_size)); + emit_insn (gen_icacheflush2di (start, op1, line_size)); + } else - emit_insn (gen_icacheflushsi (operands[0], operands[1], line_length, - gen_reg_rtx (Pmode), gen_reg_rtx (Pmode))); + { + /* Align the start address. */ + emit_insn (gen_andsi3 (start, op0, GEN_INT (-MIN_CACHELINE_SIZE))); + + /* Flush the cache. */ + emit_insn (gen_dcacheflushsi (start, op1, line_size)); + if (TARGET_PA_20) + emit_insn (gen_icacheflush2si (start, op1, line_size)); + else if (TARGET_NO_SPACE_REGS) + emit_insn (gen_icacheflush3si (start, op1, line_size)); + else + emit_insn (gen_icacheflush1si (start, op1, line_size, + gen_reg_rtx (Pmode), + gen_reg_rtx (Pmode))); + } + + emit_label (lab); DONE; })
