https://gcc.gnu.org/g:fd6d39208ba4edd9527171cc0f5426a48e63a6a2

commit r17-1049-gfd6d39208ba4edd9527171cc0f5426a48e63a6a2
Author: John David Anglin <[email protected]>
Date:   Sat May 30 20:43:04 2026 -0400

    hppa: Fix clear_cache pattern and use it in pa_trampoline_init
    
    The clear_cache pattern was broken and only flushed the instruction
    cache.  On PA-RISC, both the data and instruction caches need to be
    flushed and these flushes need to be separated by a sync instruction.
    
    The code is reworked and simplified.
    
    2026-05-30  John David Anglin  <[email protected]>
    
    gcc/ChangeLog:
    
            * config/pa/pa.cc (pa_trampoline_init): Rework to use
            clear_cache pattern.
            * config/pa/pa.md (dcacheflush): Use "<<" condition instead
            of "<<=".
            (icacheflush): Remove.
            (icacheflush1, icacheflush2, icacheflush3): New flush patterns
            for PA 1.x targets, PA 2.0 targets, and PA 1.x no space
            register targets.
            (clear_cache): Rework to flush data and instruction caches.
            Skip flush if the start address is greater than or equal to
            the end address.  Don't align the end address to a cacheline
            boundary.  Handle instruction flushes for PA 1.x targets,
            PA 2.0 targets, and PA 1.x no space register targets.

Diff:
---
 gcc/config/pa/pa.cc | 54 +++++++++-----------------------
 gcc/config/pa/pa.md | 89 ++++++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 89 insertions(+), 54 deletions(-)

diff --git a/gcc/config/pa/pa.cc b/gcc/config/pa/pa.cc
index beb5bba30b88..a4aed9e28813 100644
--- a/gcc/config/pa/pa.cc
+++ b/gcc/config/pa/pa.cc
@@ -10651,10 +10651,7 @@ static void
 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
 {
   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
-  rtx start_addr = gen_reg_rtx (Pmode);
-  rtx end_addr = gen_reg_rtx (Pmode);
-  rtx line_length = gen_reg_rtx (Pmode);
-  rtx r_tramp, tmp;
+  rtx start, end, r_tramp, tmp;
 
   emit_block_move (m_tramp, assemble_trampoline_template (),
                   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
@@ -10662,6 +10659,9 @@ pa_trampoline_init (rtx m_tramp, tree fndecl, rtx 
chain_value)
 
   if (!TARGET_64BIT)
     {
+      /* Start of trampoline code.  */
+      start = r_tramp;
+
       tmp = adjust_address (m_tramp, Pmode, 48);
       emit_move_insn (tmp, fnaddr);
       tmp = adjust_address (m_tramp, Pmode, 52);
@@ -10669,28 +10669,15 @@ pa_trampoline_init (rtx m_tramp, tree fndecl, rtx 
chain_value)
 
       /* Create a fat pointer for the trampoline.  */
       tmp = adjust_address (m_tramp, Pmode, 56);
-      emit_move_insn (tmp, r_tramp);
+      emit_move_insn (tmp, start);
       tmp = adjust_address (m_tramp, Pmode, 60);
       emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
-
-      /* fdc and fic only use registers for the address to flush,
-        they do not accept integer displacements.  We align the
-        start and end addresses to the beginning of their respective
-        cache lines to minimize the number of lines flushed.  */
-      emit_insn (gen_andsi3 (start_addr, r_tramp,
-                            GEN_INT (-MIN_CACHELINE_SIZE)));
-      tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
-                                            TRAMPOLINE_CODE_SIZE-1));
-      emit_insn (gen_andsi3 (end_addr, tmp,
-                            GEN_INT (-MIN_CACHELINE_SIZE)));
-      emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
-      emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
-      emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
-                                   gen_reg_rtx (Pmode),
-                                   gen_reg_rtx (Pmode)));
     }
   else
     {
+      /* Start of trampoline code.  */
+      start = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
+
       tmp = adjust_address (m_tramp, Pmode, 56);
       emit_move_insn (tmp, fnaddr);
       tmp = adjust_address (m_tramp, Pmode, 64);
@@ -10698,29 +10685,16 @@ pa_trampoline_init (rtx m_tramp, tree fndecl, rtx 
chain_value)
 
       /* Create a fat pointer for the trampoline.  */
       tmp = adjust_address (m_tramp, Pmode, 16);
-      emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
-                                                           r_tramp, 32)));
+      emit_move_insn (tmp, start);
       tmp = adjust_address (m_tramp, Pmode, 24);
       emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
-
-      /* fdc and fic only use registers for the address to flush,
-        they do not accept integer displacements.  We align the
-        start and end addresses to the beginning of their respective
-        cache lines to minimize the number of lines flushed.  */
-      tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
-      emit_insn (gen_anddi3 (start_addr, tmp,
-                            GEN_INT (-MIN_CACHELINE_SIZE)));
-      tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
-                                            TRAMPOLINE_CODE_SIZE - 1));
-      emit_insn (gen_anddi3 (end_addr, tmp,
-                            GEN_INT (-MIN_CACHELINE_SIZE)));
-      emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
-      emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
-      emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
-                                   gen_reg_rtx (Pmode),
-                                   gen_reg_rtx (Pmode)));
     }
 
+  end = force_reg (Pmode, plus_constant (Pmode, start, TRAMPOLINE_CODE_SIZE));
+
+  /* Flush trampoline.  */
+  emit_insn (gen_clear_cache (start, end));
+
 #ifdef HAVE_ENABLE_EXECUTE_STACK
   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
                     LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index d87f3047cdaf..04a239806d26 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -9947,10 +9947,9 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
   [(set_attr "type" "fpalu")
    (set_attr "length" "4")])
 
-;; The following two patterns are used by the trampoline code for nested
+;; The following four patterns are used by the trampoline code for nested
 ;; functions.  They flush the I and D cache lines from the start address
-;; (operand0) to the end address (operand1).  No lines are flushed if the
-;; end address is less than the start address (unsigned).
+;; (operand0) to the end address (operand1).
 ;;
 ;; Because the range of memory flushed is variable and the size of a MEM
 ;; can only be a CONST_INT, the patterns specify that they perform an
@@ -9959,9 +9958,9 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
 ;; The address range for an icache flush must lie within a single
 ;; space on targets with non-equivalent space registers.
 ;;
-;; Operand 0 contains the start address.
+;; Operand 0 contains the line aligned start address.
 ;; Operand 1 contains the end address.
-;; Operand 2 contains the line length to use.
+;; Operand 2 contains the line size to use.
 (define_insn "dcacheflush<P:mode>"
   [(const_int 1)
    (unspec_volatile [(mem:BLK (scratch))] UNSPECV_DCACHE)
@@ -9970,11 +9969,11 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
    (use (match_operand 2 "pmode_register_operand" "r"))
    (clobber (match_scratch:P 3 "=&0"))]
   ""
-  "cmpb,<dwc><<=,n %3,%1,.\;fdc,m %2(%3)\;sync"
+  "{comb|cmpb},<dwc><<,n %3,%1,.\;fdc,m %2(%3)\;sync"
   [(set_attr "type" "multi")
    (set_attr "length" "12")])
 
-(define_insn "icacheflush<P:mode>"
+(define_insn "icacheflush1<P:mode>"
   [(const_int 2)
    (unspec_volatile [(mem:BLK (scratch))] UNSPECV_ICACHE)
    (use (match_operand 0 "pmode_register_operand" "r"))
@@ -9984,24 +9983,86 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
    (clobber (match_operand 4 "pmode_register_operand" "=&r"))
    (clobber (match_scratch:P 5 "=&0"))]
   ""
-  "mfsp %%sr0,%4\;ldsid (%5),%3\;mtsp %3,%%sr0\;cmpb,<dwc><<=,n %5,%1,.\;fic,m 
%2(%%sr0,%5)\;sync\;mtsp %4,%%sr0\;nop\;nop\;nop\;nop\;nop\;nop"
+  "mfsp %%sr1,%4\;ldsid (%5),%3\;mtsp %3,%%sr1\;{comb|cmpb},<dwc><<,n 
%5,%1,.\;fic,m %2(%%sr1,%5)\;sync\;mtsp %4,%%sr1\;nop\;nop\;nop\;nop\;nop\;nop"
   [(set_attr "type" "multi")
    (set_attr "length" "52")])
 
+(define_insn "icacheflush2<P:mode>"
+  [(const_int 2)
+   (unspec_volatile [(mem:BLK (scratch))] UNSPECV_ICACHE)
+   (use (match_operand 0 "pmode_register_operand" "r"))
+   (use (match_operand 1 "pmode_register_operand" "r"))
+   (use (match_operand 2 "pmode_register_operand" "r"))
+   (clobber (match_scratch:P 3 "=&0"))]
+  "TARGET_PA_20"
+  "cmpb,<dwc><<,n %3,%1,.\;fic,m 
%2(%3)\;sync\;nop\;nop\;nop\;nop\;nop\;nop\;nop"
+  [(set_attr "type" "multi")
+   (set_attr "length" "40")])
+
+(define_insn "icacheflush3<P:mode>"
+  [(const_int 3)
+   (unspec_volatile [(mem:BLK (scratch))] UNSPECV_ICACHE)
+   (use (match_operand 0 "pmode_register_operand" "r"))
+   (use (match_operand 1 "pmode_register_operand" "r"))
+   (use (match_operand 2 "pmode_register_operand" "r"))
+   (clobber (match_scratch:P 3 "=&0"))]
+  "!TARGET_PA_20 && TARGET_NO_SPACE_REGS"
+  "comb,<dwc><<,n %3,%1,.\;fic,m 
%2(%%sr4,%3)\;sync\;nop\;nop\;nop\;nop\;nop\;nop\;nop"
+  [(set_attr "type" "multi")
+   (set_attr "length" "40")])
+
+;; clear_cache
+;;
+;; Note the data and instruction cache flushes need to be separated
+;; by a sync instruction (see page 7-151 in PA 2.0 architecture manual).
+;; Thus, we can't combine the flush operations in a single loop.
+
+;; Operand 0 contains the start address (inclusive).
+;; Operand 1 contains the end address (exclusive).
 (define_expand "clear_cache"
   [(match_operand 0 "pmode_register_operand")
    (match_operand 1 "pmode_register_operand")]
   ""
 {
-  rtx line_length = gen_reg_rtx (Pmode);
+  rtx start = gen_reg_rtx (Pmode);
+  rtx line_size = gen_reg_rtx (Pmode);
+  rtx lab = gen_label_rtx ();
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+
+  /* Skip flush if op0 is greater than or equal to op1.  */
+  emit_cmp_and_jump_insns (op0, op1, GEU, NULL_RTX, Pmode, 0, lab);
+
+  /* Load cache line size.  */
+  emit_move_insn (line_size, GEN_INT (MIN_CACHELINE_SIZE));
 
-  emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
   if (TARGET_64BIT)
-    emit_insn (gen_icacheflushdi (operands[0], operands[1], line_length,
-                                 gen_reg_rtx (Pmode), gen_reg_rtx (Pmode)));
+    {
+      /* Align the start address.  */
+      emit_insn (gen_anddi3 (start, op0, GEN_INT (-MIN_CACHELINE_SIZE)));
+
+      /* Flush the cache.  */
+      emit_insn (gen_dcacheflushdi (start, op1, line_size));
+      emit_insn (gen_icacheflush2di (start, op1, line_size));
+    }
   else
-    emit_insn (gen_icacheflushsi (operands[0], operands[1], line_length,
-                                 gen_reg_rtx (Pmode), gen_reg_rtx (Pmode)));
+    {
+      /* Align the start address.  */
+      emit_insn (gen_andsi3 (start, op0, GEN_INT (-MIN_CACHELINE_SIZE)));
+
+      /* Flush the cache.  */
+      emit_insn (gen_dcacheflushsi (start, op1, line_size));
+      if (TARGET_PA_20)
+       emit_insn (gen_icacheflush2si (start, op1, line_size));
+      else if (TARGET_NO_SPACE_REGS)
+       emit_insn (gen_icacheflush3si (start, op1, line_size));
+      else
+       emit_insn (gen_icacheflush1si (start, op1, line_size,
+                                      gen_reg_rtx (Pmode),
+                                      gen_reg_rtx (Pmode)));
+    }
+
+  emit_label (lab);
   DONE;
 })

Reply via email to