This is an automated email from the ASF dual-hosted git repository.

sanirudh pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 4330c11055 [Hexagon] Fix LWP assembly handler (predicate register) 
(#17204)
4330c11055 is described below

commit 4330c110550242571da017a1b15ae0b765723ae8
Author: FranckQC <[email protected]>
AuthorDate: Sat Jul 27 23:32:22 2024 -0500

    [Hexagon] Fix LWP assembly handler (predicate register) (#17204)
    
    * Fix LWP assembly handler (predicate register) (#2216)
    
    This solved the issue with LWP that appears with maxpool.
    
    The problem was that the LWP handler was forgetting to save p0 (used by the 
handler). This predicate register needs to be saved too, just like r0-r5, as it 
had been decided that it was the responsibility of the handler to save 
everything (even these theoretically caller-saved registers).
    Said differently, since it had been decided that calling the LWP handler 
would not follow the normal ABI, and that the LWP handler would save everything 
it touches (even normally caller-saved registers like r0-r15 and p0-3), then it 
absolutely needs to save the predicate registers too (in particular p0, which 
was causing the issue).
    
    The issue appeared only with maxpool because it's the only one that had a 
state saved in p0 before calling the LWP handler. And this call destroyed the 
content of what it had saved, making it subsequently branch to different 
portions of the code.
    
    Fix: Allocate 32 bytes (instead of 24 previously), in order to save p3:0, 
and I save those at the bottom of the stack. Restore it at the end of the LWP 
handler.
    
    * Remove training spaces
    
    ---------
    
    Co-authored-by: Slama, Franck <[email protected]>
---
 src/runtime/hexagon/profiler/lwp_handler.S | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/runtime/hexagon/profiler/lwp_handler.S 
b/src/runtime/hexagon/profiler/lwp_handler.S
index 611c071311..8cd02dd828 100644
--- a/src/runtime/hexagon/profiler/lwp_handler.S
+++ b/src/runtime/hexagon/profiler/lwp_handler.S
@@ -50,12 +50,17 @@ handler itself.
   .falign
   .type  lwp_handler,@function
 lwp_handler:
-  { allocframe(#24)                            // Allocate 24 bytes on the 
stack to save R0-R5 registers
+  {
+    allocframe(#32)                            // Allocate 32 bytes on the 
stack to save R0-R5 registers (6*4bytes) and P0-P3 (4*1byte) + 4 unused bytes 
as the stack has to be 8-bytes aligned
     memd(r29+#-16) = r5:4                      // Save R5,R4
+    r5 = p3:0                                  // We will save P3:0 but we 
need an intermediate usual register (R5) that has already been saved
+  }
+  {
+    memd(r29+#16) = r3:2                       // Save R3,R2
+    memd(r29+#8) = r1:0                        // Save R1, R0
   }
   {
-    memd(r29+#8) = r3:2                        // Save R3,R2
-    memd(r29+#0) = r1:0                        // Save R1, R0
+    memw(r29+#0) = r5                          // Save P3:0 (via R5)
     r2 = add(pc,##_GLOBAL_OFFSET_TABLE_@PCREL) // Get GOT address
   }
   {
@@ -102,14 +107,18 @@ lwp_handler:
     memw(r5+#8) = r0                          // Save lower 32 bits
   }
   .falign
-.LBB0_3:
+.LBB0_3:                                      // Restore the registers from 
the stack
+  {
+    r1 = memw(r29+#0)                         // We will restore P3:0 but need 
an intermediate usual register (R1) that hasn't already been restored
+    r5:4 = memd(r29+#24)                      // Restore R5:4
+  }
   {
-    r5:4 = memd(r29+#16)                     // Restore the registers from the 
stack
-    r3:2 = memd(r29+#8)
+    r3:2 = memd(r29+#16)                      // Restore R3:2
+    p3:0 = r1                                 // Restore P3:0 (via R1, not yet 
restored)
   }
   {
-    r1:0 = memd(r29+#0)
-    dealloc_return                          // Deallocate the stack and return
+    r1:0 = memd(r29+#8)                       // Restore R1:0
+    dealloc_return                            // Deallocate the stack and 
return
   }
 .Lfunc_end0:
   .size  lwp_handler, .Lfunc_end0-lwp_handler

Reply via email to