This is an automated email from the ASF dual-hosted git repository.
sanirudh pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 4330c11055 [Hexagon] Fix LWP assembly handler (predicate register)
(#17204)
4330c11055 is described below
commit 4330c110550242571da017a1b15ae0b765723ae8
Author: FranckQC <[email protected]>
AuthorDate: Sat Jul 27 23:32:22 2024 -0500
[Hexagon] Fix LWP assembly handler (predicate register) (#17204)
* Fix LWP assembly handler (predicate register) (#2216)
This solved the issue with LWP that appears with maxpool.
The problem was that the LWP handler was forgetting to save p0 (used by the
handler). This predicate register needs to be saved too, just like r0-r5, as it
had been decided that it was the responsibility of the handler to save
everything (even these theoretically caller-saved registers).
Said differently, since it had been decided that calling the LWP handler
would not follow the normal ABI, and that the LWP handler would save everything
it touches (even normally caller-saved registers like r0-r15 and p0-3), then it
absolutely needs to save the predicate registers too (in particular p0, which
was causing the issue).
The issue appeared only with maxpool because it's the only one that had a
state saved in p0 before calling the LWP handler. And this call destroyed the
content of what it had saved, making it subsequently branch to different
portions of the code.
Fix: Allocate 32 bytes (instead of 24 previously), in order to save p3:0,
and I save those at the bottom of the stack. Restore it at the end of the LWP
handler.
* Remove training spaces
---------
Co-authored-by: Slama, Franck <[email protected]>
---
src/runtime/hexagon/profiler/lwp_handler.S | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/src/runtime/hexagon/profiler/lwp_handler.S
b/src/runtime/hexagon/profiler/lwp_handler.S
index 611c071311..8cd02dd828 100644
--- a/src/runtime/hexagon/profiler/lwp_handler.S
+++ b/src/runtime/hexagon/profiler/lwp_handler.S
@@ -50,12 +50,17 @@ handler itself.
.falign
.type lwp_handler,@function
lwp_handler:
- { allocframe(#24) // Allocate 24 bytes on the
stack to save R0-R5 registers
+ {
+ allocframe(#32) // Allocate 32 bytes on the
stack to save R0-R5 registers (6*4bytes) and P0-P3 (4*1byte) + 4 unused bytes
as the stack has to be 8-bytes aligned
memd(r29+#-16) = r5:4 // Save R5,R4
+ r5 = p3:0 // We will save P3:0 but we
need an intermediate usual register (R5) that has already been saved
+ }
+ {
+ memd(r29+#16) = r3:2 // Save R3,R2
+ memd(r29+#8) = r1:0 // Save R1, R0
}
{
- memd(r29+#8) = r3:2 // Save R3,R2
- memd(r29+#0) = r1:0 // Save R1, R0
+ memw(r29+#0) = r5 // Save P3:0 (via R5)
r2 = add(pc,##_GLOBAL_OFFSET_TABLE_@PCREL) // Get GOT address
}
{
@@ -102,14 +107,18 @@ lwp_handler:
memw(r5+#8) = r0 // Save lower 32 bits
}
.falign
-.LBB0_3:
+.LBB0_3: // Restore the registers from
the stack
+ {
+ r1 = memw(r29+#0) // We will restore P3:0 but need
an intermediate usual register (R1) that hasn't already been restored
+ r5:4 = memd(r29+#24) // Restore R5:4
+ }
{
- r5:4 = memd(r29+#16) // Restore the registers from the
stack
- r3:2 = memd(r29+#8)
+ r3:2 = memd(r29+#16) // Restore R3:2
+ p3:0 = r1 // Restore P3:0 (via R1, not yet
restored)
}
{
- r1:0 = memd(r29+#0)
- dealloc_return // Deallocate the stack and return
+ r1:0 = memd(r29+#8) // Restore R1:0
+ dealloc_return // Deallocate the stack and
return
}
.Lfunc_end0:
.size lwp_handler, .Lfunc_end0-lwp_handler