================
@@ -0,0 +1,527 @@
+# RUN: llc -mattr=+sve -aarch64-stack-hazard-in-non-streaming
-aarch64-split-sve-objects -aarch64-streaming-hazard-size=1024
-mtriple=aarch64-none-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve
-aarch64-stack-hazard-in-non-streaming -aarch64-split-sve-objects
-aarch64-streaming-hazard-size=1024 -start-before=prologepilog %s -o - |
FileCheck %s --check-prefix=ASM
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve
-aarch64-stack-hazard-in-non-streaming -aarch64-split-sve-objects
-aarch64-streaming-hazard-size=1024 -start-before=prologepilog %s -filetype=obj
-o %t
+# RUN: llvm-objdump --dwarf=frames %t | FileCheck %s --check-prefix=UNWINDINFO
+# RUN: rm -rf %t
+#
+# Test allocation and deallocation of SVE objects on the stack with
+# split-sve-objects (and hazard padding) enabled. This also tests using a
+# combination of scalable and non-scalable offsets to access the SVE on the
+# stack.
+#
+# With split-sve-objects (which implies hazard padding) the SVE area is split
+# into PPR and ZPR areas with (fixed-size) hazard padding between them. The PPR
+# area holds all scalable predicate callee saves and locals, and the ZPR area
+# holds all scalable vector callee saves and locals. Additionally, any FPR
+# callee save is promoted to a ZPR callee save (to avoid needing additional
+# hazard padding in the callee save area).
+#
+# +-------------+
+# | stack arg |
+# +-------------+ <- SP before call
+# | Callee Saves|
+# | Frame record| (if available)
+# |-------------| <- FP (if available)
+# | PPR area |
+# |-------------|
+# |/////////////| hazard padding
+# |-------------|
+# | ZPR area |
+# +-------------+
+# | : |
+# | Stack objs |
+# | : |
+# +-------------+ <- SP after call and frame-setup
+#
+--- |
+
+ define void @test_allocate_split_sve() uwtable { entry: unreachable }
+ define void @test_allocate_split_sve_realigned() uwtable { entry:
unreachable }
+ define void @test_address_split_sve() uwtable { entry: unreachable }
+ define void @test_address_split_sve_fp() uwtable { entry: unreachable }
+ define aarch64_sve_vector_pcs void @save_restore_ppr_zpr() uwtable { entry:
unreachable }
+
+...
+---
+# +----------+
+# |scratchreg| // x29 is used as scratch reg.
+# |----------|
+# | %stack.0 | // scalable predicate of n * 12 bytes, aligned to 16 bytes
+# | | // to be materialized with 1*ADDVL (<=> n * 16 bytes)
+# |----------|
+# |//////////| // hazard padding (1024 bytes) -- part of PPR locals area
+# |//////////| // Note: This is currently not included in the "stackSize"
+# +----------+
+# | %stack.0 | // scalable SVE object of n * 18 bytes, aligned to 16 bytes,
+# | | // to be materialized with 2*ADDVL (<=> 2 * n * 16 bytes)
+# +----------+
+# |//////////| // hazard padding (1024 bytes)
+# |----------|
+# | %stack.1 | // not scalable
+# +----------+ <- SP
+
+# CHECK-LABEL: name: test_allocate_split_sve
+# CHECK: stackSize: 1056
+
+# CHECK: bb.0.entry:
+# CHECK: liveins: $z0, $p0, $fp
+# CHECK: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 ::
(store (s64) into %stack.4)
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1040
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0x90, 0x08,
0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10,
0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10,
0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22
+#
+# CHECK-NEXT: $x8 = ADDXri $sp, 1040, 0
+# CHECK-NEXT: $x8 = ADDPL_XXI $x8, 7, implicit $vg
+# CHECK-NEXT: STR_ZXI $z0, killed $x8, 0 :: (store (<vscale x 1 x s128>) into
%stack.0)
+# CHECK-NEXT: $x8 = ADDXri $sp, 2064, 0
+# CHECK-NEXT: STR_PXI $p0, killed $x8, 18 :: (store (<vscale x 1 x s16>) into
%stack.1)
+#
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0,
0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0,
0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 1056
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16
+# CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load
(s64) from %stack.4)
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
+# CHECK-NEXT: RET_ReallyLR
+
+# ASM-LABEL: test_allocate_split_sve:
+# ASM: .cfi_def_cfa_offset 16
+# ASM-NEXT: .cfi_offset w29, -16
+# ASM: sub sp, sp, #1024
+# ASM: .cfi_def_cfa_offset 1040
+# ASM: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e,
0x22 // sp + 1040 + 8 * VG
----------------
sdesmalen-arm wrote:
nit: without the checks for the asm (at least for the instructions that the
.cfi_escape relate to) these checks don't really mean much.
https://github.com/llvm/llvm-project/pull/142392
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits