Author: Eli Friedman Date: 2020-07-31T17:27:47+02:00 New Revision: 48eb1aa387eb1d356632b82efaf6438d1fcb6640
URL: https://github.com/llvm/llvm-project/commit/48eb1aa387eb1d356632b82efaf6438d1fcb6640 DIFF: https://github.com/llvm/llvm-project/commit/48eb1aa387eb1d356632b82efaf6438d1fcb6640.diff LOG: [AArch64][SVE] Teach copyPhysReg to copy ZPR2/3/4. It's sort of tricky to hit this in practice, but not impossible. I have a synthetic C testcase if anyone is interested. The implementation is identical to the equivalent NEON register copies. Differential Revision: https://reviews.llvm.org/D84373 (cherry picked from commit 993c1a3219a8ae69f1d700183bf174d75f3815d4) Added: llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir Modified: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp Removed: ################################################################################ diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 5139ae5ccaf1..08f80c9aa361 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2744,6 +2744,35 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + // Copy a Z register pair by copying the individual sub-registers. + if (AArch64::ZPR2RegClass.contains(DestReg) && + AArch64::ZPR2RegClass.contains(SrcReg)) { + static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1}; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, + Indices); + return; + } + + // Copy a Z register triple by copying the individual sub-registers. + if (AArch64::ZPR3RegClass.contains(DestReg) && + AArch64::ZPR3RegClass.contains(SrcReg)) { + static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, + AArch64::zsub2}; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, + Indices); + return; + } + + // Copy a Z register quad by copying the individual sub-registers. + if (AArch64::ZPR4RegClass.contains(DestReg) && + AArch64::ZPR4RegClass.contains(SrcReg)) { + static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, + AArch64::zsub2, AArch64::zsub3}; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, + Indices); + return; + } + if (AArch64::GPR64spRegClass.contains(DestReg) && (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { diff --git a/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir b/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir new file mode 100644 index 000000000000..83a0b5dd1c14 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir @@ -0,0 +1,78 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -run-pass=postrapseudos -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: copy_zpr2 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$z0_z1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $z0_z1 + ; CHECK-LABEL: name: copy_zpr2 + ; CHECK: liveins: $z0_z1 + ; CHECK: $z2 = ORR_ZZZ $z1, $z1 + ; CHECK: $z1 = ORR_ZZZ $z0, $z0 + ; CHECK: $z0 = ORR_ZZZ $z1, $z1 + ; CHECK: $z1 = ORR_ZZZ $z2, $z2 + ; CHECK: RET_ReallyLR + $z1_z2 = COPY $z0_z1 + $z0_z1 = COPY $z1_z2 + RET_ReallyLR + +... +--- +name: copy_zpr3 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$z0_z1_z2' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $z0_z1_z2 + ; CHECK-LABEL: name: copy_zpr3 + ; CHECK: liveins: $z0_z1_z2 + ; CHECK: $z3 = ORR_ZZZ $z2, $z2 + ; CHECK: $z2 = ORR_ZZZ $z1, $z1 + ; CHECK: $z1 = ORR_ZZZ $z0, $z0 + ; CHECK: $z0 = ORR_ZZZ $z1, $z1 + ; CHECK: $z1 = ORR_ZZZ $z2, $z2 + ; CHECK: $z2 = ORR_ZZZ $z3, $z3 + ; CHECK: RET_ReallyLR + $z1_z2_z3 = COPY $z0_z1_z2 + $z0_z1_z2 = COPY $z1_z2_z3 + RET_ReallyLR + +... +--- +name: copy_zpr4 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$z0_z1_z2_z3' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $z0_z1_z2_z3 + ; CHECK-LABEL: name: copy_zpr4 + ; CHECK: liveins: $z0_z1_z2_z3 + ; CHECK: $z4 = ORR_ZZZ $z3, $z3 + ; CHECK: $z3 = ORR_ZZZ $z2, $z2 + ; CHECK: $z2 = ORR_ZZZ $z1, $z1 + ; CHECK: $z1 = ORR_ZZZ $z0, $z0 + ; CHECK: $z0 = ORR_ZZZ $z1, $z1 + ; CHECK: $z1 = ORR_ZZZ $z2, $z2 + ; CHECK: $z2 = ORR_ZZZ $z3, $z3 + ; CHECK: $z3 = ORR_ZZZ $z4, $z4 + ; CHECK: RET_ReallyLR + $z1_z2_z3_z4 = COPY $z0_z1_z2_z3 + $z0_z1_z2_z3 = COPY $z1_z2_z3_z4 + RET_ReallyLR + +... _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
