https://github.com/gbossu closed
https://github.com/llvm/llvm-project/pull/151730
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
gbossu wrote:
Closing, I'll work on supporting `movprfx` for `ext` instead.
https://github.com/llvm/llvm-project/pull/151730
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-br
@@ -0,0 +1,253 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
--check-prefixes=SVE
+; RUN: llc -mattr=+sve2 -verify-machineinstrs < %s | FileCheck %s
--check-prefixes=SVE2
+
+ta
@@ -109,14 +109,13 @@ define <16 x i16> @two_way_i8_i16_vl256(ptr %accptr, ptr
%uptr, ptr %sptr) vscal
; SME-LABEL: two_way_i8_i16_vl256:
; SME: // %bb.0:
; SME-NEXT:ldr z0, [x0]
-; SME-NEXT:ldr z1, [x1]
-; SME-NEXT:ldr z2, [x2]
-; SME-NEXT:umlalb z0.h, z
https://github.com/gbossu created
https://github.com/llvm/llvm-project/pull/152553
They use extract shuffles for fixed vectors, and
llvm.vector.splice intrinsics for scalable vectors.
In the previous tests using ld+extract+st, the extract was optimized away and
replaced by a smaller load at th
https://github.com/gbossu edited
https://github.com/llvm/llvm-project/pull/152553
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
@@ -0,0 +1,162 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -verify-machineinstrs < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+
@@ -150,13 +150,14 @@ define void @fcvtzu_v16f16_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT:mov x8, #8 // =0x8
; VBITS_GE_256-NEXT:ld1h { z0.h }, p0/z, [x0]
; VBITS_GE_256-NEXT:ptrue p0.s, vl8
-; VBITS_GE_256-NEXT:uunpklo z1.s, z0.h
-; VBITS_GE_256-NEXT:
@@ -256,12 +256,13 @@ define
@splice_nxv2f64_last_idx( %a,
define @splice_nxv2i1_idx( %a, %b) #0 {
; CHECK-LABEL: splice_nxv2i1_idx:
; CHECK: // %bb.0:
-; CHECK-NEXT:mov z0.d, p1/z, #1 // =0x1
; CHECK-NEXT:mov z1.d, p0/z, #1 // =0x1
+; CHECK-NEXT:mov z0.d
@@ -86,6 +83,13 @@ bool
AArch64PostCoalescer::runOnMachineFunction(MachineFunction &MF) {
Changed = true;
break;
}
+ case AArch64::EXT_ZZZI:
+Register DstReg = MI.getOperand(0).getReg();
+Register SrcReg1 = MI.getOperand(1).getReg();
@@ -4069,6 +4069,22 @@ let Predicates = [HasSVE2_or_SME] in {
let AddedComplexity = 2 in {
def : Pat<(nxv16i8 (AArch64ext nxv16i8:$zn1, nxv16i8:$zn2, (i32
imm0_255:$imm))),
(EXT_ZZI_B (REG_SEQUENCE ZPR2, $zn1, zsub0, $zn2, zsub1),
imm0_255:$imm)>;
+
+f
@@ -4069,6 +4069,22 @@ let Predicates = [HasSVE2_or_SME] in {
let AddedComplexity = 2 in {
def : Pat<(nxv16i8 (AArch64ext nxv16i8:$zn1, nxv16i8:$zn2, (i32
imm0_255:$imm))),
(EXT_ZZI_B (REG_SEQUENCE ZPR2, $zn1, zsub0, $zn2, zsub1),
imm0_255:$imm)>;
+
+f
@@ -4069,6 +4069,22 @@ let Predicates = [HasSVE2_or_SME] in {
let AddedComplexity = 2 in {
def : Pat<(nxv16i8 (AArch64ext nxv16i8:$zn1, nxv16i8:$zn2, (i32
imm0_255:$imm))),
(EXT_ZZI_B (REG_SEQUENCE ZPR2, $zn1, zsub0, $zn2, zsub1),
imm0_255:$imm)>;
+
+f
@@ -2512,9 +2507,11 @@ class VPMulAccumulateReductionRecipe : public
VPReductionRecipe {
MulAcc->getCondOp(), MulAcc->isOrdered(),
WrapFlagsTy(MulAcc->hasNoUnsignedWrap(),
MulAcc->hasNoSignedWrap()),
MulAcc->getDebugLoc()),
-ExtOp(M
@@ -2586,22 +2590,21 @@ class VPMulAccumulateReductionRecipe : public
VPReductionRecipe {
VPValue *getVecOp1() const { return getOperand(2); }
/// Return if this MulAcc recipe contains extend instructions.
- bool isExtended() const { return ExtOp != Instruction::CastOps:
@@ -2586,22 +2590,21 @@ class VPMulAccumulateReductionRecipe : public
VPReductionRecipe {
VPValue *getVecOp1() const { return getOperand(2); }
/// Return if this MulAcc recipe contains extend instructions.
- bool isExtended() const { return ExtOp != Instruction::CastOps:
@@ -2586,22 +2590,21 @@ class VPMulAccumulateReductionRecipe : public
VPReductionRecipe {
VPValue *getVecOp1() const { return getOperand(2); }
/// Return if this MulAcc recipe contains extend instructions.
- bool isExtended() const { return ExtOp != Instruction::CastOps:
@@ -2526,13 +2523,14 @@ class VPMulAccumulateReductionRecipe : public
VPReductionRecipe {
R->getCondOp(), R->isOrdered(),
WrapFlagsTy(Mul->hasNoUnsignedWrap(), Mul->hasNoSignedWrap()),
R->getDebugLoc()),
-ExtOp(Ext0->getOpcode()), Is
@@ -2586,22 +2590,21 @@ class VPMulAccumulateReductionRecipe : public
VPReductionRecipe {
VPValue *getVecOp1() const { return getOperand(2); }
/// Return if this MulAcc recipe contains extend instructions.
- bool isExtended() const { return ExtOp != Instruction::CastOps:
@@ -2586,22 +2590,21 @@ class VPMulAccumulateReductionRecipe : public
VPReductionRecipe {
VPValue *getVecOp1() const { return getOperand(2); }
/// Return if this MulAcc recipe contains extend instructions.
- bool isExtended() const { return ExtOp != Instruction::CastOps:
https://github.com/gbossu edited
https://github.com/llvm/llvm-project/pull/152554
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
@@ -250,6 +286,9 @@ struct MachineSMEABI : public MachineFunctionPass {
SmallVector BundleStates;
gbossu wrote:
We are starting to accumulate a lot of state, which makes the code harder to
follow as it allows any member function to modify it instead of havi
@@ -250,6 +286,9 @@ struct MachineSMEABI : public MachineFunctionPass {
SmallVector BundleStates;
std::optional TPIDR2Block;
std::optional AfterSMEProloguePt;
+Register AgnosticZABufferPtr = AArch64::NoRegister;
+LiveRegs PhysLiveRegsAfterSMEPrologue = LiveR
@@ -200,7 +200,7 @@ struct MachineSMEABI : public MachineFunctionPass {
/// Inserts code to handle changes between ZA states within the function.
/// E.g., ACTIVE -> LOCAL_SAVED will insert code required to save ZA.
- void insertStateChanges();
+ void insertStateChanges(
24 matches
Mail list logo