[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG3f78605a8cb1: [PowerPC] Add paired vector load and store builtins and intrinsics (authored by bsaleil). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D90799/new/ https://reviews.llvm.org/D90799 Files: clang/include/clang/Basic/BuiltinsPPC.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-ppc-mma.c clang/test/Sema/ppc-mma-types.c llvm/include/llvm/IR/IntrinsicsPowerPC.td llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.h llvm/lib/Target/PowerPC/PPCInstrInfo.td llvm/lib/Target/PowerPC/PPCInstrPrefix.td llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll llvm/test/CodeGen/PowerPC/mma-intrinsics.ll Index: llvm/test/CodeGen/PowerPC/mma-intrinsics.ll === --- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -698,3 +698,315 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32) declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>) + +; Function Attrs: nounwind +define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:lxvp vsp0, 0(r3) +; CHECK-NEXT:stxvp vsp0, 0(r4) +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_1: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:lxvp vsp0, 0(r3) +; CHECK-BE-NEXT:stxvp vsp0, 0(r4) +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0) + %2 = bitcast <256 x i1>* %vp2 to i8* + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2) + ret void +} + +; Function Attrs: argmemonly nounwind readonly +declare <256 x i1> @llvm.ppc.mma.lxvp(i8*) + +; Function Attrs: argmemonly nounwind writeonly +declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*) + +; Function Attrs: nounwind +define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:lxvpx vsp0, r3, r4 +; CHECK-NEXT:stxvpx vsp0, r5, r4 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:lxvpx vsp0, r3, r4 +; CHECK-BE-NEXT:stxvpx vsp0, r5, r4 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 %offset + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 %offset + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:li r5, 18 +; CHECK-NEXT:lxvpx vsp0, r3, r5 +; CHECK-NEXT:stxvpx vsp0, r4, r5 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:li r5, 18 +; CHECK-BE-NEXT:lxvpx vsp0, r3, r5 +; CHECK-BE-NEXT:stxvpx vsp0, r4, r5 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 18 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 18 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:li r5, 1 +; CHECK-NEXT:lxvpx vsp0, r3, r5 +; CHECK-NEXT:stxvpx vsp0, r4, r5 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:li r5, 1 +; CHECK-BE-NEXT:lxvpx vsp0, r3, r5 +; CHECK-BE-NEXT:stxvpx vsp0, r4, r5 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 1 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 1 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:li r5, 42 +; CHECK-NEXT:lxvpx vsp0, r3, r5 +; CHECK-NEXT:stxvpx vsp0, r4, r5 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_5: +; CHECK-BE:
[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics
saghir accepted this revision. saghir added a comment. LGTM. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D90799/new/ https://reviews.llvm.org/D90799 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics
amyk accepted this revision. amyk added a comment. This revision is now accepted and ready to land. Thanks for fixing for tests and formatting. LGTM if there are no other concerns. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D90799/new/ https://reviews.llvm.org/D90799 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics
bsaleil updated this revision to Diff 303973. bsaleil added a comment. Simplify control flow and improve test cases Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D90799/new/ https://reviews.llvm.org/D90799 Files: clang/include/clang/Basic/BuiltinsPPC.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-ppc-mma.c clang/test/Sema/ppc-mma-types.c llvm/include/llvm/IR/IntrinsicsPowerPC.td llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.h llvm/lib/Target/PowerPC/PPCInstrInfo.td llvm/lib/Target/PowerPC/PPCInstrPrefix.td llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll llvm/test/CodeGen/PowerPC/mma-intrinsics.ll Index: llvm/test/CodeGen/PowerPC/mma-intrinsics.ll === --- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -698,3 +698,315 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32) declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>) + +; Function Attrs: nounwind +define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:lxvp vsp0, 0(r3) +; CHECK-NEXT:stxvp vsp0, 0(r4) +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_1: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:lxvp vsp0, 0(r3) +; CHECK-BE-NEXT:stxvp vsp0, 0(r4) +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0) + %2 = bitcast <256 x i1>* %vp2 to i8* + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2) + ret void +} + +; Function Attrs: argmemonly nounwind readonly +declare <256 x i1> @llvm.ppc.mma.lxvp(i8*) + +; Function Attrs: argmemonly nounwind writeonly +declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*) + +; Function Attrs: nounwind +define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:lxvpx vsp0, r3, r4 +; CHECK-NEXT:stxvpx vsp0, r5, r4 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:lxvpx vsp0, r3, r4 +; CHECK-BE-NEXT:stxvpx vsp0, r5, r4 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 %offset + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 %offset + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:li r5, 18 +; CHECK-NEXT:lxvpx vsp0, r3, r5 +; CHECK-NEXT:stxvpx vsp0, r4, r5 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:li r5, 18 +; CHECK-BE-NEXT:lxvpx vsp0, r3, r5 +; CHECK-BE-NEXT:stxvpx vsp0, r4, r5 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 18 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 18 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:li r5, 1 +; CHECK-NEXT:lxvpx vsp0, r3, r5 +; CHECK-NEXT:stxvpx vsp0, r4, r5 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:li r5, 1 +; CHECK-BE-NEXT:lxvpx vsp0, r3, r5 +; CHECK-BE-NEXT:stxvpx vsp0, r4, r5 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 1 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 1 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:li r5, 42 +; CHECK-NEXT:lxvpx vsp0, r3, r5 +; CHECK-NEXT:stxvpx vsp0, r4, r5 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_5: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:li r5, 42 +; CHECK-BE-NEXT:lxvpx vsp0, r3, r5 +; CHECK-BE-NEXT:stxvpx vsp0, r4, r5
[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics
lei added inline comments. Comment at: llvm/lib/Target/PowerPC/PPCISelLowering.cpp:2658 + return false; +} + There's alot of nested `if`s, would it be possible to refactor to have some early exits instead? Comment at: llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll:2 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 < %s | FileCheck %s + BE test? Can we add `-ppc-asm-full-reg-names` and update the checks to also ensure the reg info is accurate? Comment at: llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll:4 + +target datalayout = "e-m:e-i64:64-p:64:64-n32:64-v256:256:256-v512:512:512" + is this needed since we have the triple on the run line? Comment at: llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll:3 +; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs -disable-lsr \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 < %s | FileCheck %s + BE test? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D90799/new/ https://reviews.llvm.org/D90799 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics
bsaleil updated this revision to Diff 303540. bsaleil added a comment. Fix unaligned load/store select Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D90799/new/ https://reviews.llvm.org/D90799 Files: clang/include/clang/Basic/BuiltinsPPC.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-ppc-mma.c clang/test/Sema/ppc-mma-types.c llvm/include/llvm/IR/IntrinsicsPowerPC.td llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.h llvm/lib/Target/PowerPC/PPCInstrInfo.td llvm/lib/Target/PowerPC/PPCInstrPrefix.td llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll llvm/test/CodeGen/PowerPC/mma-intrinsics.ll Index: llvm/test/CodeGen/PowerPC/mma-intrinsics.ll === --- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -698,3 +698,315 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32) declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>) + +; Function Attrs: nounwind +define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:lxvp vsp0, 0(r3) +; CHECK-NEXT:stxvp vsp0, 0(r4) +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_1: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:lxvp vsp0, 0(r3) +; CHECK-BE-NEXT:stxvp vsp0, 0(r4) +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0) + %2 = bitcast <256 x i1>* %vp2 to i8* + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2) + ret void +} + +; Function Attrs: argmemonly nounwind readonly +declare <256 x i1> @llvm.ppc.mma.lxvp(i8*) + +; Function Attrs: argmemonly nounwind writeonly +declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*) + +; Function Attrs: nounwind +define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:lxvpx vsp0, r3, r4 +; CHECK-NEXT:stxvpx vsp0, r5, r4 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:lxvpx vsp0, r3, r4 +; CHECK-BE-NEXT:stxvpx vsp0, r5, r4 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 %offset + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 %offset + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:li r5, 18 +; CHECK-NEXT:lxvpx vsp0, r3, r5 +; CHECK-NEXT:stxvpx vsp0, r4, r5 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:li r5, 18 +; CHECK-BE-NEXT:lxvpx vsp0, r3, r5 +; CHECK-BE-NEXT:stxvpx vsp0, r4, r5 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 18 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 18 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:li r5, 1 +; CHECK-NEXT:lxvpx vsp0, r3, r5 +; CHECK-NEXT:stxvpx vsp0, r4, r5 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:li r5, 1 +; CHECK-BE-NEXT:lxvpx vsp0, r3, r5 +; CHECK-BE-NEXT:stxvpx vsp0, r4, r5 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 1 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 1 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:li r5, 42 +; CHECK-NEXT:lxvpx vsp0, r3, r5 +; CHECK-NEXT:stxvpx vsp0, r4, r5 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_5: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:li r5, 42 +; CHECK-BE-NEXT:lxvpx vsp0, r3, r5 +; CHECK-BE-NEXT:stxvpx vsp0, r4, r5 +; CHECK-BE-N
[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics
bsaleil updated this revision to Diff 303152. bsaleil added a comment. Rebase patch and add Sema check test case for the builtins Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D90799/new/ https://reviews.llvm.org/D90799 Files: clang/include/clang/Basic/BuiltinsPPC.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-ppc-mma.c clang/test/Sema/ppc-mma-types.c llvm/include/llvm/IR/IntrinsicsPowerPC.td llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.h llvm/lib/Target/PowerPC/PPCInstrInfo.td llvm/lib/Target/PowerPC/PPCInstrPrefix.td llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll llvm/test/CodeGen/PowerPC/mma-intrinsics.ll Index: llvm/test/CodeGen/PowerPC/mma-intrinsics.ll === --- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -698,3 +698,307 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32) declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>) + +; Function Attrs: nounwind +define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:lxvp vsp0, 0(r3) +; CHECK-NEXT:stxvp vsp0, 0(r4) +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_1: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:lxvp vsp0, 0(r3) +; CHECK-BE-NEXT:stxvp vsp0, 0(r4) +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0) + %2 = bitcast <256 x i1>* %vp2 to i8* + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2) + ret void +} + +; Function Attrs: argmemonly nounwind readonly +declare <256 x i1> @llvm.ppc.mma.lxvp(i8*) + +; Function Attrs: argmemonly nounwind writeonly +declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*) + +; Function Attrs: nounwind +define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:lxvpx vsp0, r3, r4 +; CHECK-NEXT:stxvpx vsp0, r5, r4 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:lxvpx vsp0, r3, r4 +; CHECK-BE-NEXT:stxvpx vsp0, r5, r4 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 %offset + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 %offset + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:plxvp vsp0, 18(r3), 0 +; CHECK-NEXT:pstxvp vsp0, 18(r4), 0 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:plxvp vsp0, 18(r3), 0 +; CHECK-BE-NEXT:pstxvp vsp0, 18(r4), 0 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 18 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 18 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:plxvp vsp0, 1(r3), 0 +; CHECK-NEXT:pstxvp vsp0, 1(r4), 0 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:plxvp vsp0, 1(r3), 0 +; CHECK-BE-NEXT:pstxvp vsp0, 1(r4), 0 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 1 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 1 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:plxvp vsp0, 42(r3), 0 +; CHECK-NEXT:pstxvp vsp0, 42(r4), 0 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_5: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:plxvp vsp0, 42(r3), 0 +; CHECK-BE-NEXT:pstxvp vsp0, 42(r4), 0 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 42 + %2 = tail ca
[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics
bsaleil updated this revision to Diff 303132. bsaleil added a comment. Add support for the paired load/store intrinsics in `PPCLoopInstrFormPrep` Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D90799/new/ https://reviews.llvm.org/D90799 Files: clang/include/clang/Basic/BuiltinsPPC.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-ppc-mma.c llvm/include/llvm/IR/IntrinsicsPowerPC.td llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.h llvm/lib/Target/PowerPC/PPCInstrInfo.td llvm/lib/Target/PowerPC/PPCInstrPrefix.td llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll llvm/test/CodeGen/PowerPC/mma-intrinsics.ll Index: llvm/test/CodeGen/PowerPC/mma-intrinsics.ll === --- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -698,3 +698,307 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32) declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>) + +; Function Attrs: nounwind +define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:lxvp vsp0, 0(r3) +; CHECK-NEXT:stxvp vsp0, 0(r4) +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_1: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:lxvp vsp0, 0(r3) +; CHECK-BE-NEXT:stxvp vsp0, 0(r4) +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0) + %2 = bitcast <256 x i1>* %vp2 to i8* + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2) + ret void +} + +; Function Attrs: argmemonly nounwind readonly +declare <256 x i1> @llvm.ppc.mma.lxvp(i8*) + +; Function Attrs: argmemonly nounwind writeonly +declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*) + +; Function Attrs: nounwind +define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:lxvpx vsp0, r3, r4 +; CHECK-NEXT:stxvpx vsp0, r5, r4 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:lxvpx vsp0, r3, r4 +; CHECK-BE-NEXT:stxvpx vsp0, r5, r4 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 %offset + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 %offset + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:plxvp vsp0, 18(r3), 0 +; CHECK-NEXT:pstxvp vsp0, 18(r4), 0 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:plxvp vsp0, 18(r3), 0 +; CHECK-BE-NEXT:pstxvp vsp0, 18(r4), 0 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 18 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 18 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:plxvp vsp0, 1(r3), 0 +; CHECK-NEXT:pstxvp vsp0, 1(r4), 0 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:plxvp vsp0, 1(r3), 0 +; CHECK-BE-NEXT:pstxvp vsp0, 1(r4), 0 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 1 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 1 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:plxvp vsp0, 42(r3), 0 +; CHECK-NEXT:pstxvp vsp0, 42(r4), 0 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_5: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:plxvp vsp0, 42(r3), 0 +; CHECK-BE-NEXT:pstxvp vsp0, 42(r4), 0 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 42 + %2 = tail call <256 x i1> @llv
[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics
bsaleil updated this revision to Diff 302983. bsaleil added a comment. Add builtin tests Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D90799/new/ https://reviews.llvm.org/D90799 Files: clang/include/clang/Basic/BuiltinsPPC.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-ppc-mma.c llvm/include/llvm/IR/IntrinsicsPowerPC.td llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.h llvm/lib/Target/PowerPC/PPCInstrInfo.td llvm/lib/Target/PowerPC/PPCInstrPrefix.td llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll llvm/test/CodeGen/PowerPC/mma-intrinsics.ll Index: llvm/test/CodeGen/PowerPC/mma-intrinsics.ll === --- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -698,3 +698,307 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32) declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>) + +; Function Attrs: nounwind +define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:lxvp vsp0, 0(r3) +; CHECK-NEXT:stxvp vsp0, 0(r4) +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_1: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:lxvp vsp0, 0(r3) +; CHECK-BE-NEXT:stxvp vsp0, 0(r4) +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0) + %2 = bitcast <256 x i1>* %vp2 to i8* + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2) + ret void +} + +; Function Attrs: argmemonly nounwind readonly +declare <256 x i1> @llvm.ppc.mma.lxvp(i8*) + +; Function Attrs: argmemonly nounwind writeonly +declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*) + +; Function Attrs: nounwind +define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:lxvpx vsp0, r3, r4 +; CHECK-NEXT:stxvpx vsp0, r5, r4 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:lxvpx vsp0, r3, r4 +; CHECK-BE-NEXT:stxvpx vsp0, r5, r4 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 %offset + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 %offset + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:plxvp vsp0, 18(r3), 0 +; CHECK-NEXT:pstxvp vsp0, 18(r4), 0 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:plxvp vsp0, 18(r3), 0 +; CHECK-BE-NEXT:pstxvp vsp0, 18(r4), 0 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 18 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 18 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:plxvp vsp0, 1(r3), 0 +; CHECK-NEXT:pstxvp vsp0, 1(r4), 0 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:plxvp vsp0, 1(r3), 0 +; CHECK-BE-NEXT:pstxvp vsp0, 1(r4), 0 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 1 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 1 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:plxvp vsp0, 42(r3), 0 +; CHECK-NEXT:pstxvp vsp0, 42(r4), 0 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_5: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:plxvp vsp0, 42(r3), 0 +; CHECK-BE-NEXT:pstxvp vsp0, 42(r4), 0 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 42 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 42 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8*
[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics
bsaleil created this revision. bsaleil added reviewers: nemanjai, amyk, saghir, lei. bsaleil added projects: LLVM, PowerPC. Herald added subscribers: llvm-commits, cfe-commits, shchenz, kbarton, hiraditya. Herald added a project: clang. bsaleil requested review of this revision. This patch adds the Clang builtins and LLVM intrinsics to load and store vector pairs. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D90799 Files: clang/include/clang/Basic/BuiltinsPPC.def clang/lib/CodeGen/CGBuiltin.cpp llvm/include/llvm/IR/IntrinsicsPowerPC.td llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.h llvm/lib/Target/PowerPC/PPCInstrInfo.td llvm/lib/Target/PowerPC/PPCInstrPrefix.td llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll llvm/test/CodeGen/PowerPC/mma-intrinsics.ll Index: llvm/test/CodeGen/PowerPC/mma-intrinsics.ll === --- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -698,3 +698,307 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32) declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>) + +; Function Attrs: nounwind +define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:lxvp vsp0, 0(r3) +; CHECK-NEXT:stxvp vsp0, 0(r4) +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_1: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:lxvp vsp0, 0(r3) +; CHECK-BE-NEXT:stxvp vsp0, 0(r4) +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0) + %2 = bitcast <256 x i1>* %vp2 to i8* + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2) + ret void +} + +; Function Attrs: argmemonly nounwind readonly +declare <256 x i1> @llvm.ppc.mma.lxvp(i8*) + +; Function Attrs: argmemonly nounwind writeonly +declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*) + +; Function Attrs: nounwind +define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:lxvpx vsp0, r3, r4 +; CHECK-NEXT:stxvpx vsp0, r5, r4 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:lxvpx vsp0, r3, r4 +; CHECK-BE-NEXT:stxvpx vsp0, r5, r4 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 %offset + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 %offset + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:plxvp vsp0, 18(r3), 0 +; CHECK-NEXT:pstxvp vsp0, 18(r4), 0 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:plxvp vsp0, 18(r3), 0 +; CHECK-BE-NEXT:pstxvp vsp0, 18(r4), 0 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 18 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 18 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:plxvp vsp0, 1(r3), 0 +; CHECK-NEXT:pstxvp vsp0, 1(r4), 0 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:plxvp vsp0, 1(r3), 0 +; CHECK-BE-NEXT:pstxvp vsp0, 1(r4), 0 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 1 + %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1) + %3 = bitcast <256 x i1>* %vp2 to i8* + %4 = getelementptr i8, i8* %3, i64 1 + tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4) + ret void +} + +; Function Attrs: nounwind +define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2) { +; CHECK-LABEL: test_ldst_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:plxvp vsp0, 42(r3), 0 +; CHECK-NEXT:pstxvp vsp0, 42(r4), 0 +; CHECK-NEXT:blr +; +; CHECK-BE-LABEL: test_ldst_5: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT:plxvp vsp0, 42(r3), 0 +; CHECK-BE-NEXT:pstxvp vsp0, 42(r4), 0 +; CHECK-BE-NEXT:blr +entry: + %0 = bitcast <256 x i1>* %vpp to i8* + %1 = getelementptr i8, i8* %0, i64 42 + %2 = tail call <2