[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics

2020-11-13 Thread Baptiste Saleil via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG3f78605a8cb1: [PowerPC] Add paired vector load and store 
builtins and intrinsics (authored by bsaleil).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90799/new/

https://reviews.llvm.org/D90799

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-mma.c
  clang/test/Sema/ppc-mma-types.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
  llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
  llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll
  llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll
  llvm/test/CodeGen/PowerPC/mma-intrinsics.ll

Index: llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
===
--- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -698,3 +698,315 @@
 
 declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
 declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>)
+
+; Function Attrs: nounwind
+define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) {
+; CHECK-LABEL: test_ldst_1:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvp vsp0, 0(r3)
+; CHECK-NEXT:stxvp vsp0, 0(r4)
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_1:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:lxvp vsp0, 0(r3)
+; CHECK-BE-NEXT:stxvp vsp0, 0(r4)
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0)
+  %2 = bitcast <256 x i1>* %vp2 to i8*
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2)
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
+
+; Function Attrs: argmemonly nounwind writeonly
+declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*)
+
+; Function Attrs: nounwind
+define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvpx vsp0, r3, r4
+; CHECK-NEXT:stxvpx vsp0, r5, r4
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_2:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:lxvpx vsp0, r3, r4
+; CHECK-BE-NEXT:stxvpx vsp0, r5, r4
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 %offset
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 %offset
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_3:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li r5, 18
+; CHECK-NEXT:lxvpx vsp0, r3, r5
+; CHECK-NEXT:stxvpx vsp0, r4, r5
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_3:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:li r5, 18
+; CHECK-BE-NEXT:lxvpx vsp0, r3, r5
+; CHECK-BE-NEXT:stxvpx vsp0, r4, r5
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 18
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 18
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_4:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li r5, 1
+; CHECK-NEXT:lxvpx vsp0, r3, r5
+; CHECK-NEXT:stxvpx vsp0, r4, r5
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_4:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:li r5, 1
+; CHECK-BE-NEXT:lxvpx vsp0, r3, r5
+; CHECK-BE-NEXT:stxvpx vsp0, r4, r5
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 1
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 1
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_5:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li r5, 42
+; CHECK-NEXT:lxvpx vsp0, r3, r5
+; CHECK-NEXT:stxvpx vsp0, r4, r5
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_5:
+; CHECK-BE: 

[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics

2020-11-11 Thread Ahsan Saghir via Phabricator via cfe-commits
saghir accepted this revision.
saghir added a comment.

LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90799/new/

https://reviews.llvm.org/D90799

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics

2020-11-10 Thread Amy Kwan via Phabricator via cfe-commits
amyk accepted this revision.
amyk added a comment.
This revision is now accepted and ready to land.

Thanks for fixing for tests and formatting. LGTM if there are no other concerns.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90799/new/

https://reviews.llvm.org/D90799

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics

2020-11-09 Thread Baptiste Saleil via Phabricator via cfe-commits
bsaleil updated this revision to Diff 303973.
bsaleil added a comment.

Simplify control flow and improve test cases


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90799/new/

https://reviews.llvm.org/D90799

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-mma.c
  clang/test/Sema/ppc-mma-types.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
  llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
  llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll
  llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll
  llvm/test/CodeGen/PowerPC/mma-intrinsics.ll

Index: llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
===
--- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -698,3 +698,315 @@
 
 declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
 declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>)
+
+; Function Attrs: nounwind
+define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) {
+; CHECK-LABEL: test_ldst_1:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvp vsp0, 0(r3)
+; CHECK-NEXT:stxvp vsp0, 0(r4)
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_1:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:lxvp vsp0, 0(r3)
+; CHECK-BE-NEXT:stxvp vsp0, 0(r4)
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0)
+  %2 = bitcast <256 x i1>* %vp2 to i8*
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2)
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
+
+; Function Attrs: argmemonly nounwind writeonly
+declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*)
+
+; Function Attrs: nounwind
+define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvpx vsp0, r3, r4
+; CHECK-NEXT:stxvpx vsp0, r5, r4
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_2:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:lxvpx vsp0, r3, r4
+; CHECK-BE-NEXT:stxvpx vsp0, r5, r4
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 %offset
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 %offset
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_3:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li r5, 18
+; CHECK-NEXT:lxvpx vsp0, r3, r5
+; CHECK-NEXT:stxvpx vsp0, r4, r5
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_3:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:li r5, 18
+; CHECK-BE-NEXT:lxvpx vsp0, r3, r5
+; CHECK-BE-NEXT:stxvpx vsp0, r4, r5
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 18
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 18
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_4:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li r5, 1
+; CHECK-NEXT:lxvpx vsp0, r3, r5
+; CHECK-NEXT:stxvpx vsp0, r4, r5
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_4:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:li r5, 1
+; CHECK-BE-NEXT:lxvpx vsp0, r3, r5
+; CHECK-BE-NEXT:stxvpx vsp0, r4, r5
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 1
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 1
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_5:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li r5, 42
+; CHECK-NEXT:lxvpx vsp0, r3, r5
+; CHECK-NEXT:stxvpx vsp0, r4, r5
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_5:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:li r5, 42
+; CHECK-BE-NEXT:lxvpx vsp0, r3, r5
+; CHECK-BE-NEXT:stxvpx vsp0, r4, r5

[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics

2020-11-09 Thread Lei Huang via Phabricator via cfe-commits
lei added inline comments.



Comment at: llvm/lib/Target/PowerPC/PPCISelLowering.cpp:2658
+  return false;
+}
+

There's alot of nested `if`s, would it be possible to refactor to have some 
early exits instead?




Comment at: llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll:2
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 < %s | FileCheck %s
+

BE test?
Can we add `-ppc-asm-full-reg-names` and update the checks to also ensure the 
reg info is accurate?



Comment at: llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll:4
+
+target datalayout = "e-m:e-i64:64-p:64:64-n32:64-v256:256:256-v512:512:512"
+

is this needed since we have the triple on the run line?



Comment at: llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll:3
+; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs -disable-lsr \
+; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 < %s | FileCheck %s
+

BE test?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90799/new/

https://reviews.llvm.org/D90799

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics

2020-11-06 Thread Baptiste Saleil via Phabricator via cfe-commits
bsaleil updated this revision to Diff 303540.
bsaleil added a comment.

Fix unaligned load/store select


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90799/new/

https://reviews.llvm.org/D90799

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-mma.c
  clang/test/Sema/ppc-mma-types.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
  llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
  llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll
  llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll
  llvm/test/CodeGen/PowerPC/mma-intrinsics.ll

Index: llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
===
--- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -698,3 +698,315 @@
 
 declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
 declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>)
+
+; Function Attrs: nounwind
+define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) {
+; CHECK-LABEL: test_ldst_1:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvp vsp0, 0(r3)
+; CHECK-NEXT:stxvp vsp0, 0(r4)
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_1:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:lxvp vsp0, 0(r3)
+; CHECK-BE-NEXT:stxvp vsp0, 0(r4)
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0)
+  %2 = bitcast <256 x i1>* %vp2 to i8*
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2)
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
+
+; Function Attrs: argmemonly nounwind writeonly
+declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*)
+
+; Function Attrs: nounwind
+define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvpx vsp0, r3, r4
+; CHECK-NEXT:stxvpx vsp0, r5, r4
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_2:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:lxvpx vsp0, r3, r4
+; CHECK-BE-NEXT:stxvpx vsp0, r5, r4
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 %offset
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 %offset
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_3:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li r5, 18
+; CHECK-NEXT:lxvpx vsp0, r3, r5
+; CHECK-NEXT:stxvpx vsp0, r4, r5
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_3:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:li r5, 18
+; CHECK-BE-NEXT:lxvpx vsp0, r3, r5
+; CHECK-BE-NEXT:stxvpx vsp0, r4, r5
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 18
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 18
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_4:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li r5, 1
+; CHECK-NEXT:lxvpx vsp0, r3, r5
+; CHECK-NEXT:stxvpx vsp0, r4, r5
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_4:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:li r5, 1
+; CHECK-BE-NEXT:lxvpx vsp0, r3, r5
+; CHECK-BE-NEXT:stxvpx vsp0, r4, r5
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 1
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 1
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_5:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li r5, 42
+; CHECK-NEXT:lxvpx vsp0, r3, r5
+; CHECK-NEXT:stxvpx vsp0, r4, r5
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_5:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:li r5, 42
+; CHECK-BE-NEXT:lxvpx vsp0, r3, r5
+; CHECK-BE-NEXT:stxvpx vsp0, r4, r5
+; CHECK-BE-N

[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics

2020-11-05 Thread Baptiste Saleil via Phabricator via cfe-commits
bsaleil updated this revision to Diff 303152.
bsaleil added a comment.

Rebase patch and add Sema check test case for the builtins


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90799/new/

https://reviews.llvm.org/D90799

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-mma.c
  clang/test/Sema/ppc-mma-types.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
  llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
  llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll
  llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll
  llvm/test/CodeGen/PowerPC/mma-intrinsics.ll

Index: llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
===
--- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -698,3 +698,307 @@
 
 declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
 declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>)
+
+; Function Attrs: nounwind
+define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) {
+; CHECK-LABEL: test_ldst_1:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvp vsp0, 0(r3)
+; CHECK-NEXT:stxvp vsp0, 0(r4)
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_1:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:lxvp vsp0, 0(r3)
+; CHECK-BE-NEXT:stxvp vsp0, 0(r4)
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0)
+  %2 = bitcast <256 x i1>* %vp2 to i8*
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2)
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
+
+; Function Attrs: argmemonly nounwind writeonly
+declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*)
+
+; Function Attrs: nounwind
+define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvpx vsp0, r3, r4
+; CHECK-NEXT:stxvpx vsp0, r5, r4
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_2:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:lxvpx vsp0, r3, r4
+; CHECK-BE-NEXT:stxvpx vsp0, r5, r4
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 %offset
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 %offset
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_3:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:plxvp vsp0, 18(r3), 0
+; CHECK-NEXT:pstxvp vsp0, 18(r4), 0
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_3:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:plxvp vsp0, 18(r3), 0
+; CHECK-BE-NEXT:pstxvp vsp0, 18(r4), 0
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 18
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 18
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_4:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:plxvp vsp0, 1(r3), 0
+; CHECK-NEXT:pstxvp vsp0, 1(r4), 0
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_4:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:plxvp vsp0, 1(r3), 0
+; CHECK-BE-NEXT:pstxvp vsp0, 1(r4), 0
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 1
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 1
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_5:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:plxvp vsp0, 42(r3), 0
+; CHECK-NEXT:pstxvp vsp0, 42(r4), 0
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_5:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:plxvp vsp0, 42(r3), 0
+; CHECK-BE-NEXT:pstxvp vsp0, 42(r4), 0
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 42
+  %2 = tail ca

[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics

2020-11-05 Thread Baptiste Saleil via Phabricator via cfe-commits
bsaleil updated this revision to Diff 303132.
bsaleil added a comment.

Add support for the paired load/store intrinsics in `PPCLoopInstrFormPrep`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90799/new/

https://reviews.llvm.org/D90799

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-mma.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
  llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
  llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll
  llvm/test/CodeGen/PowerPC/loop-p10-pair-prepare.ll
  llvm/test/CodeGen/PowerPC/mma-intrinsics.ll

Index: llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
===
--- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -698,3 +698,307 @@
 
 declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
 declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>)
+
+; Function Attrs: nounwind
+define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) {
+; CHECK-LABEL: test_ldst_1:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvp vsp0, 0(r3)
+; CHECK-NEXT:stxvp vsp0, 0(r4)
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_1:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:lxvp vsp0, 0(r3)
+; CHECK-BE-NEXT:stxvp vsp0, 0(r4)
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0)
+  %2 = bitcast <256 x i1>* %vp2 to i8*
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2)
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
+
+; Function Attrs: argmemonly nounwind writeonly
+declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*)
+
+; Function Attrs: nounwind
+define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvpx vsp0, r3, r4
+; CHECK-NEXT:stxvpx vsp0, r5, r4
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_2:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:lxvpx vsp0, r3, r4
+; CHECK-BE-NEXT:stxvpx vsp0, r5, r4
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 %offset
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 %offset
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_3:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:plxvp vsp0, 18(r3), 0
+; CHECK-NEXT:pstxvp vsp0, 18(r4), 0
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_3:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:plxvp vsp0, 18(r3), 0
+; CHECK-BE-NEXT:pstxvp vsp0, 18(r4), 0
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 18
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 18
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_4:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:plxvp vsp0, 1(r3), 0
+; CHECK-NEXT:pstxvp vsp0, 1(r4), 0
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_4:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:plxvp vsp0, 1(r3), 0
+; CHECK-BE-NEXT:pstxvp vsp0, 1(r4), 0
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 1
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 1
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_5:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:plxvp vsp0, 42(r3), 0
+; CHECK-NEXT:pstxvp vsp0, 42(r4), 0
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_5:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:plxvp vsp0, 42(r3), 0
+; CHECK-BE-NEXT:pstxvp vsp0, 42(r4), 0
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 42
+  %2 = tail call <256 x i1> @llv

[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics

2020-11-04 Thread Baptiste Saleil via Phabricator via cfe-commits
bsaleil updated this revision to Diff 302983.
bsaleil added a comment.

Add builtin tests


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90799/new/

https://reviews.llvm.org/D90799

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-mma.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
  llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll
  llvm/test/CodeGen/PowerPC/mma-intrinsics.ll

Index: llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
===
--- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -698,3 +698,307 @@
 
 declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
 declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>)
+
+; Function Attrs: nounwind
+define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) {
+; CHECK-LABEL: test_ldst_1:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvp vsp0, 0(r3)
+; CHECK-NEXT:stxvp vsp0, 0(r4)
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_1:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:lxvp vsp0, 0(r3)
+; CHECK-BE-NEXT:stxvp vsp0, 0(r4)
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0)
+  %2 = bitcast <256 x i1>* %vp2 to i8*
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2)
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
+
+; Function Attrs: argmemonly nounwind writeonly
+declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*)
+
+; Function Attrs: nounwind
+define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvpx vsp0, r3, r4
+; CHECK-NEXT:stxvpx vsp0, r5, r4
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_2:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:lxvpx vsp0, r3, r4
+; CHECK-BE-NEXT:stxvpx vsp0, r5, r4
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 %offset
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 %offset
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_3:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:plxvp vsp0, 18(r3), 0
+; CHECK-NEXT:pstxvp vsp0, 18(r4), 0
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_3:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:plxvp vsp0, 18(r3), 0
+; CHECK-BE-NEXT:pstxvp vsp0, 18(r4), 0
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 18
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 18
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_4:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:plxvp vsp0, 1(r3), 0
+; CHECK-NEXT:pstxvp vsp0, 1(r4), 0
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_4:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:plxvp vsp0, 1(r3), 0
+; CHECK-BE-NEXT:pstxvp vsp0, 1(r4), 0
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 1
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 1
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_5:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:plxvp vsp0, 42(r3), 0
+; CHECK-NEXT:pstxvp vsp0, 42(r4), 0
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_5:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:plxvp vsp0, 42(r3), 0
+; CHECK-BE-NEXT:pstxvp vsp0, 42(r4), 0
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 42
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 42
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* 

[PATCH] D90799: [PowerPC] Add paired vector load and store builtins and intrinsics

2020-11-04 Thread Baptiste Saleil via Phabricator via cfe-commits
bsaleil created this revision.
bsaleil added reviewers: nemanjai, amyk, saghir, lei.
bsaleil added projects: LLVM, PowerPC.
Herald added subscribers: llvm-commits, cfe-commits, shchenz, kbarton, 
hiraditya.
Herald added a project: clang.
bsaleil requested review of this revision.

This patch adds the Clang builtins and LLVM intrinsics to load and store vector 
pairs.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D90799

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/CodeGen/CGBuiltin.cpp
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
  llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll
  llvm/test/CodeGen/PowerPC/mma-intrinsics.ll

Index: llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
===
--- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -698,3 +698,307 @@
 
 declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
 declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>)
+
+; Function Attrs: nounwind
+define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) {
+; CHECK-LABEL: test_ldst_1:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvp vsp0, 0(r3)
+; CHECK-NEXT:stxvp vsp0, 0(r4)
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_1:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:lxvp vsp0, 0(r3)
+; CHECK-BE-NEXT:stxvp vsp0, 0(r4)
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0)
+  %2 = bitcast <256 x i1>* %vp2 to i8*
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2)
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
+
+; Function Attrs: argmemonly nounwind writeonly
+declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*)
+
+; Function Attrs: nounwind
+define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvpx vsp0, r3, r4
+; CHECK-NEXT:stxvpx vsp0, r5, r4
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_2:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:lxvpx vsp0, r3, r4
+; CHECK-BE-NEXT:stxvpx vsp0, r5, r4
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 %offset
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 %offset
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_3:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:plxvp vsp0, 18(r3), 0
+; CHECK-NEXT:pstxvp vsp0, 18(r4), 0
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_3:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:plxvp vsp0, 18(r3), 0
+; CHECK-BE-NEXT:pstxvp vsp0, 18(r4), 0
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 18
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 18
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_4:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:plxvp vsp0, 1(r3), 0
+; CHECK-NEXT:pstxvp vsp0, 1(r4), 0
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_4:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:plxvp vsp0, 1(r3), 0
+; CHECK-BE-NEXT:pstxvp vsp0, 1(r4), 0
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 1
+  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
+  %3 = bitcast <256 x i1>* %vp2 to i8*
+  %4 = getelementptr i8, i8* %3, i64 1
+  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
+; CHECK-LABEL: test_ldst_5:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:plxvp vsp0, 42(r3), 0
+; CHECK-NEXT:pstxvp vsp0, 42(r4), 0
+; CHECK-NEXT:blr
+;
+; CHECK-BE-LABEL: test_ldst_5:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:plxvp vsp0, 42(r3), 0
+; CHECK-BE-NEXT:pstxvp vsp0, 42(r4), 0
+; CHECK-BE-NEXT:blr
+entry:
+  %0 = bitcast <256 x i1>* %vpp to i8*
+  %1 = getelementptr i8, i8* %0, i64 42
+  %2 = tail call <2