[PATCH] D70485: [ARM,MVE] Add intrinsics to deal with predicates.

2019-12-02 Thread Simon Tatham via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGd173fb5d2854: [ARM,MVE] Add intrinsics to deal with 
predicates. (authored by simon_tatham).

Changed prior to commit:
  https://reviews.llvm.org/D70485?vs=230627=231718#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70485/new/

https://reviews.llvm.org/D70485

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/test/CodeGen/arm-mve-intrinsics/predicates.c
  clang/utils/TableGen/MveEmitter.cpp
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/predicates.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/predicates.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/predicates.ll
@@ -0,0 +1,219 @@
+; RUN: opt -instcombine %s | llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - | FileCheck %s
+
+declare <16 x i1> @llvm.arm.mve.vctp8(i32)
+declare <8 x i1> @llvm.arm.mve.vctp16(i32)
+declare <4 x i1> @llvm.arm.mve.vctp32(i32)
+declare <4 x i1> @llvm.arm.mve.vctp64(i32)
+
+declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>)
+declare i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1>)
+declare i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1>)
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp8q(i32 %a) {
+; CHECK-LABEL: test_vctp8q:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vctp.8 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %a)
+  %1 = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %0)
+  %2 = trunc i32 %1 to i16
+  ret i16 %2
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp8q_m(i32 %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vctp8q_m:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r1
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vctpt.8 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %a)
+  %3 = and <16 x i1> %1, %2
+  %4 = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %3)
+  %5 = trunc i32 %4 to i16
+  ret i16 %5
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp16q(i32 %a) {
+; CHECK-LABEL: test_vctp16q:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vctp.16 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %a)
+  %1 = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %0)
+  %2 = trunc i32 %1 to i16
+  ret i16 %2
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp16q_m(i32 %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vctp16q_m:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r1
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vctpt.16 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %a)
+  %3 = and <8 x i1> %1, %2
+  %4 = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %3)
+  %5 = trunc i32 %4 to i16
+  ret i16 %5
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp32q(i32 %a) {
+; CHECK-LABEL: test_vctp32q:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vctp.32 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %a)
+  %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0)
+  %2 = trunc i32 %1 to i16
+  ret i16 %2
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp32q_m(i32 %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vctp32q_m:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r1
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vctpt.32 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %a)
+  %3 = and <4 x i1> %1, %2
+  %4 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %3)
+  %5 = trunc i32 %4 to i16
+  ret i16 %5
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp64q(i32 %a) {
+; CHECK-LABEL: test_vctp64q:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vctp.64 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a)
+  %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0)
+  %2 = trunc i32 %1 to i16
+  ret i16 %2
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp64q_m(i32 %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vctp64q_m:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r1
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vctpt.64 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)

[PATCH] D70485: [ARM,MVE] Add intrinsics to deal with predicates.

2019-11-22 Thread Dave Green via Phabricator via cfe-commits
dmgreen accepted this revision.
dmgreen added a comment.

Thanks. I'm still happy.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70485/new/

https://reviews.llvm.org/D70485



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D70485: [ARM,MVE] Add intrinsics to deal with predicates.

2019-11-22 Thread Simon Tatham via Phabricator via cfe-commits
simon_tatham updated this revision to Diff 230627.
simon_tatham edited the summary of this revision.
simon_tatham added a comment.

Revised again to fix test failures. But I've also split off the rework of 
`int_arm_vctp` into a separate patch D70592 , 
to make it clearer what's going on.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70485/new/

https://reviews.llvm.org/D70485

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/include/clang/Basic/arm_mve_defs.td
  clang/test/CodeGen/arm-mve-intrinsics/predicates.c
  clang/utils/TableGen/MveEmitter.cpp
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/predicates.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/predicates.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/predicates.ll
@@ -0,0 +1,219 @@
+; RUN: opt -instcombine %s | llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - | FileCheck %s
+
+declare <16 x i1> @llvm.arm.mve.vctp8(i32)
+declare <8 x i1> @llvm.arm.mve.vctp16(i32)
+declare <4 x i1> @llvm.arm.mve.vctp32(i32)
+declare <4 x i1> @llvm.arm.mve.vctp64(i32)
+
+declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>)
+declare i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1>)
+declare i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1>)
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp8q(i32 %a) {
+; CHECK-LABEL: test_vctp8q:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vctp.8 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %a)
+  %1 = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %0)
+  %2 = trunc i32 %1 to i16
+  ret i16 %2
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp8q_m(i32 %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vctp8q_m:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r1
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vctpt.8 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %a)
+  %3 = and <16 x i1> %1, %2
+  %4 = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %3)
+  %5 = trunc i32 %4 to i16
+  ret i16 %5
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp16q(i32 %a) {
+; CHECK-LABEL: test_vctp16q:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vctp.16 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %a)
+  %1 = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %0)
+  %2 = trunc i32 %1 to i16
+  ret i16 %2
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp16q_m(i32 %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vctp16q_m:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r1
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vctpt.16 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %a)
+  %3 = and <8 x i1> %1, %2
+  %4 = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %3)
+  %5 = trunc i32 %4 to i16
+  ret i16 %5
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp32q(i32 %a) {
+; CHECK-LABEL: test_vctp32q:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vctp.32 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %a)
+  %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0)
+  %2 = trunc i32 %1 to i16
+  ret i16 %2
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp32q_m(i32 %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vctp32q_m:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r1
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vctpt.32 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %a)
+  %3 = and <4 x i1> %1, %2
+  %4 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %3)
+  %5 = trunc i32 %4 to i16
+  ret i16 %5
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp64q(i32 %a) {
+; CHECK-LABEL: test_vctp64q:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vctp.64 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a)
+  %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0)
+  %2 = trunc i32 %1 to i16
+  ret i16 %2
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp64q_m(i32 %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vctp64q_m:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r1
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vctpt.64 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:  

[PATCH] D70485: [ARM,MVE] Add intrinsics to deal with predicates.

2019-11-21 Thread Dave Green via Phabricator via cfe-commits
dmgreen accepted this revision.
dmgreen added a comment.
This revision is now accepted and ready to land.

Thanks, LGTM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70485/new/

https://reviews.llvm.org/D70485



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D70485: [ARM,MVE] Add intrinsics to deal with predicates.

2019-11-21 Thread Simon Tatham via Phabricator via cfe-commits
simon_tatham added inline comments.



Comment at: llvm/include/llvm/IR/IntrinsicsARM.td:780
 
 def int_arm_vctp8  : Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>;
 def int_arm_vctp16 : Intrinsic<[llvm_v8i1_ty], [llvm_i32_ty], [IntrNoMem]>;

dmgreen wrote:
> Should we rename int_arm_vctp8 to int_arm_mve_vctp8?
Ha! What you mean by this and the next comment is "Simon, you are totally wrong 
when you claim in the commit message that these intrinsics are for NEON as 
well". :-)

I was misled myself by the lack of "mve" in the name. So yes, now I've found 
out that they //are// MVE-specific, I agree I should put it in!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70485/new/

https://reviews.llvm.org/D70485



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D70485: [ARM,MVE] Add intrinsics to deal with predicates.

2019-11-21 Thread Simon Tatham via Phabricator via cfe-commits
simon_tatham updated this revision to Diff 230468.
simon_tatham marked an inline comment as done.
simon_tatham added a comment.

Made all the requested changes.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70485/new/

https://reviews.llvm.org/D70485

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/include/clang/Basic/arm_mve_defs.td
  clang/test/CodeGen/arm-mve-intrinsics/predicates.c
  clang/utils/TableGen/MveEmitter.cpp
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/lib/Target/ARM/MVETailPredication.cpp
  llvm/test/CodeGen/Thumb2/mve-intrinsics/predicates.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/predicates.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/predicates.ll
@@ -0,0 +1,219 @@
+; RUN: opt -instcombine %s | llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - | FileCheck %s
+
+declare <16 x i1> @llvm.arm.vctp8(i32)
+declare <8 x i1> @llvm.arm.vctp16(i32)
+declare <4 x i1> @llvm.arm.vctp32(i32)
+declare <4 x i1> @llvm.arm.mve.vctp64(i32)
+
+declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>)
+declare i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1>)
+declare i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1>)
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp8q(i32 %a) {
+; CHECK-LABEL: test_vctp8q:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vctp.8 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = call <16 x i1> @llvm.arm.vctp8(i32 %a)
+  %1 = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %0)
+  %2 = trunc i32 %1 to i16
+  ret i16 %2
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp8q_m(i32 %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vctp8q_m:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r1
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vctpt.8 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = call <16 x i1> @llvm.arm.vctp8(i32 %a)
+  %3 = and <16 x i1> %1, %2
+  %4 = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %3)
+  %5 = trunc i32 %4 to i16
+  ret i16 %5
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp16q(i32 %a) {
+; CHECK-LABEL: test_vctp16q:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vctp.16 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = call <8 x i1> @llvm.arm.vctp16(i32 %a)
+  %1 = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %0)
+  %2 = trunc i32 %1 to i16
+  ret i16 %2
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp16q_m(i32 %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vctp16q_m:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r1
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vctpt.16 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = call <8 x i1> @llvm.arm.vctp16(i32 %a)
+  %3 = and <8 x i1> %1, %2
+  %4 = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %3)
+  %5 = trunc i32 %4 to i16
+  ret i16 %5
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp32q(i32 %a) {
+; CHECK-LABEL: test_vctp32q:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vctp.32 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = call <4 x i1> @llvm.arm.vctp32(i32 %a)
+  %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0)
+  %2 = trunc i32 %1 to i16
+  ret i16 %2
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp32q_m(i32 %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vctp32q_m:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r1
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vctpt.32 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = call <4 x i1> @llvm.arm.vctp32(i32 %a)
+  %3 = and <4 x i1> %1, %2
+  %4 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %3)
+  %5 = trunc i32 %4 to i16
+  ret i16 %5
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp64q(i32 %a) {
+; CHECK-LABEL: test_vctp64q:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vctp.64 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a)
+  %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0)
+  %2 = trunc i32 %1 to i16
+  ret i16 %2
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp64q_m(i32 %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vctp64q_m:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r1
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vctpt.64 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = 

[PATCH] D70485: [ARM,MVE] Add intrinsics to deal with predicates.

2019-11-21 Thread Dave Green via Phabricator via cfe-commits
dmgreen added inline comments.



Comment at: llvm/include/llvm/IR/IntrinsicsARM.td:780
 
 def int_arm_vctp8  : Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>;
 def int_arm_vctp16 : Intrinsic<[llvm_v8i1_ty], [llvm_i32_ty], [IntrNoMem]>;

Should we rename int_arm_vctp8 to int_arm_mve_vctp8?



Comment at: llvm/include/llvm/IR/IntrinsicsARM.td:783
 def int_arm_vctp32 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>;
 def int_arm_vctp64 : Intrinsic<[llvm_v2i1_ty], [llvm_i32_ty], [IntrNoMem]>;
 

Is this ever used anywhere? It doesn't select as far as I can see, so I think 
you can just remove it.



Comment at: llvm/lib/Target/ARM/ARMInstrMVE.td:4202
 
-def MVE_VCTP8  : MVE_VCTP<"8",  0b00>;
-def MVE_VCTP16 : MVE_VCTP<"16", 0b01>;
-def MVE_VCTP32 : MVE_VCTP<"32", 0b10>;
-def MVE_VCTP64 : MVE_VCTP<"64", 0b11>;
+multiclass MVE_VCTP_and_patterns {
+  def "": MVE_VCTP;

Maybe call this MVE_VCTP and name the instruction MVE_VCTPInst, or something 
like it.



Comment at: llvm/lib/Target/ARM/ARMInstrMVE.td:4206
+  let Predicates = [HasMVEInt] in {
+def : Pat<(intr rGPR:$Rn), (VTI.Pred (!cast(NAME) rGPR:$Rn))>;
+def : Pat<(and (intr rGPR:$Rn), (VTI.Pred VCCR:$mask)),

Two lines on this one please, just to make it more obvious when looking at it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70485/new/

https://reviews.llvm.org/D70485



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D70485: [ARM,MVE] Add intrinsics to deal with predicates.

2019-11-20 Thread Simon Tatham via Phabricator via cfe-commits
simon_tatham created this revision.
simon_tatham added reviewers: ostannard, MarkMurrayARM, dmgreen.
Herald added subscribers: llvm-commits, cfe-commits, hiraditya, kristof.beyls.
Herald added projects: clang, LLVM.

This commit adds the `vpselq` intrinsics which take an MVE predicate
word and select lanes from two vectors; the `vctp` intrinsics which
create a tail predicate word suitable for processing the first m
elements of a vector (e.g. in the last iteration of a loop); and
`vpnot`, which simply complements a predicate word and is just
syntactic sugar for the `~` operator.

Most sizes of `vctp` are lowered to the existing Arm IR intrinsics
that MVE shares with NEON. We already had isel patterns for converting
those into MVE VCTP, although I've added extra ones to generate the
predicated version. But I've made a special MVE version of the 64-bit
VCTP IR intrinsic, because the NEON version of that is defined to take
a `v2i1`, whereas in MVE that's not a legal type. So the MVE version
does the usual workaround of using `v4i1` instead.

I needed one small tweak in MveEmitter to allow the `unpromoted` type
modifier to apply to predicates as well as integers, so that `vpnot`
doesn't pointlessly convert its input integer to an `` before
complementing it.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D70485

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/include/clang/Basic/arm_mve_defs.td
  clang/test/CodeGen/arm-mve-intrinsics/predicates.c
  clang/utils/TableGen/MveEmitter.cpp
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/predicates.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/predicates.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/predicates.ll
@@ -0,0 +1,219 @@
+; RUN: opt -instcombine %s | llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - | FileCheck %s
+
+declare <16 x i1> @llvm.arm.vctp8(i32)
+declare <8 x i1> @llvm.arm.vctp16(i32)
+declare <4 x i1> @llvm.arm.vctp32(i32)
+declare <4 x i1> @llvm.arm.mve.vctp64(i32)
+
+declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>)
+declare i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1>)
+declare i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1>)
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp8q(i32 %a) {
+; CHECK-LABEL: test_vctp8q:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vctp.8 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = call <16 x i1> @llvm.arm.vctp8(i32 %a)
+  %1 = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %0)
+  %2 = trunc i32 %1 to i16
+  ret i16 %2
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp8q_m(i32 %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vctp8q_m:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r1
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vctpt.8 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = call <16 x i1> @llvm.arm.vctp8(i32 %a)
+  %3 = and <16 x i1> %1, %2
+  %4 = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %3)
+  %5 = trunc i32 %4 to i16
+  ret i16 %5
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp16q(i32 %a) {
+; CHECK-LABEL: test_vctp16q:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vctp.16 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = call <8 x i1> @llvm.arm.vctp16(i32 %a)
+  %1 = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %0)
+  %2 = trunc i32 %1 to i16
+  ret i16 %2
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp16q_m(i32 %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vctp16q_m:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r1
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vctpt.16 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = call <8 x i1> @llvm.arm.vctp16(i32 %a)
+  %3 = and <8 x i1> %1, %2
+  %4 = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %3)
+  %5 = trunc i32 %4 to i16
+  ret i16 %5
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp32q(i32 %a) {
+; CHECK-LABEL: test_vctp32q:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vctp.32 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = call <4 x i1> @llvm.arm.vctp32(i32 %a)
+  %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0)
+  %2 = trunc i32 %1 to i16
+  ret i16 %2
+}
+
+define arm_aapcs_vfpcc zeroext i16 @test_vctp32q_m(i32 %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vctp32q_m:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r1
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vctpt.32 r0
+; CHECK-NEXT:vmrs r0, p0
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p