[PATCH] D70829: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.

2019-12-02 Thread Mark Murray via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG510792a2e0e3: [ARM][MVE][Intrinsics] Add 
VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics. (authored by MarkMurrayARM).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70829/new/

https://reviews.llvm.org/D70829

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/include/clang/Basic/arm_mve_defs.td
  clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
  clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c
  clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
  clang/test/CodeGen/arm-mve-intrinsics/vminq.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_u8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmin.u8 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = icmp ugt <16 x i8> %a, %b
+  %1 = select <16 x i1> %0, <16 x i8> %b, <16 x i8> %a
+  ret <16 x i8> %1
+}
+
+define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_s16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmin.s16 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = icmp sgt <8 x i16> %a, %b
+  %1 = select <8 x i1> %0, <8 x i16> %b, <8 x i16> %a
+  ret <8 x i16> %1
+}
+
+define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_u32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmin.u32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = icmp ugt <4 x i32> %a, %b
+  %1 = select <4 x i1> %0, <4 x i32> %b, <4 x i32> %a
+  ret <4 x i32> %1
+}
+
+define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_s8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vmint.s8 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2
+
+declare <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2
+
+define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_u16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vmint.s16 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2
+
+declare <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2
+
+define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_s32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vmint.s32 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2
+
+declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2
Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp 

[PATCH] D70829: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.

2019-12-02 Thread Mark Murray via Phabricator via cfe-commits
MarkMurrayARM marked an inline comment as done.
MarkMurrayARM added a comment.

Nit terminated with extreme prejudice.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70829/new/

https://reviews.llvm.org/D70829



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D70829: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.

2019-12-02 Thread Mark Murray via Phabricator via cfe-commits
MarkMurrayARM updated this revision to Diff 231664.
MarkMurrayARM added a comment.

Address whitespace nit.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70829/new/

https://reviews.llvm.org/D70829

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/include/clang/Basic/arm_mve_defs.td
  clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
  clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c
  clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
  clang/test/CodeGen/arm-mve-intrinsics/vminq.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_u8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmin.u8 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = icmp ugt <16 x i8> %a, %b
+  %1 = select <16 x i1> %0, <16 x i8> %b, <16 x i8> %a
+  ret <16 x i8> %1
+}
+
+define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_s16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmin.s16 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = icmp sgt <8 x i16> %a, %b
+  %1 = select <8 x i1> %0, <8 x i16> %b, <8 x i16> %a
+  ret <8 x i16> %1
+}
+
+define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_u32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmin.u32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = icmp ugt <4 x i32> %a, %b
+  %1 = select <4 x i1> %0, <4 x i32> %b, <4 x i32> %a
+  ret <4 x i32> %1
+}
+
+define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_s8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vmint.s8 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2
+
+declare <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2
+
+define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_u16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vmint.s16 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2
+
+declare <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2
+
+define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_s32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vmint.s32 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2
+
+declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2
Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define dso_local arm_aapcs_vfpcc <8 x half> 

[PATCH] D70829: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.

2019-12-02 Thread Simon Tatham via Phabricator via cfe-commits
simon_tatham accepted this revision.
simon_tatham added a comment.

I do, but only an indentation quibble.




Comment at: clang/include/clang/Basic/arm_mve.td:205
+(select (icmp_ule $a, $b), $a, $b)>,
+NameOverride<"vminq">;
+  def vmaxqu: Intrinsichttps://reviews.llvm.org/D70829/new/

https://reviews.llvm.org/D70829



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D70829: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.

2019-11-30 Thread Dave Green via Phabricator via cfe-commits
dmgreen accepted this revision.
dmgreen added a comment.
This revision is now accepted and ready to land.

Thanks. I'm happy if simon does not have any extra comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70829/new/

https://reviews.llvm.org/D70829



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D70829: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.

2019-11-29 Thread Mark Murray via Phabricator via cfe-commits
MarkMurrayARM updated this revision to Diff 231540.
MarkMurrayARM added a comment.

Fix non-predicated floats.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70829/new/

https://reviews.llvm.org/D70829

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/include/clang/Basic/arm_mve_defs.td
  clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
  clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c
  clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
  clang/test/CodeGen/arm-mve-intrinsics/vminq.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_u8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmin.u8 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = icmp ugt <16 x i8> %a, %b
+  %1 = select <16 x i1> %0, <16 x i8> %b, <16 x i8> %a
+  ret <16 x i8> %1
+}
+
+define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_s16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmin.s16 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = icmp sgt <8 x i16> %a, %b
+  %1 = select <8 x i1> %0, <8 x i16> %b, <8 x i16> %a
+  ret <8 x i16> %1
+}
+
+define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_u32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmin.u32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = icmp ugt <4 x i32> %a, %b
+  %1 = select <4 x i1> %0, <4 x i32> %b, <4 x i32> %a
+  ret <4 x i32> %1
+}
+
+define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_s8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vmint.s8 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2
+
+declare <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2
+
+define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_u16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vmint.s16 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2
+
+declare <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2
+
+define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_s32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vmint.s32 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2
+
+declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2
Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define dso_local arm_aapcs_vfpcc <8 x half> 

[PATCH] D70829: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.

2019-11-28 Thread Dave Green via Phabricator via cfe-commits
dmgreen added inline comments.



Comment at: llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll:7
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vcmp.f16 ge, q0, q1
+; CHECK-NEXT:vpsel q0, q0, q1

You may want to use llvm.minnum directly (providing the semantics are indeed 
equivalent). Otherwise it will need fastmath to fuse these together, which 
isn't the same as the original intrinsic.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70829/new/

https://reviews.llvm.org/D70829



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D70829: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.

2019-11-28 Thread Mark Murray via Phabricator via cfe-commits
MarkMurrayARM created this revision.
Herald added subscribers: llvm-commits, cfe-commits, dmgreen, hiraditya, 
kristof.beyls.
Herald added projects: clang, LLVM.

Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics and their predicated versions. Add 
unit tests.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D70829

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/include/clang/Basic/arm_mve_defs.td
  clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
  clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c
  clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
  clang/test/CodeGen/arm-mve-intrinsics/vminq.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_u8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmin.u8 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = icmp ugt <16 x i8> %a, %b
+  %1 = select <16 x i1> %0, <16 x i8> %b, <16 x i8> %a
+  ret <16 x i8> %1
+}
+
+define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_s16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmin.s16 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = icmp sgt <8 x i16> %a, %b
+  %1 = select <8 x i1> %0, <8 x i16> %b, <8 x i16> %a
+  ret <8 x i16> %1
+}
+
+define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_u32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmin.u32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = icmp ugt <4 x i32> %a, %b
+  %1 = select <4 x i1> %0, <4 x i32> %b, <4 x i32> %a
+  ret <4 x i32> %1
+}
+
+define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_s8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vmint.s8 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2
+
+declare <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2
+
+define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_u16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vmint.s16 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2
+
+declare <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2
+
+define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_s32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vmint.s32 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2
+
+declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2
Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s