[PATCH] D70948: [ARM][MVE][Intrinsics] Add VMULH/VRMULH intrinsics.

2019-12-04 Thread Mark Murray via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGd3f62ceac0ce: [ARM][MVE][Intrinsics] Add VMULH/VRMULH 
intrinsics. (authored by MarkMurrayARM).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70948/new/

https://reviews.llvm.org/D70948

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/test/CodeGen/arm-mve-intrinsics/vmulhq.c
  clang/test/CodeGen/arm-mve-intrinsics/vrmulhq.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulhq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrmulhq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_u8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vrmulh.s8 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <16 x i8> @llvm.arm.mve.vrmulh.v16i8(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %0
+}
+
+declare <16 x i8> @llvm.arm.mve.vrmulh.v16i8(<16 x i8>, <16 x i8>) #1
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrmulhq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_s16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vrmulh.s16 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <8 x i16> @llvm.arm.mve.vrmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %0
+}
+
+declare <8 x i16> @llvm.arm.mve.vrmulh.v8i16(<8 x i16>, <8 x i16>) #1
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrmulhq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_u32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vrmulh.s32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <4 x i32> @llvm.arm.mve.vrmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+}
+
+declare <4 x i32> @llvm.arm.mve.vrmulh.v4i32(<4 x i32>, <4 x i32>) #1
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrmulhq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_m_s8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vrmulht.s8 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.rmulh.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #1
+
+declare <16 x i8> @llvm.arm.mve.rmulh.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #1
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrmulhq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_m_u16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vrmulht.s16 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.rmulh.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1
+
+declare <8 x i16> @llvm.arm.mve.rmulh.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #1
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrmulhq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_m_s32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vrmulht.s32 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.rmulh.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1
+
+declare <4 x i32> @llvm.arm.mve.rmulh.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #1
Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulhq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulhq.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> 

[PATCH] D70948: [ARM][MVE][Intrinsics] Add VMULH/VRMULH intrinsics.

2019-12-04 Thread Dave Green via Phabricator via cfe-commits
dmgreen accepted this revision.
dmgreen added a comment.
This revision is now accepted and ready to land.

LGTM. Thanks


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70948/new/

https://reviews.llvm.org/D70948



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D70948: [ARM][MVE][Intrinsics] Add VMULH/VRMULH intrinsics.

2019-12-04 Thread Mark Murray via Phabricator via cfe-commits
MarkMurrayARM updated this revision to Diff 232094.
MarkMurrayARM marked an inline comment as done.
MarkMurrayARM added a comment.

Fix incorrect argument order error.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70948/new/

https://reviews.llvm.org/D70948

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/test/CodeGen/arm-mve-intrinsics/vmulhq.c
  clang/test/CodeGen/arm-mve-intrinsics/vrmulhq.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulhq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrmulhq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_u8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vrmulh.s8 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <16 x i8> @llvm.arm.mve.vrmulh.v16i8(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %0
+}
+
+declare <16 x i8> @llvm.arm.mve.vrmulh.v16i8(<16 x i8>, <16 x i8>) #1
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrmulhq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_s16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vrmulh.s16 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <8 x i16> @llvm.arm.mve.vrmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %0
+}
+
+declare <8 x i16> @llvm.arm.mve.vrmulh.v8i16(<8 x i16>, <8 x i16>) #1
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrmulhq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_u32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vrmulh.s32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <4 x i32> @llvm.arm.mve.vrmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+}
+
+declare <4 x i32> @llvm.arm.mve.vrmulh.v4i32(<4 x i32>, <4 x i32>) #1
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrmulhq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_m_s8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vrmulht.s8 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.rmulh.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #1
+
+declare <16 x i8> @llvm.arm.mve.rmulh.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #1
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrmulhq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_m_u16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vrmulht.s16 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.rmulh.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1
+
+declare <8 x i16> @llvm.arm.mve.rmulh.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #1
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrmulhq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_m_s32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vrmulht.s32 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.rmulh.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1
+
+declare <4 x i32> @llvm.arm.mve.rmulh.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #1
Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulhq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulhq.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmulhq_u8(<16 x 

[PATCH] D70948: [ARM][MVE][Intrinsics] Add VMULH/VRMULH intrinsics.

2019-12-04 Thread Dave Green via Phabricator via cfe-commits
dmgreen added inline comments.



Comment at: llvm/lib/Target/ARM/ARMInstrMVE.td:3645
+Intrinsic pred_int, bit round> {
+  def "" : MVE_VxMULH;
+

Should this be `VTI.Suffix, VTI.Unsigned, VTI.Size, round` now?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70948/new/

https://reviews.llvm.org/D70948



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D70948: [ARM][MVE][Intrinsics] Add VMULH/VRMULH intrinsics.

2019-12-04 Thread Mark Murray via Phabricator via cfe-commits
MarkMurrayARM updated this revision to Diff 232093.
MarkMurrayARM added a comment.

Address review comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70948/new/

https://reviews.llvm.org/D70948

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/test/CodeGen/arm-mve-intrinsics/vmulhq.c
  clang/test/CodeGen/arm-mve-intrinsics/vrmulhq.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulhq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrmulhq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_u8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vrmulh.s8 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <16 x i8> @llvm.arm.mve.vrmulh.v16i8(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %0
+}
+
+declare <16 x i8> @llvm.arm.mve.vrmulh.v16i8(<16 x i8>, <16 x i8>) #1
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrmulhq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_s16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vrmulh.s16 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <8 x i16> @llvm.arm.mve.vrmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %0
+}
+
+declare <8 x i16> @llvm.arm.mve.vrmulh.v8i16(<8 x i16>, <8 x i16>) #1
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrmulhq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_u32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vrmulh.s32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <4 x i32> @llvm.arm.mve.vrmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+}
+
+declare <4 x i32> @llvm.arm.mve.vrmulh.v4i32(<4 x i32>, <4 x i32>) #1
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrmulhq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_m_s8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vrmulht.s8 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.rmulh.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #1
+
+declare <16 x i8> @llvm.arm.mve.rmulh.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #1
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrmulhq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_m_u16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vrmulht.s16 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.rmulh.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1
+
+declare <8 x i16> @llvm.arm.mve.rmulh.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #1
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrmulhq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_m_s32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vrmulht.s32 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.rmulh.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1
+
+declare <4 x i32> @llvm.arm.mve.rmulh.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #1
Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulhq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulhq.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmulhq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; 

[PATCH] D70948: [ARM][MVE][Intrinsics] Add VMULH/VRMULH intrinsics.

2019-12-04 Thread Mark Murray via Phabricator via cfe-commits
MarkMurrayARM marked 3 inline comments as done.
MarkMurrayARM added inline comments.



Comment at: llvm/lib/Target/ARM/ARMInstrMVE.td:3644
+multiclass MVE_VxMULH_m {

dmgreen wrote:
> Formatting.
I took a guess as to what you wanted here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70948/new/

https://reviews.llvm.org/D70948



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D70948: [ARM][MVE][Intrinsics] Add VMULH/VRMULH intrinsics.

2019-12-04 Thread Dave Green via Phabricator via cfe-commits
dmgreen added a comment.

Looks good to me, with a couple of minor nitpicks (that I'm not sure matter or 
not).




Comment at: llvm/lib/Target/ARM/ARMInstrMVE.td:3626
 
-class MVE_VxMULH size,
+class MVE_VxMULH size, bit U,
  bit round, list pattern=[]>

Any strong reason to switch these around? From what I can tell most of the 
patterns are (suffix, unsigned, size) in that order. Although that doesn't seem 
to be universal. Might as well try to keep it consistent, if you do not have a 
reason to swap them.



Comment at: llvm/lib/Target/ARM/ARMInstrMVE.td:3644
+multiclass MVE_VxMULH_m {

Formatting.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70948/new/

https://reviews.llvm.org/D70948



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D70948: [ARM][MVE][Intrinsics] Add VMULH/VRMULH intrinsics.

2019-12-03 Thread Mark Murray via Phabricator via cfe-commits
MarkMurrayARM created this revision.
MarkMurrayARM added reviewers: simon_tatham, ostannard, dmgreen.
Herald added subscribers: llvm-commits, cfe-commits, hiraditya, kristof.beyls.
Herald added projects: clang, LLVM.

Add MVE VMULH/VRMULH intrinsics and unit tests.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D70948

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/test/CodeGen/arm-mve-intrinsics/vmulhq.c
  clang/test/CodeGen/arm-mve-intrinsics/vrmulhq.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulhq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vrmulhq.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrmulhq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_u8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vrmulh.s8 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <16 x i8> @llvm.arm.mve.vrmulh.v16i8(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %0
+}
+
+declare <16 x i8> @llvm.arm.mve.vrmulh.v16i8(<16 x i8>, <16 x i8>) #1
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrmulhq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_s16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vrmulh.s16 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <8 x i16> @llvm.arm.mve.vrmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %0
+}
+
+declare <8 x i16> @llvm.arm.mve.vrmulh.v8i16(<8 x i16>, <8 x i16>) #1
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrmulhq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_u32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vrmulh.s32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <4 x i32> @llvm.arm.mve.vrmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+}
+
+declare <4 x i32> @llvm.arm.mve.vrmulh.v4i32(<4 x i32>, <4 x i32>) #1
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrmulhq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_m_s8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vrmulht.s8 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.rmulh.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #1
+
+declare <16 x i8> @llvm.arm.mve.rmulh.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #1
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrmulhq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_m_u16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vrmulht.s16 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.rmulh.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1
+
+declare <8 x i16> @llvm.arm.mve.rmulh.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #1
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrmulhq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vrmulhq_m_s32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vrmulht.s32 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.rmulh.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1
+
+declare <4 x i32> @llvm.arm.mve.rmulh.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #1
Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulhq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulhq.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc