[PATCH] D70545: [ARM][MVE][Intrinsics] Add MVE VABD intrinsics.

2019-11-27 Thread Mark Murray via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGf4bba07b87ce: [ARM][MVE][Intrinsics] Add MVE VABD 
intrinsics. Add unit tests. (authored by MarkMurrayARM).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70545/new/

https://reviews.llvm.org/D70545

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/test/CodeGen/arm-mve-intrinsics/vabdq.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vabdq_u32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vabd.s32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>%a, <4 x i32>%b)
+  ret <4 x i32> %0
+}
+
+declare <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>, <4 x i32>)
+
+define arm_aapcs_vfpcc <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_vabdq_f32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vabd.f32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>%a, <4 x float>%b)
+  ret <4 x float> %0
+}
+
+declare <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>, <4 x float>)
+
+define arm_aapcs_vfpcc <16 x i8> @test_vabdq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
+; CHECK-LABEL: test_vabdq_m_s8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vabdt.s8 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+
+declare <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>)
+
+define arm_aapcs_vfpcc <8 x half> @test_vabdq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
+; CHECK-LABEL: test_vabdq_m_f16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vabdt.f16 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+
+declare <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>)
Index: llvm/lib/Target/ARM/ARMInstrMVE.td
===
--- llvm/lib/Target/ARM/ARMInstrMVE.td
+++ llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -1664,7 +1664,8 @@
 }
 
 
-class MVE_VABD_int size, list pattern=[]>
+class MVE_VABD_int size,
+ list pattern=[]>
   : MVE_int<"vabd", suffix, size, pattern> {
 
   let Inst{28} = U;
@@ -1676,12 +1677,35 @@
   let validForTailPredication = 1;
 }
 
-def MVE_VABDs8  : MVE_VABD_int<"s8", 0b0, 0b00>;
-def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>;
-def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>;
-def MVE_VABDu8  : MVE_VABD_int<"u8", 0b1, 0b00>;
-def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>;
-def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>;
+multiclass MVE_VABD_m {
+  def "" : MVE_VABD_int;
+
+  let Predicates = [HasMVEInt] in {
+// Unpredicated absolute difference
+def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+  (VTI.Vec (!cast(NAME)
+(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+// Predicated absolute difference
+def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+  (VTI.Vec (!cast(NAME)
+(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+(i32 1), (VTI.Pred VCCR:$mask),
+(VTI.Vec MQPR:$inactive)))>;
+  }
+}
+
+multiclass MVE_VABD
+  : MVE_VABD_m;
+
+defm MVE_VABDs8  : MVE_VABD;
+defm MVE_VABDs16 : MVE_VABD;
+defm MVE_VABDs32 : MVE_VABD;
+defm MVE_VABDu8  : MVE_VABD;
+defm MVE_VABDu16 : MVE_VABD;
+defm MVE_VABDu32 : MVE_VABD;
 
 class MVE_VRHADD size, list pattern=[]>
   : MVE_int<"vrhadd", suffix, size, p

[PATCH] D70545: [ARM][MVE][Intrinsics] Add MVE VABD intrinsics.

2019-11-26 Thread Mark Murray via Phabricator via cfe-commits
MarkMurrayARM updated this revision to Diff 231055.
MarkMurrayARM added a comment.

Respond to review comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70545/new/

https://reviews.llvm.org/D70545

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/test/CodeGen/arm-mve-intrinsics/vabdq.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vabdq_u32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vabd.s32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>%a, <4 x i32>%b)
+  ret <4 x i32> %0
+}
+
+declare <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>, <4 x i32>)
+
+define arm_aapcs_vfpcc <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_vabdq_f32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vabd.f32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>%a, <4 x float>%b)
+  ret <4 x float> %0
+}
+
+declare <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>, <4 x float>)
+
+define arm_aapcs_vfpcc <16 x i8> @test_vabdq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
+; CHECK-LABEL: test_vabdq_m_s8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vabdt.s8 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+
+declare <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>)
+
+define arm_aapcs_vfpcc <8 x half> @test_vabdq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
+; CHECK-LABEL: test_vabdq_m_f16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vabdt.f16 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+
+declare <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>)
Index: llvm/lib/Target/ARM/ARMInstrMVE.td
===
--- llvm/lib/Target/ARM/ARMInstrMVE.td
+++ llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -1664,7 +1664,8 @@
 }
 
 
-class MVE_VABD_int size, list pattern=[]>
+class MVE_VABD_int size,
+ list pattern=[]>
   : MVE_int<"vabd", suffix, size, pattern> {
 
   let Inst{28} = U;
@@ -1676,12 +1677,35 @@
   let validForTailPredication = 1;
 }
 
-def MVE_VABDs8  : MVE_VABD_int<"s8", 0b0, 0b00>;
-def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>;
-def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>;
-def MVE_VABDu8  : MVE_VABD_int<"u8", 0b1, 0b00>;
-def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>;
-def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>;
+multiclass MVE_VABD_m {
+  def "" : MVE_VABD_int;
+
+  let Predicates = [HasMVEInt] in {
+// Unpredicated absolute difference
+def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+  (VTI.Vec (!cast(NAME)
+(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+// Predicated absolute difference
+def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+  (VTI.Vec (!cast(NAME)
+(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+(i32 1), (VTI.Pred VCCR:$mask),
+(VTI.Vec MQPR:$inactive)))>;
+  }
+}
+
+multiclass MVE_VABD
+  : MVE_VABD_m;
+
+defm MVE_VABDs8  : MVE_VABD;
+defm MVE_VABDs16 : MVE_VABD;
+defm MVE_VABDs32 : MVE_VABD;
+defm MVE_VABDu8  : MVE_VABD;
+defm MVE_VABDu16 : MVE_VABD;
+defm MVE_VABDu32 : MVE_VABD;
 
 class MVE_VRHADD size, list pattern=[]>
   : MVE_int<"vrhadd", suffix, size, pattern> {
@@ -2950,8 +2974,28 @@
   let validForTailPredication = 1;
 }
 
-def MVE_VABDf

[PATCH] D70545: [ARM][MVE][Intrinsics] Add MVE VABD intrinsics.

2019-11-26 Thread Simon Tatham via Phabricator via cfe-commits
simon_tatham added a comment.

This mostly LGTM: only a handful of nits.




Comment at: clang/include/clang/Basic/arm_mve.td:45
 let params = T.Usual in {
+def vabdq: Intrinsic $a, $b)>;
+}

Can you wrap this line to 80 columns, please? I've been trying to fit the rest 
of the file in that width.



Comment at: llvm/lib/Target/ARM/ARMInstrMVE.td:1669
+ list pattern=[]>
+  : MVE_int {
 

This new template parameter `iname` seems to be redundant, since I can't see 
anywhere you set it to anything other than `"vabd"`.



Comment at: llvm/lib/Target/ARM/ARMInstrMVE.td:2958
+class MVE_VABD_fp
+  : MVE_float {

Similarly here: `iname` is an unnecessary extra template parameter.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70545/new/

https://reviews.llvm.org/D70545



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D70545: [ARM][MVE][Intrinsics] Add MVE VABD intrinsics.

2019-11-26 Thread Mark Murray via Phabricator via cfe-commits
MarkMurrayARM updated this revision to Diff 231034.
MarkMurrayARM added a comment.

Rebase and reupload patches.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70545/new/

https://reviews.llvm.org/D70545

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/test/CodeGen/arm-mve-intrinsics/vabdq.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vabdq_u32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vabd.s32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>%a, <4 x i32>%b)
+  ret <4 x i32> %0
+}
+
+declare <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>, <4 x i32>)
+
+define arm_aapcs_vfpcc <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_vabdq_f32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vabd.f32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>%a, <4 x float>%b)
+  ret <4 x float> %0
+}
+
+declare <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>, <4 x float>)
+
+define arm_aapcs_vfpcc <16 x i8> @test_vabdq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
+; CHECK-LABEL: test_vabdq_m_s8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vabdt.s8 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+
+declare <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>)
+
+define arm_aapcs_vfpcc <8 x half> @test_vabdq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
+; CHECK-LABEL: test_vabdq_m_f16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vabdt.f16 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+
+declare <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>)
Index: llvm/lib/Target/ARM/ARMInstrMVE.td
===
--- llvm/lib/Target/ARM/ARMInstrMVE.td
+++ llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -1664,8 +1664,9 @@
 }
 
 
-class MVE_VABD_int size, list pattern=[]>
-  : MVE_int<"vabd", suffix, size, pattern> {
+class MVE_VABD_int size,
+ list pattern=[]>
+  : MVE_int {
 
   let Inst{28} = U;
   let Inst{25-23} = 0b110;
@@ -1676,12 +1677,35 @@
   let validForTailPredication = 1;
 }
 
-def MVE_VABDs8  : MVE_VABD_int<"s8", 0b0, 0b00>;
-def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>;
-def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>;
-def MVE_VABDu8  : MVE_VABD_int<"u8", 0b1, 0b00>;
-def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>;
-def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>;
+multiclass MVE_VABD_m {
+  def "" : MVE_VABD_int;
+
+  let Predicates = [HasMVEInt] in {
+// Unpredicated absolute difference
+def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+  (VTI.Vec (!cast(NAME)
+(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+// Predicated absolute difference
+def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+  (VTI.Vec (!cast(NAME)
+(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+(i32 1), (VTI.Pred VCCR:$mask),
+(VTI.Vec MQPR:$inactive)))>;
+  }
+}
+
+multiclass MVE_VABD
+  : MVE_VABD_m<"vabd", VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
+
+defm MVE_VABDs8  : MVE_VABD;
+defm MVE_VABDs16 : MVE_VABD;
+defm MVE_VABDs32 : MVE_VABD;
+defm MVE_VABDu8  : MVE_VABD;
+defm MVE_VABDu16 : MVE_VABD;
+defm MVE_VABDu32 : MVE_VABD;
 
 class MVE_VRHADD size, list pattern=[]>
   : MVE_int<"vrhadd", 

[PATCH] D70545: [ARM][MVE][Intrinsics] Add MVE VABD intrinsics.

2019-11-25 Thread Mark Murray via Phabricator via cfe-commits
MarkMurrayARM updated this revision to Diff 230890.
MarkMurrayARM added a comment.

Merge all VABD intrinis types under T.Usual instead of doing the floats
separately.

Add more tests.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70545/new/

https://reviews.llvm.org/D70545

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/test/CodeGen/arm-mve-intrinsics/vabdq.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vabdq_u32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vabd.s32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>%a, <4 x i32>%b)
+  ret <4 x i32> %0
+}
+
+declare <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>, <4 x i32>)
+
+define arm_aapcs_vfpcc <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_vabdq_f32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vabd.f32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>%a, <4 x float>%b)
+  ret <4 x float> %0
+}
+
+declare <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>, <4 x float>)
+
+define arm_aapcs_vfpcc <16 x i8> @test_vabdq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
+; CHECK-LABEL: test_vabdq_m_s8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vabdt.s8 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+
+declare <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>)
+
+define arm_aapcs_vfpcc <8 x half> @test_vabdq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
+; CHECK-LABEL: test_vabdq_m_f16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vabdt.f16 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+
+declare <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>)
Index: llvm/lib/Target/ARM/ARMInstrMVE.td
===
--- llvm/lib/Target/ARM/ARMInstrMVE.td
+++ llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -1664,8 +1664,9 @@
 }
 
 
-class MVE_VABD_int size, list pattern=[]>
-  : MVE_int<"vabd", suffix, size, pattern> {
+class MVE_VABD_int size,
+ list pattern=[]>
+  : MVE_int {
 
   let Inst{28} = U;
   let Inst{25-23} = 0b110;
@@ -1676,12 +1677,35 @@
   let validForTailPredication = 1;
 }
 
-def MVE_VABDs8  : MVE_VABD_int<"s8", 0b0, 0b00>;
-def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>;
-def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>;
-def MVE_VABDu8  : MVE_VABD_int<"u8", 0b1, 0b00>;
-def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>;
-def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>;
+multiclass MVE_VABD_m {
+  def "" : MVE_VABD_int;
+
+  let Predicates = [HasMVEInt] in {
+// Unpredicated absolute difference
+def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+  (VTI.Vec (!cast(NAME)
+(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+// Predicated absolute difference
+def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+  (VTI.Vec (!cast(NAME)
+(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+(i32 1), (VTI.Pred VCCR:$mask),
+(VTI.Vec MQPR:$inactive)))>;
+  }
+}
+
+multiclass MVE_VABD
+  : MVE_VABD_m<"vabd", VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
+
+defm MVE_VABDs8  : MVE_VABD;
+defm MVE_VABDs16 : MVE_VABD;
+defm MVE_VABDs32 : MVE_VABD;
+defm MVE_VABDu8  : MVE_VABD;
+defm MVE_VABDu16 : MVE_VABD;
+defm MVE_VABDu32 : MVE_

[PATCH] D70545: [ARM][MVE][Intrinsics] Add MVE VABD intrinsics.

2019-11-25 Thread Dave Green via Phabricator via cfe-commits
dmgreen added inline comments.



Comment at: clang/include/clang/Basic/arm_mve.td:33
 let params = T.Int in {
+def vabdq: Intrinsic $a, $b)>;
 def vaddq: Intrinsic;

Can this and vadbqf below be combined into one using T.Usual?

I believe the differences only usually come from "fadd" being different to 
"add". If they are both intrinsics (which sounds good to me for abd), then they 
can more happily live together.



Comment at: clang/test/CodeGen/arm-mve-intrinsics/vabdq.c:13
+uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b)
+{
+#ifdef POLYMORPHIC

More tests please.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70545/new/

https://reviews.llvm.org/D70545



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D70545: [ARM][MVE][Intrinsics] Add MVE VABD intrinsics.

2019-11-21 Thread Mark Murray via Phabricator via cfe-commits
MarkMurrayARM created this revision.
MarkMurrayARM added reviewers: simon_tatham, ostannard, dmgreen.
Herald added subscribers: llvm-commits, cfe-commits, hiraditya, kristof.beyls.
Herald added projects: clang, LLVM.

Add MVE VABD intrinsics.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D70545

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/test/CodeGen/arm-mve-intrinsics/vabdq.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vabdq_u32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vabd.s32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>%a, <4 x i32>%b)
+  ret <4 x i32> %0
+}
+
+declare <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>, <4 x i32>)
+
+define arm_aapcs_vfpcc <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_vabdq_f32:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vabd.f32 q0, q0, q1
+; CHECK-NEXT:bx lr
+entry:
+  %0 = tail call <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>%a, <4 x float>%b)
+  ret <4 x float> %0
+}
+
+declare <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>, <4 x float>)
+
+define arm_aapcs_vfpcc <16 x i8> @test_vabdq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
+; CHECK-LABEL: test_vabdq_m_s8:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vabdt.s8 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+
+declare <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>)
+
+define arm_aapcs_vfpcc <8 x half> @test_vabdq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
+; CHECK-LABEL: test_vabdq_m_f16:
+; CHECK:   @ %bb.0: @ %entry
+; CHECK-NEXT:vmsr p0, r0
+; CHECK-NEXT:vpst
+; CHECK-NEXT:vabdt.f16 q0, q1, q2
+; CHECK-NEXT:bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+
+declare <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>)
Index: llvm/lib/Target/ARM/ARMInstrMVE.td
===
--- llvm/lib/Target/ARM/ARMInstrMVE.td
+++ llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -1664,8 +1664,9 @@
 }
 
 
-class MVE_VABD_int size, list pattern=[]>
-  : MVE_int<"vabd", suffix, size, pattern> {
+class MVE_VABD_int size,
+ list pattern=[]>
+  : MVE_int {
 
   let Inst{28} = U;
   let Inst{25-23} = 0b110;
@@ -1676,12 +1677,35 @@
   let validForTailPredication = 1;
 }
 
-def MVE_VABDs8  : MVE_VABD_int<"s8", 0b0, 0b00>;
-def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>;
-def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>;
-def MVE_VABDu8  : MVE_VABD_int<"u8", 0b1, 0b00>;
-def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>;
-def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>;
+multiclass MVE_VABD_m {
+  def "" : MVE_VABD_int;
+
+  let Predicates = [HasMVEInt] in {
+// Unpredicated absolute difference
+def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+  (VTI.Vec (!cast(NAME)
+(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+// Predicated absolute difference
+def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+  (VTI.Vec (!cast(NAME)
+(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+(i32 1), (VTI.Pred VCCR:$mask),
+(VTI.Vec MQPR:$inactive)))>;
+  }
+}
+
+multiclass MVE_VABD
+  : MVE_VABD_m<"vabd", VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
+
+defm MVE_VABDs8  : MVE_VABD;
+defm MVE_VABDs16 : MVE_VABD;
+defm MVE_VABDs32 : MVE_VABD;
+defm MVE_VABDu8  : MVE_VABD;
+defm MVE_VABDu16 : MVE_VABD;
+defm MVE_VABDu32 : MVE_VABD;