[PATCH] D70545: [ARM][MVE][Intrinsics] Add MVE VABD intrinsics.
This revision was automatically updated to reflect the committed changes. Closed by commit rGf4bba07b87ce: [ARM][MVE][Intrinsics] Add MVE VABD intrinsics. Add unit tests. (authored by MarkMurrayARM). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D70545/new/ https://reviews.llvm.org/D70545 Files: clang/include/clang/Basic/arm_mve.td clang/test/CodeGen/arm-mve-intrinsics/vabdq.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vabdq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vabd.s32 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>%a, <4 x i32>%b) + ret <4 x i32> %0 +} + +declare <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>, <4 x i32>) + +define arm_aapcs_vfpcc <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vabdq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vabd.f32 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>%a, <4 x float>%b) + ret <4 x float> %0 +} + +declare <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>, <4 x float>) + +define arm_aapcs_vfpcc <16 x i8> @test_vabdq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vabdq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vabdt.s8 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) + +declare <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) + +define arm_aapcs_vfpcc <8 x half> @test_vabdq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vabdq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vabdt.f16 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) + +declare <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) Index: llvm/lib/Target/ARM/ARMInstrMVE.td === --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1664,7 +1664,8 @@ } -class MVE_VABD_int size, list pattern=[]> +class MVE_VABD_int size, + list pattern=[]> : MVE_int<"vabd", suffix, size, pattern> { let Inst{28} = U; @@ -1676,12 +1677,35 @@ let validForTailPredication = 1; } -def MVE_VABDs8 : MVE_VABD_int<"s8", 0b0, 0b00>; -def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>; -def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>; -def MVE_VABDu8 : MVE_VABD_int<"u8", 0b1, 0b00>; -def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>; -def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>; +multiclass MVE_VABD_m { + def "" : MVE_VABD_int; + + let Predicates = [HasMVEInt] in { +// Unpredicated absolute difference +def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (!cast(NAME) +(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + +// Predicated absolute difference +def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), +(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast(NAME) +(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), +(i32 1), (VTI.Pred VCCR:$mask), +(VTI.Vec MQPR:$inactive)))>; + } +} + +multiclass MVE_VABD + : MVE_VABD_m; + +defm MVE_VABDs8 : MVE_VABD; +defm MVE_VABDs16 : MVE_VABD; +defm MVE_VABDs32 : MVE_VABD; +defm MVE_VABDu8 : MVE_VABD; +defm MVE_VABDu16 : MVE_VABD; +defm MVE_VABDu32 : MVE_VABD; class MVE_VRHADD size, list pattern=[]> : MVE_int<"vrhadd", suffix, size, p
[PATCH] D70545: [ARM][MVE][Intrinsics] Add MVE VABD intrinsics.
MarkMurrayARM updated this revision to Diff 231055. MarkMurrayARM added a comment. Respond to review comments. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D70545/new/ https://reviews.llvm.org/D70545 Files: clang/include/clang/Basic/arm_mve.td clang/test/CodeGen/arm-mve-intrinsics/vabdq.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vabdq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vabd.s32 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>%a, <4 x i32>%b) + ret <4 x i32> %0 +} + +declare <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>, <4 x i32>) + +define arm_aapcs_vfpcc <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vabdq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vabd.f32 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>%a, <4 x float>%b) + ret <4 x float> %0 +} + +declare <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>, <4 x float>) + +define arm_aapcs_vfpcc <16 x i8> @test_vabdq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vabdq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vabdt.s8 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) + +declare <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) + +define arm_aapcs_vfpcc <8 x half> @test_vabdq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vabdq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vabdt.f16 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) + +declare <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) Index: llvm/lib/Target/ARM/ARMInstrMVE.td === --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1664,7 +1664,8 @@ } -class MVE_VABD_int size, list pattern=[]> +class MVE_VABD_int size, + list pattern=[]> : MVE_int<"vabd", suffix, size, pattern> { let Inst{28} = U; @@ -1676,12 +1677,35 @@ let validForTailPredication = 1; } -def MVE_VABDs8 : MVE_VABD_int<"s8", 0b0, 0b00>; -def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>; -def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>; -def MVE_VABDu8 : MVE_VABD_int<"u8", 0b1, 0b00>; -def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>; -def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>; +multiclass MVE_VABD_m { + def "" : MVE_VABD_int; + + let Predicates = [HasMVEInt] in { +// Unpredicated absolute difference +def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (!cast(NAME) +(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + +// Predicated absolute difference +def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), +(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast(NAME) +(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), +(i32 1), (VTI.Pred VCCR:$mask), +(VTI.Vec MQPR:$inactive)))>; + } +} + +multiclass MVE_VABD + : MVE_VABD_m; + +defm MVE_VABDs8 : MVE_VABD; +defm MVE_VABDs16 : MVE_VABD; +defm MVE_VABDs32 : MVE_VABD; +defm MVE_VABDu8 : MVE_VABD; +defm MVE_VABDu16 : MVE_VABD; +defm MVE_VABDu32 : MVE_VABD; class MVE_VRHADD size, list pattern=[]> : MVE_int<"vrhadd", suffix, size, pattern> { @@ -2950,8 +2974,28 @@ let validForTailPredication = 1; } -def MVE_VABDf
[PATCH] D70545: [ARM][MVE][Intrinsics] Add MVE VABD intrinsics.
simon_tatham added a comment. This mostly LGTM: only a handful of nits. Comment at: clang/include/clang/Basic/arm_mve.td:45 let params = T.Usual in { +def vabdq: Intrinsic $a, $b)>; +} Can you wrap this line to 80 columns, please? I've been trying to fit the rest of the file in that width. Comment at: llvm/lib/Target/ARM/ARMInstrMVE.td:1669 + list pattern=[]> + : MVE_int { This new template parameter `iname` seems to be redundant, since I can't see anywhere you set it to anything other than `"vabd"`. Comment at: llvm/lib/Target/ARM/ARMInstrMVE.td:2958 +class MVE_VABD_fp + : MVE_float { Similarly here: `iname` is an unnecessary extra template parameter. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D70545/new/ https://reviews.llvm.org/D70545 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D70545: [ARM][MVE][Intrinsics] Add MVE VABD intrinsics.
MarkMurrayARM updated this revision to Diff 231034. MarkMurrayARM added a comment. Rebase and reupload patches. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D70545/new/ https://reviews.llvm.org/D70545 Files: clang/include/clang/Basic/arm_mve.td clang/test/CodeGen/arm-mve-intrinsics/vabdq.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vabdq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vabd.s32 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>%a, <4 x i32>%b) + ret <4 x i32> %0 +} + +declare <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>, <4 x i32>) + +define arm_aapcs_vfpcc <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vabdq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vabd.f32 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>%a, <4 x float>%b) + ret <4 x float> %0 +} + +declare <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>, <4 x float>) + +define arm_aapcs_vfpcc <16 x i8> @test_vabdq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vabdq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vabdt.s8 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) + +declare <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) + +define arm_aapcs_vfpcc <8 x half> @test_vabdq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vabdq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vabdt.f16 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) + +declare <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) Index: llvm/lib/Target/ARM/ARMInstrMVE.td === --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1664,8 +1664,9 @@ } -class MVE_VABD_int size, list pattern=[]> - : MVE_int<"vabd", suffix, size, pattern> { +class MVE_VABD_int size, + list pattern=[]> + : MVE_int { let Inst{28} = U; let Inst{25-23} = 0b110; @@ -1676,12 +1677,35 @@ let validForTailPredication = 1; } -def MVE_VABDs8 : MVE_VABD_int<"s8", 0b0, 0b00>; -def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>; -def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>; -def MVE_VABDu8 : MVE_VABD_int<"u8", 0b1, 0b00>; -def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>; -def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>; +multiclass MVE_VABD_m { + def "" : MVE_VABD_int; + + let Predicates = [HasMVEInt] in { +// Unpredicated absolute difference +def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (!cast(NAME) +(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + +// Predicated absolute difference +def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), +(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast(NAME) +(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), +(i32 1), (VTI.Pred VCCR:$mask), +(VTI.Vec MQPR:$inactive)))>; + } +} + +multiclass MVE_VABD + : MVE_VABD_m<"vabd", VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>; + +defm MVE_VABDs8 : MVE_VABD; +defm MVE_VABDs16 : MVE_VABD; +defm MVE_VABDs32 : MVE_VABD; +defm MVE_VABDu8 : MVE_VABD; +defm MVE_VABDu16 : MVE_VABD; +defm MVE_VABDu32 : MVE_VABD; class MVE_VRHADD size, list pattern=[]> : MVE_int<"vrhadd",
[PATCH] D70545: [ARM][MVE][Intrinsics] Add MVE VABD intrinsics.
MarkMurrayARM updated this revision to Diff 230890. MarkMurrayARM added a comment. Merge all VABD intrinis types under T.Usual instead of doing the floats separately. Add more tests. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D70545/new/ https://reviews.llvm.org/D70545 Files: clang/include/clang/Basic/arm_mve.td clang/test/CodeGen/arm-mve-intrinsics/vabdq.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vabdq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vabd.s32 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>%a, <4 x i32>%b) + ret <4 x i32> %0 +} + +declare <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>, <4 x i32>) + +define arm_aapcs_vfpcc <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vabdq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vabd.f32 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>%a, <4 x float>%b) + ret <4 x float> %0 +} + +declare <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>, <4 x float>) + +define arm_aapcs_vfpcc <16 x i8> @test_vabdq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vabdq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vabdt.s8 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) + +declare <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) + +define arm_aapcs_vfpcc <8 x half> @test_vabdq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vabdq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vabdt.f16 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) + +declare <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) Index: llvm/lib/Target/ARM/ARMInstrMVE.td === --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1664,8 +1664,9 @@ } -class MVE_VABD_int size, list pattern=[]> - : MVE_int<"vabd", suffix, size, pattern> { +class MVE_VABD_int size, + list pattern=[]> + : MVE_int { let Inst{28} = U; let Inst{25-23} = 0b110; @@ -1676,12 +1677,35 @@ let validForTailPredication = 1; } -def MVE_VABDs8 : MVE_VABD_int<"s8", 0b0, 0b00>; -def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>; -def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>; -def MVE_VABDu8 : MVE_VABD_int<"u8", 0b1, 0b00>; -def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>; -def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>; +multiclass MVE_VABD_m { + def "" : MVE_VABD_int; + + let Predicates = [HasMVEInt] in { +// Unpredicated absolute difference +def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (!cast(NAME) +(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + +// Predicated absolute difference +def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), +(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast(NAME) +(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), +(i32 1), (VTI.Pred VCCR:$mask), +(VTI.Vec MQPR:$inactive)))>; + } +} + +multiclass MVE_VABD + : MVE_VABD_m<"vabd", VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>; + +defm MVE_VABDs8 : MVE_VABD; +defm MVE_VABDs16 : MVE_VABD; +defm MVE_VABDs32 : MVE_VABD; +defm MVE_VABDu8 : MVE_VABD; +defm MVE_VABDu16 : MVE_VABD; +defm MVE_VABDu32 : MVE_
[PATCH] D70545: [ARM][MVE][Intrinsics] Add MVE VABD intrinsics.
dmgreen added inline comments. Comment at: clang/include/clang/Basic/arm_mve.td:33 let params = T.Int in { +def vabdq: Intrinsic $a, $b)>; def vaddq: Intrinsic; Can this and vadbqf below be combined into one using T.Usual? I believe the differences only usually come from "fadd" being different to "add". If they are both intrinsics (which sounds good to me for abd), then they can more happily live together. Comment at: clang/test/CodeGen/arm-mve-intrinsics/vabdq.c:13 +uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC More tests please. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D70545/new/ https://reviews.llvm.org/D70545 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D70545: [ARM][MVE][Intrinsics] Add MVE VABD intrinsics.
MarkMurrayARM created this revision. MarkMurrayARM added reviewers: simon_tatham, ostannard, dmgreen. Herald added subscribers: llvm-commits, cfe-commits, hiraditya, kristof.beyls. Herald added projects: clang, LLVM. Add MVE VABD intrinsics. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D70545 Files: clang/include/clang/Basic/arm_mve.td clang/test/CodeGen/arm-mve-intrinsics/vabdq.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vabdq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vabd.s32 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>%a, <4 x i32>%b) + ret <4 x i32> %0 +} + +declare <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>, <4 x i32>) + +define arm_aapcs_vfpcc <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vabdq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vabd.f32 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>%a, <4 x float>%b) + ret <4 x float> %0 +} + +declare <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>, <4 x float>) + +define arm_aapcs_vfpcc <16 x i8> @test_vabdq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vabdq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vabdt.s8 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) + +declare <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) + +define arm_aapcs_vfpcc <8 x half> @test_vabdq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vabdq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vabdt.f16 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) + +declare <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) Index: llvm/lib/Target/ARM/ARMInstrMVE.td === --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1664,8 +1664,9 @@ } -class MVE_VABD_int size, list pattern=[]> - : MVE_int<"vabd", suffix, size, pattern> { +class MVE_VABD_int size, + list pattern=[]> + : MVE_int { let Inst{28} = U; let Inst{25-23} = 0b110; @@ -1676,12 +1677,35 @@ let validForTailPredication = 1; } -def MVE_VABDs8 : MVE_VABD_int<"s8", 0b0, 0b00>; -def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>; -def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>; -def MVE_VABDu8 : MVE_VABD_int<"u8", 0b1, 0b00>; -def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>; -def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>; +multiclass MVE_VABD_m { + def "" : MVE_VABD_int; + + let Predicates = [HasMVEInt] in { +// Unpredicated absolute difference +def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (!cast(NAME) +(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + +// Predicated absolute difference +def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), +(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast(NAME) +(VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), +(i32 1), (VTI.Pred VCCR:$mask), +(VTI.Vec MQPR:$inactive)))>; + } +} + +multiclass MVE_VABD + : MVE_VABD_m<"vabd", VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>; + +defm MVE_VABDs8 : MVE_VABD; +defm MVE_VABDs16 : MVE_VABD; +defm MVE_VABDs32 : MVE_VABD; +defm MVE_VABDu8 : MVE_VABD; +defm MVE_VABDu16 : MVE_VABD; +defm MVE_VABDu32 : MVE_VABD;