[PATCH] D70829: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.
This revision was automatically updated to reflect the committed changes. Closed by commit rG510792a2e0e3: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics. (authored by MarkMurrayARM). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D70829/new/ https://reviews.llvm.org/D70829 Files: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c clang/test/CodeGen/arm-mve-intrinsics/vminq.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmin.u8 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = icmp ugt <16 x i8> %a, %b + %1 = select <16 x i1> %0, <16 x i8> %b, <16 x i8> %a + ret <16 x i8> %1 +} + +define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmin.s16 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = icmp sgt <8 x i16> %a, %b + %1 = select <8 x i1> %0, <8 x i16> %b, <8 x i16> %a + ret <8 x i16> %1 +} + +define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmin.u32 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = icmp ugt <4 x i32> %a, %b + %1 = select <4 x i1> %0, <4 x i32> %b, <4 x i32> %a + ret <4 x i32> %1 +} + +define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vmint.s8 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vmint.s16 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vmint.s32 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp
[PATCH] D70829: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.
MarkMurrayARM marked an inline comment as done. MarkMurrayARM added a comment. Nit terminated with extreme prejudice. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D70829/new/ https://reviews.llvm.org/D70829 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D70829: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.
MarkMurrayARM updated this revision to Diff 231664. MarkMurrayARM added a comment. Address whitespace nit. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D70829/new/ https://reviews.llvm.org/D70829 Files: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c clang/test/CodeGen/arm-mve-intrinsics/vminq.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmin.u8 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = icmp ugt <16 x i8> %a, %b + %1 = select <16 x i1> %0, <16 x i8> %b, <16 x i8> %a + ret <16 x i8> %1 +} + +define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmin.s16 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = icmp sgt <8 x i16> %a, %b + %1 = select <8 x i1> %0, <8 x i16> %b, <8 x i16> %a + ret <8 x i16> %1 +} + +define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmin.u32 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = icmp ugt <4 x i32> %a, %b + %1 = select <4 x i1> %0, <4 x i32> %b, <4 x i32> %a + ret <4 x i32> %1 +} + +define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vmint.s8 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vmint.s16 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vmint.s32 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define dso_local arm_aapcs_vfpcc <8 x half>
[PATCH] D70829: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.
simon_tatham accepted this revision. simon_tatham added a comment. I do, but only an indentation quibble. Comment at: clang/include/clang/Basic/arm_mve.td:205 +(select (icmp_ule $a, $b), $a, $b)>, +NameOverride<"vminq">; + def vmaxqu: Intrinsichttps://reviews.llvm.org/D70829/new/ https://reviews.llvm.org/D70829 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D70829: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.
dmgreen accepted this revision. dmgreen added a comment. This revision is now accepted and ready to land. Thanks. I'm happy if simon does not have any extra comments. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D70829/new/ https://reviews.llvm.org/D70829 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D70829: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.
MarkMurrayARM updated this revision to Diff 231540. MarkMurrayARM added a comment. Fix non-predicated floats. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D70829/new/ https://reviews.llvm.org/D70829 Files: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c clang/test/CodeGen/arm-mve-intrinsics/vminq.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmin.u8 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = icmp ugt <16 x i8> %a, %b + %1 = select <16 x i1> %0, <16 x i8> %b, <16 x i8> %a + ret <16 x i8> %1 +} + +define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmin.s16 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = icmp sgt <8 x i16> %a, %b + %1 = select <8 x i1> %0, <8 x i16> %b, <8 x i16> %a + ret <8 x i16> %1 +} + +define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmin.u32 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = icmp ugt <4 x i32> %a, %b + %1 = select <4 x i1> %0, <4 x i32> %b, <4 x i32> %a + ret <4 x i32> %1 +} + +define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vmint.s8 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vmint.s16 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vmint.s32 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define dso_local arm_aapcs_vfpcc <8 x half>
[PATCH] D70829: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.
dmgreen added inline comments. Comment at: llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll:7 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vcmp.f16 ge, q0, q1 +; CHECK-NEXT:vpsel q0, q0, q1 You may want to use llvm.minnum directly (providing the semantics are indeed equivalent). Otherwise it will need fastmath to fuse these together, which isn't the same as the original intrinsic. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D70829/new/ https://reviews.llvm.org/D70829 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D70829: [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics.
MarkMurrayARM created this revision. Herald added subscribers: llvm-commits, cfe-commits, dmgreen, hiraditya, kristof.beyls. Herald added projects: clang, LLVM. Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics and their predicated versions. Add unit tests. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D70829 Files: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c clang/test/CodeGen/arm-mve-intrinsics/vminq.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmin.u8 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = icmp ugt <16 x i8> %a, %b + %1 = select <16 x i1> %0, <16 x i8> %b, <16 x i8> %a + ret <16 x i8> %1 +} + +define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmin.s16 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = icmp sgt <8 x i16> %a, %b + %1 = select <8 x i1> %0, <8 x i16> %b, <8 x i16> %a + ret <8 x i16> %1 +} + +define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmin.u32 q0, q0, q1 +; CHECK-NEXT:bx lr +entry: + %0 = icmp ugt <4 x i32> %a, %b + %1 = select <4 x i1> %0, <4 x i32> %b, <4 x i32> %a + ret <4 x i32> %1 +} + +define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vmint.s8 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vmint.s16 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT:vmsr p0, r0 +; CHECK-NEXT:vpst +; CHECK-NEXT:vmint.s32 q0, q1, q2 +; CHECK-NEXT:bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll === --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s