MarkMurrayARM created this revision.
Herald added subscribers: llvm-commits, cfe-commits, dmgreen, hiraditya, 
kristof.beyls.
Herald added projects: clang, LLVM.

Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics and their predicated versions. Add 
unit tests.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D70829

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/include/clang/Basic/arm_mve_defs.td
  clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
  clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c
  clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
  clang/test/CodeGen/arm-mve-intrinsics/vminq.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrMVE.td
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
  llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll

Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_u8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmin.u8 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp ugt <16 x i8> %a, %b
+  %1 = select <16 x i1> %0, <16 x i8> %b, <16 x i8> %a
+  ret <16 x i8> %1
+}
+
+define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmin.s16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp sgt <8 x i16> %a, %b
+  %1 = select <8 x i1> %0, <8 x i16> %b, <8 x i16> %a
+  ret <8 x i16> %1
+}
+
+define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminq_u32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmin.u32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp ugt <4 x i32> %a, %b
+  %1 = select <4 x i1> %0, <4 x i32> %b, <4 x i32> %a
+  ret <4 x i32> %1
+}
+
+define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmint.s8 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2
+
+declare <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2
+
+define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_u16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmint.s16 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2
+
+declare <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2
+
+define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminq_m_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmint.s32 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2
+
+declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2
Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define dso_local arm_aapcs_vfpcc <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminnmq_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vcmp.f16 ge, q1, q0
+; CHECK-NEXT:    vpsel q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = fcmp ole <8 x half> %a, %b
+  %1 = select <8 x i1> %0, <8 x half> %a, <8 x half> %b
+  ret <8 x half> %1
+}
+
+define dso_local arm_aapcs_vfpcc <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vminnmq_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vcmp.f32 ge, q1, q0
+; CHECK-NEXT:    vpsel q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = fcmp ole <4 x float> %a, %b
+  %1 = select <4 x i1> %0, <4 x float> %a, <4 x float> %b
+  ret <4 x float> %1
+}
+
+define dso_local arm_aapcs_vfpcc <8 x half> @test_vminnmq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminnmq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vminnmt.f32 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2
+
+declare <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) #2
+
+define dso_local arm_aapcs_vfpcc <4 x float> @test_vminnmq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vminnmq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vminnmt.f32 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> %a, <4 x float> %b, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2
+
+declare <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #2
Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define dso_local arm_aapcs_vfpcc <16 x i8> @test_vmaxq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vmaxq_u8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmin.u8 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp ugt <16 x i8> %a, %b
+  %1 = select <16 x i1> %0, <16 x i8> %b, <16 x i8> %a
+  ret <16 x i8> %1
+}
+
+define dso_local arm_aapcs_vfpcc <8 x i16> @test_vmaxq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vmaxq_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmin.s16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp sgt <8 x i16> %a, %b
+  %1 = select <8 x i1> %0, <8 x i16> %b, <8 x i16> %a
+  ret <8 x i16> %1
+}
+
+define dso_local arm_aapcs_vfpcc <4 x i32> @test_vmaxq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vmaxq_u32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmin.u32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp ugt <4 x i32> %a, %b
+  %1 = select <4 x i1> %0, <4 x i32> %b, <4 x i32> %a
+  ret <4 x i32> %1
+}
+
+define dso_local arm_aapcs_vfpcc <16 x i8> @test_vmaxq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vmaxq_m_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmint.s8 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2
+
+declare <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2
+
+define dso_local arm_aapcs_vfpcc <8 x i16> @test_vmaxq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vmaxq_m_u16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmint.s16 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2
+
+declare <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2
+
+define dso_local arm_aapcs_vfpcc <4 x i32> @test_vmaxq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vmaxq_m_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmint.s32 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2
+
+declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2
Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define dso_local arm_aapcs_vfpcc <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vmaxnmq_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vcmp.f16 ge, q0, q1
+; CHECK-NEXT:    vpsel q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = fcmp oge <8 x half> %a, %b
+  %1 = select <8 x i1> %0, <8 x half> %a, <8 x half> %b
+  ret <8 x half> %1
+}
+
+define dso_local arm_aapcs_vfpcc <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_vmaxnmq_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vcmp.f32 ge, q0, q1
+; CHECK-NEXT:    vpsel q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = fcmp oge <4 x float> %a, %b
+  %1 = select <4 x i1> %0, <4 x float> %a, <4 x float> %b
+  ret <4 x float> %1
+}
+
+define dso_local arm_aapcs_vfpcc <8 x half> @test_vmaxnmq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vmaxnmq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmaxnmt.f32 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2
+
+declare <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) #2
+
+define dso_local arm_aapcs_vfpcc <4 x float> @test_vmaxnmq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 {
+; CHECK-LABEL: test_vmaxnmq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmaxnmt.f32 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> %a, <4 x float> %b, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2
+
+declare <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #2
Index: llvm/lib/Target/ARM/ARMInstrMVE.td
===================================================================
--- llvm/lib/Target/ARM/ARMInstrMVE.td
+++ llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -1015,6 +1015,16 @@
             (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
   def : Pat<(v8f16 (fmaxnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
             (v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+  def : Pat<(v4f32 (int_arm_mve_max_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
+                          (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))),
+            (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
+                          (i32 1), (v4i1 VCCR:$mask),
+                          (v4f32 MQPR:$inactive)))>;
+  def : Pat<(v8f16 (int_arm_mve_max_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
+                          (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))),
+            (v8f16 (MVE_VMAXNMf32 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
+                          (i32 1), (v8i1 VCCR:$mask),
+                          (v8f16 MQPR:$inactive)))>;
 }
 
 def MVE_VMINNMf32 : MVE_VMINMAXNM<"vminnm", "f32", 0b0, 0b1>;
@@ -1025,6 +1035,16 @@
             (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
   def : Pat<(v8f16 (fminnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
             (v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+  def : Pat<(v4f32 (int_arm_mve_min_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
+                          (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))),
+            (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
+                          (i32 1), (v4i1 VCCR:$mask),
+                          (v4f32 MQPR:$inactive)))>;
+  def : Pat<(v8f16 (int_arm_mve_min_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
+                          (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))),
+            (v8f16 (MVE_VMINNMf32 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
+                          (i32 1), (v8i1 VCCR:$mask),
+                          (v8f16 MQPR:$inactive)))>;
 }
 
 
@@ -1042,48 +1062,45 @@
   let Inst{4} = bit_4;
 }
 
-multiclass MVE_VMINMAX_all_sizes<string iname, bit bit_4> {
-  def s8  : MVE_VMINMAX<iname, "s8",  0b0, 0b00, bit_4>;
-  def s16 : MVE_VMINMAX<iname, "s16", 0b0, 0b01, bit_4>;
-  def s32 : MVE_VMINMAX<iname, "s32", 0b0, 0b10, bit_4>;
-  def u8  : MVE_VMINMAX<iname, "u8",  0b1, 0b00, bit_4>;
-  def u16 : MVE_VMINMAX<iname, "u16", 0b1, 0b01, bit_4>;
-  def u32 : MVE_VMINMAX<iname, "u32", 0b1, 0b10, bit_4>;
-}
+multiclass MVE_VMINMAX_m<string iname, bit bit_4, MVEVectorVTInfo VTI,
+                      SDNode unpred_op, Intrinsic pred_int> {
+  def "" : MVE_VMINMAX<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, bit_4>;
 
-defm MVE_VMAX : MVE_VMINMAX_all_sizes<"vmax", 0b0>;
-defm MVE_VMIN : MVE_VMINMAX_all_sizes<"vmin", 0b1>;
+  let Predicates = [HasMVEInt] in {
+    // Unpredicated min/max
+    def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+              (VTI.Vec (!cast<Instruction>(NAME)
+                            (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
 
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (smin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
-            (v16i8 (MVE_VMINs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
-  def : Pat<(v8i16 (smin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
-            (v8i16 (MVE_VMINs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
-  def : Pat<(v4i32 (smin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
-            (v4i32 (MVE_VMINs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-
-  def : Pat<(v16i8 (smax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
-            (v16i8 (MVE_VMAXs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
-  def : Pat<(v8i16 (smax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
-            (v8i16 (MVE_VMAXs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
-  def : Pat<(v4i32 (smax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
-            (v4i32 (MVE_VMAXs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-
-  def : Pat<(v16i8 (umin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
-            (v16i8 (MVE_VMINu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
-  def : Pat<(v8i16 (umin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
-            (v8i16 (MVE_VMINu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
-  def : Pat<(v4i32 (umin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
-            (v4i32 (MVE_VMINu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-
-  def : Pat<(v16i8 (umax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
-            (v16i8 (MVE_VMAXu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
-  def : Pat<(v8i16 (umax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
-            (v8i16 (MVE_VMAXu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
-  def : Pat<(v4i32 (umax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
-            (v4i32 (MVE_VMAXu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+    // Predicated min/max
+    def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+                            (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+              (VTI.Vec (!cast<Instruction>(NAME)
+                            (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+                            (i32 1), (VTI.Pred VCCR:$mask),
+                            (VTI.Vec MQPR:$inactive)))>;
+  }
 }
 
+multiclass MVE_VMAX<MVEVectorVTInfo VTI>
+  : MVE_VMINMAX_m<"vmax", 0b0, VTI, !if(VTI.Unsigned, umax, smax), int_arm_mve_max_predicated>;
+multiclass MVE_VMIN<MVEVectorVTInfo VTI>
+  : MVE_VMINMAX_m<"vmin", 0b1, VTI, !if(VTI.Unsigned, umin, smin), int_arm_mve_min_predicated>;
+
+defm MVE_VMINs8   : MVE_VMIN<MVE_v16s8>;
+defm MVE_VMINs16  : MVE_VMIN<MVE_v8s16>;
+defm MVE_VMINs32  : MVE_VMIN<MVE_v4s32>;
+defm MVE_VMINu8   : MVE_VMIN<MVE_v16u8>;
+defm MVE_VMINu16  : MVE_VMIN<MVE_v8u16>;
+defm MVE_VMINu32  : MVE_VMIN<MVE_v4u32>;
+
+defm MVE_VMAXs8   : MVE_VMAX<MVE_v16s8>;
+defm MVE_VMAXs16  : MVE_VMAX<MVE_v8s16>;
+defm MVE_VMAXs32  : MVE_VMAX<MVE_v4s32>;
+defm MVE_VMAXu8   : MVE_VMAX<MVE_v16u8>;
+defm MVE_VMAXu16  : MVE_VMAX<MVE_v8u16>;
+defm MVE_VMAXu32  : MVE_VMAX<MVE_v4u32>;
+
 // end of mve_comp instructions
 
 // start of mve_bit instructions
Index: llvm/include/llvm/IR/IntrinsicsARM.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsARM.td
+++ llvm/include/llvm/IR/IntrinsicsARM.td
@@ -796,6 +796,12 @@
   def _u: Intrinsic<rets, params, props, name, sdprops>;
 }
 
+def int_arm_mve_min_predicated: Intrinsic<[llvm_anyvector_ty],
+   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
+   [IntrNoMem]>;
+def int_arm_mve_max_predicated: Intrinsic<[llvm_anyvector_ty],
+   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
+   [IntrNoMem]>;
 def int_arm_mve_abd_predicated: Intrinsic<[llvm_anyvector_ty],
    [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
    [IntrNoMem]>;
Index: clang/test/CodeGen/arm-mve-intrinsics/vminq.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/arm-mve-intrinsics/vminq.c
@@ -0,0 +1,98 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vminq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[B]], <16 x i8> [[A]]
+// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+//
+uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b)
+{
+#ifdef POLYMORPHIC
+    return vminq(a, b);
+#else /* POLYMORPHIC */
+    return vminq_u8(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[B]], <8 x i16> [[A]]
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b)
+{
+#ifdef POLYMORPHIC
+    return vminq(a, b);
+#else /* POLYMORPHIC */
+    return vminq_s16(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[B]], <4 x i32> [[A]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b)
+{
+#ifdef POLYMORPHIC
+    return vminq(a, b);
+#else /* POLYMORPHIC */
+    return vminq_u32(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vminq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vminq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+    return vminq_m_s8(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminq_m_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vminq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vminq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+    return vminq_m_u16(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminq_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vminq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vminq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+    return vminq_m_s32(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
Index: clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
@@ -0,0 +1,67 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vminnmq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x half> [[A]], <8 x half> [[B]]
+// CHECK-NEXT:    ret <8 x half> [[TMP1]]
+//
+float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b)
+{
+#ifdef POLYMORPHIC
+    return vminnmq(a, b);
+#else /* POLYMORPHIC */
+    return vminnmq_f16(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminnmq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = fcmp ole <4 x float> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[A]], <4 x float> [[B]]
+// CHECK-NEXT:    ret <4 x float> [[TMP1]]
+//
+float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b)
+{
+#ifdef POLYMORPHIC
+    return vminnmq(a, b);
+#else /* POLYMORPHIC */
+    return vminnmq_f32(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminnmq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vminnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vminnmq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+    return vminnmq_m_f16(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vminnmq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vminnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vminnmq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+    return vminnmq_m_f32(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
Index: clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c
@@ -0,0 +1,98 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vmaxq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[B]], <16 x i8> [[A]]
+// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+//
+int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b)
+{
+#ifdef POLYMORPHIC
+    return vmaxq(a, b);
+#else /* POLYMORPHIC */
+    return vmaxq_s8(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmaxq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[B]], <8 x i16> [[A]]
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b)
+{
+#ifdef POLYMORPHIC
+    return vmaxq(a, b);
+#else /* POLYMORPHIC */
+    return vmaxq_u16(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmaxq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[B]], <4 x i32> [[A]]
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b)
+{
+#ifdef POLYMORPHIC
+    return vmaxq(a, b);
+#else /* POLYMORPHIC */
+    return vmaxq_s32(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmaxq_m_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.max.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vmaxq_m_u8(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmaxq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+    return vmaxq_m_u8(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmaxq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.max.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmaxq_m_s16(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmaxq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+    return vmaxq_m_s16(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmaxq_m_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmaxq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmaxq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+    return vmaxq_m_u32(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
Index: clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
@@ -0,0 +1,67 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vmaxnmq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = fcmp oge <8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x half> [[A]], <8 x half> [[B]]
+// CHECK-NEXT:    ret <8 x half> [[TMP1]]
+//
+float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b)
+{
+#ifdef POLYMORPHIC
+    return vmaxnmq(a, b);
+#else /* POLYMORPHIC */
+    return vmaxnmq_f16(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmaxnmq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = fcmp oge <4 x float> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[A]], <4 x float> [[B]]
+// CHECK-NEXT:    ret <4 x float> [[TMP1]]
+//
+float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b)
+{
+#ifdef POLYMORPHIC
+    return vmaxnmq(a, b);
+#else /* POLYMORPHIC */
+    return vmaxnmq_f32(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmaxnmq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmaxnmq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+    return vmaxnmq_m_f16(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmaxnmq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmaxnmq_m(inactive, a, b, p);
+#else /* POLYMORPHIC */
+    return vmaxnmq_m_f32(inactive, a, b, p);
+#endif /* POLYMORPHIC */
+}
Index: clang/include/clang/Basic/arm_mve_defs.td
===================================================================
--- clang/include/clang/Basic/arm_mve_defs.td
+++ clang/include/clang/Basic/arm_mve_defs.td
@@ -107,6 +107,7 @@
 def fcmp_lt: IRBuilder<"CreateFCmpOLT">;
 def fcmp_le: IRBuilder<"CreateFCmpOLE">;
 def splat: CGHelperFn<"ARMMVEVectorSplat">;
+def select: IRBuilder<"CreateSelect">;
 
 // A node that makes an Address out of a pointer-typed Value, by
 // providing an alignment as the second argument.
Index: clang/include/clang/Basic/arm_mve.td
===================================================================
--- clang/include/clang/Basic/arm_mve.td
+++ clang/include/clang/Basic/arm_mve.td
@@ -105,6 +105,26 @@
 defm vorrq_m: predicated_bit_op_fp<"orr_predicated">;
 }
 
+// Predicated intrinsics - Int types only
+let params = T.Int in {
+def vminq_m: Intrinsic<
+    Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
+    (IRInt<"min_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
+def vmaxq_m: Intrinsic<
+    Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
+    (IRInt<"max_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
+}
+
+// Predicated intrinsics - Float types only
+let params = T.Float in {
+def vminnmq_m: Intrinsic<
+    Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
+    (IRInt<"min_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
+def vmaxnmq_m: Intrinsic<
+    Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
+    (IRInt<"max_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
+}
+
 let params = T.Int in {
 def vminvq: Intrinsic<Scalar, (args Scalar:$prev, Vector:$vec),
     (Scalar (IRInt<"minv", [Vector], 1> $prev, $vec))>;
@@ -173,6 +193,28 @@
   defm: compare<"le", fcmp_le>;
 }
 
+let params = T.Signed in {
+  def vminq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
+                               (select (icmp_sle $a, $b), $a, $b)>;
+  def vmaxq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
+                               (select (icmp_sge $a, $b), $a, $b)>;
+}
+let params = T.Unsigned in {
+  def vminqu: Intrinsic<Vector, (args Vector:$a, Vector:$b),
+                                (select (icmp_ule $a, $b), $a, $b)>,
+                                    NameOverride<"vminq">;
+  def vmaxqu: Intrinsic<Vector, (args Vector:$a, Vector:$b),
+                                (select (icmp_uge $a, $b), $a, $b)>,
+                                    NameOverride<"vmaxq">;
+}
+let params = T.Float in {
+  def vminnmq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
+                               (select (fcmp_le $a, $b), $a, $b)>;
+  def vmaxnmq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
+                               (select (fcmp_ge $a, $b), $a, $b)>;
+}
+
+
 multiclass contiguous_load<string mnemonic, PrimitiveType memtype,
                            list<Type> same_size, list<Type> wider> {
   // Intrinsics named with explicit memory and element sizes that match:
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to