[clang] [llvm] [ARM] Introduce intrinsics for MVE fma under strict-fp. (PR #169795)

David Green via cfe-commits Sun, 30 Nov 2025 03:19:47 -0800

https://github.com/davemgreen updated 
https://github.com/llvm/llvm-project/pull/169795


>From dffd391f768f76ec4310a73368f01fd3360ab573 Mon Sep 17 00:00:00 2001
From: David Green <[email protected]>
Date: Sun, 30 Nov 2025 11:19:20 +0000
Subject: [PATCH] [ARM] Introduce intrinsics for MVE minnm/maxmn under
 strict-fp.

Similar to #169156 again, this is mostly for denormal handling as there is no
rounding step in a minnum/maxnum.
---
 clang/include/clang/Basic/arm_mve.td          |   8 +-
 clang/include/clang/Basic/arm_mve_defs.td     |   4 +
 .../CodeGen/arm-mve-intrinsics/vmaxnmaq.c     |  84 ++++++++-----
 .../test/CodeGen/arm-mve-intrinsics/vmaxnmq.c | 110 ++++++++++++------
 .../CodeGen/arm-mve-intrinsics/vminnmaq.c     |  84 ++++++++-----
 .../test/CodeGen/arm-mve-intrinsics/vminnmq.c | 110 ++++++++++++------
 llvm/include/llvm/IR/IntrinsicsARM.td         |   7 ++
 llvm/lib/Target/ARM/ARMInstrMVE.td            |  22 ++--
 .../mve-intrinsics/strict-intrinsics.ll       |  91 ++++++++++++++-
 9 files changed, 387 insertions(+), 133 deletions(-)

diff --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index 51b7dd16e5195..77531c31538c1 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -783,15 +783,15 @@ let params = T.Unsigned in {
 }
 let params = T.Float in {
   def vminnmq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
-                                 (IRIntBase<"minnum", [Vector]> $a, $b)>;
+                                 (fminnm $a, $b)>;
   def vmaxnmq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
-                                 (IRIntBase<"maxnum", [Vector]> $a, $b)>;
+                                 (fmaxnm $a, $b)>;
   def vminnmaq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
-                                  (IRIntBase<"minnum", [Vector]>
+                                  (fminnm
                                    (IRIntBase<"fabs", [Vector]> $a),
                                    (IRIntBase<"fabs", [Vector]> $b))>;
   def vmaxnmaq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
-                                  (IRIntBase<"maxnum", [Vector]>
+                                  (fmaxnm
                                    (IRIntBase<"fabs", [Vector]> $a),
                                    (IRIntBase<"fabs", [Vector]> $b))>;
 }
diff --git a/clang/include/clang/Basic/arm_mve_defs.td 
b/clang/include/clang/Basic/arm_mve_defs.td
index 3714262898476..3210549d0cb56 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -589,6 +589,10 @@ def fsub: strictFPAlt<fsub_node,
                       IRInt<"vsub", [Vector]>>;
 def fmul: strictFPAlt<fmul_node,
                       IRInt<"vmul", [Vector]>>;
+def fminnm : strictFPAlt<IRIntBase<"minnum", [Vector]>,
+                         IRInt<"vminnm", [Vector]>>;
+def fmaxnm : strictFPAlt<IRIntBase<"maxnum", [Vector]>,
+                         IRInt<"vmaxnm", [Vector]>>;
 
 // 
-----------------------------------------------------------------------------
 // Convenience lists of parameter types. 'T' is just a container record, so you
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c
index 613a390bc6d36..04834ece3a4a6 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c
@@ -1,17 +1,26 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | 
opt -S -passes=mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | 
opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math 
-fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math 
-fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
 #include <arm_mve.h>
 
-// CHECK-LABEL: @test_vmaxnmaq_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> 
[[A:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> 
[[B:%.*]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> 
[[TMP0]], <8 x half> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 
x half> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 
x half> [[B:%.*]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmaq_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x 
half> [[A:%.*]]) #[[ATTR3:[0-9]+]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x 
half> [[B:%.*]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.vmaxnm.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vmaxnmaq_f16(float16x8_t a, float16x8_t b)
 {
@@ -22,12 +31,19 @@ float16x8_t test_vmaxnmaq_f16(float16x8_t a, float16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmaxnmaq_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> 
[[A:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> 
[[B:%.*]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x 
float> [[TMP0]], <4 x float> [[TMP1]])
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 
x float> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 
x float> [[B:%.*]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmaq_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x 
float> [[A:%.*]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x 
float> [[B:%.*]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.vmaxnm.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) 
#[[ATTR3]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vmaxnmaq_f32(float32x4_t a, float32x4_t b)
 {
@@ -38,12 +54,19 @@ float32x4_t test_vmaxnmaq_f32(float32x4_t a, float32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmaxnmaq_m_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], <8 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_m_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], <8 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmaq_m_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], <8 x i1> [[TMP1]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vmaxnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
 {
@@ -54,12 +77,19 @@ float16x8_t test_vmaxnmaq_m_f16(float16x8_t a, float16x8_t 
b, mve_pred16_t p)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmaxnmaq_m_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], <4 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_m_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmaq_m_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], <4 x i1> [[TMP1]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vmaxnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
 {
@@ -69,3 +99,5 @@ float32x4_t test_vmaxnmaq_m_f32(float32x4_t a, float32x4_t b, 
mve_pred16_t p)
     return vmaxnmaq_m_f32(a, b, p);
 #endif /* POLYMORPHIC */
 }
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add 
tests below this line:
+// CHECK: {{.*}}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
index bad7cd903ab16..1225353a5a9d2 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
@@ -1,15 +1,22 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | 
opt -S -passes=mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | 
opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math 
-fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math 
-fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
 #include <arm_mve.h>
 
-// CHECK-LABEL: @test_vmaxnmq_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> 
[[A:%.*]], <8 x half> [[B:%.*]])
-// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> 
@llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmq_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> 
@llvm.arm.mve.vmaxnm.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) 
#[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b)
 {
@@ -20,10 +27,15 @@ float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmaxnmq_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x 
float> [[A:%.*]], <4 x float> [[B:%.*]])
-// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> 
@llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmq_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> 
@llvm.arm.mve.vmaxnm.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) 
#[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP0]]
 //
 float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b)
 {
@@ -34,12 +46,19 @@ float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmaxnmq_m_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_m_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmq_m_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, 
float16x8_t b, mve_pred16_t p)
 {
@@ -50,12 +69,19 @@ float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, 
float16x8_t a, float16x8_t
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmaxnmq_m_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_m_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmq_m_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, 
float32x4_t b, mve_pred16_t p)
 {
@@ -66,12 +92,19 @@ float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, 
float32x4_t a, float32x4_t
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmaxnmq_x_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef)
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_x_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmq_x_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vmaxnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
 {
@@ -82,12 +115,19 @@ float16x8_t test_vmaxnmq_x_f16(float16x8_t a, float16x8_t 
b, mve_pred16_t p)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vmaxnmq_x_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef)
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_x_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vmaxnmq_x_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vmaxnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
 {
@@ -97,3 +137,5 @@ float32x4_t test_vmaxnmq_x_f32(float32x4_t a, float32x4_t b, 
mve_pred16_t p)
     return vmaxnmq_x_f32(a, b, p);
 #endif /* POLYMORPHIC */
 }
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add 
tests below this line:
+// CHECK: {{.*}}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c
index 0182cf7c5b6b3..fc0dc5701e4d9 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c
@@ -1,17 +1,26 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | 
opt -S -passes=mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | 
opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math 
-fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math 
-fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
 #include <arm_mve.h>
 
-// CHECK-LABEL: @test_vminnmaq_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> 
[[A:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> 
[[B:%.*]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> 
[[TMP0]], <8 x half> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmaq_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 
x half> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 
x half> [[B:%.*]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmaq_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x 
half> [[A:%.*]]) #[[ATTR3:[0-9]+]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x 
half> [[B:%.*]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.vminnm.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vminnmaq_f16(float16x8_t a, float16x8_t b)
 {
@@ -22,12 +31,19 @@ float16x8_t test_vminnmaq_f16(float16x8_t a, float16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vminnmaq_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> 
[[A:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> 
[[B:%.*]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x 
float> [[TMP0]], <4 x float> [[TMP1]])
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmaq_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 
x float> [[A:%.*]])
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 
x float> [[B:%.*]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmaq_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x 
float> [[A:%.*]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x 
float> [[B:%.*]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.vminnm.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) 
#[[ATTR3]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vminnmaq_f32(float32x4_t a, float32x4_t b)
 {
@@ -38,12 +54,19 @@ float32x4_t test_vminnmaq_f32(float32x4_t a, float32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vminnmaq_m_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], <8 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmaq_m_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], <8 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmaq_m_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], <8 x i1> [[TMP1]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vminnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
 {
@@ -54,12 +77,19 @@ float16x8_t test_vminnmaq_m_f16(float16x8_t a, float16x8_t 
b, mve_pred16_t p)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vminnmaq_m_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], <4 x i1> [[TMP1]])
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmaq_m_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], <4 x i1> [[TMP1]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmaq_m_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], <4 x i1> [[TMP1]]) #[[ATTR3]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vminnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
 {
@@ -69,3 +99,5 @@ float32x4_t test_vminnmaq_m_f32(float32x4_t a, float32x4_t b, 
mve_pred16_t p)
     return vminnmaq_m_f32(a, b, p);
 #endif /* POLYMORPHIC */
 }
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add 
tests below this line:
+// CHECK: {{.*}}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
index b48ff9d84b8f6..7dbad94c77674 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
@@ -1,15 +1,22 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | 
opt -S -passes=mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | 
opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math 
-fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math 
-fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
 #include <arm_mve.h>
 
-// CHECK-LABEL: @test_vminnmq_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> 
[[A:%.*]], <8 x half> [[B:%.*]])
-// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmq_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> 
@llvm.minnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmq_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x half> 
@llvm.arm.mve.vminnm.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) 
#[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b)
 {
@@ -20,10 +27,15 @@ float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vminnmq_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x 
float> [[A:%.*]], <4 x float> [[B:%.*]])
-// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmq_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> 
@llvm.minnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmq_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x float> 
@llvm.arm.mve.vminnm.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) 
#[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP0]]
 //
 float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b)
 {
@@ -34,12 +46,19 @@ float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vminnmq_m_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmq_m_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmq_m_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vminnmq_m_f16(float16x8_t inactive, float16x8_t a, 
float16x8_t b, mve_pred16_t p)
 {
@@ -50,12 +69,19 @@ float16x8_t test_vminnmq_m_f16(float16x8_t inactive, 
float16x8_t a, float16x8_t
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vminnmq_m_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmq_m_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmq_m_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vminnmq_m_f32(float32x4_t inactive, float32x4_t a, 
float32x4_t b, mve_pred16_t p)
 {
@@ -66,12 +92,19 @@ float32x4_t test_vminnmq_m_f32(float32x4_t inactive, 
float32x4_t a, float32x4_t
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vminnmq_x_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef)
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmq_x_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NOSTRICT-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmq_x_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <8 x i1> 
@llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <8 x half> 
@llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> 
[[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vminnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
 {
@@ -82,12 +115,19 @@ float16x8_t test_vminnmq_x_f16(float16x8_t a, float16x8_t 
b, mve_pred16_t p)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vminnmq_x_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef)
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vminnmq_x_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NOSTRICT-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vminnmq_x_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call <4 x i1> 
@llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> 
[[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vminnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
 {
@@ -97,3 +137,5 @@ float32x4_t test_vminnmq_x_f32(float32x4_t a, float32x4_t b, 
mve_pred16_t p)
     return vminnmq_x_f32(a, b, p);
 #endif /* POLYMORPHIC */
 }
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add 
tests below this line:
+// CHECK: {{.*}}
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td 
b/llvm/include/llvm/IR/IntrinsicsARM.td
index 3787e2591a4c1..3b475c8d5614d 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -972,6 +972,13 @@ def int_arm_mve_vmaxnma_predicated: 
DefaultAttrsIntrinsic<[llvm_anyvector_ty],
    [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
     [IntrNoMem]>;
 
+def int_arm_mve_vminnm: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+   [LLVMMatchType<0>, LLVMMatchType<0>],
+    [IntrNoMem]>;
+def int_arm_mve_vmaxnm: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+   [LLVMMatchType<0>, LLVMMatchType<0>],
+    [IntrNoMem]>;
+
 multiclass MVEPredicated<list<LLVMType> rets, list<LLVMType> params,
                          LLVMType pred = llvm_anyvector_ty,
                          list<IntrinsicProperty> props = [IntrNoMem],
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td 
b/llvm/lib/Target/ARM/ARMInstrMVE.td
index f9aaacb7f5250..097318711d137 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -393,6 +393,12 @@ def vsub : PatFrags<(ops node:$lhs, node:$rhs),
 def vmul : PatFrags<(ops node:$lhs, node:$rhs),
                     [(fmul node:$lhs, node:$rhs),
                      (int_arm_mve_vmul node:$lhs, node:$rhs)]>;
+def vminnm : PatFrags<(ops node:$lhs, node:$rhs),
+                    [(fminnum node:$lhs, node:$rhs),
+                     (int_arm_mve_vminnm node:$lhs, node:$rhs)]>;
+def vmaxnm : PatFrags<(ops node:$lhs, node:$rhs),
+                    [(fmaxnum node:$lhs, node:$rhs),
+                     (int_arm_mve_vmaxnm node:$lhs, node:$rhs)]>;
 
 // --------- Start of base classes for the instructions themselves
 
@@ -1489,7 +1495,7 @@ class MVE_VMINMAXNM<string iname, string suffix, bits<2> 
sz, bit bit_21,
   let validForTailPredication = 1;
 }
 
-multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, 
SDNode Op, Intrinsic PredInt> {
+multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, 
SDPatternOperator Op, Intrinsic PredInt> {
   def "" : MVE_VMINMAXNM<iname, VTI.Suffix, VTI.Size, bit_4>;
 
   let Predicates = [HasMVEFloat] in {
@@ -1497,10 +1503,10 @@ multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, 
MVEVectorVTInfo VTI, SDNode
   }
 }
 
-defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, fmaxnum, 
int_arm_mve_max_predicated>;
-defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, fmaxnum, 
int_arm_mve_max_predicated>;
-defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, fminnum, 
int_arm_mve_min_predicated>;
-defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, fminnum, 
int_arm_mve_min_predicated>;
+defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, vmaxnm, 
int_arm_mve_max_predicated>;
+defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, vmaxnm, 
int_arm_mve_max_predicated>;
+defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, vminnm, 
int_arm_mve_min_predicated>;
+defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, vminnm, 
int_arm_mve_min_predicated>;
 
 
 class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
@@ -4148,7 +4154,7 @@ class MVE_VMAXMINNMA<string iname, string suffix, bits<2> 
size, bit bit_12,
 }
 
 multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI,
-                      SDNode unpred_op, Intrinsic pred_int,
+                      SDPatternOperator unpred_op, Intrinsic pred_int,
                       bit bit_12> {
   def "" : MVE_VMAXMINNMA<iname, VTI.Suffix, VTI.Size, bit_12>;
   defvar Inst = !cast<Instruction>(NAME);
@@ -4168,13 +4174,13 @@ multiclass MVE_VMAXMINNMA_m<string iname, 
MVEVectorVTInfo VTI,
 }
 
 multiclass MVE_VMAXNMA<MVEVectorVTInfo VTI, bit bit_12>
-  : MVE_VMAXMINNMA_m<"vmaxnma", VTI, fmaxnum, int_arm_mve_vmaxnma_predicated, 
bit_12>;
+  : MVE_VMAXMINNMA_m<"vmaxnma", VTI, vmaxnm, int_arm_mve_vmaxnma_predicated, 
bit_12>;
 
 defm MVE_VMAXNMAf32 : MVE_VMAXNMA<MVE_v4f32, 0b0>;
 defm MVE_VMAXNMAf16 : MVE_VMAXNMA<MVE_v8f16, 0b0>;
 
 multiclass MVE_VMINNMA<MVEVectorVTInfo VTI, bit bit_12>
-  : MVE_VMAXMINNMA_m<"vminnma", VTI, fminnum, int_arm_mve_vminnma_predicated, 
bit_12>;
+  : MVE_VMAXMINNMA_m<"vminnma", VTI, vminnm, int_arm_mve_vminnma_predicated, 
bit_12>;
 
 defm MVE_VMINNMAf32 : MVE_VMINNMA<MVE_v4f32, 0b1>;
 defm MVE_VMINNMAf16 : MVE_VMINNMA<MVE_v8f16, 0b1>;
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll 
b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll
index d19844c683a8f..9e42f3984c24d 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll
@@ -240,4 +240,93 @@ entry:
   ret <8 x half> %0
 }
 
-attributes #0 = { strictfp }
\ No newline at end of file
+
+define arm_aapcs_vfpcc <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> 
%b) #0 {
+; CHECK-LABEL: test_vminnmq_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmaxnm.f16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %a, <8 x 
half> %b)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x 
float> %b) #0 {
+; CHECK-LABEL: test_vminnmq_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmaxnm.f32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %a, <4 x 
float> %b)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> 
%b) #0 {
+; CHECK-LABEL: test_vmaxnmq_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmaxnm.f16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %a, <8 x 
half> %b)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x 
float> %b) #0 {
+; CHECK-LABEL: test_vmaxnmq_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmaxnm.f32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %a, <4 x 
float> %b)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vminnmaq_f16(<8 x half> %a, <8 x half> 
%b) #0 {
+; CHECK-LABEL: test_vminnmaq_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmaxnma.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+  %1 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+  %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %0, <8 x 
half> %1)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vminnmaq_f32(<4 x float> %a, <4 x 
float> %b) #0 {
+; CHECK-LABEL: test_vminnmaq_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmaxnma.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+  %1 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+  %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %0, <4 x 
float> %1)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vmaxnmaq_f16(<8 x half> %a, <8 x half> 
%b) #0 {
+; CHECK-LABEL: test_vmaxnmaq_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmaxnma.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+  %1 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+  %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %0, <8 x 
half> %1)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vmaxnmaq_f32(<4 x float> %a, <4 x 
float> %b) #0 {
+; CHECK-LABEL: test_vmaxnmaq_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmaxnma.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+  %1 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+  %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %0, <4 x 
float> %1)
+  ret <4 x float> %2
+}
+
+attributes #0 = { strictfp }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [ARM] Introduce intrinsics for MVE fma under strict-fp. (PR #169795)

Reply via email to