https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/169795
>From dffd391f768f76ec4310a73368f01fd3360ab573 Mon Sep 17 00:00:00 2001 From: David Green <[email protected]> Date: Sun, 30 Nov 2025 11:19:20 +0000 Subject: [PATCH] [ARM] Introduce intrinsics for MVE minnm/maxmn under strict-fp. Similar to #169156 again, this is mostly for denormal handling as there is no rounding step in a minnum/maxnum. --- clang/include/clang/Basic/arm_mve.td | 8 +- clang/include/clang/Basic/arm_mve_defs.td | 4 + .../CodeGen/arm-mve-intrinsics/vmaxnmaq.c | 84 ++++++++----- .../test/CodeGen/arm-mve-intrinsics/vmaxnmq.c | 110 ++++++++++++------ .../CodeGen/arm-mve-intrinsics/vminnmaq.c | 84 ++++++++----- .../test/CodeGen/arm-mve-intrinsics/vminnmq.c | 110 ++++++++++++------ llvm/include/llvm/IR/IntrinsicsARM.td | 7 ++ llvm/lib/Target/ARM/ARMInstrMVE.td | 22 ++-- .../mve-intrinsics/strict-intrinsics.ll | 91 ++++++++++++++- 9 files changed, 387 insertions(+), 133 deletions(-) diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 51b7dd16e5195..77531c31538c1 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -783,15 +783,15 @@ let params = T.Unsigned in { } let params = T.Float in { def vminnmq: Intrinsic<Vector, (args Vector:$a, Vector:$b), - (IRIntBase<"minnum", [Vector]> $a, $b)>; + (fminnm $a, $b)>; def vmaxnmq: Intrinsic<Vector, (args Vector:$a, Vector:$b), - (IRIntBase<"maxnum", [Vector]> $a, $b)>; + (fmaxnm $a, $b)>; def vminnmaq: Intrinsic<Vector, (args Vector:$a, Vector:$b), - (IRIntBase<"minnum", [Vector]> + (fminnm (IRIntBase<"fabs", [Vector]> $a), (IRIntBase<"fabs", [Vector]> $b))>; def vmaxnmaq: Intrinsic<Vector, (args Vector:$a, Vector:$b), - (IRIntBase<"maxnum", [Vector]> + (fmaxnm (IRIntBase<"fabs", [Vector]> $a), (IRIntBase<"fabs", [Vector]> $b))>; } diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index 3714262898476..3210549d0cb56 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -589,6 +589,10 @@ def fsub: strictFPAlt<fsub_node, IRInt<"vsub", [Vector]>>; def fmul: strictFPAlt<fmul_node, IRInt<"vmul", [Vector]>>; +def fminnm : strictFPAlt<IRIntBase<"minnum", [Vector]>, + IRInt<"vminnm", [Vector]>>; +def fmaxnm : strictFPAlt<IRIntBase<"maxnum", [Vector]>, + IRInt<"vmaxnm", [Vector]>>; // ----------------------------------------------------------------------------- // Convenience lists of parameter types. 'T' is just a container record, so you diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c index 613a390bc6d36..04834ece3a4a6 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c @@ -1,17 +1,26 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT // REQUIRES: aarch64-registered-target || arm-registered-target #include <arm_mve.h> -// CHECK-LABEL: @test_vmaxnmaq_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmaq_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) #[[ATTR3:[0-9]+]] +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmaq_f16(float16x8_t a, float16x8_t b) { @@ -22,12 +31,19 @@ float16x8_t test_vmaxnmaq_f16(float16x8_t a, float16x8_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmaxnmaq_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmaq_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmaq_f32(float32x4_t a, float32x4_t b) { @@ -38,12 +54,19 @@ float32x4_t test_vmaxnmaq_f32(float32x4_t a, float32x4_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmaxnmaq_m_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_m_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmaq_m_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) { @@ -54,12 +77,19 @@ float16x8_t test_vmaxnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmaxnmaq_m_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_m_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmaq_m_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) { @@ -69,3 +99,5 @@ float32x4_t test_vmaxnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) return vmaxnmaq_m_f32(a, b, p); #endif /* POLYMORPHIC */ } +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c index bad7cd903ab16..1225353a5a9d2 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c @@ -1,15 +1,22 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT // REQUIRES: aarch64-registered-target || arm-registered-target #include <arm_mve.h> -// CHECK-LABEL: @test_vmaxnmq_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmq_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) { @@ -20,10 +27,15 @@ float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmaxnmq_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmq_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) { @@ -34,12 +46,19 @@ float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmaxnmq_m_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_m_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmq_m_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { @@ -50,12 +69,19 @@ float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmaxnmq_m_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_m_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmq_m_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { @@ -66,12 +92,19 @@ float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmaxnmq_x_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_x_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmq_x_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) { @@ -82,12 +115,19 @@ float16x8_t test_vmaxnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmaxnmq_x_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_x_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmaxnmq_x_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) { @@ -97,3 +137,5 @@ float32x4_t test_vmaxnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) return vmaxnmq_x_f32(a, b, p); #endif /* POLYMORPHIC */ } +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c index 0182cf7c5b6b3..fc0dc5701e4d9 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c @@ -1,17 +1,26 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT // REQUIRES: aarch64-registered-target || arm-registered-target #include <arm_mve.h> -// CHECK-LABEL: @test_vminnmaq_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vminnmaq_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vminnmaq_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) #[[ATTR3:[0-9]+]] +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vminnm.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmaq_f16(float16x8_t a, float16x8_t b) { @@ -22,12 +31,19 @@ float16x8_t test_vminnmaq_f16(float16x8_t a, float16x8_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vminnmaq_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vminnmaq_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vminnmaq_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vminnm.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmaq_f32(float32x4_t a, float32x4_t b) { @@ -38,12 +54,19 @@ float32x4_t test_vminnmaq_f32(float32x4_t a, float32x4_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vminnmaq_m_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vminnmaq_m_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vminnmaq_m_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) { @@ -54,12 +77,19 @@ float16x8_t test_vminnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vminnmaq_m_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vminnmaq_m_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vminnmaq_m_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) { @@ -69,3 +99,5 @@ float32x4_t test_vminnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) return vminnmaq_m_f32(a, b, p); #endif /* POLYMORPHIC */ } +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c index b48ff9d84b8f6..7dbad94c77674 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c @@ -1,15 +1,22 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT // REQUIRES: aarch64-registered-target || arm-registered-target #include <arm_mve.h> -// CHECK-LABEL: @test_vminnmq_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vminnmq_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vminnmq_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vminnm.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b) { @@ -20,10 +27,15 @@ float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vminnmq_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vminnmq_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vminnmq_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vminnm.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) { @@ -34,12 +46,19 @@ float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vminnmq_m_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vminnmq_m_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vminnmq_m_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { @@ -50,12 +69,19 @@ float16x8_t test_vminnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vminnmq_m_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vminnmq_m_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vminnmq_m_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { @@ -66,12 +92,19 @@ float32x4_t test_vminnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vminnmq_x_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vminnmq_x_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vminnmq_x_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) { @@ -82,12 +115,19 @@ float16x8_t test_vminnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vminnmq_x_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vminnmq_x_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vminnmq_x_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) { @@ -97,3 +137,5 @@ float32x4_t test_vminnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) return vminnmq_x_f32(a, b, p); #endif /* POLYMORPHIC */ } +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 3787e2591a4c1..3b475c8d5614d 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -972,6 +972,13 @@ def int_arm_mve_vmaxnma_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty], [IntrNoMem]>; +def int_arm_mve_vminnm: DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_vmaxnm: DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; + multiclass MVEPredicated<list<LLVMType> rets, list<LLVMType> params, LLVMType pred = llvm_anyvector_ty, list<IntrinsicProperty> props = [IntrNoMem], diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index f9aaacb7f5250..097318711d137 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -393,6 +393,12 @@ def vsub : PatFrags<(ops node:$lhs, node:$rhs), def vmul : PatFrags<(ops node:$lhs, node:$rhs), [(fmul node:$lhs, node:$rhs), (int_arm_mve_vmul node:$lhs, node:$rhs)]>; +def vminnm : PatFrags<(ops node:$lhs, node:$rhs), + [(fminnum node:$lhs, node:$rhs), + (int_arm_mve_vminnm node:$lhs, node:$rhs)]>; +def vmaxnm : PatFrags<(ops node:$lhs, node:$rhs), + [(fmaxnum node:$lhs, node:$rhs), + (int_arm_mve_vmaxnm node:$lhs, node:$rhs)]>; // --------- Start of base classes for the instructions themselves @@ -1489,7 +1495,7 @@ class MVE_VMINMAXNM<string iname, string suffix, bits<2> sz, bit bit_21, let validForTailPredication = 1; } -multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDNode Op, Intrinsic PredInt> { +multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDPatternOperator Op, Intrinsic PredInt> { def "" : MVE_VMINMAXNM<iname, VTI.Suffix, VTI.Size, bit_4>; let Predicates = [HasMVEFloat] in { @@ -1497,10 +1503,10 @@ multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDNode } } -defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, fmaxnum, int_arm_mve_max_predicated>; -defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, fmaxnum, int_arm_mve_max_predicated>; -defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, fminnum, int_arm_mve_min_predicated>; -defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, fminnum, int_arm_mve_min_predicated>; +defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, vmaxnm, int_arm_mve_max_predicated>; +defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, vmaxnm, int_arm_mve_max_predicated>; +defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, vminnm, int_arm_mve_min_predicated>; +defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, vminnm, int_arm_mve_min_predicated>; class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size, @@ -4148,7 +4154,7 @@ class MVE_VMAXMINNMA<string iname, string suffix, bits<2> size, bit bit_12, } multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI, - SDNode unpred_op, Intrinsic pred_int, + SDPatternOperator unpred_op, Intrinsic pred_int, bit bit_12> { def "" : MVE_VMAXMINNMA<iname, VTI.Suffix, VTI.Size, bit_12>; defvar Inst = !cast<Instruction>(NAME); @@ -4168,13 +4174,13 @@ multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI, } multiclass MVE_VMAXNMA<MVEVectorVTInfo VTI, bit bit_12> - : MVE_VMAXMINNMA_m<"vmaxnma", VTI, fmaxnum, int_arm_mve_vmaxnma_predicated, bit_12>; + : MVE_VMAXMINNMA_m<"vmaxnma", VTI, vmaxnm, int_arm_mve_vmaxnma_predicated, bit_12>; defm MVE_VMAXNMAf32 : MVE_VMAXNMA<MVE_v4f32, 0b0>; defm MVE_VMAXNMAf16 : MVE_VMAXNMA<MVE_v8f16, 0b0>; multiclass MVE_VMINNMA<MVEVectorVTInfo VTI, bit bit_12> - : MVE_VMAXMINNMA_m<"vminnma", VTI, fminnum, int_arm_mve_vminnma_predicated, bit_12>; + : MVE_VMAXMINNMA_m<"vminnma", VTI, vminnm, int_arm_mve_vminnma_predicated, bit_12>; defm MVE_VMINNMAf32 : MVE_VMINNMA<MVE_v4f32, 0b1>; defm MVE_VMINNMAf16 : MVE_VMINNMA<MVE_v8f16, 0b1>; diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll index d19844c683a8f..9e42f3984c24d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll @@ -240,4 +240,93 @@ entry: ret <8 x half> %0 } -attributes #0 = { strictfp } \ No newline at end of file + +define arm_aapcs_vfpcc <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) #0 { +; CHECK-LABEL: test_vminnmq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: test_vminnmq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) #0 { +; CHECK-LABEL: test_vmaxnmq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: test_vmaxnmq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vminnmaq_f16(<8 x half> %a, <8 x half> %b) #0 { +; CHECK-LABEL: test_vminnmaq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnma.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) + %1 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %b) + %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %0, <8 x half> %1) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vminnmaq_f32(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: test_vminnmaq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnma.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) + %1 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %b) + %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %0, <4 x float> %1) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vmaxnmaq_f16(<8 x half> %a, <8 x half> %b) #0 { +; CHECK-LABEL: test_vmaxnmaq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnma.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) + %1 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %b) + %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %0, <8 x half> %1) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vmaxnmaq_f32(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: test_vmaxnmaq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnma.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) + %1 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %b) + %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %0, <4 x float> %1) + ret <4 x float> %2 +} + +attributes #0 = { strictfp } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
