https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/169797
>From 866b55220d301821b9d578fcf30f3eb59a75e138 Mon Sep 17 00:00:00 2001 From: David Green <[email protected]> Date: Sun, 30 Nov 2025 11:20:33 +0000 Subject: [PATCH] [ARM] Introduce intrinsics for MVE vrnd under strict-fp. Similar to #169156 again, this adds intrinsics for strict-fp vrnd nodes to make sure they end up as the original instruction. --- clang/include/clang/Basic/arm_mve.td | 3 +- clang/test/CodeGen/arm-mve-intrinsics/vrnd.c | 620 ++++++++++++------ llvm/include/llvm/IR/IntrinsicsARM.td | 8 + llvm/lib/Target/ARM/ARMInstrMVE.td | 3 + .../mve-intrinsics/strict-intrinsics.ll | 213 +++++- 5 files changed, 651 insertions(+), 196 deletions(-) diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 51b7dd16e5195..71e3d0eae2a5b 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -700,7 +700,8 @@ let params = [s16, s32] in { multiclass vrnd<IRIntBase ir_int, string suffix> { let params = T.Float in { - def "": Intrinsic<Vector, (args Vector:$a), (ir_int $a)>; + def "": Intrinsic<Vector, (args Vector:$a), + (strictFPAlt<ir_int, IRInt<"vrint"#suffix, [Vector]>> $a)>; defm "": IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred), (IRInt<"vrint"#suffix#"_predicated", [Vector, Predicate]> $a, $pred, $inactive)>; diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c index 4888bc8c5e98f..9490f4f65157d 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c @@ -1,15 +1,22 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes='mem2reg,sroa,early-cse<>' | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes='mem2reg,sroa,early-cse<>' | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes='mem2reg,sroa,early-cse<>' | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT // REQUIRES: aarch64-registered-target || arm-registered-target #include <arm_mve.h> -// CHECK-LABEL: @test_vrndaq_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.round.v8f16(<8 x half> [[A:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vrndaq_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.round.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vrndaq_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.v8f16(<8 x half> [[A:%.*]]) #[[ATTR3:[0-9]+]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vrndaq_f16(float16x8_t a) { @@ -20,10 +27,15 @@ float16x8_t test_vrndaq_f16(float16x8_t a) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndaq_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[A:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vrndaq_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vrndaq_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.v4f32(<4 x float> [[A:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vrndaq_f32(float32x4_t a) { @@ -34,10 +46,15 @@ float32x4_t test_vrndaq_f32(float32x4_t a) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndmq_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.floor.v8f16(<8 x half> [[A:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vrndmq_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.floor.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vrndmq_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.v8f16(<8 x half> [[A:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vrndmq_f16(float16x8_t a) { @@ -48,10 +65,15 @@ float16x8_t test_vrndmq_f16(float16x8_t a) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndmq_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[A:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vrndmq_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vrndmq_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.v4f32(<4 x float> [[A:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vrndmq_f32(float32x4_t a) { @@ -62,10 +84,15 @@ float32x4_t test_vrndmq_f32(float32x4_t a) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndpq_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.ceil.v8f16(<8 x half> [[A:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vrndpq_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.ceil.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vrndpq_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.v8f16(<8 x half> [[A:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vrndpq_f16(float16x8_t a) { @@ -76,10 +103,15 @@ float16x8_t test_vrndpq_f16(float16x8_t a) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndpq_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[A:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vrndpq_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vrndpq_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.v4f32(<4 x float> [[A:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vrndpq_f32(float32x4_t a) { @@ -90,10 +122,15 @@ float32x4_t test_vrndpq_f32(float32x4_t a) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndq_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.trunc.v8f16(<8 x half> [[A:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vrndq_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.trunc.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vrndq_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.v8f16(<8 x half> [[A:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vrndq_f16(float16x8_t a) { @@ -104,10 +141,15 @@ float16x8_t test_vrndq_f16(float16x8_t a) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndq_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[A:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vrndq_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vrndq_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.v4f32(<4 x float> [[A:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vrndq_f32(float32x4_t a) { @@ -118,10 +160,15 @@ float32x4_t test_vrndq_f32(float32x4_t a) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndxq_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.rint.v8f16(<8 x half> [[A:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vrndxq_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.rint.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vrndxq_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.v8f16(<8 x half> [[A:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vrndxq_f16(float16x8_t a) { @@ -132,10 +179,15 @@ float16x8_t test_vrndxq_f16(float16x8_t a) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndxq_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[A:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vrndxq_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vrndxq_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.v4f32(<4 x float> [[A:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vrndxq_f32(float32x4_t a) { @@ -146,10 +198,15 @@ float32x4_t test_vrndxq_f32(float32x4_t a) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndnq_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> [[A:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vrndnq_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vrndnq_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> [[A:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vrndnq_f16(float16x8_t a) { @@ -160,10 +217,15 @@ float16x8_t test_vrndnq_f16(float16x8_t a) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndnq_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[A:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NOSTRICT-LABEL: @test_vrndnq_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vrndnq_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[A:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vrndnq_f32(float32x4_t a) { @@ -174,12 +236,19 @@ float32x4_t test_vrndnq_f32(float32x4_t a) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndaq_m_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndaq_m_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndaq_m_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vrndaq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) { @@ -190,12 +259,19 @@ float16x8_t test_vrndaq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndaq_m_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndaq_m_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndaq_m_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vrndaq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) { @@ -206,12 +282,19 @@ float32x4_t test_vrndaq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndmq_m_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndmq_m_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndmq_m_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vrndmq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) { @@ -222,12 +305,19 @@ float16x8_t test_vrndmq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndmq_m_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndmq_m_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndmq_m_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vrndmq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) { @@ -238,12 +328,19 @@ float32x4_t test_vrndmq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndnq_m_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndnq_m_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndnq_m_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vrndnq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) { @@ -254,12 +351,19 @@ float16x8_t test_vrndnq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndnq_m_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndnq_m_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndnq_m_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vrndnq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) { @@ -270,12 +374,19 @@ float32x4_t test_vrndnq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndpq_m_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndpq_m_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndpq_m_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vrndpq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) { @@ -286,12 +397,19 @@ float16x8_t test_vrndpq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndpq_m_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndpq_m_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndpq_m_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vrndpq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) { @@ -302,12 +420,19 @@ float32x4_t test_vrndpq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndq_m_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndq_m_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndq_m_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vrndq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) { @@ -318,12 +443,19 @@ float16x8_t test_vrndq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndq_m_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndq_m_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndq_m_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vrndq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) { @@ -334,12 +466,19 @@ float32x4_t test_vrndq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndxq_m_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndxq_m_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndxq_m_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vrndxq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) { @@ -350,12 +489,19 @@ float16x8_t test_vrndxq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndxq_m_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndxq_m_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndxq_m_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vrndxq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) { @@ -366,12 +512,19 @@ float32x4_t test_vrndxq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndaq_x_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndaq_x_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndaq_x_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vrndaq_x_f16(float16x8_t a, mve_pred16_t p) { @@ -382,12 +535,19 @@ float16x8_t test_vrndaq_x_f16(float16x8_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndaq_x_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndaq_x_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndaq_x_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vrndaq_x_f32(float32x4_t a, mve_pred16_t p) { @@ -398,12 +558,19 @@ float32x4_t test_vrndaq_x_f32(float32x4_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndmq_x_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndmq_x_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndmq_x_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vrndmq_x_f16(float16x8_t a, mve_pred16_t p) { @@ -414,12 +581,19 @@ float16x8_t test_vrndmq_x_f16(float16x8_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndmq_x_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndmq_x_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndmq_x_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vrndmq_x_f32(float32x4_t a, mve_pred16_t p) { @@ -430,12 +604,19 @@ float32x4_t test_vrndmq_x_f32(float32x4_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndnq_x_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndnq_x_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndnq_x_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vrndnq_x_f16(float16x8_t a, mve_pred16_t p) { @@ -446,12 +627,19 @@ float16x8_t test_vrndnq_x_f16(float16x8_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndnq_x_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndnq_x_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndnq_x_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vrndnq_x_f32(float32x4_t a, mve_pred16_t p) { @@ -462,12 +650,19 @@ float32x4_t test_vrndnq_x_f32(float32x4_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndpq_x_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndpq_x_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndpq_x_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vrndpq_x_f16(float16x8_t a, mve_pred16_t p) { @@ -478,12 +673,19 @@ float16x8_t test_vrndpq_x_f16(float16x8_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndpq_x_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndpq_x_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndpq_x_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vrndpq_x_f32(float32x4_t a, mve_pred16_t p) { @@ -494,12 +696,19 @@ float32x4_t test_vrndpq_x_f32(float32x4_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndq_x_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndq_x_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndq_x_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vrndq_x_f16(float16x8_t a, mve_pred16_t p) { @@ -510,12 +719,19 @@ float16x8_t test_vrndq_x_f16(float16x8_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndq_x_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndq_x_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndq_x_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vrndq_x_f32(float32x4_t a, mve_pred16_t p) { @@ -526,12 +742,19 @@ float32x4_t test_vrndq_x_f32(float32x4_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndxq_x_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndxq_x_f16( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndxq_x_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vrndxq_x_f16(float16x8_t a, mve_pred16_t p) { @@ -542,12 +765,19 @@ float16x8_t test_vrndxq_x_f16(float16x8_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vrndxq_x_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NOSTRICT-LABEL: @test_vrndxq_x_f32( +// CHECK-NOSTRICT-NEXT: entry: +// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vrndxq_x_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR3]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vrndxq_x_f32(float32x4_t a, mve_pred16_t p) { @@ -558,3 +788,5 @@ float32x4_t test_vrndxq_x_f32(float32x4_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 3787e2591a4c1..09eab4b8e385e 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -1325,6 +1325,8 @@ def int_arm_mve_vqdmull_predicated: DefaultAttrsIntrinsic< LLVMMatchType<0>], [IntrNoMem]>; +class MVESimpleUnary: DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>], [IntrNoMem]>; class MVESimpleUnaryPredicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; @@ -1335,6 +1337,12 @@ def int_arm_mve_qabs_predicated: MVESimpleUnaryPredicated; def int_arm_mve_qneg_predicated: MVESimpleUnaryPredicated; def int_arm_mve_clz_predicated: MVESimpleUnaryPredicated; def int_arm_mve_cls_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrintz: MVESimpleUnary; +def int_arm_mve_vrintm: MVESimpleUnary; +def int_arm_mve_vrintp: MVESimpleUnary; +def int_arm_mve_vrinta: MVESimpleUnary; +def int_arm_mve_vrintx: MVESimpleUnary; +def int_arm_mve_vrintn: MVESimpleUnary; def int_arm_mve_vrintz_predicated: MVESimpleUnaryPredicated; def int_arm_mve_vrintm_predicated: MVESimpleUnaryPredicated; def int_arm_mve_vrintp_predicated: MVESimpleUnaryPredicated; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index f9aaacb7f5250..bf9a38a16e013 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -3564,11 +3564,14 @@ multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string suffix, bits<3> opcode, SDPatternOperator unpred_op> { def "": MVE_VRINT<suffix, opcode, VTI.Suffix, VTI.Size>; defvar Inst = !cast<Instruction>(NAME); + defvar unpred_int = !cast<Intrinsic>("int_arm_mve_vrint"#suffix); defvar pred_int = !cast<Intrinsic>("int_arm_mve_vrint"#suffix#"_predicated"); let Predicates = [HasMVEFloat] in { def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))), (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>; + def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$val))), + (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>; def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))), (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen, diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll index d19844c683a8f..9df5a9f06def4 100644 --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll @@ -240,4 +240,215 @@ entry: ret <8 x half> %0 } -attributes #0 = { strictfp } \ No newline at end of file +define arm_aapcs_vfpcc <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) #0 { +; CHECK-LABEL: test_vminnmq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: test_vminnmq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) #0 { +; CHECK-LABEL: test_vmaxnmq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: test_vmaxnmq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vminnmaq_f16(<8 x half> %a, <8 x half> %b) #0 { +; CHECK-LABEL: test_vminnmaq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnma.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) + %1 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %b) + %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %0, <8 x half> %1) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vminnmaq_f32(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: test_vminnmaq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnma.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) + %1 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %b) + %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %0, <4 x float> %1) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vmaxnmaq_f16(<8 x half> %a, <8 x half> %b) #0 { +; CHECK-LABEL: test_vmaxnmaq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnma.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) + %1 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %b) + %2 = tail call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> %0, <8 x half> %1) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vmaxnmaq_f32(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: test_vmaxnmaq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnma.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) + %1 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %b) + %2 = tail call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> %0, <4 x float> %1) + ret <4 x float> %2 +} + + +define arm_aapcs_vfpcc <8 x half> @test_vrndnq_f16(<8 x half> %a) #0 { +; CHECK-LABEL: test_vrndnq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrintn.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half> %a) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndnq_f32(<4 x float> %a) #0 { +; CHECK-LABEL: test_vrndnq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrintn.f32 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float> %a) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrndxq_f16(<8 x half> %a) #0 { +; CHECK-LABEL: test_vrndxq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrintx.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.arm.mve.vrintx.v8f16(<8 x half> %a) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndxq_f32(<4 x float> %a) #0 { +; CHECK-LABEL: test_vrndxq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrintx.f32 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vrintx.v4f32(<4 x float> %a) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrndaq_f16(<8 x half> %a) #0 { +; CHECK-LABEL: test_vrndaq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrinta.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.arm.mve.vrinta.v8f16(<8 x half> %a) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndaq_f32(<4 x float> %a) #0 { +; CHECK-LABEL: test_vrndaq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrinta.f32 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vrinta.v4f32(<4 x float> %a) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrndzq_f16(<8 x half> %a) #0 { +; CHECK-LABEL: test_vrndzq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrintz.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.arm.mve.vrintz.v8f16(<8 x half> %a) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndzq_f32(<4 x float> %a) #0 { +; CHECK-LABEL: test_vrndzq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrintz.f32 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vrintz.v4f32(<4 x float> %a) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrndmq_f16(<8 x half> %a) #0 { +; CHECK-LABEL: test_vrndmq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrintm.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.arm.mve.vrintm.v8f16(<8 x half> %a) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndmq_f32(<4 x float> %a) #0 { +; CHECK-LABEL: test_vrndmq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrintm.f32 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vrintm.v4f32(<4 x float> %a) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrndpq_f16(<8 x half> %a) #0 { +; CHECK-LABEL: test_vrndpq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrintp.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.arm.mve.vrintp.v8f16(<8 x half> %a) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndpq_f32(<4 x float> %a) #0 { +; CHECK-LABEL: test_vrndpq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrintp.f32 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vrintp.v4f32(<4 x float> %a) + ret <4 x float> %0 +} + + + +attributes #0 = { strictfp } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
