https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/169156
As far as I understand, the MVE fp vadd/vsub/vmul instructions will set exception flags in the same ways as scalar fadd/fsub/fmul, but will not honor flush-to-zero (for f32 they always flush, for f16 they follows the fpsrc flags) and will always use the default rounding mode. This means that we cannot convert the vadd_f23/vsub_f32/vmul_f32 intrinsics to llvm.constrained.fadd/fsub/fmul, and the fadd/fsub/fmul without changing the expected behaviour under strict-fp. This patch introduces a set in intrinsics that we can use instead, going from vadd_f32 -> llvm.arm.mve.vadd -> MVE_VADD. If this is acceptable, the other intrinsics I can add will be: fma fptoi, itofp trunc/round/ceil/etc fcmp minnum/maxnum The current implementations assumes that the standard variant of a strictfp alternative will be a IRBuilder, this can be changed to take a IRBuilder or IRInt. There are also Neon intrinsics that AFAIU behave the same way. >From 4d8d1a25b53cfaac8ab9c61fcd6aee54e40017cc Mon Sep 17 00:00:00 2001 From: David Green <[email protected]> Date: Sat, 22 Nov 2025 09:39:57 +0000 Subject: [PATCH] [ARM] Introduce intrinsics for MVE add/sub/mul under strict-fp. As far as I understand, the MVE fp vadd/vsub/vmul instructions will set exception flags in the same ways as scalar fadd/fsub/fmul, but will not honor flush-to-zero (for f32 they always flush, for f16 they follows the fpsrc flags) and will always use the default rounding mode. This means that we cannot convert the vadd_f23/vsub_f32/vmul_f32 intrinsics to llvm.constrained.fadd/fsub/fmul, and the fadd/fsub/fmul without changing the expected behaviour under strict-fp. This patch introduces a set in intrinsics that we can use instead, going from vadd_f32 -> llvm.arm.mve.vadd -> MVE_VADD. If this is acceptable, the other intrinsics I can add will be: fma fptoi, itofp trunc/round/ceil/etc fcmp minnum/maxnum The current implementations assumes that the standard variant of a strictfp alternative will be a IRBuilder, this can be changed to take a IRBuilder or IRInt. There are also Neon intrinsics that AFAIU behave the same way. --- clang/include/clang/Basic/arm_mve_defs.td | 21 +- clang/test/CodeGen/arm-mve-intrinsics/vaddq.c | 214 +++++++--- clang/test/CodeGen/arm-mve-intrinsics/vmulq.c | 390 ++++++++++++------ clang/test/CodeGen/arm-mve-intrinsics/vsubq.c | 214 +++++++--- clang/utils/TableGen/MveEmitter.cpp | 39 +- llvm/include/llvm/IR/IntrinsicsARM.td | 12 + llvm/lib/Target/ARM/ARMInstrMVE.td | 33 +- .../mve-intrinsics/strict-intrinsics.ll | 143 +++++++ .../CodeGen/Thumb2/mve-intrinsics/vabdq.ll | 4 +- llvm/test/CodeGen/Thumb2/mve-pred-ext.ll | 12 +- 10 files changed, 797 insertions(+), 285 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index c1562a0c1f04c..eeca9153dd742 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -74,9 +74,9 @@ def immshr: CGHelperFn<"MVEImmediateShr"> { let special_params = [IRBuilderIntParam<1, "unsigned">, IRBuilderIntParam<2, "bool">]; } -def fadd: IRBuilder<"CreateFAdd">; -def fmul: IRBuilder<"CreateFMul">; -def fsub: IRBuilder<"CreateFSub">; +def fadd_node: IRBuilder<"CreateFAdd">; +def fmul_node: IRBuilder<"CreateFMul">; +def fsub_node: IRBuilder<"CreateFSub">; def load: IRBuilder<"CreateLoad"> { let special_params = [IRBuilderAddrParam<0>]; } @@ -212,6 +212,13 @@ def unsignedflag; // constant giving its size in bits. def bitsize; +// strictFPAlt allows a node to have different code generation under strict-fp. +// TODO: The standard node can be IRBuilderBase or IRIntBase. +class strictFPAlt<IRBuilderBase standard_, IRIntBase strictfp_> { + IRBuilderBase standard = standard_; + IRIntBase strictfp = strictfp_; +} + // If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it // indicates that the IR generation for that intrinsic is done by handwritten // C++ and not autogenerated at all. The effect in the MVE builtin codegen @@ -573,6 +580,14 @@ multiclass IntrinsicMXNameOverride<Type rettype, dag arguments, dag cg, } } +// StrictFP nodes that choose between standard fadd and llvm.arm.mve.fadd nodes +// depending on whether we are using strict-fp. +def fadd: strictFPAlt<fadd_node, + IRInt<"vadd", [Vector]>>; +def fsub: strictFPAlt<fsub_node, + IRInt<"vsub", [Vector]>>; +def fmul: strictFPAlt<fmul_node, + IRInt<"vmul", [Vector]>>; // ----------------------------------------------------------------------------- // Convenience lists of parameter types. 'T' is just a container record, so you diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c index 238cb4056d4f1..d24834951b385 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c @@ -1,6 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -frounding-math -fexperimental-strict-floating-point -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -frounding-math -fexperimental-strict-floating-point -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT // REQUIRES: aarch64-registered-target || arm-registered-target @@ -20,10 +22,15 @@ uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vaddq_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NONSTRICT-LABEL: @test_vaddq_f16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[B:%.*]] +// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vaddq_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vadd.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vaddq_f16(float16x8_t a, float16x8_t b) { @@ -34,12 +41,19 @@ float16x8_t test_vaddq_f16(float16x8_t a, float16x8_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vaddq_m_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vaddq_m_s8( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vaddq_m_s8( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]] // int8x16_t test_vaddq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { @@ -50,12 +64,19 @@ int8x16_t test_vaddq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vaddq_m_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vaddq_m_f32( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vaddq_m_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vaddq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { @@ -66,12 +87,19 @@ float32x4_t test_vaddq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vaddq_x_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vaddq_x_u16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vaddq_x_u16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vaddq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { @@ -82,12 +110,19 @@ uint16x8_t test_vaddq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vaddq_x_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vaddq_x_f16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vaddq_x_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vaddq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) { @@ -114,12 +149,19 @@ uint32x4_t test_vaddq_n_u32(uint32x4_t a, uint32_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vaddq_n_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NONSTRICT-LABEL: @test_vaddq_n_f16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vaddq_n_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vadd.v8f16(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vaddq_n_f16(float16x8_t a, float16_t b) { @@ -130,14 +172,23 @@ float16x8_t test_vaddq_n_f16(float16x8_t a, float16_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vaddq_m_n_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vaddq_m_n_s8( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vaddq_m_n_s8( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]] // int8x16_t test_vaddq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p) { @@ -148,14 +199,23 @@ int8x16_t test_vaddq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred1 #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vaddq_m_n_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vaddq_m_n_f32( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vaddq_m_n_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vaddq_m_n_f32(float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p) { @@ -166,14 +226,23 @@ float32x4_t test_vaddq_m_n_f32(float32x4_t inactive, float32x4_t a, float32_t b, #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vaddq_x_n_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vaddq_x_n_u16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vaddq_x_n_u16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vaddq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p) { @@ -184,14 +253,23 @@ uint16x8_t test_vaddq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vaddq_x_n_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vaddq_x_n_f16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vaddq_x_n_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vaddq_x_n_f16(float16x8_t a, float16_t b, mve_pred16_t p) { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c index 9d36a2d334860..708502ddb52fc 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c @@ -1,6 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -frounding-math -fexperimental-strict-floating-point -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -frounding-math -fexperimental-strict-floating-point -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT // REQUIRES: aarch64-registered-target || arm-registered-target @@ -48,10 +50,15 @@ uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = fmul <4 x float> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_f32( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = fmul <4 x float> [[A:%.*]], [[B:%.*]] +// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vmulq_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vmul.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) #[[ATTR2:[0-9]+]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b) { @@ -62,12 +69,19 @@ float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_m_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_m_s8( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmulq_m_s8( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]] // int8x16_t test_vmulq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { @@ -78,12 +92,19 @@ int8x16_t test_vmulq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_m_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_m_u16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmulq_m_u16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vmulq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { @@ -94,12 +115,19 @@ uint16x8_t test_vmulq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_m_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_m_s32( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NONSTRICT-NEXT: ret <4 x i32> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmulq_m_s32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vmulq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { @@ -110,12 +138,19 @@ int32x4_t test_vmulq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pre #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_m_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_m_f16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmulq_m_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmulq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { @@ -126,12 +161,19 @@ float16x8_t test_vmulq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_x_u8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) -// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_x_u8( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmulq_x_u8( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]] // uint8x16_t test_vmulq_x_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { @@ -142,12 +184,19 @@ uint8x16_t test_vmulq_x_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_x_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_x_s16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmulq_x_s16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vmulq_x_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) { @@ -158,12 +207,19 @@ int16x8_t test_vmulq_x_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_x_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_x_u32( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NONSTRICT-NEXT: ret <4 x i32> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmulq_x_u32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vmulq_x_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { @@ -174,12 +230,19 @@ uint32x4_t test_vmulq_x_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_x_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_x_f32( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmulq_x_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmulq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p) { @@ -238,12 +301,19 @@ uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_n_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = fmul <4 x float> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_n_f32( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = fmul <4 x float> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vmulq_n_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vmul.v4f32(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) { @@ -254,14 +324,23 @@ float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_m_n_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_m_n_s8( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmulq_m_n_s8( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]] // int8x16_t test_vmulq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p) { @@ -272,14 +351,23 @@ int8x16_t test_vmulq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred1 #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_m_n_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_m_n_u16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmulq_m_n_u16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vmulq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mve_pred16_t p) { @@ -290,14 +378,23 @@ uint16x8_t test_vmulq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mve #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_m_n_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_m_n_s32( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NONSTRICT-NEXT: ret <4 x i32> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmulq_m_n_s32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vmulq_m_n_s32(int32x4_t inactive, int32x4_t a, int32_t b, mve_pred16_t p) { @@ -308,14 +405,23 @@ int32x4_t test_vmulq_m_n_s32(int32x4_t inactive, int32x4_t a, int32_t b, mve_pre #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_m_n_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_m_n_f16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmulq_m_n_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmulq_m_n_f16(float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p) { @@ -326,14 +432,23 @@ float16x8_t test_vmulq_m_n_f16(float16x8_t inactive, float16x8_t a, float16_t b, #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_x_n_u8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> undef) -// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_x_n_u8( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmulq_x_n_u8( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]] // uint8x16_t test_vmulq_x_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p) { @@ -344,14 +459,23 @@ uint8x16_t test_vmulq_x_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_x_n_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_x_n_s16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmulq_x_n_s16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vmulq_x_n_s16(int16x8_t a, int16_t b, mve_pred16_t p) { @@ -361,14 +485,23 @@ int16x8_t test_vmulq_x_n_s16(int16x8_t a, int16_t b, mve_pred16_t p) return vmulq_x_n_s16(a, b, p); #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_x_n_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> undef) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_x_n_u32( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NONSTRICT-NEXT: ret <4 x i32> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmulq_x_n_u32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x i32> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vmulq_x_n_u32(uint32x4_t a, uint32_t b, mve_pred16_t p) { @@ -379,14 +512,23 @@ uint32x4_t test_vmulq_x_n_u32(uint32x4_t a, uint32_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vmulq_x_n_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> undef) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vmulq_x_n_f32( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vmulq_x_n_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmulq_x_n_f32(float32x4_t a, float32_t b, mve_pred16_t p) { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c b/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c index 3d87770889707..6f7b5a4f8271f 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c @@ -1,6 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -frounding-math -fexperimental-strict-floating-point -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT +// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -frounding-math -fexperimental-strict-floating-point -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT // REQUIRES: aarch64-registered-target || arm-registered-target @@ -20,10 +22,15 @@ uint32x4_t test_vsubq_u32(uint32x4_t a, uint32x4_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vsubq_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = fsub <8 x half> [[A:%.*]], [[B:%.*]] -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NONSTRICT-LABEL: @test_vsubq_f16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = fsub <8 x half> [[A:%.*]], [[B:%.*]] +// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vsubq_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vsub.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vsubq_f16(float16x8_t a, float16x8_t b) { @@ -34,12 +41,19 @@ float16x8_t test_vsubq_f16(float16x8_t a, float16x8_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vsubq_m_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vsubq_m_s8( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vsubq_m_s8( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]] // int8x16_t test_vsubq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { @@ -50,12 +64,19 @@ int8x16_t test_vsubq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vsubq_m_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vsubq_m_f32( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vsubq_m_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vsubq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { @@ -66,12 +87,19 @@ float32x4_t test_vsubq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vsubq_x_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vsubq_x_u16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vsubq_x_u16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vsubq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { @@ -82,12 +110,19 @@ uint16x8_t test_vsubq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vsubq_x_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vsubq_x_f16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vsubq_x_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vsubq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p) { @@ -114,12 +149,19 @@ uint32x4_t test_vsubq_n_u32(uint32x4_t a, uint32_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vsubq_n_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = fsub <8 x half> [[A:%.*]], [[DOTSPLAT]] -// CHECK-NEXT: ret <8 x half> [[TMP0]] +// CHECK-NONSTRICT-LABEL: @test_vsubq_n_f16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = fsub <8 x half> [[A:%.*]], [[DOTSPLAT]] +// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP0]] +// +// CHECK-STRICT-LABEL: @test_vsubq_n_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vsub.v8f16(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vsubq_n_f16(float16x8_t a, float16_t b) { @@ -130,14 +172,23 @@ float16x8_t test_vsubq_n_f16(float16x8_t a, float16_t b) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vsubq_m_n_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vsubq_m_n_s8( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NONSTRICT-NEXT: ret <16 x i8> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vsubq_m_n_s8( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <16 x i8> [[TMP2]] // int8x16_t test_vsubq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p) { @@ -148,14 +199,23 @@ int8x16_t test_vsubq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred1 #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vsubq_m_n_f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x float> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vsubq_m_n_f32( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NONSTRICT-NEXT: ret <4 x float> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vsubq_m_n_f32( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vsubq_m_n_f32(float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p) { @@ -166,14 +226,23 @@ float32x4_t test_vsubq_m_n_f32(float32x4_t inactive, float32x4_t a, float32_t b, #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vsubq_x_n_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vsubq_x_n_u16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NONSTRICT-NEXT: ret <8 x i16> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vsubq_x_n_u16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x i16> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vsubq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p) { @@ -184,14 +253,23 @@ uint16x8_t test_vsubq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p) #endif /* POLYMORPHIC */ } -// CHECK-LABEL: @test_vsubq_x_n_f16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0 -// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef) -// CHECK-NEXT: ret <8 x half> [[TMP2]] +// CHECK-NONSTRICT-LABEL: @test_vsubq_x_n_f16( +// CHECK-NONSTRICT-NEXT: entry: +// CHECK-NONSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0 +// CHECK-NONSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer +// CHECK-NONSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NONSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NONSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NONSTRICT-NEXT: ret <8 x half> [[TMP2]] +// +// CHECK-STRICT-LABEL: @test_vsubq_x_n_f16( +// CHECK-STRICT-NEXT: entry: +// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0 +// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer +// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]] +// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.sub.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]], <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]] +// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vsubq_x_n_f16(float16x8_t a, float16_t b, mve_pred16_t p) { diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp index f55a5f54bd158..d2b366779ba16 100644 --- a/clang/utils/TableGen/MveEmitter.cpp +++ b/clang/utils/TableGen/MveEmitter.cpp @@ -757,6 +757,26 @@ class IRIntrinsicResult : public Result { } }; +// Result subclass that generates +// Builder.getIsFPConstrained() ? <Standard> : <StrictFp> +class StrictFpAltResult : public Result { +public: + Ptr Standard; + Ptr StrictFp; + StrictFpAltResult(Ptr Standard,Ptr StrictFp) + : Standard(Standard), StrictFp(StrictFp) {} + void genCode(raw_ostream &OS, + CodeGenParamAllocator &ParamAlloc) const override { + OS << "!Builder.getIsFPConstrained() ? "; + Standard->genCode(OS, ParamAlloc); + OS << " : "; + StrictFp->genCode(OS, ParamAlloc); + } + void morePrerequisites(std::vector<Ptr> &output) const override { + Standard->morePrerequisites(output); + } +}; + // Result subclass that specifies a type, for use in IRBuilder operations such // as CreateBitCast that take a type argument. class TypeResult : public Result { @@ -1239,7 +1259,8 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D, std::vector<Result::Ptr> Args; for (unsigned i = 0, e = D->getNumArgs(); i < e; ++i) Args.push_back(getCodeForDagArg(D, i, Scope, Param)); - if (Op->isSubClassOf("IRBuilderBase")) { + + auto GenIRBuilderBase = [&](const Record *Op) { std::set<unsigned> AddressArgs; std::map<unsigned, std::string> IntegerArgs; for (const Record *sp : Op->getValueAsListOfDefs("special_params")) { @@ -1252,7 +1273,8 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D, } return std::make_shared<IRBuilderResult>(Op->getValueAsString("prefix"), Args, AddressArgs, IntegerArgs); - } else if (Op->isSubClassOf("IRIntBase")) { + }; + auto GenIRIntBase = [&](const Record *Op) { std::vector<const Type *> ParamTypes; for (const Record *RParam : Op->getValueAsListOfDefs("params")) ParamTypes.push_back(getType(RParam, Param)); @@ -1260,7 +1282,18 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D, if (Op->getValueAsBit("appendKind")) IntName += "_" + toLetter(cast<ScalarType>(Param)->kind()); return std::make_shared<IRIntrinsicResult>(IntName, ParamTypes, Args); - } else { + }; + + if (Op->isSubClassOf("IRBuilderBase")) { + return GenIRBuilderBase(Op); + } else if (Op->isSubClassOf("IRIntBase")) { + return GenIRIntBase(Op); + } else if (Op->isSubClassOf("strictFPAlt")) { + auto Standard = GenIRBuilderBase(Op->getValueAsDef("standard")); + auto StrictFp = GenIRIntBase(Op->getValueAsDef("strictfp")); + return std::make_shared<StrictFpAltResult>(Standard, StrictFp); + } + else { PrintFatalError("Unsupported dag node " + Op->getName()); } } diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 1219ce2f86da8..ecadb235bec36 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -1138,6 +1138,18 @@ def int_arm_mve_vshlc_predicated: DefaultAttrsIntrinsic< [llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */, llvm_i32_ty /* shift count */, llvm_anyvector_ty], [IntrNoMem]>; +def int_arm_mve_vadd: DefaultAttrsIntrinsic< + [llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_vsub: DefaultAttrsIntrinsic< + [llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_vmul: DefaultAttrsIntrinsic< + [llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; def int_arm_mve_vmulh: DefaultAttrsIntrinsic< [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */], diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 98591fa3f5bd7..6da04c4ac6f18 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -384,6 +384,16 @@ multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrin (VTI.Vec MQPR:$inactive)))>; } +def vadd : PatFrags<(ops node:$lhs, node:$rhs), + [(fadd node:$lhs, node:$rhs), + (int_arm_mve_vadd node:$lhs, node:$rhs)]>; +def vsub : PatFrags<(ops node:$lhs, node:$rhs), + [(fsub node:$lhs, node:$rhs), + (int_arm_mve_vsub node:$lhs, node:$rhs)]>; +def vmul : PatFrags<(ops node:$lhs, node:$rhs), + [(fmul node:$lhs, node:$rhs), + (int_arm_mve_vmul node:$lhs, node:$rhs)]>; + // --------- Start of base classes for the instructions themselves class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm, @@ -3605,7 +3615,7 @@ class MVE_VMUL_fp<string iname, string suffix, bits<2> size, list<dag> pattern=[ let validForTailPredication = 1; } -multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDNode Op, +multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDPatternOperator Op, Intrinsic PredInt, SDPatternOperator IdentityVec> { def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size>; defvar Inst = !cast<Instruction>(NAME); @@ -3616,7 +3626,7 @@ multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDNode Op, } multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> - : MVE_VMULT_fp_m<"vmul", VTI, fmul, int_arm_mve_mul_predicated, IdentityVec>; + : MVE_VMULT_fp_m<"vmul", VTI, vmul, int_arm_mve_mul_predicated, IdentityVec>; def ARMimmOneF: PatLeaf<(bitconvert (v4f32 (ARMvmovFPImm (i32 112))))>; // 1.0 float def ARMimmOneH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2620))))>; // 1.0 half @@ -3740,7 +3750,7 @@ defm MVE_VFMSf32 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v4f32>; defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>; multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI, - SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> { + SDPatternOperator Op, Intrinsic PredInt, SDPatternOperator IdentityVec> { def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size, 0, 1, bit_21> { let validForTailPredication = 1; } @@ -3752,9 +3762,9 @@ multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI, } multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> - : MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated, IdentityVec>; + : MVE_VADDSUB_fp_m<"vadd", 0, VTI, vadd, int_arm_mve_add_predicated, IdentityVec>; multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> - : MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated, IdentityVec>; + : MVE_VADDSUB_fp_m<"vsub", 1, VTI, vsub, int_arm_mve_sub_predicated, IdentityVec>; def ARMimmMinusZeroF: PatLeaf<(bitconvert (v4i32 (ARMvmovImm (i32 1664))))>; // -0.0 float def ARMimmMinusZeroH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2688))))>; // -0.0 half @@ -5391,21 +5401,22 @@ defm MVE_VHSUB_qr_u16 : MVE_VHSUB_qr_m<MVE_v8u16, subnuw, ARMvshruImm>; defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32, subnuw, ARMvshruImm>; multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract, - SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> { + SDPatternOperator Op, Intrinsic PredInt, + SDPatternOperator IdentityVec> { def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract, VTI.Size>; defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), IdentityVec>; } let Predicates = [HasMVEFloat] in { - defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, fadd, + defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, vadd, int_arm_mve_add_predicated, ARMimmMinusZeroF>; - defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, fadd, + defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, vadd, int_arm_mve_add_predicated, ARMimmMinusZeroH>; - defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, fsub, + defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, vsub, int_arm_mve_sub_predicated, ARMimmAllZerosV>; - defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, fsub, + defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, vsub, int_arm_mve_sub_predicated, ARMimmAllZerosV>; } @@ -5588,7 +5599,7 @@ defm MVE_VQRDMULH_qr_s32 : MVE_VQRDMULH_qr_m<MVE_v4s32>; multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> { let validForTailPredication = 1 in def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11, VTI.Size>; - defm : MVE_TwoOpPatternDup<VTI, fmul, int_arm_mve_mul_predicated, (? ), + defm : MVE_TwoOpPatternDup<VTI, vmul, int_arm_mve_mul_predicated, (? ), !cast<Instruction>(NAME), IdentityVec>; } diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll new file mode 100644 index 0000000000000..fa1ae5cf306d8 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll @@ -0,0 +1,143 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <8 x half> @test_vaddq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: test_vaddq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vadd.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.arm.mve.vadd.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vaddq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vaddq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vadd.f32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vadd.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @test_vsubq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: test_vsubq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsub.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.arm.mve.vsub.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vsubq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vsubq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsub.f32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vsub.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @test_vmulq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: test_vmulq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmul.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.arm.mve.vmul.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vmulq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vmulq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vmul.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %0 +} + + + + +define arm_aapcs_vfpcc <8 x half> @test_vaddq_f16_splat(<8 x half> %a, half %b) { +; CHECK-LABEL: test_vaddq_f16_splat: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 r0, s4 +; CHECK-NEXT: vadd.f16 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x half> undef, half %b, i32 0 + %s = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %0 = tail call <8 x half> @llvm.arm.mve.vadd.v8f16(<8 x half> %a, <8 x half> %s) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vaddq_f32_splat(<4 x float> %a, float %b) { +; CHECK-LABEL: test_vaddq_f32_splat: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: vadd.f32 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %b, i32 0 + %s = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %0 = tail call <4 x float> @llvm.arm.mve.vadd.v4f32(<4 x float> %a, <4 x float> %s) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @test_vsubq_f16_splat(<8 x half> %a, half %b) { +; CHECK-LABEL: test_vsubq_f16_splat: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 r0, s4 +; CHECK-NEXT: vsub.f16 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x half> undef, half %b, i32 0 + %s = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %0 = tail call <8 x half> @llvm.arm.mve.vsub.v8f16(<8 x half> %a, <8 x half> %s) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vsubq_f32_splat(<4 x float> %a, float %b) { +; CHECK-LABEL: test_vsubq_f32_splat: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: vsub.f32 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %b, i32 0 + %s = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %0 = tail call <4 x float> @llvm.arm.mve.vsub.v4f32(<4 x float> %a, <4 x float> %s) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @test_vmulq_f16_splat(<8 x half> %a, half %b) { +; CHECK-LABEL: test_vmulq_f16_splat: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f16 r0, s4 +; CHECK-NEXT: vmul.f16 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x half> undef, half %b, i32 0 + %s = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer + %0 = tail call <8 x half> @llvm.arm.mve.vmul.v8f16(<8 x half> %a, <8 x half> %s) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vmulq_f32_splat(<4 x float> %a, float %b) { +; CHECK-LABEL: test_vmulq_f32_splat: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: vmul.f32 q0, q0, r0 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x float> undef, float %b, i32 0 + %s = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer + %0 = tail call <4 x float> @llvm.arm.mve.vmul.v4f32(<4 x float> %a, <4 x float> %s) + ret <4 x float> %0 +} diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll index 961489613b626..389ccd3491539 100644 --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll @@ -25,8 +25,8 @@ entry: declare <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>, <4 x i32>, i32) #1 -define arm_aapcs_vfpcc <8 x half> @test_vabdq_f32(<8 x half> %a, <8 x half> %b) local_unnamed_addr #0 { -; CHECK-LABEL: test_vabdq_f32: +define arm_aapcs_vfpcc <8 x half> @test_vabdq_f16(<8 x half> %a, <8 x half> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vabdq_f16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vabd.f16 q0, q0, q1 ; CHECK-NEXT: bx lr diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll index 0bec2b100911c..1aea7f9849b89 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll @@ -63,8 +63,8 @@ entry: ret <8 x i16> %0 } -define arm_aapcs_vfpcc <8 x i16> @sext_v8i1_v8f32(<8 x half> %src1, <8 x half> %src2) { -; CHECK-MVE-LABEL: sext_v8i1_v8f32: +define arm_aapcs_vfpcc <8 x i16> @sext_v8i1_v8f16(<8 x half> %src1, <8 x half> %src2) { +; CHECK-MVE-LABEL: sext_v8i1_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: .save {r4, lr} ; CHECK-MVE-NEXT: push {r4, lr} @@ -110,7 +110,7 @@ define arm_aapcs_vfpcc <8 x i16> @sext_v8i1_v8f32(<8 x half> %src1, <8 x half> % ; CHECK-MVE-NEXT: vmov.16 q0[7], r0 ; CHECK-MVE-NEXT: pop {r4, pc} ; -; CHECK-MVEFP-LABEL: sext_v8i1_v8f32: +; CHECK-MVEFP-LABEL: sext_v8i1_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov.i16 q2, #0x0 ; CHECK-MVEFP-NEXT: vmov.i8 q3, #0xff @@ -291,8 +291,8 @@ entry: ret <8 x i16> %0 } -define arm_aapcs_vfpcc <8 x i16> @zext_v8i1_v8f32(<8 x half> %src1, <8 x half> %src2) { -; CHECK-MVE-LABEL: zext_v8i1_v8f32: +define arm_aapcs_vfpcc <8 x i16> @zext_v8i1_v8f16(<8 x half> %src1, <8 x half> %src2) { +; CHECK-MVE-LABEL: zext_v8i1_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: .save {r4, lr} ; CHECK-MVE-NEXT: push {r4, lr} @@ -340,7 +340,7 @@ define arm_aapcs_vfpcc <8 x i16> @zext_v8i1_v8f32(<8 x half> %src1, <8 x half> % ; CHECK-MVE-NEXT: vand q0, q1, q0 ; CHECK-MVE-NEXT: pop {r4, pc} ; -; CHECK-MVEFP-LABEL: zext_v8i1_v8f32: +; CHECK-MVEFP-LABEL: zext_v8i1_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov.i16 q2, #0x0 ; CHECK-MVEFP-NEXT: vmov.i16 q3, #0x1 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
