[clang] [llvm] [ARM] Introduce intrinsics for MVE vcmp under strict-fp. (PR #169798)

via cfe-commits Sun, 30 Nov 2025 03:25:46 -0800

llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-ir

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

Similar to #<!-- -->169156 again, this adds intrinsics for strict-fp compare 
nodes to
make sure they end up as the original instruction.

---

Patch is 370.71 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/169798.diff


5 Files Affected:

- (modified) clang/include/clang/Basic/arm_mve_defs.td (+18-6) 
- (modified) clang/test/CodeGen/arm-mve-intrinsics/compare.c (+3030-1430) 
- (modified) llvm/include/llvm/IR/IntrinsicsARM.td (+9) 
- (modified) llvm/lib/Target/ARM/ARMInstrMVE.td (+34-15) 
- (added) llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-compare.ll (+820) 


``````````diff
diff --git a/clang/include/clang/Basic/arm_mve_defs.td 
b/clang/include/clang/Basic/arm_mve_defs.td
index 3714262898476..be79002bcbe64 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -114,12 +114,12 @@ def icmp_sgt: IRBuilder<"CreateICmpSGT">;
 def icmp_sge: IRBuilder<"CreateICmpSGE">;
 def icmp_slt: IRBuilder<"CreateICmpSLT">;
 def icmp_sle: IRBuilder<"CreateICmpSLE">;
-def fcmp_eq: IRBuilder<"CreateFCmpOEQ">;
-def fcmp_ne: IRBuilder<"CreateFCmpUNE">; // not O: it must return true on NaNs
-def fcmp_gt: IRBuilder<"CreateFCmpOGT">;
-def fcmp_ge: IRBuilder<"CreateFCmpOGE">;
-def fcmp_ult: IRBuilder<"CreateFCmpULT">;
-def fcmp_ule: IRBuilder<"CreateFCmpULE">;
+def fcmp_eq_node: IRBuilder<"CreateFCmpOEQ">;
+def fcmp_ne_node: IRBuilder<"CreateFCmpUNE">; // not O: it must return true on 
NaNs
+def fcmp_gt_node: IRBuilder<"CreateFCmpOGT">;
+def fcmp_ge_node: IRBuilder<"CreateFCmpOGE">;
+def fcmp_ult_node: IRBuilder<"CreateFCmpULT">;
+def fcmp_ule_node: IRBuilder<"CreateFCmpULE">;
 def splat: CGHelperFn<"ARMMVEVectorSplat">;
 def select: IRBuilder<"CreateSelect">;
 def fneg: IRBuilder<"CreateFNeg">;
@@ -589,6 +589,18 @@ def fsub: strictFPAlt<fsub_node,
                       IRInt<"vsub", [Vector]>>;
 def fmul: strictFPAlt<fmul_node,
                       IRInt<"vmul", [Vector]>>;
+def fcmp_eq  : strictFPAlt<fcmp_eq_node,
+                           IRInt<"cmp_eq", [Predicate, Vector]>>;
+def fcmp_ne  : strictFPAlt<fcmp_ne_node,
+                           IRInt<"cmp_ne", [Predicate, Vector]>>;
+def fcmp_gt  : strictFPAlt<fcmp_gt_node,
+                           IRInt<"cmp_gt", [Predicate, Vector]>>;
+def fcmp_ge  : strictFPAlt<fcmp_ge_node,
+                           IRInt<"cmp_ge", [Predicate, Vector]>>;
+def fcmp_ult : strictFPAlt<fcmp_ult_node,
+                           IRInt<"cmp_lt", [Predicate, Vector]>>;
+def fcmp_ule : strictFPAlt<fcmp_ule_node,
+                           IRInt<"cmp_le", [Predicate, Vector]>>;
 
 // 
-----------------------------------------------------------------------------
 // Convenience lists of parameter types. 'T' is just a container record, so you
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/compare.c 
b/clang/test/CodeGen/arm-mve-intrinsics/compare.c
index 8886cf5c10058..dd756a401e5cd 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/compare.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/compare.c
@@ -1,17 +1,26 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S 
-passes=mem2reg,sroa | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - 
%s | opt -S -passes=mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - 
%s | opt -S -passes='mem2reg,sroa,early-cse<>' | FileCheck %s 
--check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math 
-fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S 
-passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math 
-fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S 
-passes='mem2reg,sroa,early-cse<>' | FileCheck %s 
--check-prefixes=CHECK,CHECK-STRICT
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
 #include <arm_mve.h>
 
-// CHECK-LABEL: @test_vcmpeqq_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], 
[[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x i1> 
@llvm.arm.mve.cmp.eq.v8i1.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) 
#[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b)
 {
@@ -22,12 +31,19 @@ mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], 
[[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x i1> 
@llvm.arm.mve.cmp.eq.v4i1.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) 
#[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_f32(float32x4_t a, float32x4_t b)
 {
@@ -38,12 +54,19 @@ mve_pred16_t test_vcmpeqq_f32(float32x4_t a, float32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x 
i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], 
[[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_s8(int8x16_t a, int8x16_t b)
 {
@@ -54,12 +77,19 @@ mve_pred16_t test_vcmpeqq_s8(int8x16_t a, int8x16_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], 
[[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_s16(int16x8_t a, int16x8_t b)
 {
@@ -70,12 +100,19 @@ mve_pred16_t test_vcmpeqq_s16(int16x8_t a, int16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], 
[[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_s32(int32x4_t a, int32x4_t b)
 {
@@ -86,12 +123,19 @@ mve_pred16_t test_vcmpeqq_s32(int32x4_t a, int32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x 
i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], 
[[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_u8(uint8x16_t a, uint8x16_t b)
 {
@@ -102,12 +146,19 @@ mve_pred16_t test_vcmpeqq_u8(uint8x16_t a, uint8x16_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], 
[[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_u16(uint16x8_t a, uint16x8_t b)
 {
@@ -118,12 +169,19 @@ mve_pred16_t test_vcmpeqq_u16(uint16x8_t a, uint16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], 
[[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_u32(uint32x4_t a, uint32x4_t b)
 {
@@ -134,14 +192,23 @@ mve_pred16_t test_vcmpeqq_u32(uint32x4_t a, uint32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_n_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, 
half [[B:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> 
[[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> 
poison, half [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> 
[[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], 
[[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> 
poison, half [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> 
[[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x i1> 
@llvm.arm.mve.cmp.eq.v8i1.v8f16(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]]) 
#[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b)
 {
@@ -152,14 +219,23 @@ mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t 
b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_n_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, 
float [[B:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> 
[[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> 
[[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> 
poison, float [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> 
[[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], 
[[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> 
poison, float [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> 
[[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x i1> 
@llvm.arm.mve.cmp.eq.v4i1.v4f32(<4 x float> [[A:%.*]], <4 x float> 
[[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_n_f32(float32x4_t a, float32_t b)
 {
@@ -170,14 +246,23 @@ mve_pred16_t test_vcmpeqq_n_f32(float32x4_t a, float32_t 
b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 
[[B:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> 
[[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x 
i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_s8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> 
poison, i8 [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> 
[[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], 
[[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_s8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> 
poison, i8 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> 
[[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], 
[[DOTSPLAT]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 
@llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_n_s8(int8x16_t a, int8_t b)
 {
@@ -188,14 +273,23 @@ mve_pred16_t test_vcmpeqq_n_s8(int8x16_t a, int8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 
[[B:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> 
[[DOTSPLATINSERT]], <8 x i16> poison, <8 x ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/169798
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [ARM] Introduce intrinsics for MVE vcmp under strict-fp. (PR #169798)

Reply via email to