llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Jiahao Guo (E00N777) <details> <summary>Changes</summary> Part of https://github.com/llvm/llvm-project/issues/185382 Lowering: - test_vbsl_s8 - test_vbslq_s8 - test_vbsl_s16 - test_vbslq_s16 - test_vbsl_f32 - test_vbslq_f32 I reused the lowering logic from the [incubator](https://github.com/llvm/clangir/blob/main/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp) implementation and added a corresponding helper function in the upstream file. like this way: ``` case NEON::BI__builtin_neon_vbsl_v: case NEON::BI__builtin_neon_vbslq_v: { cir::VectorType bitTy = vTy; if (cir::isAnyFloatingPointType(bitTy.getElementType())) bitTy = castVecOfFPTypeToVecOfIntWithSameWidth(builder, vTy); Ops[0] = builder.createBitcast(Ops[0], bitTy); Ops[1] = builder.createBitcast(Ops[1], bitTy); Ops[2] = builder.createBitcast(Ops[2], bitTy); Ops[1] = builder.createAnd(Ops[0], Ops[1]); Ops[2] = builder.createAnd(builder.createNot(Ops[0]), Ops[2]); Ops[0] = builder.createOr(Ops[1], Ops[2]); return builder.createBitcast(Ops[0], ty); } ``` and ``` static cir::VectorType castVecOfFPTypeToVecOfIntWithSameWidth(CIRGenBuilderTy &builder, cir::VectorType vecTy) { if (mlir::isa<cir::SingleType>(vecTy.getElementType())) return cir::VectorType::get(builder.getSInt32Ty(), vecTy.getSize()); if (mlir::isa<cir::DoubleType>(vecTy.getElementType())) return cir::VectorType::get(builder.getSInt64Ty(), vecTy.getSize()); llvm_unreachable( "Unsupported element type in getVecOfIntTypeWithSameEltWidth"); } ``` If this is not the preferred way to structure it, I’d be happy to adjust it based on your feedback. For FileCheck coverage, I moved the relevant test cases from `clang/test/CodeGen/AArch64/neon-intrinsics.c` into `clang/test/CodeGen/AArch64/neon/intrinsics.c`. I was not entirely sure whether the Bitwise select coverage should go into a separate dedicated test file, so for now I kept it in `clang/test/CodeGen/AArch64/neon/intrinsics.c`. --- Patch is 20.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/188449.diff 3 Files Affected: - (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp (+27-2) - (modified) clang/test/CodeGen/AArch64/neon-intrinsics.c (-107) - (modified) clang/test/CodeGen/AArch64/neon/intrinsics.c (+155) ``````````diff diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index a3488bfcc3dec..3a0cc766478a3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -14,6 +14,7 @@ #include "CIRGenFunction.h" #include "clang/Basic/AArch64CodeGenUtils.h" #include "clang/Basic/TargetBuiltins.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" // TODO(cir): once all builtins are covered, decide whether we still @@ -23,6 +24,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" +#include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Value.h" #include "clang/AST/GlobalDecl.h" #include "clang/Basic/Builtins.h" @@ -169,6 +171,17 @@ static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags, llvm_unreachable("Unknown vector element type!"); } +static cir::VectorType +castVecOfFPTypeToVecOfIntWithSameWidth(CIRGenBuilderTy &builder, + cir::VectorType vecTy) { + if (mlir::isa<cir::SingleType>(vecTy.getElementType())) + return cir::VectorType::get(builder.getSInt32Ty(),vecTy.getSize()); + if (mlir::isa<cir::DoubleType>(vecTy.getElementType())) + return cir::VectorType::get(builder.getSInt64Ty(), vecTy.getSize()); + llvm_unreachable( + "Unsupported element type in getVecOfIntTypeWithSameEltWidth"); +} + static mlir::Value emitCommonNeonBuiltinExpr( CIRGenFunction &cgf, unsigned builtinID, unsigned llvmIntrinsic, unsigned altLLVMIntrinsic, const char *nameHint, unsigned modifier, @@ -1677,7 +1690,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, return mlir::Value{}; } - // Memory Operations (MOPS) + // Memory Operations (Mops) if (builtinID == AArch64::BI__builtin_arm_mops_memset_tag) { cgm.errorNYI(expr->getSourceRange(), std::string("unimplemented AArch64 builtin call: ") + @@ -2196,7 +2209,19 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, default: return std::nullopt; case NEON::BI__builtin_neon_vbsl_v: - case NEON::BI__builtin_neon_vbslq_v: + case NEON::BI__builtin_neon_vbslq_v: { + cir::VectorType bitTy = ty; + if(cir::isAnyFloatingPointType(bitTy.getElementType())) + bitTy = castVecOfFPTypeToVecOfIntWithSameWidth(builder, bitTy); + ops[0] = builder.createBitcast(ops[0], bitTy); + ops[1] = builder.createBitcast(ops[1], bitTy); + ops[2] = builder.createBitcast(ops[2], bitTy); + + ops[1] = builder.createAnd(loc, ops[0], ops[1]); + ops[2] = builder.createAnd(loc, builder.createNot(ops[0]), ops[2]); + ops[0] = builder.createOr(loc, ops[1], ops[2]); + return builder.createBitcast(ops[0], ty); + } case NEON::BI__builtin_neon_vfma_lane_v: case NEON::BI__builtin_neon_vfmaq_lane_v: case NEON::BI__builtin_neon_vfma_laneq_v: diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c index 8eb6cd86339d6..c01edc93267b7 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c @@ -1038,39 +1038,6 @@ float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) { return vdiv_f32(v1, v2); } -// CHECK-LABEL: define dso_local <8 x i8> @test_vbsl_s8( -// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VBSL_I:%.*]] = and <8 x i8> [[V1]], [[V2]] -// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i8> [[V1]], splat (i8 -1) -// CHECK-NEXT: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], [[V3]] -// CHECK-NEXT: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] -// CHECK-NEXT: ret <8 x i8> [[VBSL2_I]] -// -int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) { - return vbsl_s8(v1, v2, v3); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vbsl_s16( -// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8> -// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], splat (i16 -1) -// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] -// CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8> -// CHECK-NEXT: ret <8 x i8> [[TMP4]] -// -int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) { - return (int8x8_t)vbsl_s16(v1, v2, v3); -} - // CHECK-LABEL: define dso_local <2 x i32> @test_vbsl_s32( // CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -1179,28 +1146,6 @@ uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { return vbsl_u64(v1, v2, v3); } -// CHECK-LABEL: define dso_local <2 x float> @test_vbsl_f32( -// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]], <2 x float> noundef [[V3:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V3]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8> -// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> -// CHECK-NEXT: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]] -// CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[VBSL_I]], splat (i32 -1) -// CHECK-NEXT: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP5]], [[VBSL2_I]] -// CHECK-NEXT: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float> -// CHECK-NEXT: ret <2 x float> [[TMP6]] -// -float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) { - return vbsl_f32(v1, v2, v3); -} - // CHECK-LABEL: define dso_local <1 x double> @test_vbsl_f64( // CHECK-SAME: <1 x i64> noundef [[V1:%.*]], <1 x double> noundef [[V2:%.*]], <1 x double> noundef [[V3:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -1257,37 +1202,6 @@ poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) { return vbsl_p16(v1, v2, v3); } -// CHECK-LABEL: define dso_local <16 x i8> @test_vbslq_s8( -// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VBSL_I:%.*]] = and <16 x i8> [[V1]], [[V2]] -// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[V1]], splat (i8 -1) -// CHECK-NEXT: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], [[V3]] -// CHECK-NEXT: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] -// CHECK-NEXT: ret <16 x i8> [[VBSL2_I]] -// -int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) { - return vbslq_s8(v1, v2, v3); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vbslq_s16( -// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8> -// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], splat (i16 -1) -// CHECK-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] -// CHECK-NEXT: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] -// CHECK-NEXT: ret <8 x i16> [[VBSL5_I]] -// -int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) { - return vbslq_s16(v1, v2, v3); -} // CHECK-LABEL: define dso_local <4 x i32> @test_vbslq_s32( // CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]]) #[[ATTR0]] { @@ -1397,27 +1311,6 @@ uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) { return vbslq_u64(v1, v2, v3); } -// CHECK-LABEL: define dso_local <4 x float> @test_vbslq_f32( -// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]], <4 x float> noundef [[V3:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V3]] to <4 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8> -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8> -// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32> -// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> -// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]] -// CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[VBSL_I]], splat (i32 -1) -// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP5]], [[VBSL2_I]] -// CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float> -// CHECK-NEXT: ret <4 x float> [[TMP6]] -// -float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) { - return vbslq_f32(v1, v2, v3); -} // CHECK-LABEL: define dso_local <16 x i8> @test_vbslq_p8( // CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] { diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c index bf8e62feda8da..0375d3ab02647 100644 --- a/clang/test/CodeGen/AArch64/neon/intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c @@ -982,3 +982,158 @@ int64_t test_vshld_u64(int64_t a,int64_t b) { return (int64_t)vshld_u64(a, b); } +// LLVM-LABEL: @test_vbsl_s8( +// CIR-LABEL: @vbsl_s8( +int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) { + // CIR: [[MASK_PTR:%.*]] = cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<8 x !u8i>> -> !cir.ptr<!cir.vector<8 x !s8i>> + // CIR: [[AND:%.*]] = cir.and %{{.*}}, %{{.*}} : !cir.vector<8 x !s8i> + // CIR: [[NOT:%.*]] = cir.not %{{.*}} : !cir.vector<8 x !s8i> + // CIR: [[AND2:%.*]] = cir.and [[NOT]], %{{.*}} : !cir.vector<8 x !s8i> + // CIR: [[RES:%.*]] = cir.or [[AND]], [[AND2]] : !cir.vector<8 x !s8i> + + + // LLVM: [[VBSL_I:%.*]] = and <8 x i8> [[V1]], [[V2]] + // LLVM-NEXT: [[TMP0:%.*]] = xor <8 x i8> [[V1]], splat (i8 -1) + // LLVM-NEXT: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], [[V3]] + // LLVM-NEXT: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] + // LLVM-NEXT: ret <8 x i8> [[VBSL2_I]] + return vbsl_s8(v1, v2, v3); +} + +// LLVM-LABEL: @test_vbslq_s8( +// CIR-LABEL: @vbslq_s8( +int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) { + // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<16 x !u8i>> -> !cir.ptr<!cir.vector<16 x !s8i>> + // CIR: [[AND:%.*]] = cir.and %{{.*}}, %{{.*}} : !cir.vector<16 x !s8i> + // CIR: [[NOT:%.*]] = cir.not %{{.*}} : !cir.vector<16 x !s8i> + // CIR: [[AND2:%.*]] = cir.and [[NOT]], %{{.*}} : !cir.vector<16 x !s8i> + // CIR: cir.or [[AND]], [[AND2]] : !cir.vector<16 x !s8i> + + // LLVM: [[VBSL_I:%.*]] = and <16 x i8> [[V1]], [[V2]] + // LLVM-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[V1]], splat (i8 -1) + // LLVM-NEXT: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], [[V3]] + // LLVM-NEXT: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] + // LLVM-NEXT: ret <16 x i8> [[VBSL2_I]] + return vbslq_s8(v1, v2, v3); +} + +// LLVM-LABEL: @test_vbsl_s16( +// CIR-LABEL: @vbsl_s16( +int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) { + // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<4 x !u16i>> -> !cir.ptr<!cir.vector<8 x !s8i>> + // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<4 x !s16i>> -> !cir.ptr<!cir.vector<8 x !s8i>> + // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<4 x !s16i>> -> !cir.ptr<!cir.vector<8 x !s8i>> + // CIR: [[MASK:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i> + // CIR: [[VAL1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i> + // CIR: [[VAL2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i> + // CIR: [[AND:%.*]] = cir.and [[MASK]], [[VAL1]] : !cir.vector<4 x !s16i> + // CIR: [[NOT:%.*]] = cir.not [[MASK]] : !cir.vector<4 x !s16i> + // CIR: [[AND2:%.*]] = cir.and [[NOT]], [[VAL2]] : !cir.vector<4 x !s16i> + // CIR: [[RES:%.*]] = cir.or [[AND]], [[AND2]] : !cir.vector<4 x !s16i> + + // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> + // LLVM-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8> + // LLVM-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> + // LLVM-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> + // LLVM-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> + // LLVM-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]] + // LLVM-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], splat (i16 -1) + // LLVM-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] + // LLVM-NEXT: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] + // LLVM-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8> + // LLVM-NEXT: ret <8 x i8> [[TMP4]] + return (int8x8_t)vbsl_s16(v1, v2, v3); +} + +// LLVM-LABEL: @test_vbslq_s16( +// CIR-LABEL: @vbslq_s16( +int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) { + // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<8 x !u16i>> -> !cir.ptr<!cir.vector<16 x !s8i>> + // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<8 x !s16i>> -> !cir.ptr<!cir.vector<16 x !s8i>> + // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<8 x !s16i>> -> !cir.ptr<!cir.vector<16 x !s8i>> + // CIR: [[MASK:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !s16i> + // CIR: [[VAL1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !s16i> + // CIR: [[VAL2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !s16i> + // CIR: [[AND:%.*]] = cir.and [[MASK]], [[VAL1]] : !cir.vector<8 x !s16i> + // CIR: [[NOT:%.*]] = cir.not [[MASK]] : !cir.vector<8 x !s16i> + // CIR: [[AND2:%.*]] = cir.and [[NOT]], [[VAL2]] : !cir.vector<8 x !s16i> + // CIR: [[RES:%.*]] = cir.or [[AND]], [[AND2]] : !cir.vector<8 x !s16i> + + // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8> + // LLVM-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8> + // LLVM-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> + // LLVM-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> + // LLVM-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> + // LLVM-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]] + // LLVM-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], splat (i16 -1) + // LLVM-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] + // LLVM-NEXT: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] + // LLVM-NEXT: ret <8 x i16> [[VBSL5_I]] + return vbslq_s16(v1, v2, v3); +} + +// LLVM-LABEL: @test_vbsl_f32( +// CIR-LABEL: @vbsl_f32( +float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) { + // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<2 x !u32i>> -> !cir.ptr<!cir.vector<8 x !s8i>> + // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<2 x !cir.float>> -> !cir.ptr<!cir.vector<8 x !s8i>> + // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<2 x !cir.float>> -> !cir.ptr<!cir.vector<8 x !s8i>> + // CIR: [[MASK:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i> + // CIR: [[VAL1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i> + // CIR: [[VAL2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i> + // CIR: [[AND:%.*]] = cir.and [[MASK]], [[VAL1]] : !cir.vector<2 x !s32i> + // CIR: [[NOT:%.*]] = cir.not [[MASK]] : !cir.vector<2 x !s32i> + // CIR: [[AND2:%.*]] = cir.and [[NOT]], [[VAL2]] : !cir.vector<2 x !s32i> + // CIR: [[OR:%.*]] = cir.or [[AND]], [[AND2]] : !cir.vector<2 x !s32i> + // CIR: cir.cast bitcast [[OR]] : !cir.vector<2 x !s32i> -> !cir.vector<2 x !cir.float> + + // LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V3]] to <2 x i32> + // LLVM-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8> + // LLVM-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> + // LLVM-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8> + // LLVM-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> + // LLVM-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> + // LLVM-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> + // LLVM-NEXT: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]] + // LLVM-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[VBSL_I]], splat (i32 -1) + // LLVM-NEXT: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP5]], [[VBSL2_I]] + // LLVM-NEXT: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] + // LLVM-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float> + // LLVM-NEXT: ret <2 x float> [[TMP6]] + return vbsl_f32(v1, v2, v3); +} + +// LLVM-LABEL: @test_vbslq_f32( +// CIR-LABEL: @vbslq_f32( +float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) { + // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<4 x !u32i>> -> !cir.ptr<!cir.vector<16 x !s8i>> + // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>> -> !cir.ptr<!cir.vector<16 x !s8i>> + // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>> -> !cir.ptr<!cir.vector<16 x !s8i>> + // CIR: [[MASK:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !s32i> + // CIR: [[VAL1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !s32i> + // CIR: [[VAL2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !s32i> + // CIR: [[AND:%.*]] = cir.and [[MASK]], [[VAL1]] : !cir.vector<4 x !s32i> + // CIR: [[NOT:%.*]] = cir.not [[MASK]] : !cir.vector<4 x !s32i> + // CIR: [[AND2:%.*]] = cir.and [[NOT]], [[VAL2]] : !cir.vector<4 x !s32i> + // CIR: [[OR:%.*]] = cir.or [[AND]], [[AND2]] : !cir.vector<4 x !s32i> + // CIR: cir.cast bitcast [[OR]] : !cir.vector<4 x !s32i> -> !cir.vector<4 x !cir.float> + + // LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V3]] to <4 x i32> + // LLVM-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8> + // LLVM-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8> + // LLVM-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8> + // LLVM-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> + // LLVM-NEXT: [[VBSL1_I:%.*]] = ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/188449 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
