https://github.com/Ko496-glitch created https://github.com/llvm/llvm-project/pull/189301
#189260 Fixed the assertion failure by truncating the scalar boolean vectors to i1. >From 224a35b671b6f482dcdd0bbd923caffe44ce6bd8 Mon Sep 17 00:00:00 2001 From: kartikohlan <[email protected]> Date: Thu, 12 Mar 2026 22:25:51 -0400 Subject: [PATCH 1/6] migrated neon_vqshlud_s64 --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 5534e69b5f8bc..fc085965e1d73 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2772,6 +2772,31 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, ops[0] = builder.createFNeg(ops[0]); return emitCallMaybeConstrainedBuiltin(builder, loc, "fma", convertType(expr->getType()), ops); + + case NEON::BI__builtin_neon_vqshlud_n_s64:{ + + auto loc = getLoc(expr->getExprLoc()); + const cir::IntType intType = builder.getSInt64Ty(); + + std::optional<llvm::APSInt> apsInt = + expr->getArg(1)->getIntegerConstantExpr(getContext()); + assert(apsInt && "Expected argument to be a Constant"); + + ops.push_back(builder.getSInt64(apsInt->getZExtValue(), loc)); + + const StringRef intrinsicName = "aarch64.neon.sqshlu"; + + return emitNeonCall(builder, {intType, intType}, ops, intrinsicName, intType, loc); + } + + + + + case NEON::BI_builtin_neon_vqshld_n_u64: + + case NEON::BI_builtin_neon_vqshld_n_s64: + + case NEON::BI__builtin_neon_vaddd_s64: case NEON::BI__builtin_neon_vaddd_u64: case NEON::BI__builtin_neon_vsubd_s64: >From 6789822331625edbeb6cbbcbf468d7c63f2b296a Mon Sep 17 00:00:00 2001 From: kartikohlan <[email protected]> Date: Thu, 12 Mar 2026 22:35:18 -0400 Subject: [PATCH 2/6] Added vqshld_n_64 --- clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index fc085965e1d73..30dc7368152d5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2772,8 +2772,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, ops[0] = builder.createFNeg(ops[0]); return emitCallMaybeConstrainedBuiltin(builder, loc, "fma", convertType(expr->getType()), ops); - - case NEON::BI__builtin_neon_vqshlud_n_s64:{ + case NEON::BI__builtin_neon_vqshlud_n_s64: { auto loc = getLoc(expr->getExprLoc()); const cir::IntType intType = builder.getSInt64Ty(); @@ -2788,13 +2787,20 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, return emitNeonCall(builder, {intType, intType}, ops, intrinsicName, intType, loc); } + case NEON::BI__builtin_neon_vqshld_n_u64: + case NEON::BI__builtin_neon_vqshld_n_s64: { + auto loc = getLoc(expr->getExprLoc()); + const cir::IntType intType = (builtinID == NEON::BI__builtin_neon_vqshld_n_u64) ? builder.getUInt64Ty(): builder.getSInt64Ty(); + const StringRef intrinsicName = (builtinID == NEON::BI__builtin_neon_vqshld_n_u64) ? "aarch64.neon.uqshl": "aarch64.neon.sqshl"; + // Emit and cast the arugment and then push directly to avoid indexing issues + mlir::Value arg1 = emitScalarExpr(expr->getArg(1)); + ops.push_back(builder.createIntCast(arg1,intType)); - case NEON::BI_builtin_neon_vqshld_n_u64: - - case NEON::BI_builtin_neon_vqshld_n_s64: + return emitNeonCall(builder, {intType, intType}, ops, intrinsicName, intType, loc); + } case NEON::BI__builtin_neon_vaddd_s64: >From ce96e29c989979358e3e9adf982db395bf8803ef Mon Sep 17 00:00:00 2001 From: kartikohlan <[email protected]> Date: Thu, 12 Mar 2026 22:58:18 -0400 Subject: [PATCH 3/6] fixed the location for vqshld_* --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 30dc7368152d5..858900465ea56 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2772,6 +2772,12 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, ops[0] = builder.createFNeg(ops[0]); return emitCallMaybeConstrainedBuiltin(builder, loc, "fma", convertType(expr->getType()), ops); + case NEON::BI__builtin_neon_vaddd_s64: + case NEON::BI__builtin_neon_vaddd_u64: + case NEON::BI__builtin_neon_vsubd_s64: + case NEON::BI__builtin_neon_vsubd_u64: + case NEON::BI__builtin_neon_vqdmlalh_s16: + case NEON::BI__builtin_neon_vqdmlslh_s16: case NEON::BI__builtin_neon_vqshlud_n_s64: { auto loc = getLoc(expr->getExprLoc()); @@ -2795,23 +2801,12 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, const StringRef intrinsicName = (builtinID == NEON::BI__builtin_neon_vqshld_n_u64) ? "aarch64.neon.uqshl": "aarch64.neon.sqshl"; - // Emit and cast the arugment and then push directly to avoid indexing issues + // Emit and cast the argument and then push directly to avoid indexing issues mlir::Value arg1 = emitScalarExpr(expr->getArg(1)); ops.push_back(builder.createIntCast(arg1,intType)); return emitNeonCall(builder, {intType, intType}, ops, intrinsicName, intType, loc); } - - - case NEON::BI__builtin_neon_vaddd_s64: - case NEON::BI__builtin_neon_vaddd_u64: - case NEON::BI__builtin_neon_vsubd_s64: - case NEON::BI__builtin_neon_vsubd_u64: - case NEON::BI__builtin_neon_vqdmlalh_s16: - case NEON::BI__builtin_neon_vqdmlslh_s16: - case NEON::BI__builtin_neon_vqshlud_n_s64: - case NEON::BI__builtin_neon_vqshld_n_u64: - case NEON::BI__builtin_neon_vqshld_n_s64: case NEON::BI__builtin_neon_vrshrd_n_u64: case NEON::BI__builtin_neon_vrshrd_n_s64: case NEON::BI__builtin_neon_vrsrad_n_u64: >From 9a132ad46b7b6267ad9a97a63329ff6fce434579 Mon Sep 17 00:00:00 2001 From: kartikohlan <[email protected]> Date: Thu, 12 Mar 2026 23:43:25 -0400 Subject: [PATCH 4/6] Fixed the MLIR syntax --- clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 858900465ea56..e8572b2c91e9f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2780,17 +2780,18 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vqdmlslh_s16: case NEON::BI__builtin_neon_vqshlud_n_s64: { - auto loc = getLoc(expr->getExprLoc()); const cir::IntType intType = builder.getSInt64Ty(); + auto loc = getLoc(expr->getExprLoc()); + + ops.push_back(builder.getSInt64(apsInt->getZExtValue(), loc)); std::optional<llvm::APSInt> apsInt = expr->getArg(1)->getIntegerConstantExpr(getContext()); assert(apsInt && "Expected argument to be a Constant"); - ops.push_back(builder.getSInt64(apsInt->getZExtValue(), loc)); + ops[1] = builder.getSInt64(apsInt->getZExtValue(), loc); const StringRef intrinsicName = "aarch64.neon.sqshlu"; - return emitNeonCall(builder, {intType, intType}, ops, intrinsicName, intType, loc); } case NEON::BI__builtin_neon_vqshld_n_u64: @@ -2799,11 +2800,11 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, const cir::IntType intType = (builtinID == NEON::BI__builtin_neon_vqshld_n_u64) ? builder.getUInt64Ty(): builder.getSInt64Ty(); + const StringRef intrinsicName = (builtinID == NEON::BI__builtin_neon_vqshld_n_u64) ? "aarch64.neon.uqshl": "aarch64.neon.sqshl"; - // Emit and cast the argument and then push directly to avoid indexing issues - mlir::Value arg1 = emitScalarExpr(expr->getArg(1)); - ops.push_back(builder.createIntCast(arg1,intType)); + ops.push_back(emitScalarExpr(expr->getArg(1))); + ops[1] = builder.createIntCast(ops[1], intType); return emitNeonCall(builder, {intType, intType}, ops, intrinsicName, intType, loc); } >From 9dd16a224699b584b980162d9ef807973c3a96e7 Mon Sep 17 00:00:00 2001 From: kartikohlan <[email protected]> Date: Fri, 13 Mar 2026 17:57:31 -0400 Subject: [PATCH 5/6] Added test to intrinsic --- clang/test/CodeGen/AArch64/neon/intrinsics.c | 71 +++++++++++++------- 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c index b740c3b5b2310..089ce7ba0ceba 100644 --- a/clang/test/CodeGen/AArch64/neon/intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c @@ -24,6 +24,29 @@ #include <arm_neon.h> +// LLVM-LABEL: @test_vqshlud_n_s64 +// CIR-LABEL: @test_vqshlud_n_s64 +int64x1_t test_vqshlud_n_s64(int64x1_t a) { +// CIR: %[[CONST:.*]] = cir.const(#cir.int<2> : !s64i) +// CIR: cir.call_llvm_intrinsic "aarch64.neon.sqshlu" %{{.*}}, %[[CONST]] : (!cir.int<s, 64>, !cir.int<s, 64>) -> !cir.int<s, 64> + +// LLVM: [[RES:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %{{.*}}, i64 2) +// LLVM: ret i64 [[RES]] + return vqshlud_n_s64(a, 2); +} + +// LLVM-LABEL: @test_vqshld_n_u64 +// CIR-LABEL: @test_vqshld_n_u64 +uint64x1_t test_vqshld_n_u64(uint64x1_t a, int64x1_t b) { +// CIR: %[[ARG_B:.*]] = cir.load +// CIR: %[[CAST:.*]] = cir.cast(integral, %[[ARG_B]] : !cir.int<s, 64>), !cir.int<u, 64> +// CIR: cir.call_llvm_intrinsic "aarch64.neon.uqshl" %{{.*}}, %[[CAST]] : (!cir.int<u, 64>, !cir.int<u, 64>) -> !cir.int<u, 64> + +// LLVM: [[RES:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %{{.*}}, i64 %{{.*}}) +// LLVM: ret i64 [[RES]] + return vqshld_n_u64(a, b); +} + // LLVM-LABEL: @test_vnegd_s64 // CIR-LABEL: @vnegd_s64 int64_t test_vnegd_s64(int64_t a) { @@ -467,7 +490,7 @@ uint64_t test_vceqzd_f64(float64_t a) { int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s8i>, !cir.vector<8 x !s8i>) -> !cir.vector<8 x !s8i> -// LLVM-SAME: <8 x i8> {{.*}} [[V1:%.*]], <8 x i8> {{.*}} [[V2:%.*]]) +// LLVM-SAME: <8 x i8> {{.*}} [[V1:%.*]], <8 x i8> {{.*}} [[V2:%.*]]) // LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]]) // LLVM-NEXT: ret <8 x i8> [[VABD_I]] return vabd_s8(v1, v2); @@ -480,7 +503,7 @@ int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i> // CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]] -// LLVM-SAME: <4 x i16> {{.*}} [[V1:%.*]], <4 x i16> {{.*}} [[V2:%.*]]) +// LLVM-SAME: <4 x i16> {{.*}} [[V1:%.*]], <4 x i16> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -497,7 +520,7 @@ int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i> // CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]] -// LLVM-SAME: <2 x i32> {{.*}} [[V1:%.*]], <2 x i32> {{.*}} [[V2:%.*]]) +// LLVM-SAME: <2 x i32> {{.*}} [[V1:%.*]], <2 x i32> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -512,7 +535,7 @@ int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) { uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" %{{.*}}, %{{.*}} : (!cir.vector<8 x !u8i>, !cir.vector<8 x !u8i>) -> !cir.vector<8 x !u8i> -// LLVM-SAME: <8 x i8> {{.*}} [[V1:%.*]], <8 x i8> {{.*}} [[V2:%.*]]) +// LLVM-SAME: <8 x i8> {{.*}} [[V1:%.*]], <8 x i8> {{.*}} [[V2:%.*]]) // LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]]) // LLVM-NEXT: ret <8 x i8> [[VABD_I]] return vabd_u8(v1, v2); @@ -525,7 +548,7 @@ uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !u16i> // CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]] -// LLVM-SAME: <4 x i16> {{.*}} [[V1:%.*]], <4 x i16> {{.*}} [[V2:%.*]]) +// LLVM-SAME: <4 x i16> {{.*}} [[V1:%.*]], <4 x i16> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -542,7 +565,7 @@ uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !u32i> // CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]] -// LLVM-SAME: <2 x i32> {{.*}} [[V1:%.*]], <2 x i32> {{.*}} [[V2:%.*]]) +// LLVM-SAME: <2 x i32> {{.*}} [[V1:%.*]], <2 x i32> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -559,7 +582,7 @@ float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !cir.float> // CIR: cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]] -// LLVM-SAME: <2 x float> {{.*}} [[V1:%.*]], <2 x float> {{.*}} [[V2:%.*]]) +// LLVM-SAME: <2 x float> {{.*}} [[V1:%.*]], <2 x float> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32> // LLVM-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> @@ -597,7 +620,7 @@ float64x1_t test_vabd_f64(float64x1_t v1, float64x1_t v2) { int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" %{{.*}}, %{{.*}} : (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>) -> !cir.vector<16 x !s8i> -// LLVM-SAME: <16 x i8> {{.*}} [[V1:%.*]], <16 x i8> {{.*}} [[V2:%.*]]) +// LLVM-SAME: <16 x i8> {{.*}} [[V1:%.*]], <16 x i8> {{.*}} [[V2:%.*]]) // LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]]) // LLVM-NEXT: ret <16 x i8> [[VABD_I]] return vabdq_s8(v1, v2); @@ -610,7 +633,7 @@ int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !s16i> // CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]] -// LLVM-SAME: <8 x i16> {{.*}} [[V1:%.*]], <8 x i16> {{.*}} [[V2:%.*]]) +// LLVM-SAME: <8 x i16> {{.*}} [[V1:%.*]], <8 x i16> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -627,7 +650,7 @@ int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !s32i> // CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]] -// LLVM-SAME: <4 x i32> {{.*}} [[V1:%.*]], <4 x i32> {{.*}} [[V2:%.*]]) +// LLVM-SAME: <4 x i32> {{.*}} [[V1:%.*]], <4 x i32> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -642,7 +665,7 @@ int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) { uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) { // CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" %{{.*}}, %{{.*}} : (!cir.vector<16 x !u8i>, !cir.vector<16 x !u8i>) -> !cir.vector<16 x !u8i> -// LLVM-SAME: <16 x i8> {{.*}} [[V1:%.*]], <16 x i8> {{.*}} [[V2:%.*]]) +// LLVM-SAME: <16 x i8> {{.*}} [[V1:%.*]], <16 x i8> {{.*}} [[V2:%.*]]) // LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]]) // LLVM-NEXT: ret <16 x i8> [[VABD_I]] return vabdq_u8(v1, v2); @@ -655,7 +678,7 @@ uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !u16i> // CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]] -// LLVM-SAME: <8 x i16> {{.*}} [[V1:%.*]], <8 x i16> {{.*}} [[V2:%.*]]) +// LLVM-SAME: <8 x i16> {{.*}} [[V1:%.*]], <8 x i16> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -672,7 +695,7 @@ uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !u32i> // CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]] -// LLVM-SAME: <4 x i32> {{.*}} [[V1:%.*]], <4 x i32> {{.*}} [[V2:%.*]]) +// LLVM-SAME: <4 x i32> {{.*}} [[V1:%.*]], <4 x i32> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -689,7 +712,7 @@ float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !cir.float> // CIR: cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]] -// LLVM-SAME: <4 x float> {{.*}} [[V1:%.*]], <4 x float> {{.*}} [[V2:%.*]]) +// LLVM-SAME: <4 x float> {{.*}} [[V1:%.*]], <4 x float> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[V1]] to <4 x i32> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32> // LLVM-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8> @@ -708,7 +731,7 @@ float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) { // CIR: [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<2 x !cir.double> // CIR: cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]] -// LLVM-SAME: <2 x double> {{.*}} [[V1:%.*]], <2 x double> {{.*}} [[V2:%.*]]) +// LLVM-SAME: <2 x double> {{.*}} [[V1:%.*]], <2 x double> {{.*}} [[V2:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[V1]] to <2 x i64> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V2]] to <2 x i64> // LLVM-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8> @@ -785,7 +808,7 @@ uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabd_u32 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <2 x i32> {{.*}} [[V1:%.*]], <2 x i32> {{.*}} [[V2:%.*]], <2 x i32> {{.*}} [[V3:%.*]]) +// LLVM-SAME: <2 x i32> {{.*}} [[V1:%.*]], <2 x i32> {{.*}} [[V2:%.*]], <2 x i32> {{.*}} [[V3:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -802,7 +825,7 @@ int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabd_s8 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <8 x i8> {{.*}} [[V1:%.*]], <8 x i8> {{.*}} [[V2:%.*]], <8 x i8> {{.*}} [[V3:%.*]]) +// LLVM-SAME: <8 x i8> {{.*}} [[V1:%.*]], <8 x i8> {{.*}} [[V2:%.*]], <8 x i8> {{.*}} [[V3:%.*]]) // LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]]) // LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]] // LLVM-NEXT: ret <8 x i8> [[ADD_I]] @@ -815,7 +838,7 @@ int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabd_s16 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <4 x i16> {{.*}} [[V1:%.*]], <4 x i16> {{.*}} [[V2:%.*]], <4 x i16> {{.*}} [[V3:%.*]]) +// LLVM-SAME: <4 x i16> {{.*}} [[V1:%.*]], <4 x i16> {{.*}} [[V2:%.*]], <4 x i16> {{.*}} [[V3:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -849,7 +872,7 @@ int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabdq_s8 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <16 x i8> {{.*}} [[V1:%.*]], <16 x i8> {{.*}} [[V2:%.*]], <16 x i8> {{.*}} [[V3:%.*]]) +// LLVM-SAME: <16 x i8> {{.*}} [[V1:%.*]], <16 x i8> {{.*}} [[V2:%.*]], <16 x i8> {{.*}} [[V3:%.*]]) // LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> [[V2]], <16 x i8> [[V3]]) // LLVM-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[V1]], [[VABD_I]] // LLVM-NEXT: ret <16 x i8> [[ADD_I]] @@ -862,7 +885,7 @@ int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabdq_s16 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <8 x i16> {{.*}} [[V1:%.*]], <8 x i16> {{.*}} [[V2:%.*]], <8 x i16> {{.*}} [[V3:%.*]]) +// LLVM-SAME: <8 x i16> {{.*}} [[V1:%.*]], <8 x i16> {{.*}} [[V2:%.*]], <8 x i16> {{.*}} [[V3:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -879,7 +902,7 @@ int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabdq_s32 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <4 x i32> {{.*}} [[V1:%.*]], <4 x i32> {{.*}} [[V2:%.*]], <4 x i32> {{.*}} [[V3:%.*]]) +// LLVM-SAME: <4 x i32> {{.*}} [[V1:%.*]], <4 x i32> {{.*}} [[V2:%.*]], <4 x i32> {{.*}} [[V3:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V3]] to <16 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -896,7 +919,7 @@ uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabdq_u8 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <16 x i8> {{.*}} [[V1:%.*]], <16 x i8> {{.*}} [[V2:%.*]], <16 x i8> {{.*}} [[V3:%.*]]) +// LLVM-SAME: <16 x i8> {{.*}} [[V1:%.*]], <16 x i8> {{.*}} [[V2:%.*]], <16 x i8> {{.*}} [[V3:%.*]]) // LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> [[V2]], <16 x i8> [[V3]]) // LLVM-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[V1]], [[VABD_I]] // LLVM-NEXT: ret <16 x i8> [[ADD_I]] @@ -909,7 +932,7 @@ uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabdq_u16 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <8 x i16> {{.*}} [[V1:%.*]], <8 x i16> {{.*}} [[V2:%.*]], <8 x i16> {{.*}} [[V3:%.*]]) +// LLVM-SAME: <8 x i16> {{.*}} [[V1:%.*]], <8 x i16> {{.*}} [[V2:%.*]], <8 x i16> {{.*}} [[V3:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -926,7 +949,7 @@ uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { // CIR: [[ABD:%.*]] = cir.call @vabdq_u32 // CIR: [[RES:%.*]] = cir.add {{.*}}, [[ABD]] -// LLVM-SAME: <4 x i32> {{.*}} [[V1:%.*]], <4 x i32> {{.*}} [[V2:%.*]], <4 x i32> {{.*}} [[V3:%.*]]) +// LLVM-SAME: <4 x i32> {{.*}} [[V1:%.*]], <4 x i32> {{.*}} [[V2:%.*]], <4 x i32> {{.*}} [[V3:%.*]]) // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8> // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V3]] to <16 x i8> // LLVM-NEXT: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> >From fca4e7c46e4194a7866eba594cf05615fe15d7ed Mon Sep 17 00:00:00 2001 From: kartikohlan <[email protected]> Date: Sun, 29 Mar 2026 23:21:44 -0400 Subject: [PATCH 6/6] Fix #189260 --- clang/lib/CodeGen/CGExpr.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index eebb36276e0eb..7fdb105df6e6a 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2756,6 +2756,10 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, Builder.getInt1Ty(), IRStoreTy->getPrimitiveSizeInBits()); Vec = Builder.CreateBitCast(Vec, IRVecTy); // iN --> <N x i1>. + + if(SrcVal->getType() != Builder.getInt1Ty()){ + SrcVal = Builder.CreateTrunc(SrcVal,Builder.getInt1Ty()); + } } // Allow inserting `<1 x T>` into an `<N x T>`. It can happen with scalar _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
