llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clangir Author: Vicky Nguyen (iamvickynguyen) <details> <summary>Changes</summary> Related to https://github.com/llvm/llvm-project/issues/185382 CIR lowering for - narrowing-addition intrinsics (https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#narrowing-addition) Port tests: - `clang/test/CodeGen/AArch64/neon_intrinsics.c` to `clang/test/CodeGen/AArch64/neon/add.c` --- Patch is 65.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/204989.diff 3 Files Affected: - (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp (+40-1) - (modified) clang/test/CodeGen/AArch64/neon-intrinsics.c (-724) - (modified) clang/test/CodeGen/AArch64/neon/add.c (+545) ``````````diff diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index 8b077620d2bab..779f7249c929a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -685,7 +685,28 @@ static mlir::Value emitCommonNeonBuiltinExpr( mlir::Value result = cgf.getBuilder().createXor(loc, ops[0], ops[1]); return cgf.getBuilder().createBitcast(result, ty); } - case NEON::BI__builtin_neon_vaddhn_v: + case NEON::BI__builtin_neon_vaddhn_v: { + cir::VectorType srcTy = + cgf.getBuilder().getExtendedOrTruncatedElementVectorType( + vTy, /*isExtended=*/true, /*isSigned=*/false); + + // %sum = add <4 x i32> %lhs, %rhs + ops[0] = cgf.getBuilder().createBitcast(ops[0], srcTy); + ops[1] = cgf.getBuilder().createBitcast(ops[1], srcTy); + mlir::Value result = cgf.getBuilder().createAdd(loc, ops[0], ops[1]); + + // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> + auto wideEltTy = mlir::cast<cir::IntType>(srcTy.getElementType()); + mlir::Value shiftAmt = cgf.getBuilder().getConstantInt( + loc, wideEltTy, wideEltTy.getWidth() / 2); + mlir::Value shiftVec = + emitNeonShiftVector(cgf.getBuilder(), shiftAmt, srcTy, loc, + /*neg=*/false); + result = cgf.getBuilder().createShiftRight(loc, result, shiftVec); + + // %res = trunc <4 x i32> %high to <4 x i16> + return cgf.getBuilder().createIntCast(result, vTy); + } case NEON::BI__builtin_neon_vcale_v: case NEON::BI__builtin_neon_vcaleq_v: case NEON::BI__builtin_neon_vcalt_v: @@ -1060,6 +1081,10 @@ static mlir::Value emitCommonNeonBuiltinExpr( std::string("unimplemented AArch64 builtin call: ") + cgf.getContext().BuiltinInfo.getName(builtinID)); break; + case NEON::BI__builtin_neon_vhadd_v: + case NEON::BI__builtin_neon_vhaddq_v: + case NEON::BI__builtin_neon_vrhadd_v: + case NEON::BI__builtin_neon_vrhaddq_v: case NEON::BI__builtin_neon_vshl_v: case NEON::BI__builtin_neon_vshlq_v: { llvm::StringRef llvmIntrName = @@ -1073,6 +1098,20 @@ static mlir::Value emitCommonNeonBuiltinExpr( mlir::Type resultType = cgf.convertType(expr->getType()); return cgf.getBuilder().createBitcast(result, resultType); } + case NEON::BI__builtin_neon_vraddhn_v: { + cir::VectorType srcTy = + cgf.getBuilder().getExtendedOrTruncatedElementVectorType( + vTy, /*isExtended=*/true, /*isSigned=*/true); + + llvm::StringRef llvmIntrName = getLLVMIntrNameNoPrefix( + static_cast<llvm::Intrinsic::ID>(llvmIntrinsic)); + mlir::Value result = + emitNeonCall(cgf.getCIRGenModule(), cgf.getBuilder(), + /*argTypes=*/{srcTy, srcTy}, ops, llvmIntrName, + /*funcResTy=*/vTy, loc); + mlir::Type resultType = cgf.convertType(expr->getType()); + return cgf.getBuilder().createBitcast(result, resultType); + } } // NYI diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c index b37ed5aa29f10..78b4495e9f8cc 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c @@ -2404,174 +2404,6 @@ uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) { return vcltq_f64(v1, v2); } -// CHECK-LABEL: define dso_local <8 x i8> @test_vhadd_s8( -// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]]) -// CHECK-NEXT: ret <8 x i8> [[VHADD_V_I]] -// -int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) { - return vhadd_s8(v1, v2); -} - -// CHECK-LABEL: define dso_local <4 x i16> @test_vhadd_s16( -// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> -// CHECK-NEXT: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) -// CHECK-NEXT: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16> -// CHECK-NEXT: ret <4 x i16> [[TMP2]] -// -int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) { - return vhadd_s16(v1, v2); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vhadd_s32( -// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> -// CHECK-NEXT: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK-NEXT: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) -// CHECK-NEXT: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32> -// CHECK-NEXT: ret <2 x i32> [[TMP2]] -// -int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) { - return vhadd_s32(v1, v2); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vhadd_u8( -// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]]) -// CHECK-NEXT: ret <8 x i8> [[VHADD_V_I]] -// -uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) { - return vhadd_u8(v1, v2); -} - -// CHECK-LABEL: define dso_local <4 x i16> @test_vhadd_u16( -// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> -// CHECK-NEXT: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) -// CHECK-NEXT: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16> -// CHECK-NEXT: ret <4 x i16> [[TMP2]] -// -uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) { - return vhadd_u16(v1, v2); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vhadd_u32( -// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> -// CHECK-NEXT: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK-NEXT: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) -// CHECK-NEXT: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32> -// CHECK-NEXT: ret <2 x i32> [[TMP2]] -// -uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) { - return vhadd_u32(v1, v2); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vhaddq_s8( -// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]]) -// CHECK-NEXT: ret <16 x i8> [[VHADDQ_V_I]] -// -int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) { - return vhaddq_s8(v1, v2); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vhaddq_s16( -// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8> -// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK-NEXT: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK-NEXT: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) -// CHECK-NEXT: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[TMP2]] -// -int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) { - return vhaddq_s16(v1, v2); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vhaddq_s32( -// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8> -// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK-NEXT: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) -// CHECK-NEXT: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP2]] -// -int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) { - return vhaddq_s32(v1, v2); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vhaddq_u8( -// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]]) -// CHECK-NEXT: ret <16 x i8> [[VHADDQ_V_I]] -// -uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) { - return vhaddq_u8(v1, v2); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vhaddq_u16( -// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8> -// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK-NEXT: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK-NEXT: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) -// CHECK-NEXT: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[TMP2]] -// -uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) { - return vhaddq_u16(v1, v2); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vhaddq_u32( -// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8> -// CHECK-NEXT: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK-NEXT: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) -// CHECK-NEXT: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP2]] -// -uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) { - return vhaddq_u32(v1, v2); -} - // CHECK-LABEL: define dso_local <8 x i8> @test_vhsub_s8( // CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] @@ -2740,174 +2572,6 @@ uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) { return vhsubq_u32(v1, v2); } -// CHECK-LABEL: define dso_local <8 x i8> @test_vrhadd_s8( -// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]]) -// CHECK-NEXT: ret <8 x i8> [[VRHADD_V_I]] -// -int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) { - return vrhadd_s8(v1, v2); -} - -// CHECK-LABEL: define dso_local <4 x i16> @test_vrhadd_s16( -// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> -// CHECK-NEXT: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) -// CHECK-NEXT: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16> -// CHECK-NEXT: ret <4 x i16> [[TMP2]] -// -int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) { - return vrhadd_s16(v1, v2); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vrhadd_s32( -// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> -// CHECK-NEXT: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK-NEXT: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) -// CHECK-NEXT: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32> -// CHECK-NEXT: ret <2 x i32> [[TMP2]] -// -int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) { - return vrhadd_s32(v1, v2); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vrhadd_u8( -// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]]) -// CHECK-NEXT: ret <8 x i8> [[VRHADD_V_I]] -// -uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) { - return vrhadd_u8(v1, v2); -} - -// CHECK-LABEL: define dso_local <4 x i16> @test_vrhadd_u16( -// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8> -// CHECK-NEXT: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) -// CHECK-NEXT: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16> -// CHECK-NEXT: ret <4 x i16> [[TMP2]] -// -uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) { - return vrhadd_u16(v1, v2); -} - -// CHECK-LABEL: define dso_local <2 x i32> @test_vrhadd_u32( -// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8> -// CHECK-NEXT: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK-NEXT: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) -// CHECK-NEXT: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32> -// CHECK-NEXT: ret <2 x i32> [[TMP2]] -// -uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) { - return vrhadd_u32(v1, v2); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vrhaddq_s8( -// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]]) -// CHECK-NEXT: ret <16 x i8> [[VRHADDQ_V_I]] -// -int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) { - return vrhaddq_s8(v1, v2); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vrhaddq_s16( -// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8> -// CHECK-NEXT: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK-NEXT: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK-NEXT: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) -// CHECK-NEXT: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[TMP2]] -// -int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) { - return vrhaddq_s16(v1, v2); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vrhaddq_s32( -// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8> -// CHECK-NEXT: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK-NEXT: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) -// CHECK-NEXT: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP2]] -// -int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) { - return vrhaddq_s32(v1, v2); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vrhaddq_u8( -// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]]) -// CHECK-NEXT: ret <16 x i8> [[VRHADDQ_V_I]] -// -uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) { - return vrhaddq_u8(v1, v2); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vrhaddq_u16( -// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bit... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/204989 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
