llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-x86 Author: SiHuaN (sihuan) <details> <summary>Changes</summary> Add the __riscv_predsum/predsumu_* header wrappers over new __builtin_riscv_* builtins, lowering to the llvm.riscv.predsum/predsumu intrinsics. Stacked on #<!-- -->206430 (its commit appears first here); please review/merge that PR first. Once it lands I will rebase so only the Clang commit remains. --- Patch is 27.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/206441.diff 8 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsRISCV.td (+18) - (modified) clang/lib/CodeGen/TargetBuiltins/RISCV.cpp (+42) - (modified) clang/lib/Headers/riscv_packed_simd.h (+25) - (modified) clang/test/CodeGen/RISCV/rvp-intrinsics.c (+254) - (modified) cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c (+114) - (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+5-1) - (modified) llvm/test/CodeGen/RISCV/rvp-simd-32.ll (-4) - (modified) llvm/test/CodeGen/RISCV/rvp-simd-64.ll (-4) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td index ee20fefadd7c3..3f84528fdca70 100644 --- a/clang/include/clang/Basic/BuiltinsRISCV.td +++ b/clang/include/clang/Basic/BuiltinsRISCV.td @@ -215,6 +215,24 @@ def pabd_i16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, short>, _Ve def pabdu_u8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, unsigned char>, _Vector<8, unsigned char>)">; def pabdu_u16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, unsigned short>, _Vector<4, unsigned short>)">; +// Packed Reduction Sum (32-bit) +def predsum_i8x4_i32 : RISCVBuiltin<"int(_Vector<4, signed char>, int)">; +def predsumu_u8x4_u32 : RISCVBuiltin<"unsigned int(_Vector<4, unsigned char>, unsigned int)">; +def predsum_i16x2_i32 : RISCVBuiltin<"int(_Vector<2, short>, int)">; +def predsumu_u16x2_u32 : RISCVBuiltin<"unsigned int(_Vector<2, unsigned short>, unsigned int)">; + +// Packed Reduction Sum (64-bit) +def predsum_i8x8_i32 : RISCVBuiltin<"int(_Vector<8, signed char>, int)">; +def predsumu_u8x8_u32 : RISCVBuiltin<"unsigned int(_Vector<8, unsigned char>, unsigned int)">; +def predsum_i16x4_i32 : RISCVBuiltin<"int(_Vector<4, short>, int)">; +def predsumu_u16x4_u32 : RISCVBuiltin<"unsigned int(_Vector<4, unsigned short>, unsigned int)">; +def predsum_i8x8_i64 : RISCVBuiltin<"int64_t(_Vector<8, signed char>, int64_t)">; +def predsumu_u8x8_u64 : RISCVBuiltin<"uint64_t(_Vector<8, unsigned char>, uint64_t)">; +def predsum_i16x4_i64 : RISCVBuiltin<"int64_t(_Vector<4, short>, int64_t)">; +def predsumu_u16x4_u64 : RISCVBuiltin<"uint64_t(_Vector<4, unsigned short>, uint64_t)">; +def predsum_i32x2_i64 : RISCVBuiltin<"int64_t(_Vector<2, int>, int64_t)">; +def predsumu_u32x2_u64 : RISCVBuiltin<"uint64_t(_Vector<2, unsigned int>, uint64_t)">; + } // Features = "experimental-p" //===----------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp index d5b027fe5f8fe..bb8fa86e7a564 100644 --- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp @@ -1327,6 +1327,48 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, break; } + // Packed Reduction Sum + case RISCV::BI__builtin_riscv_predsum_i8x4_i32: + case RISCV::BI__builtin_riscv_predsum_i16x2_i32: + case RISCV::BI__builtin_riscv_predsum_i8x8_i32: + case RISCV::BI__builtin_riscv_predsum_i16x4_i32: + case RISCV::BI__builtin_riscv_predsum_i8x8_i64: + case RISCV::BI__builtin_riscv_predsum_i16x4_i64: + case RISCV::BI__builtin_riscv_predsum_i32x2_i64: + case RISCV::BI__builtin_riscv_predsumu_u8x4_u32: + case RISCV::BI__builtin_riscv_predsumu_u16x2_u32: + case RISCV::BI__builtin_riscv_predsumu_u8x8_u32: + case RISCV::BI__builtin_riscv_predsumu_u16x4_u32: + case RISCV::BI__builtin_riscv_predsumu_u8x8_u64: + case RISCV::BI__builtin_riscv_predsumu_u16x4_u64: + case RISCV::BI__builtin_riscv_predsumu_u32x2_u64: { + switch (BuiltinID) { + default: + llvm_unreachable("unexpected builtin ID"); + case RISCV::BI__builtin_riscv_predsum_i8x4_i32: + case RISCV::BI__builtin_riscv_predsum_i16x2_i32: + case RISCV::BI__builtin_riscv_predsum_i8x8_i32: + case RISCV::BI__builtin_riscv_predsum_i16x4_i32: + case RISCV::BI__builtin_riscv_predsum_i8x8_i64: + case RISCV::BI__builtin_riscv_predsum_i16x4_i64: + case RISCV::BI__builtin_riscv_predsum_i32x2_i64: + ID = Intrinsic::riscv_predsum; + break; + case RISCV::BI__builtin_riscv_predsumu_u8x4_u32: + case RISCV::BI__builtin_riscv_predsumu_u16x2_u32: + case RISCV::BI__builtin_riscv_predsumu_u8x8_u32: + case RISCV::BI__builtin_riscv_predsumu_u16x4_u32: + case RISCV::BI__builtin_riscv_predsumu_u8x8_u64: + case RISCV::BI__builtin_riscv_predsumu_u16x4_u64: + case RISCV::BI__builtin_riscv_predsumu_u32x2_u64: + ID = Intrinsic::riscv_predsumu; + break; + } + + IntrinsicTypes = {ResultType, Ops[0]->getType()}; + break; + } + // Zk builtins // Zknh diff --git a/clang/lib/Headers/riscv_packed_simd.h b/clang/lib/Headers/riscv_packed_simd.h index 5aa00f1519671..c61e156ca6a7f 100644 --- a/clang/lib/Headers/riscv_packed_simd.h +++ b/clang/lib/Headers/riscv_packed_simd.h @@ -103,6 +103,12 @@ typedef uint32_t uint32x2_t __attribute__((__vector_size__(8))); return (rty)builtin(__rs1, __rs2); \ } +#define __packed_reduction(name, rty, ty, builtin) \ + static __inline__ rty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, \ + rty __rs2) { \ + return builtin(__rs1, __rs2); \ + } + // clang-format off: macro call sites have no trailing semicolons, which // confuses clang-format into a deeply nested expression. @@ -424,6 +430,24 @@ __packed_binary_builtin_cast(pabd_i16x4, int16x4_t, uint16x4_t, __builtin_riscv_ __packed_binary_builtin_cast(pabdu_u8x8, uint8x8_t, uint8x8_t, __builtin_riscv_pabdu_u8x8) __packed_binary_builtin_cast(pabdu_u16x4, uint16x4_t, uint16x4_t, __builtin_riscv_pabdu_u16x4) +/* Packed Reduction Sum (32-bit) */ +__packed_reduction(predsum_i8x4_i32, int32_t, int8x4_t, __builtin_riscv_predsum_i8x4_i32) +__packed_reduction(predsumu_u8x4_u32, uint32_t, uint8x4_t, __builtin_riscv_predsumu_u8x4_u32) +__packed_reduction(predsum_i16x2_i32, int32_t, int16x2_t, __builtin_riscv_predsum_i16x2_i32) +__packed_reduction(predsumu_u16x2_u32, uint32_t, uint16x2_t, __builtin_riscv_predsumu_u16x2_u32) + +/* Packed Reduction Sum (64-bit) */ +__packed_reduction(predsum_i8x8_i32, int32_t, int8x8_t, __builtin_riscv_predsum_i8x8_i32) +__packed_reduction(predsumu_u8x8_u32, uint32_t, uint8x8_t, __builtin_riscv_predsumu_u8x8_u32) +__packed_reduction(predsum_i16x4_i32, int32_t, int16x4_t, __builtin_riscv_predsum_i16x4_i32) +__packed_reduction(predsumu_u16x4_u32, uint32_t, uint16x4_t, __builtin_riscv_predsumu_u16x4_u32) +__packed_reduction(predsum_i8x8_i64, int64_t, int8x8_t, __builtin_riscv_predsum_i8x8_i64) +__packed_reduction(predsumu_u8x8_u64, uint64_t, uint8x8_t, __builtin_riscv_predsumu_u8x8_u64) +__packed_reduction(predsum_i16x4_i64, int64_t, int16x4_t, __builtin_riscv_predsum_i16x4_i64) +__packed_reduction(predsumu_u16x4_u64, uint64_t, uint16x4_t, __builtin_riscv_predsumu_u16x4_u64) +__packed_reduction(predsum_i32x2_i64, int64_t, int32x2_t, __builtin_riscv_predsum_i32x2_i64) +__packed_reduction(predsumu_u32x2_u64, uint64_t, uint32x2_t, __builtin_riscv_predsumu_u32x2_u64) + // clang-format on #undef __packed_splat2 @@ -443,6 +467,7 @@ __packed_binary_builtin_cast(pabdu_u16x4, uint16x4_t, uint16x4_t, __builtin_risc #undef __packed_cmp #undef __packed_pabs #undef __packed_binary_builtin_cast +#undef __packed_reduction #undef __DEFAULT_FN_ATTRS #if defined(__cplusplus) diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c index d3f153109b904..290f61787ceff 100644 --- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c +++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c @@ -5889,3 +5889,257 @@ uint8x8_t test_pabdu_u8x8(uint8x8_t rs1, uint8x8_t rs2) { uint16x4_t test_pabdu_u16x4(uint16x4_t rs1, uint16x4_t rs2) { return __riscv_pabdu_u16x4(rs1, rs2); } + +/* Packed Reduction Sum (32-bit) */ +// RV32-LABEL: define dso_local i32 @test_predsum_i8x4_i32( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.predsum.i32.v4i8(<4 x i8> [[TMP0]], i32 [[RS2]]) +// RV32-NEXT: ret i32 [[TMP1]] +// +// RV64-LABEL: define dso_local signext i32 @test_predsum_i8x4_i32( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef signext [[RS2:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.predsum.i32.v4i8(<4 x i8> [[TMP0]], i32 [[RS2]]) +// RV64-NEXT: ret i32 [[TMP1]] +// +int32_t test_predsum_i8x4_i32(int8x4_t rs1, int32_t rs2) { + return __riscv_predsum_i8x4_i32(rs1, rs2); +} + +// RV32-LABEL: define dso_local i32 @test_predsumu_u8x4_u32( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.predsumu.i32.v4i8(<4 x i8> [[TMP0]], i32 [[RS2]]) +// RV32-NEXT: ret i32 [[TMP1]] +// +// RV64-LABEL: define dso_local signext i32 @test_predsumu_u8x4_u32( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef signext [[RS2:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.predsumu.i32.v4i8(<4 x i8> [[TMP0]], i32 [[RS2]]) +// RV64-NEXT: ret i32 [[TMP1]] +// +uint32_t test_predsumu_u8x4_u32(uint8x4_t rs1, uint32_t rs2) { + return __riscv_predsumu_u8x4_u32(rs1, rs2); +} + +// RV32-LABEL: define dso_local i32 @test_predsum_i16x2_i32( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.predsum.i32.v2i16(<2 x i16> [[TMP0]], i32 [[RS2]]) +// RV32-NEXT: ret i32 [[TMP1]] +// +// RV64-LABEL: define dso_local signext i32 @test_predsum_i16x2_i32( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef signext [[RS2:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.predsum.i32.v2i16(<2 x i16> [[TMP0]], i32 [[RS2]]) +// RV64-NEXT: ret i32 [[TMP1]] +// +int32_t test_predsum_i16x2_i32(int16x2_t rs1, int32_t rs2) { + return __riscv_predsum_i16x2_i32(rs1, rs2); +} + +// RV32-LABEL: define dso_local i32 @test_predsumu_u16x2_u32( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.predsumu.i32.v2i16(<2 x i16> [[TMP0]], i32 [[RS2]]) +// RV32-NEXT: ret i32 [[TMP1]] +// +// RV64-LABEL: define dso_local signext i32 @test_predsumu_u16x2_u32( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef signext [[RS2:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.predsumu.i32.v2i16(<2 x i16> [[TMP0]], i32 [[RS2]]) +// RV64-NEXT: ret i32 [[TMP1]] +// +uint32_t test_predsumu_u16x2_u32(uint16x2_t rs1, uint32_t rs2) { + return __riscv_predsumu_u16x2_u32(rs1, rs2); +} + +/* Packed Reduction Sum (64-bit) */ +// RV32-LABEL: define dso_local i32 @test_predsum_i8x8_i32( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.predsum.i32.v8i8(<8 x i8> [[TMP0]], i32 [[RS2]]) +// RV32-NEXT: ret i32 [[TMP1]] +// +// RV64-LABEL: define dso_local signext i32 @test_predsum_i8x8_i32( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i32 noundef signext [[RS2:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.predsum.i32.v8i8(<8 x i8> [[TMP0]], i32 [[RS2]]) +// RV64-NEXT: ret i32 [[TMP1]] +// +int32_t test_predsum_i8x8_i32(int8x8_t rs1, int32_t rs2) { + return __riscv_predsum_i8x8_i32(rs1, rs2); +} + +// RV32-LABEL: define dso_local i32 @test_predsumu_u8x8_u32( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.predsumu.i32.v8i8(<8 x i8> [[TMP0]], i32 [[RS2]]) +// RV32-NEXT: ret i32 [[TMP1]] +// +// RV64-LABEL: define dso_local signext i32 @test_predsumu_u8x8_u32( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i32 noundef signext [[RS2:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.predsumu.i32.v8i8(<8 x i8> [[TMP0]], i32 [[RS2]]) +// RV64-NEXT: ret i32 [[TMP1]] +// +uint32_t test_predsumu_u8x8_u32(uint8x8_t rs1, uint32_t rs2) { + return __riscv_predsumu_u8x8_u32(rs1, rs2); +} + +// RV32-LABEL: define dso_local i32 @test_predsum_i16x4_i32( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.predsum.i32.v4i16(<4 x i16> [[TMP0]], i32 [[RS2]]) +// RV32-NEXT: ret i32 [[TMP1]] +// +// RV64-LABEL: define dso_local signext i32 @test_predsum_i16x4_i32( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i32 noundef signext [[RS2:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.predsum.i32.v4i16(<4 x i16> [[TMP0]], i32 [[RS2]]) +// RV64-NEXT: ret i32 [[TMP1]] +// +int32_t test_predsum_i16x4_i32(int16x4_t rs1, int32_t rs2) { + return __riscv_predsum_i16x4_i32(rs1, rs2); +} + +// RV32-LABEL: define dso_local i32 @test_predsumu_u16x4_u32( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.predsumu.i32.v4i16(<4 x i16> [[TMP0]], i32 [[RS2]]) +// RV32-NEXT: ret i32 [[TMP1]] +// +// RV64-LABEL: define dso_local signext i32 @test_predsumu_u16x4_u32( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i32 noundef signext [[RS2:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = call i32 @llvm.riscv.predsumu.i32.v4i16(<4 x i16> [[TMP0]], i32 [[RS2]]) +// RV64-NEXT: ret i32 [[TMP1]] +// +uint32_t test_predsumu_u16x4_u32(uint16x4_t rs1, uint32_t rs2) { + return __riscv_predsumu_u16x4_u32(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_predsum_i8x8_i64( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.predsum.i64.v8i8(<8 x i8> [[TMP0]], i64 [[RS2]]) +// RV32-NEXT: ret i64 [[TMP1]] +// +// RV64-LABEL: define dso_local i64 @test_predsum_i8x8_i64( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.predsum.i64.v8i8(<8 x i8> [[TMP0]], i64 [[RS2]]) +// RV64-NEXT: ret i64 [[TMP1]] +// +int64_t test_predsum_i8x8_i64(int8x8_t rs1, int64_t rs2) { + return __riscv_predsum_i8x8_i64(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_predsumu_u8x8_u64( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.predsumu.i64.v8i8(<8 x i8> [[TMP0]], i64 [[RS2]]) +// RV32-NEXT: ret i64 [[TMP1]] +// +// RV64-LABEL: define dso_local i64 @test_predsumu_u8x8_u64( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.predsumu.i64.v8i8(<8 x i8> [[TMP0]], i64 [[RS2]]) +// RV64-NEXT: ret i64 [[TMP1]] +// +uint64_t test_predsumu_u8x8_u64(uint8x8_t rs1, uint64_t rs2) { + return __riscv_predsumu_u8x8_u64(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_predsum_i16x4_i64( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.predsum.i64.v4i16(<4 x i16> [[TMP0]], i64 [[RS2]]) +// RV32-NEXT: ret i64 [[TMP1]] +// +// RV64-LABEL: define dso_local i64 @test_predsum_i16x4_i64( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.predsum.i64.v4i16(<4 x i16> [[TMP0]], i64 [[RS2]]) +// RV64-NEXT: ret i64 [[TMP1]] +// +int64_t test_predsum_i16x4_i64(int16x4_t rs1, int64_t rs2) { + return __riscv_predsum_i16x4_i64(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_predsumu_u16x4_u64( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.predsumu.i64.v4i16(<4 x i16> [[TMP0]], i64 [[RS2]]) +// RV32-NEXT: ret i64 [[TMP1]] +// +// RV64-LABEL: define dso_local i64 @test_predsumu_u16x4_u64( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.predsumu.i64.v4i16(<4 x i16> [[TMP0]], i64 [[RS2]]) +// RV64-NEXT: ret i64 [[TMP1]] +// +uint64_t test_predsumu_u16x4_u64(uint16x4_t rs1, uint64_t rs2) { + return __riscv_predsumu_u16x4_u64(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_predsum_i32x2_i64( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.predsum.i64.v2i32(<2 x i32> [[TMP0]], i64 [[RS2]]) +// RV32-NEXT: ret i64 [[TMP1]] +// +// RV64-LABEL: define dso_local i64 @test_predsum_i32x2_i64( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.predsum.i64.v2i32(<2 x i32> [[TMP0]], i64 [[RS2]]) +// RV64-NEXT: ret i64 [[TMP1]] +// +int64_t test_predsum_i32x2_i64(int32x2_t rs1, int64_t rs2) { + return __riscv_predsum_i32x2_i64(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_predsumu_u32x2_u64( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.predsumu.i64.v2i32(<2 x i32> [[TMP0]], i64 [[RS2]]) +// RV32-NEXT: ret i64 [[TMP1]] +// +// RV64-LABEL: define dso_local i64 @test_predsumu_u32x2_u64( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP1:%.*]] = call i64 @llvm.riscv.predsumu.i64.v2i32(<2 x i32> [[TMP0]], i64 [[RS2]]) +// RV64-NEXT: ret i64 [[TMP1]] +// +uint64_t test_predsumu_u32x2_u64(uint32x2_t rs1, uint64_t rs2) { + return __riscv_predsumu_u32x2_u64(rs1, rs2); +} diff --git a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c index 020a6be70aadb..4b939675cbeb5 100644 --- a/cross-project-tests/intrinsic-header-tests/ri... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/206441 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
