llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-risc-v Author: TelGome <details> <summary>Changes</summary> This pr support RISC-V P extension intrinsics [Packed Merge](https://github.com/riscv/riscv-p-spec/blob/master/P-ext-intrinsics.adoc#packed-merge). --- Patch is 45.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/207110.diff 9 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsRISCV.td (+14) - (modified) clang/lib/CodeGen/TargetBuiltins/RISCV.cpp (+24-1) - (modified) clang/lib/Headers/riscv_packed_simd.h (+21) - (modified) clang/test/CodeGen/RISCV/rvp-intrinsics.c (+244) - (modified) cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c (+198) - (modified) llvm/include/llvm/IR/IntrinsicsRISCV.td (+8) - (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+48-4) - (modified) llvm/test/CodeGen/RISCV/rvp-simd-32.ll (+109) - (modified) llvm/test/CodeGen/RISCV/rvp-simd-64.ll (+271) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td index 3f84528fdca70..785e41324ad78 100644 --- a/clang/include/clang/Basic/BuiltinsRISCV.td +++ b/clang/include/clang/Basic/BuiltinsRISCV.td @@ -233,6 +233,20 @@ def predsumu_u16x4_u64 : RISCVBuiltin<"uint64_t(_Vector<4, unsigned short>, uint def predsum_i32x2_i64 : RISCVBuiltin<"int64_t(_Vector<2, int>, int64_t)">; def predsumu_u32x2_u64 : RISCVBuiltin<"uint64_t(_Vector<2, unsigned int>, uint64_t)">; +// Packed Merge (32-bit) +def pmerge_u8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, unsigned char>, _Vector<4, unsigned char>, _Vector<4, unsigned char>)">; +def pmerge_i8x4 : RISCVBuiltin<"_Vector<4, signed char>(_Vector<4, signed char>, _Vector<4, signed char>, _Vector<4, unsigned char>)">; +def pmerge_u16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, unsigned short>, _Vector<2, unsigned short>, _Vector<2, unsigned short>)">; +def pmerge_i16x2 : RISCVBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, short>, _Vector<2, unsigned short>)">; + +// Packed Merge (64-bit) +def pmerge_u8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, unsigned char>, _Vector<8, unsigned char>, _Vector<8, unsigned char>)">; +def pmerge_i8x8 : RISCVBuiltin<"_Vector<8, signed char>(_Vector<8, signed char>, _Vector<8, signed char>, _Vector<8, unsigned char>)">; +def pmerge_u16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, unsigned short>, _Vector<4, unsigned short>, _Vector<4, unsigned short>)">; +def pmerge_i16x4 : RISCVBuiltin<"_Vector<4, short>(_Vector<4, short>, _Vector<4, short>, _Vector<4, unsigned short>)">; +def pmerge_u32x2 : RISCVBuiltin<"_Vector<2, unsigned int>(_Vector<2, unsigned int>, _Vector<2, unsigned int>, _Vector<2, unsigned int>)">; +def pmerge_i32x2 : RISCVBuiltin<"_Vector<2, int>(_Vector<2, int>, _Vector<2, int>, _Vector<2, unsigned int>)">; + } // Features = "experimental-p" //===----------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp index bb8fa86e7a564..588e229499082 100644 --- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp @@ -1247,7 +1247,18 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, case RISCV::BI__builtin_riscv_pabdu_u8x4: case RISCV::BI__builtin_riscv_pabdu_u16x2: case RISCV::BI__builtin_riscv_pabdu_u8x8: - case RISCV::BI__builtin_riscv_pabdu_u16x4: { + case RISCV::BI__builtin_riscv_pabdu_u16x4: + // Packed Merge + case RISCV::BI__builtin_riscv_pmerge_u8x4: + case RISCV::BI__builtin_riscv_pmerge_i8x4: + case RISCV::BI__builtin_riscv_pmerge_u16x2: + case RISCV::BI__builtin_riscv_pmerge_i16x2: + case RISCV::BI__builtin_riscv_pmerge_u8x8: + case RISCV::BI__builtin_riscv_pmerge_i8x8: + case RISCV::BI__builtin_riscv_pmerge_u16x4: + case RISCV::BI__builtin_riscv_pmerge_i16x4: + case RISCV::BI__builtin_riscv_pmerge_u32x2: + case RISCV::BI__builtin_riscv_pmerge_i32x2: { switch (BuiltinID) { default: llvm_unreachable("unexpected builtin ID"); @@ -1321,6 +1332,18 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, case RISCV::BI__builtin_riscv_pabdu_u16x4: ID = Intrinsic::riscv_pabdu; break; + case RISCV::BI__builtin_riscv_pmerge_u8x4: + case RISCV::BI__builtin_riscv_pmerge_i8x4: + case RISCV::BI__builtin_riscv_pmerge_u16x2: + case RISCV::BI__builtin_riscv_pmerge_i16x2: + case RISCV::BI__builtin_riscv_pmerge_u8x8: + case RISCV::BI__builtin_riscv_pmerge_i8x8: + case RISCV::BI__builtin_riscv_pmerge_u16x4: + case RISCV::BI__builtin_riscv_pmerge_i16x4: + case RISCV::BI__builtin_riscv_pmerge_u32x2: + case RISCV::BI__builtin_riscv_pmerge_i32x2: + ID = Intrinsic::riscv_pmerge; + break; } IntrinsicTypes = {ResultType}; diff --git a/clang/lib/Headers/riscv_packed_simd.h b/clang/lib/Headers/riscv_packed_simd.h index c61e156ca6a7f..cdfa7ec5812ba 100644 --- a/clang/lib/Headers/riscv_packed_simd.h +++ b/clang/lib/Headers/riscv_packed_simd.h @@ -109,6 +109,12 @@ typedef uint32_t uint32x2_t __attribute__((__vector_size__(8))); return builtin(__rs1, __rs2); \ } +#define __packed_merge_builtin(name, ty, mask_ty, builtin) \ + static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name( \ + ty __rs1, ty __rs2, mask_ty __rd) { \ + return (ty)builtin(__rs1, __rs2, __rd); \ + } + // clang-format off: macro call sites have no trailing semicolons, which // confuses clang-format into a deeply nested expression. @@ -448,6 +454,20 @@ __packed_reduction(predsumu_u16x4_u64, uint64_t, uint16x4_t, __builtin_riscv_pre __packed_reduction(predsum_i32x2_i64, int64_t, int32x2_t, __builtin_riscv_predsum_i32x2_i64) __packed_reduction(predsumu_u32x2_u64, uint64_t, uint32x2_t, __builtin_riscv_predsumu_u32x2_u64) +/* Packed Merge (32-bit) */ +__packed_merge_builtin(pmerge_u8x4, uint8x4_t, uint8x4_t, __builtin_riscv_pmerge_u8x4) +__packed_merge_builtin(pmerge_i8x4, int8x4_t, uint8x4_t, __builtin_riscv_pmerge_i8x4) +__packed_merge_builtin(pmerge_u16x2, uint16x2_t, uint16x2_t, __builtin_riscv_pmerge_u16x2) +__packed_merge_builtin(pmerge_i16x2, int16x2_t, uint16x2_t, __builtin_riscv_pmerge_i16x2) + +/* Packed Merge (64-bit) */ +__packed_merge_builtin(pmerge_u8x8, uint8x8_t, uint8x8_t, __builtin_riscv_pmerge_u8x8) +__packed_merge_builtin(pmerge_i8x8, int8x8_t, uint8x8_t, __builtin_riscv_pmerge_i8x8) +__packed_merge_builtin(pmerge_u16x4, uint16x4_t, uint16x4_t, __builtin_riscv_pmerge_u16x4) +__packed_merge_builtin(pmerge_i16x4, int16x4_t, uint16x4_t, __builtin_riscv_pmerge_i16x4) +__packed_merge_builtin(pmerge_u32x2, uint32x2_t, uint32x2_t, __builtin_riscv_pmerge_u32x2) +__packed_merge_builtin(pmerge_i32x2, int32x2_t, uint32x2_t, __builtin_riscv_pmerge_i32x2) + // clang-format on #undef __packed_splat2 @@ -468,6 +488,7 @@ __packed_reduction(predsumu_u32x2_u64, uint64_t, uint32x2_t, __builtin_riscv_pre #undef __packed_pabs #undef __packed_binary_builtin_cast #undef __packed_reduction +#undef __packed_merge_builtin #undef __DEFAULT_FN_ATTRS #if defined(__cplusplus) diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c index 290f61787ceff..ec555e52d62fe 100644 --- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c +++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c @@ -6143,3 +6143,247 @@ int64_t test_predsum_i32x2_i64(int32x2_t rs1, int64_t rs2) { uint64_t test_predsumu_u32x2_u64(uint32x2_t rs1, uint64_t rs2) { return __riscv_predsumu_u32x2_u64(rs1, rs2); } + +// Packed Merge (32-bit) + +// RV32-LABEL: define dso_local i32 @test_pmerge_u8x4( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]], i32 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP2:%.*]] = bitcast i32 [[RD_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP3:%.*]] = call <4 x i8> @llvm.riscv.pmerge.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]], <4 x i8> [[TMP2]]) +// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[TMP3]] to i32 +// RV32-NEXT: ret i32 [[TMP4]] +// +// RV64-LABEL: define dso_local i32 @test_pmerge_u8x4( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]], i32 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP2:%.*]] = bitcast i32 [[RD_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP3:%.*]] = call <4 x i8> @llvm.riscv.pmerge.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]], <4 x i8> [[TMP2]]) +// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[TMP3]] to i32 +// RV64-NEXT: ret i32 [[TMP4]] +// +uint8x4_t test_pmerge_u8x4(uint8x4_t rs1, uint8x4_t rs2, uint8x4_t rd) { + return __riscv_pmerge_u8x4(rs1, rs2, rd); +} + +// RV32-LABEL: define dso_local i32 @test_pmerge_i8x4( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]], i32 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP2:%.*]] = bitcast i32 [[RD_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP3:%.*]] = call <4 x i8> @llvm.riscv.pmerge.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]], <4 x i8> [[TMP2]]) +// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[TMP3]] to i32 +// RV32-NEXT: ret i32 [[TMP4]] +// +// RV64-LABEL: define dso_local i32 @test_pmerge_i8x4( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]], i32 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP2:%.*]] = bitcast i32 [[RD_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP3:%.*]] = call <4 x i8> @llvm.riscv.pmerge.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]], <4 x i8> [[TMP2]]) +// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[TMP3]] to i32 +// RV64-NEXT: ret i32 [[TMP4]] +// +int8x4_t test_pmerge_i8x4(int8x4_t rs1, int8x4_t rs2, uint8x4_t rd) { + return __riscv_pmerge_i8x4(rs1, rs2, rd); +} + +// RV32-LABEL: define dso_local i32 @test_pmerge_u16x2( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]], i32 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP2:%.*]] = bitcast i32 [[RD_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP3:%.*]] = call <2 x i16> @llvm.riscv.pmerge.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]], <2 x i16> [[TMP2]]) +// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[TMP3]] to i32 +// RV32-NEXT: ret i32 [[TMP4]] +// +// RV64-LABEL: define dso_local i32 @test_pmerge_u16x2( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]], i32 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP2:%.*]] = bitcast i32 [[RD_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP3:%.*]] = call <2 x i16> @llvm.riscv.pmerge.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]], <2 x i16> [[TMP2]]) +// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[TMP3]] to i32 +// RV64-NEXT: ret i32 [[TMP4]] +// +uint16x2_t test_pmerge_u16x2(uint16x2_t rs1, uint16x2_t rs2, uint16x2_t rd) { + return __riscv_pmerge_u16x2(rs1, rs2, rd); +} + +// RV32-LABEL: define dso_local i32 @test_pmerge_i16x2( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]], i32 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP2:%.*]] = bitcast i32 [[RD_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP3:%.*]] = call <2 x i16> @llvm.riscv.pmerge.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]], <2 x i16> [[TMP2]]) +// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[TMP3]] to i32 +// RV32-NEXT: ret i32 [[TMP4]] +// +// RV64-LABEL: define dso_local i32 @test_pmerge_i16x2( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]], i32 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP2:%.*]] = bitcast i32 [[RD_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP3:%.*]] = call <2 x i16> @llvm.riscv.pmerge.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]], <2 x i16> [[TMP2]]) +// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[TMP3]] to i32 +// RV64-NEXT: ret i32 [[TMP4]] +// +int16x2_t test_pmerge_i16x2(int16x2_t rs1, int16x2_t rs2, uint16x2_t rd) { + return __riscv_pmerge_i16x2(rs1, rs2, rd); +} + +// Packed Merge (64-bit) + +// RV32-LABEL: define dso_local i64 @test_pmerge_u8x8( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.riscv.pmerge.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to i64 +// RV32-NEXT: ret i64 [[TMP4]] +// +// RV64-LABEL: define dso_local i64 @test_pmerge_u8x8( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.riscv.pmerge.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to i64 +// RV64-NEXT: ret i64 [[TMP4]] +// +uint8x8_t test_pmerge_u8x8(uint8x8_t rs1, uint8x8_t rs2, uint8x8_t rd) { + return __riscv_pmerge_u8x8(rs1, rs2, rd); +} + +// RV32-LABEL: define dso_local i64 @test_pmerge_i8x8( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.riscv.pmerge.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to i64 +// RV32-NEXT: ret i64 [[TMP4]] +// +// RV64-LABEL: define dso_local i64 @test_pmerge_i8x8( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.riscv.pmerge.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to i64 +// RV64-NEXT: ret i64 [[TMP4]] +// +int8x8_t test_pmerge_i8x8(int8x8_t rs1, int8x8_t rs2, uint8x8_t rd) { + return __riscv_pmerge_i8x8(rs1, rs2, rd); +} + +// RV32-LABEL: define dso_local i64 @test_pmerge_u16x4( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.riscv.pmerge.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to i64 +// RV32-NEXT: ret i64 [[TMP4]] +// +// RV64-LABEL: define dso_local i64 @test_pmerge_u16x4( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.riscv.pmerge.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to i64 +// RV64-NEXT: ret i64 [[TMP4]] +// +uint16x4_t test_pmerge_u16x4(uint16x4_t rs1, uint16x4_t rs2, uint16x4_t rd) { + return __riscv_pmerge_u16x4(rs1, rs2, rd); +} + +// RV32-LABEL: define dso_local i64 @test_pmerge_i16x4( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.riscv.pmerge.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to i64 +// RV32-NEXT: ret i64 [[TMP4]] +// +// RV64-LABEL: define dso_local i64 @test_pmerge_i16x4( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.riscv.pmerge.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to i64 +// RV64-NEXT: ret i64 [[TMP4]] +// +int16x4_t test_pmerge_i16x4(int16x4_t rs1, int16x4_t rs2, uint16x4_t rd) { + return __riscv_pmerge_i16x4(rs1, rs2, rd); +} + +// RV32-LABEL: define dso_local i64 @test_pmerge_u32x2( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.riscv.pmerge.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) +// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// RV32-NEXT: ret i64 [[TMP4]] +// +// RV64-LABEL: define dso_local i64 @test_pmerge_u32x2( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.riscv.pmerge.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) +// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +// RV64-NEXT: ret i64 [[TMP4]] +// +uint32x2_t test_pmerge_u32x2(uint32x2_t rs1, uint32x2_t rs2, uint32x2_t rd) { + return __riscv_pmerge_u32x2(rs1, rs2, rd); +} + +// RV32-LABEL: define dso_local i64 @test_pmerge_i32x2( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.riscv.pmerge.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <2... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/207110 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
