Author: TelGome Date: 2026-06-18T22:32:00+08:00 New Revision: b02659ab5c405dd221f625085e2656a190d87a54
URL: https://github.com/llvm/llvm-project/commit/b02659ab5c405dd221f625085e2656a190d87a54 DIFF: https://github.com/llvm/llvm-project/commit/b02659ab5c405dd221f625085e2656a190d87a54.diff LOG: [RISCV][P-ext] Support Packed Absolute Value and Absolute Difference (#203840) This pr support RISC-V P extension intrinsics [Packed Absolute Value and Absolute Difference](https://github.com/riscv/riscv-p-spec/blob/master/P-ext-intrinsics.adoc#packed-absolute-value-and-absolute-difference) Added: Modified: clang/include/clang/Basic/BuiltinsRISCV.td clang/lib/CodeGen/TargetBuiltins/RISCV.cpp clang/lib/Headers/riscv_packed_simd.h clang/test/CodeGen/RISCV/rvp-intrinsics.c cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c llvm/include/llvm/IR/IntrinsicsRISCV.td llvm/lib/Target/RISCV/RISCVISelLowering.cpp llvm/test/CodeGen/RISCV/rvp-simd-32.ll llvm/test/CodeGen/RISCV/rvp-simd-64.ll Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td index 185269bfc6d85..3a1b54763bae6 100644 --- a/clang/include/clang/Basic/BuiltinsRISCV.td +++ b/clang/include/clang/Basic/BuiltinsRISCV.td @@ -181,6 +181,18 @@ def pasubu_u8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, unsigned ch def pasubu_u16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, unsigned short>, _Vector<4, unsigned short>)">; def pasubu_u32x2 : RISCVBuiltin<"_Vector<2, unsigned int>(_Vector<2, unsigned int>, _Vector<2, unsigned int>)">; +// Packed Absolute Value and Absolute Difference (32-bit) +def pabd_i8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, signed char>, _Vector<4, signed char>)">; +def pabd_i16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, short>, _Vector<2, short>)">; +def pabdu_u8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, unsigned char>, _Vector<4, unsigned char>)">; +def pabdu_u16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, unsigned short>, _Vector<2, unsigned short>)">; + +// Packed Absolute Value and Absolute Difference (64-bit) +def pabd_i8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, signed char>, _Vector<8, signed char>)">; +def pabd_i16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, short>, _Vector<4, short>)">; +def pabdu_u8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, unsigned char>, _Vector<8, unsigned char>)">; +def pabdu_u16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, unsigned short>, _Vector<4, unsigned short>)">; + } // Features = "experimental-p" //===----------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp index 8c0684110dad7..a1e9acb7ec2c8 100644 --- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp @@ -1219,7 +1219,16 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, case RISCV::BI__builtin_riscv_pasubu_u16x2: case RISCV::BI__builtin_riscv_pasubu_u8x8: case RISCV::BI__builtin_riscv_pasubu_u16x4: - case RISCV::BI__builtin_riscv_pasubu_u32x2: { + case RISCV::BI__builtin_riscv_pasubu_u32x2: + // Packed Absolute Value and Absolute Difference + case RISCV::BI__builtin_riscv_pabd_i8x4: + case RISCV::BI__builtin_riscv_pabd_i16x2: + case RISCV::BI__builtin_riscv_pabd_i8x8: + case RISCV::BI__builtin_riscv_pabd_i16x4: + case RISCV::BI__builtin_riscv_pabdu_u8x4: + case RISCV::BI__builtin_riscv_pabdu_u16x2: + case RISCV::BI__builtin_riscv_pabdu_u8x8: + case RISCV::BI__builtin_riscv_pabdu_u16x4: { switch (BuiltinID) { default: llvm_unreachable("unexpected builtin ID"); @@ -1251,6 +1260,18 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, case RISCV::BI__builtin_riscv_pasubu_u32x2: ID = Intrinsic::riscv_pasubu; break; + case RISCV::BI__builtin_riscv_pabd_i8x4: + case RISCV::BI__builtin_riscv_pabd_i16x2: + case RISCV::BI__builtin_riscv_pabd_i8x8: + case RISCV::BI__builtin_riscv_pabd_i16x4: + ID = Intrinsic::riscv_pabd; + break; + case RISCV::BI__builtin_riscv_pabdu_u8x4: + case RISCV::BI__builtin_riscv_pabdu_u16x2: + case RISCV::BI__builtin_riscv_pabdu_u8x8: + case RISCV::BI__builtin_riscv_pabdu_u16x4: + ID = Intrinsic::riscv_pabdu; + break; } IntrinsicTypes = {ResultType}; diff --git a/clang/lib/Headers/riscv_packed_simd.h b/clang/lib/Headers/riscv_packed_simd.h index 7e981c91ec3e1..56f6b108d5f14 100644 --- a/clang/lib/Headers/riscv_packed_simd.h +++ b/clang/lib/Headers/riscv_packed_simd.h @@ -92,6 +92,17 @@ typedef uint32_t uint32x2_t __attribute__((__vector_size__(8))); return (rty)(__rs1 op __rs2); \ } +#define __packed_pabs(name, ty, rty) \ + static __inline__ rty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1) { \ + return (rty)__builtin_elementwise_abs(__rs1); \ + } + +#define __packed_binary_builtin_cast(name, ty, rty, builtin) \ + static __inline__ rty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, \ + ty __rs2) { \ + return (rty)builtin(__rs1, __rs2); \ + } + // clang-format off: macro call sites have no trailing semicolons, which // confuses clang-format into a deeply nested expression. @@ -375,6 +386,22 @@ __packed_binary_builtin(pasubu_u8x8, uint8x8_t, __builtin_riscv_pasubu_u8x8) __packed_binary_builtin(pasubu_u16x4, uint16x4_t, __builtin_riscv_pasubu_u16x4) __packed_binary_builtin(pasubu_u32x2, uint32x2_t, __builtin_riscv_pasubu_u32x2) +/* Packed Absolute Value and Absolute Difference (32-bit) */ +__packed_pabs(pabs_i8x4, int8x4_t, uint8x4_t) +__packed_pabs(pabs_i16x2, int16x2_t, uint16x2_t) +__packed_binary_builtin_cast(pabd_i8x4, int8x4_t, uint8x4_t, __builtin_riscv_pabd_i8x4) +__packed_binary_builtin_cast(pabd_i16x2, int16x2_t, uint16x2_t, __builtin_riscv_pabd_i16x2) +__packed_binary_builtin_cast(pabdu_u8x4, uint8x4_t, uint8x4_t, __builtin_riscv_pabdu_u8x4) +__packed_binary_builtin_cast(pabdu_u16x2, uint16x2_t, uint16x2_t, __builtin_riscv_pabdu_u16x2) + +/* Packed Absolute Value and Absolute Difference (64-bit) */ +__packed_pabs(pabs_i8x8, int8x8_t, uint8x8_t) +__packed_pabs(pabs_i16x4, int16x4_t, uint16x4_t) +__packed_binary_builtin_cast(pabd_i8x8, int8x8_t, uint8x8_t, __builtin_riscv_pabd_i8x8) +__packed_binary_builtin_cast(pabd_i16x4, int16x4_t, uint16x4_t, __builtin_riscv_pabd_i16x4) +__packed_binary_builtin_cast(pabdu_u8x8, uint8x8_t, uint8x8_t, __builtin_riscv_pabdu_u8x8) +__packed_binary_builtin_cast(pabdu_u16x4, uint16x4_t, uint16x4_t, __builtin_riscv_pabdu_u16x4) + // clang-format on #undef __packed_splat2 @@ -392,6 +419,8 @@ __packed_binary_builtin(pasubu_u32x2, uint32x2_t, __builtin_riscv_pasubu_u32x2) #undef __packed_sh1add #undef __packed_sh1sadd #undef __packed_cmp +#undef __packed_pabs +#undef __packed_binary_builtin_cast #undef __DEFAULT_FN_ATTRS #if defined(__cplusplus) diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c index 363bfa5f5d995..cc388d0ab0328 100644 --- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c +++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c @@ -5235,3 +5235,259 @@ uint16x4_t test_pasubu_u16x4(uint16x4_t rs1, uint16x4_t rs2) { uint32x2_t test_pasubu_u32x2(uint32x2_t rs1, uint32x2_t rs2) { return __riscv_pasubu_u32x2(rs1, rs2); } + +// RV32-LABEL: define dso_local i32 @test_pabs_i8x4( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV32-NEXT: [[ELT_ABS_I:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[TMP0]], i1 false) +// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[ELT_ABS_I]] to i32 +// RV32-NEXT: ret i32 [[TMP1]] +// +// RV64-LABEL: define dso_local i32 @test_pabs_i8x4( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV64-NEXT: [[ELT_ABS_I:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[TMP0]], i1 false) +// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[ELT_ABS_I]] to i32 +// RV64-NEXT: ret i32 [[TMP1]] +// +uint8x4_t test_pabs_i8x4(int8x4_t rs1) { + return __riscv_pabs_i8x4(rs1); +} + +// RV32-LABEL: define dso_local i32 @test_pabs_i16x2( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV32-NEXT: [[ELT_ABS_I:%.*]] = call <2 x i16> @llvm.abs.v2i16(<2 x i16> [[TMP0]], i1 false) +// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[ELT_ABS_I]] to i32 +// RV32-NEXT: ret i32 [[TMP1]] +// +// RV64-LABEL: define dso_local i32 @test_pabs_i16x2( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV64-NEXT: [[ELT_ABS_I:%.*]] = call <2 x i16> @llvm.abs.v2i16(<2 x i16> [[TMP0]], i1 false) +// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[ELT_ABS_I]] to i32 +// RV64-NEXT: ret i32 [[TMP1]] +// +uint16x2_t test_pabs_i16x2(int16x2_t rs1) { + return __riscv_pabs_i16x2(rs1); +} + +// RV32-LABEL: define dso_local i32 @test_pabd_i8x4( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pabd.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_pabd_i8x4( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pabd.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +uint8x4_t test_pabd_i8x4(int8x4_t rs1, int8x4_t rs2) { + return __riscv_pabd_i8x4(rs1, rs2); +} + +// RV32-LABEL: define dso_local i32 @test_pabd_i16x2( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pabd.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_pabd_i16x2( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pabd.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +uint16x2_t test_pabd_i16x2(int16x2_t rs1, int16x2_t rs2) { + return __riscv_pabd_i16x2(rs1, rs2); +} + +// RV32-LABEL: define dso_local i32 @test_pabdu_u8x4( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pabdu.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_pabdu_u8x4( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pabdu.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +uint8x4_t test_pabdu_u8x4(uint8x4_t rs1, uint8x4_t rs2) { + return __riscv_pabdu_u8x4(rs1, rs2); +} + +// RV32-LABEL: define dso_local i32 @test_pabdu_u16x2( +// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pabdu.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_pabdu_u16x2( +// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pabdu.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +uint16x2_t test_pabdu_u16x2(uint16x2_t rs1, uint16x2_t rs2) { + return __riscv_pabdu_u16x2(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_pabs_i8x8( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV32-NEXT: [[ELT_ABS_I:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP0]], i1 false) +// RV32-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[ELT_ABS_I]] to i64 +// RV32-NEXT: ret i64 [[TMP1]] +// +// RV64-LABEL: define dso_local i64 @test_pabs_i8x8( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV64-NEXT: [[ELT_ABS_I:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP0]], i1 false) +// RV64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[ELT_ABS_I]] to i64 +// RV64-NEXT: ret i64 [[TMP1]] +// +uint8x8_t test_pabs_i8x8(int8x8_t rs1) { + return __riscv_pabs_i8x8(rs1); +} + +// RV32-LABEL: define dso_local i64 @test_pabs_i16x4( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV32-NEXT: [[ELT_ABS_I:%.*]] = call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[TMP0]], i1 false) +// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[ELT_ABS_I]] to i64 +// RV32-NEXT: ret i64 [[TMP1]] +// +// RV64-LABEL: define dso_local i64 @test_pabs_i16x4( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV64-NEXT: [[ELT_ABS_I:%.*]] = call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[TMP0]], i1 false) +// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[ELT_ABS_I]] to i64 +// RV64-NEXT: ret i64 [[TMP1]] +// +uint16x4_t test_pabs_i16x4(int16x4_t rs1) { + return __riscv_pabs_i16x4(rs1); +} + +// RV32-LABEL: define dso_local i64 @test_pabd_i8x8( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pabd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_pabd_i8x8( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pabd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t test_pabd_i8x8(int8x8_t rs1, int8x8_t rs2) { + return __riscv_pabd_i8x8(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_pabd_i16x4( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pabd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_pabd_i16x4( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pabd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t test_pabd_i16x4(int16x4_t rs1, int16x4_t rs2) { + return __riscv_pabd_i16x4(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_pabdu_u8x8( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8> +// RV32-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pabdu.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_pabdu_u8x8( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8> +// RV64-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pabdu.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +uint8x8_t test_pabdu_u8x8(uint8x8_t rs1, uint8x8_t rs2) { + return __riscv_pabdu_u8x8(rs1, rs2); +} + +// RV32-LABEL: define dso_local i64 @test_pabdu_u16x4( +// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pabdu.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_pabdu_u16x4( +// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pabdu.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +uint16x4_t test_pabdu_u16x4(uint16x4_t rs1, uint16x4_t rs2) { + return __riscv_pabdu_u16x4(rs1, rs2); +} diff --git a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c index edbc56ce0e199..e9f90fc17e23d 100644 --- a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c +++ b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c @@ -1772,3 +1772,73 @@ uint16x4_t test_pasubu_u16x4(uint16x4_t a, uint16x4_t b) { uint32x2_t test_pasubu_u32x2(uint32x2_t a, uint32x2_t b) { return __riscv_pasubu_u32x2(a, b); } + +// CHECK-LABEL: test_pabs_i8x4: +// CHECK: pabs.b +uint8x4_t test_pabs_i8x4(int8x4_t a) { return __riscv_pabs_i8x4(a); } + +// CHECK-LABEL: test_pabs_i16x2: +// CHECK: pabs.h +uint16x2_t test_pabs_i16x2(int16x2_t a) { return __riscv_pabs_i16x2(a); } + +// CHECK-LABEL: test_pabd_i8x4: +// CHECK: pabd.b +uint8x4_t test_pabd_i8x4(int8x4_t a, int8x4_t b) { + return __riscv_pabd_i8x4(a, b); +} + +// CHECK-LABEL: test_pabd_i16x2: +// CHECK: pabd.h +uint16x2_t test_pabd_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_pabd_i16x2(a, b); +} + +// CHECK-LABEL: test_pabdu_u8x4: +// CHECK: pabdu.b +uint8x4_t test_pabdu_u8x4(uint8x4_t a, uint8x4_t b) { + return __riscv_pabdu_u8x4(a, b); +} + +// CHECK-LABEL: test_pabdu_u16x2: +// CHECK: pabdu.h +uint16x2_t test_pabdu_u16x2(uint16x2_t a, uint16x2_t b) { + return __riscv_pabdu_u16x2(a, b); +} + +// CHECK-LABEL: test_pabs_i8x8: +// RV32: pabs.db +// RV64: pabs.b +uint8x8_t test_pabs_i8x8(int8x8_t a) { return __riscv_pabs_i8x8(a); } + +// CHECK-LABEL: test_pabs_i16x4: +// RV32: pabs.dh +// RV64: pabs.h +uint16x4_t test_pabs_i16x4(int16x4_t a) { return __riscv_pabs_i16x4(a); } + +// CHECK-LABEL: test_pabd_i8x8: +// RV32: pabd.db +// RV64: pabd.b +uint8x8_t test_pabd_i8x8(int8x8_t a, int8x8_t b) { + return __riscv_pabd_i8x8(a, b); +} + +// CHECK-LABEL: test_pabd_i16x4: +// RV32: pabd.dh +// RV64: pabd.h +uint16x4_t test_pabd_i16x4(int16x4_t a, int16x4_t b) { + return __riscv_pabd_i16x4(a, b); +} + +// CHECK-LABEL: test_pabdu_u8x8: +// RV32: pabdu.db +// RV64: pabdu.b +uint8x8_t test_pabdu_u8x8(uint8x8_t a, uint8x8_t b) { + return __riscv_pabdu_u8x8(a, b); +} + +// CHECK-LABEL: test_pabdu_u16x4: +// RV32: pabdu.dh +// RV64: pabdu.h +uint16x4_t test_pabdu_u16x4(uint16x4_t a, uint16x4_t b) { + return __riscv_pabdu_u16x4(a, b); +} diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index b2add44b19a5e..a82b17591f780 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -2046,16 +2046,20 @@ def int_riscv_pause : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffec // Packed SIMD extensions //===----------------------------------------------------------------------===// let TargetPrefix = "riscv" in { -// Packed Averaging Addition and Subtraction. class RVPBinaryIntrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; + // Packed Averaging Addition and Subtraction. def int_riscv_paadd : RVPBinaryIntrinsic; def int_riscv_paaddu : RVPBinaryIntrinsic; def int_riscv_pasub : RVPBinaryIntrinsic; def int_riscv_pasubu : RVPBinaryIntrinsic; + + // Packed Absolute Value and Absolute Difference + def int_riscv_pabd : RVPBinaryIntrinsic; + def int_riscv_pabdu : RVPBinaryIntrinsic; } // TargetPrefix = "riscv" //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d18b52437c98d..8b1a23896ff38 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11761,7 +11761,9 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::riscv_paadd: case Intrinsic::riscv_paaddu: case Intrinsic::riscv_pasub: - case Intrinsic::riscv_pasubu: { + case Intrinsic::riscv_pasubu: + case Intrinsic::riscv_pabd: + case Intrinsic::riscv_pabdu: { unsigned Opc; switch (IntNo) { case Intrinsic::riscv_paadd: @@ -11776,6 +11778,12 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::riscv_pasubu: Opc = RISCVISD::ASUBU; break; + case Intrinsic::riscv_pabd: + Opc = ISD::ABDS; + break; + case Intrinsic::riscv_pabdu: + Opc = ISD::ABDU; + break; } return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(1), @@ -15688,7 +15696,9 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, case Intrinsic::riscv_paadd: case Intrinsic::riscv_paaddu: case Intrinsic::riscv_pasub: - case Intrinsic::riscv_pasubu: { + case Intrinsic::riscv_pasubu: + case Intrinsic::riscv_pabd: + case Intrinsic::riscv_pabdu: { EVT VT = N->getValueType(0); if (!Subtarget.is64Bit() || (VT != MVT::v4i8 && VT != MVT::v2i16)) return; @@ -15707,6 +15717,12 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, case Intrinsic::riscv_pasubu: Opc = RISCVISD::ASUBU; break; + case Intrinsic::riscv_pabd: + Opc = ISD::ABDS; + break; + case Intrinsic::riscv_pabdu: + Opc = ISD::ABDU; + break; } EVT WideVT = VT == MVT::v4i8 ? MVT::v8i8 : MVT::v4i16; diff --git a/llvm/test/CodeGen/RISCV/rvp-simd-32.ll b/llvm/test/CodeGen/RISCV/rvp-simd-32.ll index ff44c8b3cda4e..56d30878d7ac2 100644 --- a/llvm/test/CodeGen/RISCV/rvp-simd-32.ll +++ b/llvm/test/CodeGen/RISCV/rvp-simd-32.ll @@ -2775,3 +2775,39 @@ define <2 x i16> @test_pasubu_v2i16(<2 x i16> %a, <2 x i16> %b) { %res = call <2 x i16> @llvm.riscv.pasubu.v2i16(<2 x i16> %a, <2 x i16> %b) ret <2 x i16> %res } + +define <4 x i8> @test_pabd_v4i8(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: test_pabd_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: pabd.b a0, a0, a1 +; CHECK-NEXT: ret + %res = call <4 x i8> @llvm.riscv.pabd.v4i8(<4 x i8> %a, <4 x i8> %b) + ret <4 x i8> %res +} + +define <2 x i16> @test_pabd_v2i16(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: test_pabd_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: pabd.h a0, a0, a1 +; CHECK-NEXT: ret + %res = call <2 x i16> @llvm.riscv.pabd.v2i16(<2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %res +} + +define <4 x i8> @test_pabdu_v4i8(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: test_pabdu_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: pabdu.b a0, a0, a1 +; CHECK-NEXT: ret + %res = call <4 x i8> @llvm.riscv.pabdu.v4i8(<4 x i8> %a, <4 x i8> %b) + ret <4 x i8> %res +} + +define <2 x i16> @test_pabdu_v2i16(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: test_pabdu_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: pabdu.h a0, a0, a1 +; CHECK-NEXT: ret + %res = call <2 x i16> @llvm.riscv.pabdu.v2i16(<2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %res +} diff --git a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll index 470434d27c0a3..8bc93f6e3c2e3 100644 --- a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll +++ b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll @@ -5094,3 +5094,59 @@ define <2 x i16> @test_pnsra_hs_mask(<2 x i32> %a, i32 %shamt) { %trunc = trunc <2 x i32> %ashr to <2 x i16> ret <2 x i16> %trunc } + +define <8 x i8> @test_pabd_v8i8(<8 x i8> %a, <8 x i8> %b) { +; RV32-LABEL: test_pabd_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: pabd.db a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_pabd_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: pabd.b a0, a0, a1 +; RV64-NEXT: ret + %res = call <8 x i8> @llvm.riscv.pabd.v8i8(<8 x i8> %a, <8 x i8> %b) + ret <8 x i8> %res +} + +define <4 x i16> @test_pabd_v4i16(<4 x i16> %a, <4 x i16> %b) { +; RV32-LABEL: test_pabd_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: pabd.dh a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_pabd_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: pabd.h a0, a0, a1 +; RV64-NEXT: ret + %res = call <4 x i16> @llvm.riscv.pabd.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %res +} + +define <8 x i8> @test_pabdu_v8i8(<8 x i8> %a, <8 x i8> %b) { +; RV32-LABEL: test_pabdu_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: pabdu.db a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_pabdu_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: pabdu.b a0, a0, a1 +; RV64-NEXT: ret + %res = call <8 x i8> @llvm.riscv.pabdu.v8i8(<8 x i8> %a, <8 x i8> %b) + ret <8 x i8> %res +} + +define <4 x i16> @test_pabdu_v4i16(<4 x i16> %a, <4 x i16> %b) { +; RV32-LABEL: test_pabdu_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: pabdu.dh a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_pabdu_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: pabdu.h a0, a0, a1 +; RV64-NEXT: ret + %res = call <4 x i16> @llvm.riscv.pabdu.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %res +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
