https://github.com/sihuan created https://github.com/llvm/llvm-project/pull/174068
This patch adds initial intrinsic support for the RISC-V P extension, introducing padd and psub operations. The implementation is based on the `Packed Addition and Subtraction` section of the P extension intrinsic specification: >From 7bcd9d9f0f93157b37703f621c7bfda0a034ec78 Mon Sep 17 00:00:00 2001 From: SiHuaN <[email protected]> Date: Wed, 31 Dec 2025 13:36:54 +0800 Subject: [PATCH] [RISCV] Preliminary P-ext intrinsics support This patch adds initial intrinsic support for the RISC-V P extension, introducing padd and psub operations. --- clang/include/clang/Basic/BuiltinsRISCV.td | 17 ++ clang/lib/CodeGen/TargetBuiltins/RISCV.cpp | 31 ++- clang/test/CodeGen/RISCV/rvp-intrinsics.c | 223 ++++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsRISCV.td | 12 ++ llvm/lib/Target/RISCV/RISCVCallingConv.cpp | 75 ++++--- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 5 + llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 36 ++++ llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 14 +- llvm/test/CodeGen/RISCV/rvp-intrinsics.ll | 135 ++++++++++++ 9 files changed, 514 insertions(+), 34 deletions(-) create mode 100644 clang/test/CodeGen/RISCV/rvp-intrinsics.c create mode 100644 llvm/test/CodeGen/RISCV/rvp-intrinsics.ll diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td index 2dad5ede2d64b..1c43371cd52fc 100644 --- a/clang/include/clang/Basic/BuiltinsRISCV.td +++ b/clang/include/clang/Basic/BuiltinsRISCV.td @@ -137,6 +137,23 @@ def sm3p0 : RISCVBuiltin<"unsigned int(unsigned int)">; def sm3p1 : RISCVBuiltin<"unsigned int(unsigned int)">; } // Features = "zksh" +//===----------------------------------------------------------------------===// +// P extension. +//===----------------------------------------------------------------------===// +let Features = "experimental-p" in { +def padd_v4i8 : RISCVBuiltin<"_Vector<4, char>(_Vector<4, char>, _Vector<4, char>)">; +def padd_v2i16 : RISCVBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, short>)">; +def padd_v8i8 : RISCVBuiltin<"_Vector<8, char>(_Vector<8, char>, _Vector<8, char>)">; +def padd_v4i16 : RISCVBuiltin<"_Vector<4, short>(_Vector<4, short>, _Vector<4, short>)">; +def padd_v2i32 : RISCVBuiltin<"_Vector<2, int>(_Vector<2, int>, _Vector<2, int>)">; + +def psub_v4i8 : RISCVBuiltin<"_Vector<4, char>(_Vector<4, char>, _Vector<4, char>)">; +def psub_v2i16 : RISCVBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, short>)">; +def psub_v8i8 : RISCVBuiltin<"_Vector<8, char>(_Vector<8, char>, _Vector<8, char>)">; +def psub_v4i16 : RISCVBuiltin<"_Vector<4, short>(_Vector<4, short>, _Vector<4, short>)">; +def psub_v2i32 : RISCVBuiltin<"_Vector<2, int>(_Vector<2, int>, _Vector<2, int>)">; +} // Features = "experimental-p" + } // Attributes = [Const, NoThrow] //===----------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp index 2e11037f0dcd0..8cc8b03db0137 100644 --- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp @@ -1143,7 +1143,17 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, case RISCV::BI__builtin_riscv_brev8_32: case RISCV::BI__builtin_riscv_brev8_64: case RISCV::BI__builtin_riscv_zip_32: - case RISCV::BI__builtin_riscv_unzip_32: { + case RISCV::BI__builtin_riscv_unzip_32: + case RISCV::BI__builtin_riscv_padd_v4i8: + case RISCV::BI__builtin_riscv_padd_v2i16: + case RISCV::BI__builtin_riscv_padd_v8i8: + case RISCV::BI__builtin_riscv_padd_v4i16: + case RISCV::BI__builtin_riscv_padd_v2i32: + case RISCV::BI__builtin_riscv_psub_v4i8: + case RISCV::BI__builtin_riscv_psub_v2i16: + case RISCV::BI__builtin_riscv_psub_v8i8: + case RISCV::BI__builtin_riscv_psub_v4i16: + case RISCV::BI__builtin_riscv_psub_v2i32: { switch (BuiltinID) { default: llvm_unreachable("unexpected builtin ID"); // Zbb @@ -1187,11 +1197,26 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, case RISCV::BI__builtin_riscv_unzip_32: ID = Intrinsic::riscv_unzip; break; - } + // P extension + case RISCV::BI__builtin_riscv_padd_v4i8: + case RISCV::BI__builtin_riscv_padd_v2i16: + case RISCV::BI__builtin_riscv_padd_v8i8: + case RISCV::BI__builtin_riscv_padd_v4i16: + case RISCV::BI__builtin_riscv_padd_v2i32: + ID = Intrinsic::riscv_padd; + break; + case RISCV::BI__builtin_riscv_psub_v4i8: + case RISCV::BI__builtin_riscv_psub_v2i16: + case RISCV::BI__builtin_riscv_psub_v8i8: + case RISCV::BI__builtin_riscv_psub_v4i16: + case RISCV::BI__builtin_riscv_psub_v2i32: + ID = Intrinsic::riscv_psub; + break; + } + } IntrinsicTypes = {ResultType}; break; - } // Zk builtins diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c new file mode 100644 index 0000000000000..2d047f2438e8b --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c @@ -0,0 +1,223 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-p -emit-llvm %s -O2 -o - | FileCheck %s --check-prefix=RV32 +// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-p -emit-llvm %s -O2 -o - | FileCheck %s --check-prefix=RV64 + +#include <stdint.h> + +typedef int8_t v4i8 __attribute__((vector_size(4))); +typedef int16_t v2i16 __attribute__((vector_size(4))); +typedef int8_t v8i8 __attribute__((vector_size(8))); +typedef int16_t v4i16 __attribute__((vector_size(8))); +typedef int32_t v2i32 __attribute__((vector_size(8))); + +// RV32-LABEL: @test_padd_v4i8( +// RV32-NEXT: entry: +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.padd.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: @test_padd_v4i8( +// RV64-NEXT: entry: +// RV64-NEXT: [[COERCE_VAL_II:%.*]] = trunc i64 [[A_COERCE:%.*]] to i32 +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II]] to <4 x i8> +// RV64-NEXT: [[COERCE_VAL_II1:%.*]] = trunc i64 [[B_COERCE:%.*]] to i32 +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1]] to <4 x i8> +// RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.padd.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64 +// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]] +// +v4i8 test_padd_v4i8(v4i8 a, v4i8 b) { + return __builtin_riscv_padd_v4i8(a, b); +} + +// RV32-LABEL: @test_padd_v2i16( +// RV32-NEXT: entry: +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.padd.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: @test_padd_v2i16( +// RV64-NEXT: entry: +// RV64-NEXT: [[COERCE_VAL_II:%.*]] = trunc i64 [[A_COERCE:%.*]] to i32 +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II]] to <2 x i16> +// RV64-NEXT: [[COERCE_VAL_II1:%.*]] = trunc i64 [[B_COERCE:%.*]] to i32 +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1]] to <2 x i16> +// RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.padd.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64 +// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]] +// +v2i16 test_padd_v2i16(v2i16 a, v2i16 b) { + return __builtin_riscv_padd_v2i16(a, b); +} + +// RV32-LABEL: @test_padd_v8i8( +// RV32-NEXT: entry: +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// RV32-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.padd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: @test_padd_v8i8( +// RV64-NEXT: entry: +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.padd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +v8i8 test_padd_v8i8(v8i8 a, v8i8 b) { + return __builtin_riscv_padd_v8i8(a, b); +} + +// RV32-LABEL: @test_padd_v4i16( +// RV32-NEXT: entry: +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.padd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: @test_padd_v4i16( +// RV64-NEXT: entry: +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.padd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +v4i16 test_padd_v4i16(v4i16 a, v4i16 b) { + return __builtin_riscv_padd_v4i16(a, b); +} + +// RV32-LABEL: @test_padd_v2i32( +// RV32-NEXT: entry: +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <2 x i32> +// RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.padd.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: @test_padd_v2i32( +// RV64-NEXT: entry: +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <2 x i32> +// RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.padd.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +v2i32 test_padd_v2i32(v2i32 a, v2i32 b) { + return __builtin_riscv_padd_v2i32(a, b); +} + +// RV32-LABEL: @test_psub_v4i8( +// RV32-NEXT: entry: +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8> +// RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.psub.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: @test_psub_v4i8( +// RV64-NEXT: entry: +// RV64-NEXT: [[COERCE_VAL_II:%.*]] = trunc i64 [[A_COERCE:%.*]] to i32 +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II]] to <4 x i8> +// RV64-NEXT: [[COERCE_VAL_II1:%.*]] = trunc i64 [[B_COERCE:%.*]] to i32 +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1]] to <4 x i8> +// RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.psub.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32 +// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64 +// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]] +// +v4i8 test_psub_v4i8(v4i8 a, v4i8 b) { + return __builtin_riscv_psub_v4i8(a, b); +} + +// RV32-LABEL: @test_psub_v2i16( +// RV32-NEXT: entry: +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16> +// RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.psub.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: @test_psub_v2i16( +// RV64-NEXT: entry: +// RV64-NEXT: [[COERCE_VAL_II:%.*]] = trunc i64 [[A_COERCE:%.*]] to i32 +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II]] to <2 x i16> +// RV64-NEXT: [[COERCE_VAL_II1:%.*]] = trunc i64 [[B_COERCE:%.*]] to i32 +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1]] to <2 x i16> +// RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.psub.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64 +// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]] +// +v2i16 test_psub_v2i16(v2i16 a, v2i16 b) { + return __builtin_riscv_psub_v2i16(a, b); +} + +// RV32-LABEL: @test_psub_v8i8( +// RV32-NEXT: entry: +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// RV32-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.psub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: @test_psub_v8i8( +// RV64-NEXT: entry: +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8> +// RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.psub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +v8i8 test_psub_v8i8(v8i8 a, v8i8 b) { + return __builtin_riscv_psub_v8i8(a, b); +} + +// RV32-LABEL: @test_psub_v4i16( +// RV32-NEXT: entry: +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.psub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: @test_psub_v4i16( +// RV64-NEXT: entry: +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16> +// RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.psub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +v4i16 test_psub_v4i16(v4i16 a, v4i16 b) { + return __builtin_riscv_psub_v4i16(a, b); +} + +// RV32-LABEL: @test_psub_v2i32( +// RV32-NEXT: entry: +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <2 x i32> +// RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.psub.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: @test_psub_v2i32( +// RV64-NEXT: entry: +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <2 x i32> +// RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.psub.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +v2i32 test_psub_v2i32(v2i32 a, v2i32 b) { + return __builtin_riscv_psub_v2i32(a, b); +} diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 9088e5e6a357b..c35e09e372e89 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -1978,6 +1978,18 @@ let TargetPrefix = "riscv" in { defm vfncvt_sat_f_f_q_alt : RISCVConversionRoundingMode; } // TargetPrefix = "riscv" +//===----------------------------------------------------------------------===// +// Packed SIMD (P) Extension + +let TargetPrefix = "riscv" in { + def int_riscv_padd : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; + def int_riscv_psub : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; +} + // Vendor extensions //===----------------------------------------------------------------------===// include "llvm/IR/IntrinsicsRISCVXTHead.td" diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp index 78f47794a5b66..c8688d8aefaf3 100644 --- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp @@ -545,37 +545,53 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, unsigned StoreSizeBytes = XLen / 8; Align StackAlign = Align(XLen / 8); + static const MCPhysReg ArgGPRPairs[] = {RISCV::X10_X11, RISCV::X12_X13, + RISCV::X14_X15, RISCV::X16_X17}; + if (ValVT.isVector() || ValVT.isRISCVVectorTuple()) { - Reg = allocateRVVReg(ValVT, ValNo, State, TLI); - if (Reg) { - // Fixed-length vectors are located in the corresponding scalable-vector - // container types. - if (ValVT.isFixedLengthVector()) { - LocVT = TLI.getContainerForFixedLengthVector(LocVT); - State.addLoc( - CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return false; - } - } else { - // For return values, the vector must be passed fully via registers or - // via the stack. - // FIXME: The proposed vector ABI only mandates v8-v15 for return values, - // but we're using all of them. - if (IsRet) - return true; - // Try using a GPR to pass the address - if ((Reg = State.AllocateReg(ArgGPRs))) { - LocVT = XLenVT; - LocInfo = CCValAssign::Indirect; - } else if (ValVT.isScalableVector()) { - LocVT = XLenVT; - LocInfo = CCValAssign::Indirect; + bool IsPVectorInGPR = false; + if (Subtarget.enablePExtSIMDCodeGen() && ValVT.isVector()) { + const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); + if (RC == &RISCV::GPRRegClass || RC == &RISCV::GPRPairRegClass) + IsPVectorInGPR = true; + } + + if (!IsPVectorInGPR) { + Reg = allocateRVVReg(ValVT, ValNo, State, TLI); + if (Reg) { + // Fixed-length vectors are located in the corresponding scalable-vector + // container types. + if (ValVT.isFixedLengthVector()) { + LocVT = TLI.getContainerForFixedLengthVector(LocVT); + State.addLoc( + CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } } else { - StoreSizeBytes = ValVT.getStoreSize(); - // Align vectors to their element sizes, being careful for vXi1 - // vectors. - StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); + // For return values, the vector must be passed fully via registers or + // via the stack. + // FIXME: The proposed vector ABI only mandates v8-v15 for return + // values, but we're using all of them. + if (IsRet) + return true; + // Try using a GPR to pass the address + if ((Reg = State.AllocateReg(ArgGPRs))) { + LocVT = XLenVT; + LocInfo = CCValAssign::Indirect; + } else if (ValVT.isScalableVector()) { + LocVT = XLenVT; + LocInfo = CCValAssign::Indirect; + } else { + StoreSizeBytes = ValVT.getStoreSize(); + // Align vectors to their element sizes, being careful for vXi1 + // vectors. + StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); + } } + } else if (XLen == 32 && ValVT.getSizeInBits() == 64) { + Reg = State.AllocateReg(ArgGPRPairs); + } else { + Reg = State.AllocateReg(ArgGPRs); } } else { Reg = State.AllocateReg(ArgGPRs); @@ -604,7 +620,8 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, assert(((ValVT.isFloatingPoint() && !ValVT.isVector()) || LocVT == XLenVT || (TLI.getSubtarget().hasVInstructions() && - (ValVT.isVector() || ValVT.isRISCVVectorTuple()))) && + (ValVT.isVector() || ValVT.isRISCVVectorTuple())) || + (Subtarget.enablePExtSIMDCodeGen() && ValVT.isVector())) && "Expected an XLenVT or vector types at this stage"); if (Reg) { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c60f740d37576..d084c1cfdc8b4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -293,9 +293,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v2i32, &RISCV::GPRRegClass); addRegisterClass(MVT::v4i16, &RISCV::GPRRegClass); addRegisterClass(MVT::v8i8, &RISCV::GPRRegClass); + addRegisterClass(MVT::v2i16, &RISCV::GPRRegClass); + addRegisterClass(MVT::v4i8, &RISCV::GPRRegClass); } else { addRegisterClass(MVT::v2i16, &RISCV::GPRRegClass); addRegisterClass(MVT::v4i8, &RISCV::GPRRegClass); + addRegisterClass(MVT::v8i8, &RISCV::GPRPairRegClass); + addRegisterClass(MVT::v4i16, &RISCV::GPRPairRegClass); + addRegisterClass(MVT::v2i32, &RISCV::GPRPairRegClass); } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index 92a9c06fc534b..6ff47656d344e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1685,3 +1685,39 @@ let Predicates = [HasStdExtP, IsRV64] in { def : Pat<(v2i32 (build_vector (XLenVT GPR:$a), (XLenVT GPR:$b))), (PACK GPR:$a, GPR:$b)>; } // Predicates = [HasStdExtP, IsRV64] + +//===----------------------------------------------------------------------===// +// P-extension Intrinsic patterns +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtP] in { + def : Pat<(XLenVecI8VT (int_riscv_padd (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2))), (PADD_B GPR:$rs1, GPR:$rs2)>; + def : Pat<(XLenVecI8VT (int_riscv_psub (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2))), (PSUB_B GPR:$rs1, GPR:$rs2)>; + + def : Pat<(XLenVecI16VT (int_riscv_padd (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2))), (PADD_H GPR:$rs1, GPR:$rs2)>; + def : Pat<(XLenVecI16VT (int_riscv_psub (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2))), (PSUB_H GPR:$rs1, GPR:$rs2)>; +} + +let Predicates = [HasStdExtP, IsRV64] in { + def : Pat<(v2i32 (int_riscv_padd (v2i32 GPR:$rs1), (v2i32 GPR:$rs2))), (PADD_W GPR:$rs1, GPR:$rs2)>; + def : Pat<(v2i32 (int_riscv_psub (v2i32 GPR:$rs1), (v2i32 GPR:$rs2))), (PSUB_W GPR:$rs1, GPR:$rs2)>; + + // Sub-XLEN vectors on RV64 + def : Pat<(v4i8 (int_riscv_padd (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (PADD_B GPR:$rs1, GPR:$rs2)>; + def : Pat<(v4i8 (int_riscv_psub (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (PSUB_B GPR:$rs1, GPR:$rs2)>; + + def : Pat<(v2i16 (int_riscv_padd (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (PADD_H GPR:$rs1, GPR:$rs2)>; + def : Pat<(v2i16 (int_riscv_psub (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (PSUB_H GPR:$rs1, GPR:$rs2)>; +} + +let Predicates = [HasStdExtP, IsRV32] in { + // 64-bit vectors (v8i8, v4i16, v2i32) using register pairs + def : Pat<(v8i8 (int_riscv_padd (v8i8 GPRPairRV32:$rs1), (v8i8 GPRPairRV32:$rs2))), (PADD_DB GPRPairRV32:$rs1, GPRPairRV32:$rs2)>; + def : Pat<(v8i8 (int_riscv_psub (v8i8 GPRPairRV32:$rs1), (v8i8 GPRPairRV32:$rs2))), (PSUB_DB GPRPairRV32:$rs1, GPRPairRV32:$rs2)>; + + def : Pat<(v4i16 (int_riscv_padd (v4i16 GPRPairRV32:$rs1), (v4i16 GPRPairRV32:$rs2))), (PADD_DH GPRPairRV32:$rs1, GPRPairRV32:$rs2)>; + def : Pat<(v4i16 (int_riscv_psub (v4i16 GPRPairRV32:$rs1), (v4i16 GPRPairRV32:$rs2))), (PSUB_DH GPRPairRV32:$rs1, GPRPairRV32:$rs2)>; + + def : Pat<(v2i32 (int_riscv_padd (v2i32 GPRPairRV32:$rs1), (v2i32 GPRPairRV32:$rs2))), (PADD_DW GPRPairRV32:$rs1, GPRPairRV32:$rs2)>; + def : Pat<(v2i32 (int_riscv_psub (v2i32 GPRPairRV32:$rs1), (v2i32 GPRPairRV32:$rs2))), (PSUB_DW GPRPairRV32:$rs1, GPRPairRV32:$rs2)>; +} diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index e3657badfa9a4..98eaa1420f338 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -230,6 +230,13 @@ def XLenVecI16VT : ValueTypeByHwMode<[RV32, RV64], [v2i16, v4i16]>; def XLenVecI32VT : ValueTypeByHwMode<[RV64], [v2i32]>; + +def XLenPairVecI8VT : ValueTypeByHwMode<[RV32], + [v8i8]>; +def XLenPairVecI16VT : ValueTypeByHwMode<[RV32], + [v4i16]>; +def XLenPairVecI32VT : ValueTypeByHwMode<[RV32], + [v2i32]>; def XLenRI : RegInfoByHwMode< [RV32, RV64], [RegInfo<32,32,32>, RegInfo<64,64,64>]>; @@ -248,7 +255,8 @@ class RISCVRegisterClass<list<ValueType> regTypes, int align, dag regList> class GPRRegisterClass<dag regList> : RISCVRegisterClass<[XLenVT, XLenFVT, // P extension packed vector types: - XLenVecI8VT, XLenVecI16VT, XLenVecI32VT], 32, regList> { + XLenVecI8VT, XLenVecI16VT, XLenVecI32VT, + v4i8, v2i16], 32, regList> { let RegInfos = XLenRI; } @@ -369,7 +377,9 @@ let RegAltNameIndices = [ABIRegAltName] in { } let RegInfos = XLenPairRI, CopyCost = 2 in { -def GPRPair : RISCVRegisterClass<[XLenPairVT, XLenPairFVT], 64, (add +def GPRPair : RISCVRegisterClass<[XLenPairVT, XLenPairFVT, + XLenPairVecI8VT, XLenPairVecI16VT, + XLenPairVecI32VT], 64, (add X10_X11, X12_X13, X14_X15, X16_X17, X6_X7, X28_X29, X30_X31, diff --git a/llvm/test/CodeGen/RISCV/rvp-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvp-intrinsics.ll new file mode 100644 index 0000000000000..71d75de1a71d8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp-intrinsics.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -riscv-enable-p-ext-simd-codegen -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -riscv-enable-p-ext-simd-codegen -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +define <4 x i8> @test_padd_v4i8(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: test_padd_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: padd.b a0, a0, a1 +; CHECK-NEXT: ret + %res = call <4 x i8> @llvm.riscv.padd.v4i8(<4 x i8> %a, <4 x i8> %b) + ret <4 x i8> %res +} + +define <2 x i16> @test_padd_v2i16(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: test_padd_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: padd.h a0, a0, a1 +; CHECK-NEXT: ret + %res = call <2 x i16> @llvm.riscv.padd.v2i16(<2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %res +} + +define <8 x i8> @test_padd_v8i8(<8 x i8> %a, <8 x i8> %b) { +; RV32-LABEL: test_padd_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: padd.db a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_padd_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: padd.b a0, a0, a1 +; RV64-NEXT: ret + %res = call <8 x i8> @llvm.riscv.padd.v8i8(<8 x i8> %a, <8 x i8> %b) + ret <8 x i8> %res +} + +define <4 x i16> @test_padd_v4i16(<4 x i16> %a, <4 x i16> %b) { +; RV32-LABEL: test_padd_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: padd.dh a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_padd_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: padd.h a0, a0, a1 +; RV64-NEXT: ret + %res = call <4 x i16> @llvm.riscv.padd.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %res +} + +define <2 x i32> @test_padd_v2i32(<2 x i32> %a, <2 x i32> %b) { +; RV32-LABEL: test_padd_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: padd.dw a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_padd_v2i32: +; RV64: # %bb.0: +; RV64-NEXT: padd.w a0, a0, a1 +; RV64-NEXT: ret + %res = call <2 x i32> @llvm.riscv.padd.v2i32(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %res +} + +define <4 x i8> @test_psub_v4i8(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: test_psub_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: psub.b a0, a0, a1 +; CHECK-NEXT: ret + %res = call <4 x i8> @llvm.riscv.psub.v4i8(<4 x i8> %a, <4 x i8> %b) + ret <4 x i8> %res +} + +define <2 x i16> @test_psub_v2i16(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: test_psub_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: psub.h a0, a0, a1 +; CHECK-NEXT: ret + %res = call <2 x i16> @llvm.riscv.psub.v2i16(<2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %res +} + +define <8 x i8> @test_psub_v8i8(<8 x i8> %a, <8 x i8> %b) { +; RV32-LABEL: test_psub_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: psub.db a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_psub_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: psub.b a0, a0, a1 +; RV64-NEXT: ret + %res = call <8 x i8> @llvm.riscv.psub.v8i8(<8 x i8> %a, <8 x i8> %b) + ret <8 x i8> %res +} + +define <4 x i16> @test_psub_v4i16(<4 x i16> %a, <4 x i16> %b) { +; RV32-LABEL: test_psub_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: psub.dh a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_psub_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: psub.h a0, a0, a1 +; RV64-NEXT: ret + %res = call <4 x i16> @llvm.riscv.psub.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %res +} + +define <2 x i32> @test_psub_v2i32(<2 x i32> %a, <2 x i32> %b) { +; RV32-LABEL: test_psub_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: psub.dw a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_psub_v2i32: +; RV64: # %bb.0: +; RV64-NEXT: psub.w a0, a0, a1 +; RV64-NEXT: ret + %res = call <2 x i32> @llvm.riscv.psub.v2i32(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %res +} + +declare <4 x i8> @llvm.riscv.padd.v4i8(<4 x i8>, <4 x i8>) +declare <2 x i16> @llvm.riscv.padd.v2i16(<2 x i16>, <2 x i16>) +declare <8 x i8> @llvm.riscv.padd.v8i8(<8 x i8>, <8 x i8>) +declare <4 x i16> @llvm.riscv.padd.v4i16(<4 x i16>, <4 x i16>) +declare <2 x i32> @llvm.riscv.padd.v2i32(<2 x i32>, <2 x i32>) + +declare <4 x i8> @llvm.riscv.psub.v4i8(<4 x i8>, <4 x i8>) +declare <2 x i16> @llvm.riscv.psub.v2i16(<2 x i16>, <2 x i16>) +declare <8 x i8> @llvm.riscv.psub.v8i8(<8 x i8>, <8 x i8>) +declare <4 x i16> @llvm.riscv.psub.v4i16(<4 x i16>, <4 x i16>) +declare <2 x i32> @llvm.riscv.psub.v2i32(<2 x i32>, <2 x i32>) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
