llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-codegen Author: SiHuaN (sihuan) <details> <summary>Changes</summary> This patch adds `riscv_simd.h`, introducing initial support for RISC-V P extension intrinsics. The supported operations include: - Packed addition and subtraction (padd, psub) - Packed logic and arithmetic shifts (psll, psrl, psra) These intrinsics are implemented using standard C operators to generate canonical LLVM IR (e.g., `add <4 x i8>`, `shl <2 x i16>`). The implementation relies on the RISC-V backend to correctly lower this IR to specific P extension instructions. Specification: https://github.com/topperc/p-ext-intrinsics/blob/main/source/index.md --- Patch is 46.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/181115.diff 4 Files Affected: - (modified) clang/lib/CodeGen/Targets/RISCV.cpp (+7) - (modified) clang/lib/Headers/CMakeLists.txt (+1) - (added) clang/lib/Headers/riscv_simd.h (+245) - (added) clang/test/CodeGen/RISCV/rvp-intrinsics.c (+769) ``````````diff diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp index d1345891e9fb6..01496c4bdbb54 100644 --- a/clang/lib/CodeGen/Targets/RISCV.cpp +++ b/clang/lib/CodeGen/Targets/RISCV.cpp @@ -714,6 +714,13 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, // Generic vector without riscv_vls_cc should fall through and pass by // reference. return coerceVLSVector(Ty, ABIVLen); + if (getContext().getTargetInfo().hasFeature("experimental-p") && + VT->getVectorKind() == VectorKind::Generic && + VT->getElementType()->isIntegerType() && (Size == 32 || Size == 64)) { + uint64_t EltSize = getContext().getTypeSize(VT->getElementType()); + if (EltSize == 8 || EltSize == 16 || EltSize == 32) + return ABIArgInfo::getDirect(); + } } // Aggregates which are <= 2*XLen will be passed in registers if possible, diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index c92b370b88d2d..76574d7a937e8 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -137,6 +137,7 @@ set(riscv_files sifive_vector.h andes_vector.h riscv_mips.h + riscv_simd.h ) set(spirv_files diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h new file mode 100644 index 0000000000000..262f35b483cbd --- /dev/null +++ b/clang/lib/Headers/riscv_simd.h @@ -0,0 +1,245 @@ +/*===---- riscv_simd.h - RISC-V P intrinsics -----------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RISCV_SIMD_H +#define __RISCV_SIMD_H + +#include <stdint.h> + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Packed SIMD Types */ + +typedef int8_t int8x4_t __attribute__((vector_size(4))); +typedef uint8_t uint8x4_t __attribute__((vector_size(4))); +typedef int16_t int16x2_t __attribute__((vector_size(4))); +typedef uint16_t uint16x2_t __attribute__((vector_size(4))); + +typedef int8_t int8x8_t __attribute__((vector_size(8))); +typedef uint8_t uint8x8_t __attribute__((vector_size(8))); +typedef int16_t int16x4_t __attribute__((vector_size(8))); +typedef uint16_t uint16x4_t __attribute__((vector_size(8))); +typedef int32_t int32x2_t __attribute__((vector_size(8))); +typedef uint32_t uint32x2_t __attribute__((vector_size(8))); + +/* Packed Addition and Subtraction (32-bit) */ + +static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) +__riscv_padd_i8x4(int8x4_t __rs1, int8x4_t __rs2) { + return __rs1 + __rs2; +} + +static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) +__riscv_padd_u8x4(uint8x4_t __rs1, uint8x4_t __rs2) { + return __rs1 + __rs2; +} + +static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) +__riscv_padd_i16x2(int16x2_t __rs1, int16x2_t __rs2) { + return __rs1 + __rs2; +} + +static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) +__riscv_padd_u16x2(uint16x2_t __rs1, uint16x2_t __rs2) { + return __rs1 + __rs2; +} + +static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psub_i8x4(int8x4_t __rs1, int8x4_t __rs2) { + return __rs1 - __rs2; +} + +static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psub_u8x4(uint8x4_t __rs1, uint8x4_t __rs2) { + return __rs1 - __rs2; +} + +static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psub_i16x2(int16x2_t __rs1, int16x2_t __rs2) { + return __rs1 - __rs2; +} + +static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psub_u16x2(uint16x2_t __rs1, uint16x2_t __rs2) { + return __rs1 - __rs2; +} + +/* Packed Addition and Subtraction (64-bit) */ + +static __inline__ int8x8_t __attribute__((__always_inline__, __nodebug__)) +__riscv_padd_i8x8(int8x8_t __rs1, int8x8_t __rs2) { + return __rs1 + __rs2; +} + +static __inline__ uint8x8_t __attribute__((__always_inline__, __nodebug__)) +__riscv_padd_u8x8(uint8x8_t __rs1, uint8x8_t __rs2) { + return __rs1 + __rs2; +} + +static __inline__ int16x4_t __attribute__((__always_inline__, __nodebug__)) +__riscv_padd_i16x4(int16x4_t __rs1, int16x4_t __rs2) { + return __rs1 + __rs2; +} + +static __inline__ uint16x4_t __attribute__((__always_inline__, __nodebug__)) +__riscv_padd_u16x4(uint16x4_t __rs1, uint16x4_t __rs2) { + return __rs1 + __rs2; +} + +static __inline__ int32x2_t __attribute__((__always_inline__, __nodebug__)) +__riscv_padd_i32x2(int32x2_t __rs1, int32x2_t __rs2) { + return __rs1 + __rs2; +} + +static __inline__ uint32x2_t __attribute__((__always_inline__, __nodebug__)) +__riscv_padd_u32x2(uint32x2_t __rs1, uint32x2_t __rs2) { + return __rs1 + __rs2; +} + +static __inline__ int8x8_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psub_i8x8(int8x8_t __rs1, int8x8_t __rs2) { + return __rs1 - __rs2; +} + +static __inline__ uint8x8_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psub_u8x8(uint8x8_t __rs1, uint8x8_t __rs2) { + return __rs1 - __rs2; +} + +static __inline__ int16x4_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psub_i16x4(int16x4_t __rs1, int16x4_t __rs2) { + return __rs1 - __rs2; +} + +static __inline__ uint16x4_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psub_u16x4(uint16x4_t __rs1, uint16x4_t __rs2) { + return __rs1 - __rs2; +} + +static __inline__ int32x2_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psub_i32x2(int32x2_t __rs1, int32x2_t __rs2) { + return __rs1 - __rs2; +} + +static __inline__ uint32x2_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psub_u32x2(uint32x2_t __rs1, uint32x2_t __rs2) { + return __rs1 - __rs2; +} + +/* Packed Shifts (32-bit) */ + +static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psll_s_u8x4(uint8x4_t __rs1, unsigned __shamt) { + return __rs1 << __shamt; +} + +static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psll_s_i8x4(int8x4_t __rs1, unsigned __shamt) { + return __rs1 << __shamt; +} + +static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psll_s_u16x2(uint16x2_t __rs1, unsigned __shamt) { + return __rs1 << __shamt; +} + +static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psll_s_i16x2(int16x2_t __rs1, unsigned __shamt) { + return __rs1 << __shamt; +} + +static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psrl_s_u8x4(uint8x4_t __rs1, unsigned __shamt) { + return __rs1 >> __shamt; +} + +static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psrl_s_u16x2(uint16x2_t __rs1, unsigned __shamt) { + return __rs1 >> __shamt; +} + +static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psra_s_i8x4(int8x4_t __rs1, unsigned __shamt) { + return __rs1 >> __shamt; +} + +static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psra_s_i16x2(int16x2_t __rs1, unsigned __shamt) { + return __rs1 >> __shamt; +} + +/* Packed Shifts (64-bit) */ + +static __inline__ uint8x8_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psll_s_u8x8(uint8x8_t __rs1, unsigned __shamt) { + return __rs1 << __shamt; +} + +static __inline__ int8x8_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psll_s_i8x8(int8x8_t __rs1, unsigned __shamt) { + return __rs1 << __shamt; +} + +static __inline__ uint16x4_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psll_s_u16x4(uint16x4_t __rs1, unsigned __shamt) { + return __rs1 << __shamt; +} + +static __inline__ int16x4_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psll_s_i16x4(int16x4_t __rs1, unsigned __shamt) { + return __rs1 << __shamt; +} + +static __inline__ uint32x2_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psll_s_u32x2(uint32x2_t __rs1, unsigned __shamt) { + return __rs1 << __shamt; +} + +static __inline__ int32x2_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psll_s_i32x2(int32x2_t __rs1, unsigned __shamt) { + return __rs1 << __shamt; +} + +static __inline__ uint8x8_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psrl_s_u8x8(uint8x8_t __rs1, unsigned __shamt) { + return __rs1 >> __shamt; +} + +static __inline__ uint16x4_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psrl_s_u16x4(uint16x4_t __rs1, unsigned __shamt) { + return __rs1 >> __shamt; +} + +static __inline__ uint32x2_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psrl_s_u32x2(uint32x2_t __rs1, unsigned __shamt) { + return __rs1 >> __shamt; +} + +static __inline__ int8x8_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psra_s_i8x8(int8x8_t __rs1, unsigned __shamt) { + return __rs1 >> __shamt; +} + +static __inline__ int16x4_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psra_s_i16x4(int16x4_t __rs1, unsigned __shamt) { + return __rs1 >> __shamt; +} + +static __inline__ int32x2_t __attribute__((__always_inline__, __nodebug__)) +__riscv_psra_s_i32x2(int32x2_t __rs1, unsigned __shamt) { + return __rs1 >> __shamt; +} + +#if defined(__cplusplus) +} +#endif + +#endif /* __RISCV_SIMD_H */ diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c new file mode 100644 index 0000000000000..87ad2aa034b8c --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c @@ -0,0 +1,769 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-p \ +// RUN: -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: | opt -S -passes=sroa,instcombine | FileCheck %s --check-prefix=RV32 +// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-p \ +// RUN: -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: | opt -S -passes=sroa,instcombine | FileCheck %s --check-prefix=RV64 + +#include <riscv_simd.h> + +/* 32-bit Packed Addition and Subtraction */ + +// RV32-LABEL: define dso_local <4 x i8> @test_padd_i8x4( +// RV32-SAME: <4 x i8> noundef [[A:%.*]], <4 x i8> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[A]], [[B]] +// RV32-NEXT: ret <4 x i8> [[ADD_I]] +// +// RV64-LABEL: define dso_local <4 x i8> @test_padd_i8x4( +// RV64-SAME: <4 x i8> noundef [[A:%.*]], <4 x i8> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[A]], [[B]] +// RV64-NEXT: ret <4 x i8> [[ADD_I]] +// +int8x4_t test_padd_i8x4(int8x4_t a, int8x4_t b) { + return __riscv_padd_i8x4(a, b); +} + +// RV32-LABEL: define dso_local <4 x i8> @test_padd_u8x4( +// RV32-SAME: <4 x i8> noundef [[A:%.*]], <4 x i8> noundef [[B:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[A]], [[B]] +// RV32-NEXT: ret <4 x i8> [[ADD_I]] +// +// RV64-LABEL: define dso_local <4 x i8> @test_padd_u8x4( +// RV64-SAME: <4 x i8> noundef [[A:%.*]], <4 x i8> noundef [[B:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[A]], [[B]] +// RV64-NEXT: ret <4 x i8> [[ADD_I]] +// +uint8x4_t test_padd_u8x4(uint8x4_t a, uint8x4_t b) { + return __riscv_padd_u8x4(a, b); +} + +// RV32-LABEL: define dso_local <2 x i16> @test_padd_i16x2( +// RV32-SAME: <2 x i16> noundef [[A:%.*]], <2 x i16> noundef [[B:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[A]], [[B]] +// RV32-NEXT: ret <2 x i16> [[ADD_I]] +// +// RV64-LABEL: define dso_local <2 x i16> @test_padd_i16x2( +// RV64-SAME: <2 x i16> noundef [[A:%.*]], <2 x i16> noundef [[B:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[A]], [[B]] +// RV64-NEXT: ret <2 x i16> [[ADD_I]] +// +int16x2_t test_padd_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_padd_i16x2(a, b); +} + +// RV32-LABEL: define dso_local <2 x i16> @test_padd_u16x2( +// RV32-SAME: <2 x i16> noundef [[A:%.*]], <2 x i16> noundef [[B:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[A]], [[B]] +// RV32-NEXT: ret <2 x i16> [[ADD_I]] +// +// RV64-LABEL: define dso_local <2 x i16> @test_padd_u16x2( +// RV64-SAME: <2 x i16> noundef [[A:%.*]], <2 x i16> noundef [[B:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[A]], [[B]] +// RV64-NEXT: ret <2 x i16> [[ADD_I]] +// +uint16x2_t test_padd_u16x2(uint16x2_t a, uint16x2_t b) { + return __riscv_padd_u16x2(a, b); +} + +// RV32-LABEL: define dso_local <4 x i8> @test_psub_i8x4( +// RV32-SAME: <4 x i8> noundef [[A:%.*]], <4 x i8> noundef [[B:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[A]], [[B]] +// RV32-NEXT: ret <4 x i8> [[SUB_I]] +// +// RV64-LABEL: define dso_local <4 x i8> @test_psub_i8x4( +// RV64-SAME: <4 x i8> noundef [[A:%.*]], <4 x i8> noundef [[B:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[A]], [[B]] +// RV64-NEXT: ret <4 x i8> [[SUB_I]] +// +int8x4_t test_psub_i8x4(int8x4_t a, int8x4_t b) { + return __riscv_psub_i8x4(a, b); +} + +// RV32-LABEL: define dso_local <4 x i8> @test_psub_u8x4( +// RV32-SAME: <4 x i8> noundef [[A:%.*]], <4 x i8> noundef [[B:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[A]], [[B]] +// RV32-NEXT: ret <4 x i8> [[SUB_I]] +// +// RV64-LABEL: define dso_local <4 x i8> @test_psub_u8x4( +// RV64-SAME: <4 x i8> noundef [[A:%.*]], <4 x i8> noundef [[B:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[A]], [[B]] +// RV64-NEXT: ret <4 x i8> [[SUB_I]] +// +uint8x4_t test_psub_u8x4(uint8x4_t a, uint8x4_t b) { + return __riscv_psub_u8x4(a, b); +} + +// RV32-LABEL: define dso_local <2 x i16> @test_psub_i16x2( +// RV32-SAME: <2 x i16> noundef [[A:%.*]], <2 x i16> noundef [[B:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[A]], [[B]] +// RV32-NEXT: ret <2 x i16> [[SUB_I]] +// +// RV64-LABEL: define dso_local <2 x i16> @test_psub_i16x2( +// RV64-SAME: <2 x i16> noundef [[A:%.*]], <2 x i16> noundef [[B:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[A]], [[B]] +// RV64-NEXT: ret <2 x i16> [[SUB_I]] +// +int16x2_t test_psub_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_psub_i16x2(a, b); +} + +// RV32-LABEL: define dso_local <2 x i16> @test_psub_u16x2( +// RV32-SAME: <2 x i16> noundef [[A:%.*]], <2 x i16> noundef [[B:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[A]], [[B]] +// RV32-NEXT: ret <2 x i16> [[SUB_I]] +// +// RV64-LABEL: define dso_local <2 x i16> @test_psub_u16x2( +// RV64-SAME: <2 x i16> noundef [[A:%.*]], <2 x i16> noundef [[B:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[A]], [[B]] +// RV64-NEXT: ret <2 x i16> [[SUB_I]] +// +uint16x2_t test_psub_u16x2(uint16x2_t a, uint16x2_t b) { + return __riscv_psub_u16x2(a, b); +} + +/* 64-bit Packed Addition and Subtraction */ + +// RV32-LABEL: define dso_local <8 x i8> @test_padd_i8x8( +// RV32-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[A]], [[B]] +// RV32-NEXT: ret <8 x i8> [[ADD_I]] +// +// RV64-LABEL: define dso_local <8 x i8> @test_padd_i8x8( +// RV64-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[A]], [[B]] +// RV64-NEXT: ret <8 x i8> [[ADD_I]] +// +int8x8_t test_padd_i8x8(int8x8_t a, int8x8_t b) { + return __riscv_padd_i8x8(a, b); +} + +// RV32-LABEL: define dso_local <8 x i8> @test_padd_u8x8( +// RV32-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[A]], [[B]] +// RV32-NEXT: ret <8 x i8> [[ADD_I]] +// +// RV64-LABEL: define dso_local <8 x i8> @test_padd_u8x8( +// RV64-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[A]], [[B]] +// RV64-NEXT: ret <8 x i8> [[ADD_I]] +// +uint8x8_t test_padd_u8x8(uint8x8_t a, uint8x8_t b) { + return __riscv_padd_u8x8(a, b); +} + +// RV32-LABEL: define dso_local <4 x i16> @test_padd_i16x4( +// RV32-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[A]], [[B]] +// RV32-NEXT: ret <4 x i16> [[ADD_I]] +// +// RV64-LABEL: define dso_local <4 x i16> @test_padd_i16x4( +// RV64-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[A]], [[B]] +// RV64-NEXT: ret <4 x i16> [[ADD_I]] +// +int16x4_t test_padd_i16x4(int16x4_t a, int16x4_t b) { + return __riscv_padd_i16x4(a, b); +} + +// RV32-LABEL: define dso_local <4 x i16> @test_padd_u16x4( +// RV32-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[A]], [[B]] +// RV32-NEXT: ret <4 x i16> [[ADD_I]] +// +// RV64-LABEL: define dso_local <4 x i16> @test_padd_u16x4( +// RV64-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[A]], [[B]] +// RV64-NEXT: ret <4 x i16> [[ADD_I]] +// +uint16x4_t test_padd_u16x4(uint16x4_t a, uint16x4_t b) { + return __riscv_padd_u16x4(a, b); +} + +// RV32-LABEL: define dso_local <2 x i32> @test_padd_i32x2( +// RV32-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[A]], [[B]] +// RV32-NEXT: ret <2 x i32> [[ADD_I]] +// +// RV64-LABEL: define dso_local <2 x i32> @test_padd_i32x2( +// RV64-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[A]], [[B]] +// RV64-NEXT: ret <2 x i32> [[ADD_I]] +// +int32x2_t test_padd_i32x2(int32x2_t a, int32x2_t b) { + return __riscv_padd_i32x2(a, b); +} + +// RV32-LABEL: define dso_local <2 x i32> @test_padd_u32x2( +// RV32-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[A]], [[B]] +// RV32-NEXT: ret <2 x i32> [[ADD_I]] +// +// RV64-LABEL: define dso_local <2 x i32> @test_padd_u32x2( +// RV64-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[A]], [[B]] +// RV64-NEXT: ret <2 x i32> [[ADD_I]] +// +uint32x2_t test_padd_u32x2(uint32x2_t a, uint32x2_t b) { + return __riscv_padd_u32x2(a, b); +} + +// RV32-LABEL: define dso_local <8 x i8> @test_psub_i8x8( +// RV32-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[A]], [[B]] +// RV32-NEXT: ret <8 x i8> [[SUB_I]] +// +// RV64-LABEL: define dso_local <8 x i8> @test_psub_i8x8( +// RV64-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[A]], [[B]] +// RV64-NEXT: ret <8 x i8> [[SUB_I]] +// +int8x8_t test_psub_i8x8(int8x8_t a, int8x8_t b) { + return __riscv_psub_i8x8(a, b); +} + +// RV32-LABEL: define dso_local <8 x i8> @test_psub_u8x8( +// RV32-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[A]], [[B]] +// RV32-NEXT: ret <8 x i8> [[SUB_I]] +// +// RV64-LABEL: define ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/181115 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
