Author: tyb0807 Date: 2022-01-31T20:49:34Z New Revision: 51e188d079f6ee9d8bc640351f2f772234d809dd
URL: https://github.com/llvm/llvm-project/commit/51e188d079f6ee9d8bc640351f2f772234d809dd DIFF: https://github.com/llvm/llvm-project/commit/51e188d079f6ee9d8bc640351f2f772234d809dd.diff LOG: [AArch64] Support for memset tagged intrinsic This introduces a new ACLE intrinsic for memset tagged (https://github.com/ARM-software/acle/blob/next-release/main/acle.md#memcpy-family-of-operations-intrinsics---mops). void *__builtin_arm_mops_memset_tag(void *, int, size_t) A corresponding LLVM intrinsic is introduced: i8* llvm.aarch64.mops.memset.tag(i8*, i8, i64) The types match llvm.memset but the return type is not void. This is part 1/4 of a series of patches split from https://reviews.llvm.org/D117405 to facilitate reviewing. Patch by Tomas Matheson Differential Revision: https://reviews.llvm.org/D117753 Added: clang/test/CodeGen/aarch64-mops.c Modified: clang/include/clang/Basic/BuiltinsAArch64.def clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Headers/arm_acle.h llvm/include/llvm/IR/IntrinsicsAArch64.td Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 634bcaed20a6..0869b87e32fb 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -62,6 +62,9 @@ BUILTIN(__builtin_arm_ldg, "v*v*", "t") BUILTIN(__builtin_arm_stg, "vv*", "t") BUILTIN(__builtin_arm_subp, "Uiv*v*", "t") +// Memory Operations +BUILTIN(__builtin_arm_mops_memset_tag, "v*v*iz", "") + // Memory barrier BUILTIN(__builtin_arm_dmb, "vUi", "nc") BUILTIN(__builtin_arm_dsb, "vUi", "nc") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2b7862e618bd..d071c7a5b4a4 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9777,6 +9777,18 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Arg0, Arg1}); } + // Memory Operations (MOPS) + if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) { + Value *Dst = EmitScalarExpr(E->getArg(0)); + Value *Val = EmitScalarExpr(E->getArg(1)); + Value *Size = EmitScalarExpr(E->getArg(2)); + Dst = Builder.CreatePointerCast(Dst, Int8PtrTy); + Val = Builder.CreateTrunc(Val, Int8Ty); + Size = Builder.CreateIntCast(Size, Int64Ty, false); + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size}); + } + // Memory Tagging Extensions (MTE) Intrinsics Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index 45fac248dadb..1cfc1403276d 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -730,6 +730,12 @@ __arm_st64bv0(void *__addr, data512_t __value) { #define __arm_mte_ptr diff (__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb) #endif +/* Memory Operations Intrinsics */ +#if __ARM_FEATURE_MOPS && __ARM_FEATURE_MEMORY_TAGGING +#define __arm_mops_memset_tag(__tagged_address, __value, __size) \ + __builtin_arm_mops_memset_tag(__tagged_address, __value, __size) +#endif + /* Transactional Memory Extension (TME) Intrinsics */ #if __ARM_FEATURE_TME diff --git a/clang/test/CodeGen/aarch64-mops.c b/clang/test/CodeGen/aarch64-mops.c new file mode 100644 index 000000000000..0b6a2c26c1e8 --- /dev/null +++ b/clang/test/CodeGen/aarch64-mops.c @@ -0,0 +1,153 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// RUN: %clang_cc1 -triple aarch64-arm-unknown-eabi -target-feature +mops -target-feature +mte -S -emit-llvm -o - %s | FileCheck %s + +#define __ARM_FEATURE_MOPS 1 +#include <arm_acle.h> +#include <stddef.h> + +// CHECK-LABEL: @bzero_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 0) +// CHECK-NEXT: ret i8* [[TMP1]] +// +void *bzero_0(void *dst) { + return __arm_mops_memset_tag(dst, 0, 0); +} + +// CHECK-LABEL: @bzero_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 1) +// CHECK-NEXT: ret i8* [[TMP1]] +// +void *bzero_1(void *dst) { + return __arm_mops_memset_tag(dst, 0, 1); +} + +// CHECK-LABEL: @bzero_10( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10) +// CHECK-NEXT: ret i8* [[TMP1]] +// +void *bzero_10(void *dst) { + return __arm_mops_memset_tag(dst, 0, 10); +} + +// CHECK-LABEL: @bzero_10000( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10000) +// CHECK-NEXT: ret i8* [[TMP1]] +// +void *bzero_10000(void *dst) { + return __arm_mops_memset_tag(dst, 0, 10000); +} + +// CHECK-LABEL: @bzero_n( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 [[TMP1]]) +// CHECK-NEXT: ret i8* [[TMP2]] +// +void *bzero_n(void *dst, size_t size) { + return __arm_mops_memset_tag(dst, 0, size); +} + +// CHECK-LABEL: @memset_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 0) +// CHECK-NEXT: ret i8* [[TMP3]] +// +void *memset_0(void *dst, int value) { + return __arm_mops_memset_tag(dst, value, 0); +} + +// CHECK-LABEL: @memset_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 1) +// CHECK-NEXT: ret i8* [[TMP3]] +// +void *memset_1(void *dst, int value) { + return __arm_mops_memset_tag(dst, value, 1); +} + +// CHECK-LABEL: @memset_10( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10) +// CHECK-NEXT: ret i8* [[TMP3]] +// +void *memset_10(void *dst, int value) { + return __arm_mops_memset_tag(dst, value, 10); +} + +// CHECK-LABEL: @memset_10000( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10000) +// CHECK-NEXT: ret i8* [[TMP3]] +// +void *memset_10000(void *dst, int value) { + return __arm_mops_memset_tag(dst, value, 10000); +} + +// CHECK-LABEL: @memset_n( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 +// CHECK-NEXT: [[TMP4:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP3]], i64 [[TMP2]]) +// CHECK-NEXT: ret i8* [[TMP4]] +// +void *memset_n(void *dst, int value, size_t size) { + return __arm_mops_memset_tag(dst, value, size); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index e610c28a5923..a65ddff07a29 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -897,6 +897,14 @@ def int_aarch64_stgp : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llv [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>; } +//===----------------------------------------------------------------------===// +// Memory Operations (MOPS) Intrinsics +let TargetPrefix = "aarch64" in { + // Sizes are chosen to correspond to the llvm.memset intrinsic: ptr, i8, i64 + def int_aarch64_mops_memset_tag : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i8_ty, llvm_i64_ty], + [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>; +} + // Transactional Memory Extension (TME) Intrinsics let TargetPrefix = "aarch64" in { def int_aarch64_tstart : GCCBuiltin<"__builtin_arm_tstart">, _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits