gchatelet updated this revision to Diff 433858.
gchatelet added a comment.
- remove unused variable and rebase
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D126903/new/
https://reviews.llvm.org/D126903
Files:
clang/docs/LanguageExtensions.rst
clang/include/clang/Basic/Builtins.def
clang/lib/CodeGen/CGBuilder.h
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Sema/SemaChecking.cpp
clang/test/CodeGen/builtins-memset-inline.c
clang/test/Sema/builtins-memset-inline.cpp
llvm/docs/LangRef.rst
llvm/include/llvm/CodeGen/SelectionDAG.h
llvm/include/llvm/IR/IRBuilder.h
llvm/include/llvm/IR/IntrinsicInst.h
llvm/include/llvm/IR/Intrinsics.td
llvm/lib/Analysis/Lint.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/IR/IRBuilder.cpp
llvm/lib/IR/Verifier.cpp
llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
llvm/test/CodeGen/X86/memset-inline.ll
llvm/test/Other/lint.ll
llvm/test/Verifier/intrinsic-immarg.ll
llvm/test/Verifier/memset-inline.ll
Index: llvm/test/Verifier/memset-inline.ll
===================================================================
--- /dev/null
+++ llvm/test/Verifier/memset-inline.ll
@@ -0,0 +1,9 @@
+; RUN: not opt -verify < %s 2>&1 | FileCheck %s
+
+; CHECK: alignment is not a power of two
+
+define void @foo(i8* %P, i8 %value) {
+ call void @llvm.memset.inline.p0i8.i32(i8* align 3 %P, i8 %value, i32 4, i1 false)
+ ret void
+}
+declare void @llvm.memset.inline.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
Index: llvm/test/Verifier/intrinsic-immarg.ll
===================================================================
--- llvm/test/Verifier/intrinsic-immarg.ll
+++ llvm/test/Verifier/intrinsic-immarg.ll
@@ -62,6 +62,23 @@
ret void
}
+declare void @llvm.memset.inline.p0i8.i32(i8* nocapture, i8, i32, i1)
+define void @memset_inline_is_volatile(i8* %dest, i8 %value, i1 %is.volatile) {
+ ; CHECK: immarg operand has non-immediate parameter
+ ; CHECK-NEXT: i1 %is.volatile
+ ; CHECK-NEXT: call void @llvm.memset.inline.p0i8.i32(i8* %dest, i8 %value, i32 8, i1 %is.volatile)
+ call void @llvm.memset.inline.p0i8.i32(i8* %dest, i8 %value, i32 8, i1 %is.volatile)
+ ret void
+}
+
+define void @memset_inline_variable_size(i8* %dest, i8 %value, i32 %size) {
+ ; CHECK: immarg operand has non-immediate parameter
+ ; CHECK-NEXT: i32 %size
+ ; CHECK-NEXT: call void @llvm.memset.inline.p0i8.i32(i8* %dest, i8 %value, i32 %size, i1 true)
+ call void @llvm.memset.inline.p0i8.i32(i8* %dest, i8 %value, i32 %size, i1 true)
+ ret void
+}
+
declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1)
define void @objectsize(i8* %ptr, i1 %a, i1 %b, i1 %c) {
Index: llvm/test/Other/lint.ll
===================================================================
--- llvm/test/Other/lint.ll
+++ llvm/test/Other/lint.ll
@@ -6,6 +6,8 @@
declare void @llvm.stackrestore(i8*)
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.inline.p0i8.i8.i64(i8* nocapture, i8, i64, i1) nounwind
declare void @has_sret(i8* sret(i8) %p)
declare void @has_noaliases(i32* noalias %p, i32* %q)
declare void @one_arg(i32)
@@ -87,6 +89,11 @@
; CHECK: Unusual: noalias argument aliases another argument
call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG to i8*), i64 1, i1 0)
+; CHECK: Write to read-only memory
+call void @llvm.memset.p0i8.i8.i64(i8* bitcast (i32* @CG to i8*), i8 1, i64 1, i1 0)
+; CHECK: Write to read-only memory
+call void @llvm.memset.inline.p0i8.i8.i64(i8* bitcast (i32* @CG to i8*), i8 1, i64 1, i1 0)
+
; CHECK: Undefined behavior: Buffer overflow
%wider = bitcast i8* %buf to i16*
store i16 0, i16* %wider
Index: llvm/test/CodeGen/X86/memset-inline.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/memset-inline.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.inline.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+
+define void @test1(i8* %a, i8 %value) nounwind {
+; CHECK-LABEL: test1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; CHECK-NEXT: imulq %rax, %rcx
+; CHECK-NEXT: movq %rcx, (%rdi)
+; CHECK-NEXT: retq
+ tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 8, i1 0)
+ ret void
+}
+
+define void @regular_memset_calls_external_function(i8* %a, i8 %value) nounwind {
+; CHECK-LABEL: regular_memset_calls_external_function:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl $129, %edx
+; CHECK-NEXT: jmp memset@PLT # TAILCALL
+ tail call void @llvm.memset.p0i8.i64(i8* %a, i8 %value, i64 129, i1 0)
+ ret void
+}
+
+define void @inlined_set_doesnt_call_external_function(i8* %a, i8 %value) nounwind {
+; CHECK-LABEL: inlined_set_doesnt_call_external_function:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; CHECK-NEXT: imulq %rax, %rcx
+; CHECK-NEXT: movq %rcx, 120(%rdi)
+; CHECK-NEXT: movq %rcx, 112(%rdi)
+; CHECK-NEXT: movq %rcx, 104(%rdi)
+; CHECK-NEXT: movq %rcx, 96(%rdi)
+; CHECK-NEXT: movq %rcx, 88(%rdi)
+; CHECK-NEXT: movq %rcx, 80(%rdi)
+; CHECK-NEXT: movq %rcx, 72(%rdi)
+; CHECK-NEXT: movq %rcx, 64(%rdi)
+; CHECK-NEXT: movq %rcx, 56(%rdi)
+; CHECK-NEXT: movq %rcx, 48(%rdi)
+; CHECK-NEXT: movq %rcx, 40(%rdi)
+; CHECK-NEXT: movq %rcx, 32(%rdi)
+; CHECK-NEXT: movq %rcx, 24(%rdi)
+; CHECK-NEXT: movq %rcx, 16(%rdi)
+; CHECK-NEXT: movq %rcx, 8(%rdi)
+; CHECK-NEXT: movq %rcx, (%rdi)
+; CHECK-NEXT: movb %cl, 128(%rdi)
+; CHECK-NEXT: retq
+ tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 129, i1 0)
+ ret void
+}
Index: llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
===================================================================
--- llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -175,7 +175,8 @@
DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
DAG.getConstant(Offset, dl, AddrVT)),
Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment,
- isVolatile, false, DstPtrInfo.getWithOffset(Offset));
+ isVolatile, /* AlwaysInline */ false,
+ /* isTailCall */ false, DstPtrInfo.getWithOffset(Offset));
}
// TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
Index: llvm/lib/IR/Verifier.cpp
===================================================================
--- llvm/lib/IR/Verifier.cpp
+++ llvm/lib/IR/Verifier.cpp
@@ -4917,7 +4917,8 @@
case Intrinsic::memcpy:
case Intrinsic::memcpy_inline:
case Intrinsic::memmove:
- case Intrinsic::memset: {
+ case Intrinsic::memset:
+ case Intrinsic::memset_inline: {
const auto *MI = cast<MemIntrinsic>(&Call);
auto IsValidAlignment = [&](unsigned Alignment) -> bool {
return Alignment == 0 || isPowerOf2_32(Alignment);
Index: llvm/lib/IR/IRBuilder.cpp
===================================================================
--- llvm/lib/IR/IRBuilder.cpp
+++ llvm/lib/IR/IRBuilder.cpp
@@ -164,6 +164,35 @@
return CI;
}
+CallInst *IRBuilderBase::CreateMemSetInline(Value *Dst, MaybeAlign DstAlign,
+ Value *Val, Value *Size,
+ bool IsVolatile, MDNode *TBAATag,
+ MDNode *ScopeTag,
+ MDNode *NoAliasTag) {
+ Dst = getCastedInt8PtrValue(Dst);
+ Value *Ops[] = {Dst, Val, Size, getInt1(IsVolatile)};
+ Type *Tys[] = {Dst->getType(), Size->getType()};
+ Module *M = BB->getParent()->getParent();
+ Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset_inline, Tys);
+
+ CallInst *CI = createCallHelper(TheFn, Ops, this);
+
+ if (DstAlign)
+ cast<MemSetInlineInst>(CI)->setDestAlignment(DstAlign->value());
+
+ // Set the TBAA info if present.
+ if (TBAATag)
+ CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+ if (ScopeTag)
+ CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag);
+
+ if (NoAliasTag)
+ CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
+
+ return CI;
+}
+
CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemSet(
Value *Ptr, Value *Val, Value *Size, Align Alignment, uint32_t ElementSize,
MDNode *TBAATag, MDNode *ScopeTag, MDNode *NoAliasTag) {
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5909,10 +5909,30 @@
bool isVol = MSI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC,
+ SDValue MS = DAG.getMemset(
+ Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false,
+ isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata());
+ updateDAGForMaybeTailCall(MS);
+ return;
+ }
+ case Intrinsic::memset_inline: {
+ const auto &MSII = cast<MemSetInlineInst>(I);
+ SDValue Dst = getValue(I.getArgOperand(0));
+ SDValue Value = getValue(I.getArgOperand(1));
+ SDValue Size = getValue(I.getArgOperand(2));
+ assert(isa<ConstantSDNode>(Size) && "memset_inline needs constant size");
+ // @llvm.memset defines 0 and 1 to both mean no alignment.
+ Align DstAlign = MSII.getDestAlign().valueOrOne();
+ bool isVol = MSII.isVolatile();
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
+ // FIXME: Support passing different dest/src alignments to the memcpy DAG
+ // node.
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MC = DAG.getMemset(Root, sdl, Dst, Value, Size, DstAlign, isVol,
+ /* AlwaysInline */ true, isTC,
MachinePointerInfo(I.getArgOperand(0)),
I.getAAMetadata());
- updateDAGForMaybeTailCall(MS);
+ updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::memmove: {
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6990,7 +6990,7 @@
static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
uint64_t Size, Align Alignment, bool isVol,
- MachinePointerInfo DstPtrInfo,
+ bool AlwaysInline, MachinePointerInfo DstPtrInfo,
const AAMDNodes &AAInfo) {
// Turn a memset of undef to nop.
// FIXME: We need to honor volatile even is Src is undef.
@@ -7010,8 +7010,10 @@
DstAlignCanChange = true;
bool IsZeroVal =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero();
+ unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize);
+
if (!TLI.findOptimalMemOpLowering(
- MemOps, TLI.getMaxStoresPerMemset(OptSize),
+ MemOps, Limit,
MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),
DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes()))
return SDValue();
@@ -7307,7 +7309,7 @@
SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, Align Alignment,
- bool isVol, bool isTailCall,
+ bool isVol, bool AlwaysInline, bool isTailCall,
MachinePointerInfo DstPtrInfo,
const AAMDNodes &AAInfo) {
// Check to see if we should lower the memset to stores first.
@@ -7320,7 +7322,7 @@
SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(), Alignment,
- isVol, DstPtrInfo, AAInfo);
+ isVol, false, DstPtrInfo, AAInfo);
if (Result.getNode())
return Result;
@@ -7335,6 +7337,15 @@
return Result;
}
+ // If we really need inline code and the target declined to provide it,
+ // use a (potentially long) sequence of loads and stores.
+ if (AlwaysInline) {
+ assert(ConstantSize && "AlwaysInline requires a constant size!");
+ return getMemsetStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Alignment, isVol, true,
+ DstPtrInfo, AAInfo);
+ }
+
checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
// Emit a library call.
Index: llvm/lib/Analysis/Lint.cpp
===================================================================
--- llvm/lib/Analysis/Lint.cpp
+++ llvm/lib/Analysis/Lint.cpp
@@ -335,6 +335,12 @@
MSI->getDestAlign(), nullptr, MemRef::Write);
break;
}
+ case Intrinsic::memset_inline: {
+ MemSetInlineInst *MSII = cast<MemSetInlineInst>(&I);
+ visitMemoryReference(I, MemoryLocation::getForDest(MSII),
+ MSII->getDestAlign(), nullptr, MemRef::Write);
+ break;
+ }
case Intrinsic::vastart:
Check(I.getParent()->getParent()->isVarArg(),
Index: llvm/include/llvm/IR/Intrinsics.td
===================================================================
--- llvm/include/llvm/IR/Intrinsics.td
+++ llvm/include/llvm/IR/Intrinsics.td
@@ -651,6 +651,17 @@
NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
ImmArg<ArgIndex<3>>]>;
+// Memset semantic that is guaranteed to be inlined.
+// In particular this means that the generated code is not allowed to call any
+// external function.
+// The third argument (specifying the size) must be a constant.
+def int_memset_inline
+ : Intrinsic<[],
+ [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i1_ty],
+ [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree,
+ NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
+ ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
+
// FIXME: Add version of these floating point intrinsics which allow non-default
// rounding modes and FP exception handling.
Index: llvm/include/llvm/IR/IntrinsicInst.h
===================================================================
--- llvm/include/llvm/IR/IntrinsicInst.h
+++ llvm/include/llvm/IR/IntrinsicInst.h
@@ -978,6 +978,7 @@
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset:
+ case Intrinsic::memset_inline:
case Intrinsic::memcpy_inline:
return true;
default:
@@ -994,7 +995,28 @@
public:
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
- return I->getIntrinsicID() == Intrinsic::memset;
+ switch (I->getIntrinsicID()) {
+ case Intrinsic::memset:
+ case Intrinsic::memset_inline:
+ return true;
+ default:
+ return false;
+ }
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
+/// This class wraps the llvm.memset.inline intrinsic.
+class MemSetInlineInst : public MemSetInst {
+public:
+ ConstantInt *getLength() const {
+ return cast<ConstantInt>(MemSetInst::getLength());
+ }
+ // Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::memset_inline;
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
@@ -1079,6 +1101,7 @@
case Intrinsic::memcpy_inline:
case Intrinsic::memmove:
case Intrinsic::memset:
+ case Intrinsic::memset_inline:
case Intrinsic::memcpy_element_unordered_atomic:
case Intrinsic::memmove_element_unordered_atomic:
case Intrinsic::memset_element_unordered_atomic:
@@ -1100,6 +1123,7 @@
static bool classof(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {
case Intrinsic::memset:
+ case Intrinsic::memset_inline:
case Intrinsic::memset_element_unordered_atomic:
return true;
default:
Index: llvm/include/llvm/IR/IRBuilder.h
===================================================================
--- llvm/include/llvm/IR/IRBuilder.h
+++ llvm/include/llvm/IR/IRBuilder.h
@@ -578,6 +578,12 @@
MDNode *ScopeTag = nullptr,
MDNode *NoAliasTag = nullptr);
+ CallInst *CreateMemSetInline(Value *Dst, MaybeAlign DstAlign, Value *Val,
+ Value *Size, bool IsVolatile = false,
+ MDNode *TBAATag = nullptr,
+ MDNode *ScopeTag = nullptr,
+ MDNode *NoAliasTag = nullptr);
+
/// Create and insert an element unordered-atomic memset of the region of
/// memory starting at the given pointer to the given value.
///
Index: llvm/include/llvm/CodeGen/SelectionDAG.h
===================================================================
--- llvm/include/llvm/CodeGen/SelectionDAG.h
+++ llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1052,7 +1052,8 @@
const AAMDNodes &AAInfo = AAMDNodes());
SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
- SDValue Size, Align Alignment, bool isVol, bool isTailCall,
+ SDValue Size, Align Alignment, bool isVol,
+ bool AlwaysInline, bool isTailCall,
MachinePointerInfo DstPtrInfo,
const AAMDNodes &AAInfo = AAMDNodes());
Index: llvm/docs/LangRef.rst
===================================================================
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -13867,6 +13867,71 @@
If ``<len>`` is not zero, ``<dest>`` should be well-defined, otherwise the
behavior is undefined.
+.. _int_memset_inline:
+
+'``llvm.memset.inline``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.memset.inline`` on any
+integer bit width and for different address spaces. Not all targets
+support all bit widths however.
+
+::
+
+ declare void @llvm.memset.inline.p0i8.p0i8.i32(i8* <dest>, i8 <val>,
+ i32 <len>,
+ i1 <isvolatile>)
+ declare void @llvm.memset.inline.p0i8.p0i8.i64(i8* <dest>, i8 <val>,
+ i64 <len>,
+ i1 <isvolatile>)
+
+Overview:
+"""""""""
+
+The '``llvm.memset.inline.*``' intrinsics fill a block of memory with a
+particular byte value and guarantees that no external functions are called.
+
+Note that, unlike the standard libc function, the ``llvm.memset.inline.*``
+intrinsics do not return a value, takes extra isvolatile
+arguments and the pointers can be in specified address spaces.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to the destination to fill, the second
+is the byte value with which to fill it, the third argument is a constant
+integer argument specifying the number of bytes to fill, and the fourth
+is a boolean indicating a volatile access.
+
+The :ref:`align <attr_align>` parameter attribute can be provided
+for the first argument.
+
+If the ``isvolatile`` parameter is ``true``, the ``llvm.memset.inline`` call is
+a :ref:`volatile operation <volatile>`. The detailed access behavior is not
+very cleanly specified and it is unwise to depend on it.
+
+Semantics:
+""""""""""
+
+The '``llvm.memset.inline.*``' intrinsics fill "len" bytes of memory starting
+at the destination location. If the argument is known to be
+aligned to some boundary, this can be specified as an attribute on
+the argument.
+
+``len`` must be a constant expression.
+If ``<len>`` is 0, it is no-op modulo the behavior of attributes attached to
+the arguments.
+If ``<len>`` is not a well-defined value, the behavior is undefined.
+If ``<len>`` is not zero, ``<dest>`` should be well-defined, otherwise the
+behavior is undefined.
+
+The behavior of '``llvm.memset.inline.*``' is equivalent to the behavior of
+'``llvm.memset.*``', but the generated code is guaranteed not to call any
+external functions.
+
'``llvm.sqrt.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^
Index: clang/test/Sema/builtins-memset-inline.cpp
===================================================================
--- /dev/null
+++ clang/test/Sema/builtins-memset-inline.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#define NULL ((char *)0)
+
+#if __has_builtin(__builtin_memset_inline)
+#warning defined as expected
+// expected-warning@-1 {{defined as expected}}
+#endif
+
+void test_memset_inline_null_dst(void *ptr) {
+ __builtin_memset_inline(NULL, 1, 4); // expected-warning {{null passed to a callee that requires a non-null argument}}
+}
+
+void test_memset_inline_null_buffer_is_ok_if_size_is_zero(void *ptr, char value) {
+ __builtin_memset_inline(NULL, value, /*size */ 0);
+}
+
+void test_memset_inline_non_constant_size(void *dst, char value, unsigned size) {
+ __builtin_memset_inline(dst, value, size); // expected-error {{argument to '__builtin_memset_inline' must be a constant integer}}
+}
+
+template <unsigned size>
+void test_memset_inline_template(void *dst, char value) {
+ // we do not try to evaluate size in non intantiated templates.
+ __builtin_memset_inline(dst, value, size);
+}
+
+void test_memset_inline_implicit_conversion(void *ptr, char value) {
+ char a[5];
+ __builtin_memset_inline(a, value, 5);
+}
+
+void test_memset_inline_num_args(void *dst, char value) {
+ __builtin_memset_inline(); // expected-error {{too few arguments to function call}}
+ __builtin_memset_inline(dst, value, 4, NULL); // expected-error {{too many arguments to function call}}
+}
Index: clang/test/CodeGen/builtins-memset-inline.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/builtins-memset-inline.c
@@ -0,0 +1,21 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s
+
+// CHECK-LABEL: define{{.*}} void @test_memset_inline_0(i8* noundef %dst, i8 noundef signext %value)
+void test_memset_inline_0(void *dst, char value) {
+ // CHECK: call void @llvm.memset.inline.p0i8.i64(i8* align 1 %0, i8 %1, i64 0, i1 false)
+ __builtin_memset_inline(dst, value, 0);
+}
+
+// CHECK-LABEL: define{{.*}} void @test_memset_inline_1(i8* noundef %dst, i8 noundef signext %value)
+void test_memset_inline_1(void *dst, char value) {
+ // CHECK: call void @llvm.memset.inline.p0i8.i64(i8* align 1 %0, i8 %1, i64 1, i1 false)
+ __builtin_memset_inline(dst, value, 1);
+}
+
+// CHECK-LABEL: define{{.*}} void @test_memset_inline_4(i8* noundef %dst, i8 noundef signext %value)
+void test_memset_inline_4(void *dst, char value) {
+ // CHECK: call void @llvm.memset.inline.p0i8.i64(i8* align 1 %0, i8 %1, i64 4, i1 false)
+ __builtin_memset_inline(dst, value, 4);
+}
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -2290,6 +2290,30 @@
}
break;
}
+ case Builtin::BI__builtin_memset_inline: {
+ if (checkArgCount(*this, TheCall, 3))
+ return ExprError();
+ auto ArgArrayConversionFailed = [&](unsigned Arg) {
+ ExprResult ArgExpr =
+ DefaultFunctionArrayLvalueConversion(TheCall->getArg(Arg));
+ if (ArgExpr.isInvalid())
+ return true;
+ TheCall->setArg(Arg, ArgExpr.get());
+ return false;
+ };
+
+ if (ArgArrayConversionFailed(0))
+ return true;
+ clang::Expr *SizeOp = TheCall->getArg(2);
+ // We warn about setting to `nullptr` pointers when `size` is
+ // greater than 0. When `size` is value dependent we cannot evaluate its
+ // value so we bail out.
+ if (SizeOp->isValueDependent())
+ break;
+ if (!SizeOp->EvaluateKnownConstInt(Context).isZero())
+ CheckNonNullArgument(*this, TheCall->getArg(0), TheCall->getExprLoc());
+ break;
+ }
#define BUILTIN(ID, TYPE, ATTRS)
#define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \
case Builtin::BI##ID: \
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -3508,6 +3508,17 @@
Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
return RValue::get(Dest.getPointer());
}
+ case Builtin::BI__builtin_memset_inline: {
+ Address Dest = EmitPointerWithAlignment(E->getArg(0));
+ Value *ByteVal =
+ Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
+ uint64_t Size =
+ E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
+ EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
+ E->getArg(0)->getExprLoc(), FD, 0);
+ Builder.CreateMemSetInline(Dest, ByteVal, Size);
+ return RValue::get(nullptr);
+ }
case Builtin::BI__builtin___memset_chk: {
// fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
Expr::EvalResult SizeResult, DstSizeResult;
Index: clang/lib/CodeGen/CGBuilder.h
===================================================================
--- clang/lib/CodeGen/CGBuilder.h
+++ clang/lib/CodeGen/CGBuilder.h
@@ -32,6 +32,7 @@
void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name,
llvm::BasicBlock *BB,
llvm::BasicBlock::iterator InsertPt) const override;
+
private:
CodeGenFunction *CGF = nullptr;
};
@@ -45,17 +46,18 @@
/// Storing a reference to the type cache here makes it a lot easier
/// to build natural-feeling, target-specific IR.
const CodeGenTypeCache &TypeCache;
+
public:
CGBuilderTy(const CodeGenTypeCache &TypeCache, llvm::LLVMContext &C)
- : CGBuilderBaseTy(C), TypeCache(TypeCache) {}
- CGBuilderTy(const CodeGenTypeCache &TypeCache,
- llvm::LLVMContext &C, const llvm::ConstantFolder &F,
+ : CGBuilderBaseTy(C), TypeCache(TypeCache) {}
+ CGBuilderTy(const CodeGenTypeCache &TypeCache, llvm::LLVMContext &C,
+ const llvm::ConstantFolder &F,
const CGBuilderInserterTy &Inserter)
- : CGBuilderBaseTy(C, F, Inserter), TypeCache(TypeCache) {}
+ : CGBuilderBaseTy(C, F, Inserter), TypeCache(TypeCache) {}
CGBuilderTy(const CodeGenTypeCache &TypeCache, llvm::Instruction *I)
- : CGBuilderBaseTy(I), TypeCache(TypeCache) {}
+ : CGBuilderBaseTy(I), TypeCache(TypeCache) {}
CGBuilderTy(const CodeGenTypeCache &TypeCache, llvm::BasicBlock *BB)
- : CGBuilderBaseTy(BB), TypeCache(TypeCache) {}
+ : CGBuilderBaseTy(BB), TypeCache(TypeCache) {}
llvm::ConstantInt *getSize(CharUnits N) {
return llvm::ConstantInt::get(TypeCache.SizeTy, N.getQuantity());
@@ -102,7 +104,8 @@
using CGBuilderBaseTy::CreateAlignedStore;
llvm::StoreInst *CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr,
- CharUnits Align, bool IsVolatile = false) {
+ CharUnits Align,
+ bool IsVolatile = false) {
return CreateAlignedStore(Val, Addr, Align.getAsAlign(), IsVolatile);
}
@@ -165,8 +168,8 @@
Address CreateElementBitCast(Address Addr, llvm::Type *Ty,
const llvm::Twine &Name = "") {
auto *PtrTy = Ty->getPointerTo(Addr.getAddressSpace());
- return Address(CreateBitCast(Addr.getPointer(), PtrTy, Name),
- Ty, Addr.getAlignment());
+ return Address(CreateBitCast(Addr.getPointer(), PtrTy, Name), Ty,
+ Addr.getAlignment());
}
using CGBuilderBaseTy::CreatePointerBitCastOrAddrSpaceCast;
@@ -193,10 +196,10 @@
const llvm::StructLayout *Layout = DL.getStructLayout(ElTy);
auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index));
- return Address(CreateStructGEP(Addr.getElementType(),
- Addr.getPointer(), Index, Name),
- ElTy->getElementType(Index),
- Addr.getAlignment().alignmentAtOffset(Offset));
+ return Address(
+ CreateStructGEP(Addr.getElementType(), Addr.getPointer(), Index, Name),
+ ElTy->getElementType(Index),
+ Addr.getAlignment().alignmentAtOffset(Offset));
}
/// Given
@@ -264,10 +267,10 @@
CharUnits EltSize =
CharUnits::fromQuantity(DL.getTypeAllocSize(Addr.getElementType()));
- return Address(CreateGEP(Addr.getElementType(), Addr.getPointer(), Index,
- Name),
- Addr.getElementType(),
- Addr.getAlignment().alignmentOfArrayElement(EltSize));
+ return Address(
+ CreateGEP(Addr.getElementType(), Addr.getPointer(), Index, Name),
+ Addr.getElementType(),
+ Addr.getAlignment().alignmentOfArrayElement(EltSize));
}
/// Given a pointer to i8, adjust it by a given constant offset.
@@ -341,9 +344,16 @@
Dest.getAlignment().getAsAlign(), IsVolatile);
}
+ using CGBuilderBaseTy::CreateMemSetInline;
+ llvm::CallInst *CreateMemSetInline(Address Dest, llvm::Value *Value,
+ uint64_t Size) {
+ return CreateMemSetInline(Dest.getPointer(),
+ Dest.getAlignment().getAsAlign(), Value,
+ getInt64(Size));
+ }
+
using CGBuilderBaseTy::CreatePreserveStructAccessIndex;
- Address CreatePreserveStructAccessIndex(Address Addr,
- unsigned Index,
+ Address CreatePreserveStructAccessIndex(Address Addr, unsigned Index,
unsigned FieldIndex,
llvm::MDNode *DbgInfo) {
llvm::StructType *ElTy = cast<llvm::StructType>(Addr.getElementType());
@@ -363,7 +373,7 @@
}
};
-} // end namespace CodeGen
-} // end namespace clang
+} // end namespace CodeGen
+} // end namespace clang
#endif
Index: clang/include/clang/Basic/Builtins.def
===================================================================
--- clang/include/clang/Basic/Builtins.def
+++ clang/include/clang/Basic/Builtins.def
@@ -555,6 +555,7 @@
BUILTIN(__builtin_memmove, "v*v*vC*z", "nF")
BUILTIN(__builtin_mempcpy, "v*v*vC*z", "nF")
BUILTIN(__builtin_memset, "v*v*iz", "nF")
+BUILTIN(__builtin_memset_inline, "vv*iIz", "nt")
BUILTIN(__builtin_printf, "icC*.", "Fp:0:")
BUILTIN(__builtin_stpcpy, "c*c*cC*", "nF")
BUILTIN(__builtin_stpncpy, "c*c*cC*z", "nF")
Index: clang/docs/LanguageExtensions.rst
===================================================================
--- clang/docs/LanguageExtensions.rst
+++ clang/docs/LanguageExtensions.rst
@@ -3218,6 +3218,26 @@
Note that this intrinsic cannot yet be called in a ``constexpr`` context.
+Guaranteed inlined memset
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+. code-block:: c
+
+ void __builtin_memset_inline(void *dst, int value, size_t size);
+
+
+``__builtin_memset_inline`` has been designed as a building block for efficient
+``memset`` implementations. It is identical to ``__builtin_memset`` but also
+guarantees not to call any external functions. See LLVM IR `llvm.memset.inline
+<https://llvm.org/docs/LangRef.html#llvm-memset-inline-intrinsic>`_ intrinsic
+for more information.
+
+This is useful to implement a custom version of ``memset``, implement a
+``libc`` memset or work around the absence of a ``libc``.
+
+Note that the `size` argument must be a compile time constant.
+
+Note that this intrinsic cannot yet be called in a ``constexpr`` context.
Atomic Min/Max builtins with memory ordering
--------------------------------------------
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits