Looks like I missed that the inferred types for the returned tuple elements are references; I will rebuild with ASan and
confirm my fix is correct, then commit again.

Thanks,
Scott

On 2018-08-03 13:48, Vlad Tsyrklevich wrote:
This change is causing ASan failures on the sanitizer bots:
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/21898/steps/check-clang%20asan/logs/stdio
[9]

I've reverted it in r338904.

On Fri, Aug 3, 2018 at 8:51 AM Scott Linder via cfe-commits
<cfe-commits@lists.llvm.org> wrote:

Author: scott.linder
Date: Fri Aug 3 08:50:52 2018
New Revision: 338899

URL: http://llvm.org/viewvc/llvm-project?rev=338899&view=rev [1]
Log:
[OpenCL] Always emit alloca in entry block for enqueue_kernel
builtin

Ensures the statically sized alloca is not converted to
DYNAMIC_STACKALLOC
later because it is not in the entry block.

Differential Revision: https://reviews.llvm.org/D50104 [2]

Added:
cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl
[3]
Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl [4]

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL:

http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=338899&r1=338898&r2=338899&view=diff
[5]

==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Aug 3 08:50:52 2018
@@ -3338,23 +3338,29 @@ RValue CodeGenFunction::EmitBuiltinExpr(
// Create a temporary array to hold the sizes of local pointer
arguments
// for the block. \p First is the position of the first size
argument.
auto CreateArrayForSizeVar = [=](unsigned First) {
- auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
- auto *Arr = Builder.CreateAlloca(AT);
- llvm::Value *Ptr;
+ llvm::APInt ArraySize(32, NumArgs - First);
+ QualType SizeArrayTy = getContext().getConstantArrayType(
+ getContext().getSizeType(), ArraySize, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
+ auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
+ llvm::Value *TmpPtr = Tmp.getPointer();
+ llvm::Value *TmpSize = EmitLifetimeStart(
+
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
+ llvm::Value *ElemPtr;
// Each of the following arguments specifies the size of the
corresponding
// argument passed to the enqueued block.
auto *Zero = llvm::ConstantInt::get(IntTy, 0);
for (unsigned I = First; I < NumArgs; ++I) {
auto *Index = llvm::ConstantInt::get(IntTy, I - First);
- auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
+ auto *GEP = Builder.CreateGEP(TmpPtr, {Zero, Index});
if (I == First)
- Ptr = GEP;
+ ElemPtr = GEP;
auto *V =
Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)),
SizeTy);
Builder.CreateAlignedStore(
V, GEP,
CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
}
- return Ptr;
+ return std::tie(ElemPtr, TmpSize, TmpPtr);
};

// Could have events and/or varargs.
@@ -3366,24 +3372,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(
llvm::Value *Kernel =
Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
auto *Block = Builder.CreatePointerCast(Info.BlockArg,
GenericVoidPtrTy);
- auto *PtrToSizeArray = CreateArrayForSizeVar(4);
+ llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
+ std::tie(ElemPtr, TmpSize, TmpPtr) =
CreateArrayForSizeVar(4);

// Create a vector of the arguments, as well as a constant
value to
// express to the runtime the number of variadic arguments.
std::vector<llvm::Value *> Args = {
Queue, Flags, Range,
Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
- PtrToSizeArray};
+ ElemPtr};
std::vector<llvm::Type *> ArgTys = {
- QueueTy, IntTy, RangeTy,
- GenericVoidPtrTy, GenericVoidPtrTy, IntTy,
- PtrToSizeArray->getType()};
+ QueueTy, IntTy, RangeTy,
GenericVoidPtrTy,
+ GenericVoidPtrTy, IntTy, ElemPtr->getType()};

llvm::FunctionType *FTy = llvm::FunctionType::get(
Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- return RValue::get(
- Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
- llvm::ArrayRef<llvm::Value *>(Args)));
+ auto Call =
+
RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+ llvm::ArrayRef<llvm::Value
*>(Args)));
+ if (TmpSize)
+ EmitLifetimeEnd(TmpSize, TmpPtr);
+ return Call;
}
// Any calls now have event arguments passed.
if (NumArgs >= 7) {
@@ -3430,15 +3439,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(
ArgTys.push_back(Int32Ty);
Name = "__enqueue_kernel_events_varargs";

- auto *PtrToSizeArray = CreateArrayForSizeVar(7);
- Args.push_back(PtrToSizeArray);
- ArgTys.push_back(PtrToSizeArray->getType());
+ llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
+ std::tie(ElemPtr, TmpSize, TmpPtr) =
CreateArrayForSizeVar(7);
+ Args.push_back(ElemPtr);
+ ArgTys.push_back(ElemPtr->getType());

llvm::FunctionType *FTy = llvm::FunctionType::get(
Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- return RValue::get(
- Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
- llvm::ArrayRef<llvm::Value *>(Args)));
+ auto Call =
+
RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+ llvm::ArrayRef<llvm::Value
*>(Args)));
+ if (TmpSize)
+ EmitLifetimeEnd(TmpSize, TmpPtr);
+ return Call;
}
LLVM_FALLTHROUGH;
}

Modified: cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
[4]
URL:

http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl?rev=338899&r1=338898&r2=338899&view=diff
[6]

==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl [4]
(original)
+++ cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl [4] Fri
Aug 3 08:50:52 2018
@@ -1,5 +1,6 @@
// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0
-emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s
--check-prefix=COMMON --check-prefix=B32
// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0
-emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s
--check-prefix=COMMON --check-prefix=B64
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O1
-emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s
--check-prefix=CHECK-LIFETIMES

#pragma OPENCL EXTENSION cl_khr_subgroups : enable

@@ -46,8 +47,31 @@ kernel void device_side_enqueue(global i
// COMMON: %event_wait_list2 = alloca [1 x %opencl.clk_event_t*]
clk_event_t event_wait_list2[] = {clk_event};

- // Emits block literal on stack and block kernel [[INVLK1]].
// COMMON: [[NDR:%[a-z0-9]+]] = alloca %struct.ndrange_t, align 4
+
+ // B32: %[[BLOCK_SIZES1:.*]] = alloca [1 x i32]
+ // B64: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64]
+ // CHECK-LIFETIMES: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64]
+ // B32: %[[BLOCK_SIZES2:.*]] = alloca [1 x i32]
+ // B64: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64]
+ // CHECK-LIFETIMES: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64]
+ // B32: %[[BLOCK_SIZES3:.*]] = alloca [1 x i32]
+ // B64: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64]
+ // CHECK-LIFETIMES: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64]
+ // B32: %[[BLOCK_SIZES4:.*]] = alloca [1 x i32]
+ // B64: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64]
+ // CHECK-LIFETIMES: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64]
+ // B32: %[[BLOCK_SIZES5:.*]] = alloca [1 x i32]
+ // B64: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64]
+ // CHECK-LIFETIMES: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64]
+ // B32: %[[BLOCK_SIZES6:.*]] = alloca [3 x i32]
+ // B64: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64]
+ // CHECK-LIFETIMES: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64]
+ // B32: %[[BLOCK_SIZES7:.*]] = alloca [1 x i32]
+ // B64: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64]
+ // CHECK-LIFETIMES: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64]
+
+ // Emits block literal on stack and block kernel [[INVLK1]].
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*,
%opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
// B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32 addrspace(1)*,
i32, i32 addrspace(1)* }>* %block to void ()*
@@ -73,7 +97,6 @@ kernel void device_side_enqueue(global i
// COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],
%struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}*
addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}*
addrspace(4)* [[EVNT]],
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast
({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
// COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
-
enqueue_kernel(default_queue, flags, ndrange, 2,
&event_wait_list, &clk_event,
^(void) {
a[i] = b[i];
@@ -82,39 +105,46 @@ kernel void device_side_enqueue(global i
// Emits global block literal [[BLG1]] and block kernel
[[INVGK1]].
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*,
%opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
- // B32: %[[TMP:.*]] = alloca [1 x i32]
- // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]*
%[[TMP]], i32 0, i32 0
- // B32: store i32 256, i32* %[[TMP1]], align 4
- // B64: %[[TMP:.*]] = alloca [1 x i64]
- // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]*
%[[TMP]], i32 0, i32 0
- // B64: store i64 256, i64* %[[TMP1]], align 8
+ // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]*
%[[BLOCK_SIZES1]] to i8*
+ // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64
8, i8* nonnull [[LIFETIME_PTR]])
+ // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x
i64]* %[[BLOCK_SIZES1]], i64 0, i64 0
+ // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
+ // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8,
i8* nonnull [[LIFETIME_PTR]])
+ // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]*
%[[BLOCK_SIZES1]], i32 0, i32 0
+ // B32: store i32 256, i32* %[[TMP]], align 4
+ // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]*
%[[BLOCK_SIZES1]], i32 0, i32 0
+ // B64: store i64 256, i64* %[[TMP]], align 8
// COMMON-LABEL: call i32 @__enqueue_kernel_varargs(
// COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],
%struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast
({{.*}} [[INVGK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
bitcast ({ i32, i32 } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to
i8 addrspace(4)*), i32 1,
- // B32-SAME: i32* %[[TMP1]])
- // B64-SAME: i64* %[[TMP1]])
+ // B32-SAME: i32* %[[TMP]])
+ // B64-SAME: i64* %[[TMP]])
enqueue_kernel(default_queue, flags, ndrange,
^(local void *p) {
return;
},
256);
+
char c;
// Emits global block literal [[BLG2]] and block kernel
[[INVGK2]].
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*,
%opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
- // B32: %[[TMP:.*]] = alloca [1 x i32]
- // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]*
%[[TMP]], i32 0, i32 0
- // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4
- // B64: %[[TMP:.*]] = alloca [1 x i64]
- // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]*
%[[TMP]], i32 0, i32 0
- // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
+ // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]*
%[[BLOCK_SIZES2]] to i8*
+ // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64
8, i8* nonnull [[LIFETIME_PTR]])
+ // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x
i64]* %[[BLOCK_SIZES2]], i64 0, i64 0
+ // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
+ // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8,
i8* nonnull [[LIFETIME_PTR]])
+ // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]*
%[[BLOCK_SIZES2]], i32 0, i32 0
+ // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4
+ // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]*
%[[BLOCK_SIZES2]], i32 0, i32 0
+ // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8
// COMMON-LABEL: call i32 @__enqueue_kernel_varargs(
// COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],
%struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast
({{.*}} [[INVGK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
bitcast ({ i32, i32 } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to
i8 addrspace(4)*), i32 1,
- // B32-SAME: i32* %[[TMP1]])
- // B64-SAME: i64* %[[TMP1]])
+ // B32-SAME: i32* %[[TMP]])
+ // B64-SAME: i64* %[[TMP]])
enqueue_kernel(default_queue, flags, ndrange,
^(local void *p) {
return;
@@ -127,18 +157,21 @@ kernel void device_side_enqueue(global i
// COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x
%opencl.clk_event_t*], [1 x %opencl.clk_event_t*]*
%event_wait_list2, i32 0, i32 0
// COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast
%opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}*
addrspace(4)*
// COMMON: [[EVNT:%[0-9]+]] = addrspacecast
%opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}*
addrspace(4)*
- // B32: %[[TMP:.*]] = alloca [1 x i32]
- // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]*
%[[TMP]], i32 0, i32 0
- // B32: store i32 256, i32* %[[TMP1]], align 4
- // B64: %[[TMP:.*]] = alloca [1 x i64]
- // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]*
%[[TMP]], i32 0, i32 0
- // B64: store i64 256, i64* %[[TMP1]], align 8
+ // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]*
%[[BLOCK_SIZES3]] to i8*
+ // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64
8, i8* nonnull [[LIFETIME_PTR]])
+ // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x
i64]* %[[BLOCK_SIZES3]], i64 0, i64 0
+ // CHECK-LIFETIMES-LABEL: call i32
@__enqueue_kernel_events_varargs(
+ // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8,
i8* nonnull [[LIFETIME_PTR]])
+ // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]*
%[[BLOCK_SIZES3]], i32 0, i32 0
+ // B32: store i32 256, i32* %[[TMP]], align 4
+ // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]*
%[[BLOCK_SIZES3]], i32 0, i32 0
+ // B64: store i64 256, i64* %[[TMP]], align 8
// COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs
// COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],
%struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}
[[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]],
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast
({{.*}} [[INVGK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
bitcast ({ i32, i32 } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to
i8 addrspace(4)*), i32 1,
- // B32-SAME: i32* %[[TMP1]])
- // B64-SAME: i64* %[[TMP1]])
+ // B32-SAME: i32* %[[TMP]])
+ // B64-SAME: i64* %[[TMP]])
enqueue_kernel(default_queue, flags, ndrange, 2,
event_wait_list2, &clk_event,
^(local void *p) {
return;
@@ -151,18 +184,21 @@ kernel void device_side_enqueue(global i
// COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x
%opencl.clk_event_t*], [1 x %opencl.clk_event_t*]*
%event_wait_list2, i32 0, i32 0
// COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast
%opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}*
addrspace(4)*
// COMMON: [[EVNT:%[0-9]+]] = addrspacecast
%opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}*
addrspace(4)*
- // B32: %[[TMP:.*]] = alloca [1 x i32]
- // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]*
%[[TMP]], i32 0, i32 0
- // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4
- // B64: %[[TMP:.*]] = alloca [1 x i64]
- // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]*
%[[TMP]], i32 0, i32 0
- // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
+ // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]*
%[[BLOCK_SIZES4]] to i8*
+ // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64
8, i8* nonnull [[LIFETIME_PTR]])
+ // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x
i64]* %[[BLOCK_SIZES4]], i64 0, i64 0
+ // CHECK-LIFETIMES-LABEL: call i32
@__enqueue_kernel_events_varargs(
+ // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8,
i8* nonnull [[LIFETIME_PTR]])
+ // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]*
%[[BLOCK_SIZES4]], i32 0, i32 0
+ // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4
+ // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]*
%[[BLOCK_SIZES4]], i32 0, i32 0
+ // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8
// COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs
// COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],
%struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}*
addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}*
addrspace(4)* [[EVNT]],
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast
({{.*}} [[INVGK4:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
bitcast ({ i32, i32 } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to
i8 addrspace(4)*), i32 1,
- // B32-SAME: i32* %[[TMP1]])
- // B64-SAME: i64* %[[TMP1]])
+ // B32-SAME: i32* %[[TMP]])
+ // B64-SAME: i64* %[[TMP]])
enqueue_kernel(default_queue, flags, ndrange, 2,
event_wait_list2, &clk_event,
^(local void *p) {
return;
@@ -173,18 +209,21 @@ kernel void device_side_enqueue(global i
// Emits global block literal [[BLG5]] and block kernel
[[INVGK5]].
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*,
%opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
- // B32: %[[TMP:.*]] = alloca [1 x i32]
- // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]*
%[[TMP]], i32 0, i32 0
- // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4
- // B64: %[[TMP:.*]] = alloca [1 x i64]
- // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]*
%[[TMP]], i32 0, i32 0
- // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
+ // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]*
%[[BLOCK_SIZES5]] to i8*
+ // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64
8, i8* nonnull [[LIFETIME_PTR]])
+ // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x
i64]* %[[BLOCK_SIZES5]], i64 0, i64 0
+ // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
+ // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8,
i8* nonnull [[LIFETIME_PTR]])
+ // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]*
%[[BLOCK_SIZES5]], i32 0, i32 0
+ // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4
+ // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]*
%[[BLOCK_SIZES5]], i32 0, i32 0
+ // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8
// COMMON-LABEL: call i32 @__enqueue_kernel_varargs
// COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],
%struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast
({{.*}} [[INVGK5:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
bitcast ({ i32, i32 } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to
i8 addrspace(4)*), i32 1,
- // B32-SAME: i32* %[[TMP1]])
- // B64-SAME: i64* %[[TMP1]])
+ // B32-SAME: i32* %[[TMP]])
+ // B64-SAME: i64* %[[TMP]])
enqueue_kernel(default_queue, flags, ndrange,
^(local void *p) {
return;
@@ -194,26 +233,29 @@ kernel void device_side_enqueue(global i
// Emits global block literal [[BLG6]] and block kernel
[[INVGK6]].
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*,
%opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
- // B32: %[[TMP:.*]] = alloca [3 x i32]
- // B32: %[[TMP1:.*]] = getelementptr [3 x i32], [3 x i32]*
%[[TMP]], i32 0, i32 0
- // B32: store i32 1, i32* %[[TMP1]], align 4
- // B32: %[[TMP2:.*]] = getelementptr [3 x i32], [3 x i32]*
%[[TMP]], i32 0, i32 1
- // B32: store i32 2, i32* %[[TMP2]], align 4
- // B32: %[[TMP3:.*]] = getelementptr [3 x i32], [3 x i32]*
%[[TMP]], i32 0, i32 2
- // B32: store i32 4, i32* %[[TMP3]], align 4
- // B64: %[[TMP:.*]] = alloca [3 x i64]
- // B64: %[[TMP1:.*]] = getelementptr [3 x i64], [3 x i64]*
%[[TMP]], i32 0, i32 0
- // B64: store i64 1, i64* %[[TMP1]], align 8
- // B64: %[[TMP2:.*]] = getelementptr [3 x i64], [3 x i64]*
%[[TMP]], i32 0, i32 1
- // B64: store i64 2, i64* %[[TMP2]], align 8
- // B64: %[[TMP3:.*]] = getelementptr [3 x i64], [3 x i64]*
%[[TMP]], i32 0, i32 2
- // B64: store i64 4, i64* %[[TMP3]], align 8
+ // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [3 x i64]*
%[[BLOCK_SIZES6]] to i8*
+ // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64
24, i8* nonnull [[LIFETIME_PTR]])
+ // CHECK-LIFETIMES-NEXT: getelementptr inbounds [3 x i64], [3 x
i64]* %[[BLOCK_SIZES6]], i64 0, i64 0
+ // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
+ // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64
24, i8* nonnull [[LIFETIME_PTR]])
+ // B32: %[[TMP:.*]] = getelementptr [3 x i32], [3 x i32]*
%[[BLOCK_SIZES6]], i32 0, i32 0
+ // B32: store i32 1, i32* %[[TMP]], align 4
+ // B32: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i32], [3 x
i32]* %[[BLOCK_SIZES6]], i32 0, i32 1
+ // B32: store i32 2, i32* %[[BLOCK_SIZES62]], align 4
+ // B32: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i32], [3 x
i32]* %[[BLOCK_SIZES6]], i32 0, i32 2
+ // B32: store i32 4, i32* %[[BLOCK_SIZES63]], align 4
+ // B64: %[[TMP:.*]] = getelementptr [3 x i64], [3 x i64]*
%[[BLOCK_SIZES6]], i32 0, i32 0
+ // B64: store i64 1, i64* %[[TMP]], align 8
+ // B64: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i64], [3 x
i64]* %[[BLOCK_SIZES6]], i32 0, i32 1
+ // B64: store i64 2, i64* %[[BLOCK_SIZES62]], align 8
+ // B64: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i64], [3 x
i64]* %[[BLOCK_SIZES6]], i32 0, i32 2
+ // B64: store i64 4, i64* %[[BLOCK_SIZES63]], align 8
// COMMON-LABEL: call i32 @__enqueue_kernel_varargs
// COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],
%struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast
({{.*}} [[INVGK6:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
bitcast ({ i32, i32 } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to
i8 addrspace(4)*), i32 3,
- // B32-SAME: i32* %[[TMP1]])
- // B64-SAME: i64* %[[TMP1]])
+ // B32-SAME: i32* %[[TMP]])
+ // B64-SAME: i64* %[[TMP]])
enqueue_kernel(default_queue, flags, ndrange,
^(local void *p1, local void *p2, local void *p3)
{
return;
@@ -223,18 +265,21 @@ kernel void device_side_enqueue(global i
// Emits global block literal [[BLG7]] and block kernel
[[INVGK7]].
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*,
%opencl.queue_t** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
- // B32: %[[TMP:.*]] = alloca [1 x i32]
- // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]*
%[[TMP]], i32 0, i32 0
- // B32: store i32 0, i32* %[[TMP1]], align 4
- // B64: %[[TMP:.*]] = alloca [1 x i64]
- // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]*
%[[TMP]], i32 0, i32 0
- // B64: store i64 4294967296, i64* %[[TMP1]], align 8
+ // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]*
%[[BLOCK_SIZES7]] to i8*
+ // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64
8, i8* nonnull [[LIFETIME_PTR]])
+ // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x
i64]* %[[BLOCK_SIZES7]], i64 0, i64 0
+ // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs(
+ // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8,
i8* nonnull [[LIFETIME_PTR]])
+ // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]*
%[[BLOCK_SIZES7]], i32 0, i32 0
+ // B32: store i32 0, i32* %[[TMP]], align 4
+ // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]*
%[[BLOCK_SIZES7]], i32 0, i32 0
+ // B64: store i64 4294967296, i64* %[[TMP]], align 8
// COMMON-LABEL: call i32 @__enqueue_kernel_varargs
// COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],
%struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast
({{.*}} [[INVGK7:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
bitcast ({ i32, i32 } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to
i8 addrspace(4)*), i32 1,
- // B32-SAME: i32* %[[TMP1]])
- // B64-SAME: i64* %[[TMP1]])
+ // B32-SAME: i32* %[[TMP]])
+ // B64-SAME: i64* %[[TMP]])
enqueue_kernel(default_queue, flags, ndrange,
^(local void *p) {
return;

Added:
cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl [3]
URL:

http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl?rev=338899&view=auto
[7]

==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl
[3] (added)
+++ cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl
[3] Fri Aug 3 08:50:52 2018
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn
< %s | FileCheck %s --check-prefixes=COMMON,AMDGPU
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple
"spir-unknown-unknown" < %s | FileCheck %s
--check-prefixes=COMMON,SPIR32
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple
"spir64-unknown-unknown" < %s | FileCheck %s
--check-prefixes=COMMON,SPIR64
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -debug-info-kind=limited
-emit-llvm -o - -triple amdgcn < %s | FileCheck %s
--check-prefixes=CHECK-DEBUG
+
+// Check that the enqueue_kernel array temporary is in the entry
block to avoid
+// a dynamic alloca
+
+typedef struct {int a;} ndrange_t;
+
+kernel void test(int i) {
+// COMMON-LABEL: define {{.*}} void @test
+// COMMON-LABEL: entry:
+// AMDGPU: %block_sizes = alloca [1 x i64]
+// SPIR32: %block_sizes = alloca [1 x i32]
+// SPIR64: %block_sizes = alloca [1 x i64]
+// COMMON-LABEL: if.then:
+// COMMON-NOT: alloca
+// CHECK-DEBUG: getelementptr {{.*}} %block_sizes, {{.*}} !dbg !34
+// COMMON-LABEL: if.end
+ queue_t default_queue;
+ unsigned flags = 0;
+ ndrange_t ndrange;
+ if (i)
+ enqueue_kernel(default_queue, flags, ndrange, ^(local void *a)
{ }, 32);
+}
+
+// Check that the temporary is scoped to the `if`
+
+// CHECK-DEBUG: !32 = distinct !DILexicalBlock(scope: !7, file: !1,
line: 24)
+// CHECK-DEBUG: !34 = !DILocation(line: 25, scope: !32)

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits [8]


Links:
------
[1] http://llvm.org/viewvc/llvm-project?rev=338899&amp;view=rev
[2] https://reviews.llvm.org/D50104
[3] http://enqueue-kernel-non-entry-block.cl
[4] http://cl20-device-side-enqueue.cl
[5]
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=338899&amp;r1=338898&amp;r2=338899&amp;view=diff
[6]
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl?rev=338899&amp;r1=338898&amp;r2=338899&amp;view=diff
[7]
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl?rev=338899&amp;view=auto
[8] http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[9]
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/21898/steps/check-clang%20asan/logs/stdio
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to