https://github.com/ddpagan updated https://github.com/llvm/llvm-project/pull/196431
>From ee5c6a801f3bac6f83a5a1171399776951b4cba4 Mon Sep 17 00:00:00 2001 From: Dave Pagan <[email protected]> Date: Wed, 8 Apr 2026 17:47:28 -0500 Subject: [PATCH 1/2] [clang][OpenMP 6.0][CodeGen] Codegen for declare_target 'local' clause Implement code generation for the OpenMP 6.0 declare_target 'local' clause, which creates device-only variables with per-device static storage. A 'local' variable exists in the device image with its static initializer and is always accessed directly by device code. This is the same as 'to'/'enter' without unified shared memory, except that no offload entry is registered. Using 'device_type(nohost)' with 'local' is not yet supported. Sema generates a warning and converts it to 'device_type(any)'. Testing: - Updated tests: clang/test/OpenMP/declare_target_messages.cpp clang/test/OpenMP/declare_target_ast_print.cpp - New tests: clang/test/OpenMP/declare_target_local_codegen.cpp clang/test/OpenMP/declare_target_local_usm_codegen.cpp offload/test/offloading/declare_target_local.cpp --- .../clang/Basic/DiagnosticSemaKinds.td | 9 +- clang/lib/CodeGen/CGExpr.cpp | 19 +- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 26 +- clang/lib/CodeGen/CodeGenModule.cpp | 11 +- clang/lib/Sema/SemaOpenMP.cpp | 15 +- .../test/OpenMP/declare_target_ast_print.cpp | 12 +- .../OpenMP/declare_target_local_codegen.cpp | 430 ++++++++++++++++++ .../declare_target_local_usm_codegen.cpp | 52 +++ clang/test/OpenMP/declare_target_messages.cpp | 9 +- .../test/offloading/declare_target_local.cpp | 40 ++ 10 files changed, 577 insertions(+), 46 deletions(-) create mode 100644 clang/test/OpenMP/declare_target_local_codegen.cpp create mode 100644 clang/test/OpenMP/declare_target_local_usm_codegen.cpp create mode 100644 offload/test/offloading/declare_target_local.cpp diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c69b2ce3648f8..2e4102d9e1741 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12174,6 +12174,10 @@ def err_omp_declare_target_var_in_both_clauses def err_omp_declare_target_local_host_only : Error<"'local' clause is incompatible with 'device_type(host)'; " "local variables exist only on the device">; +def warn_omp_declare_target_local_nohost + : Warning<"'device_type(nohost)' is not yet supported with 'local' clause; " + "treating as 'device_type(any)'">, + InGroup<OpenMPTarget>; def warn_omp_not_in_target_context : Warning< "declaration is not declared in any declare target region">, InGroup<OpenMPTarget>; @@ -12596,11 +12600,6 @@ def err_omp_declare_target_has_local_vars : Error< def warn_omp_declare_target_after_first_use : Warning< "declaration marked as declare target after first use, it may lead to incorrect results">, InGroup<OpenMPTarget>; -def warn_omp_declare_target_local_not_implemented - : Warning<"'local' clause on 'declare_target' directive is not yet fully " - "implemented; " - "variable will be treated as 'enter'">, - InGroup<OpenMPTarget>; def err_omp_declare_variant_incompat_attributes : Error< "'#pragma omp declare variant' is not compatible with any target-specific attributes">; def warn_omp_declare_variant_score_not_constant diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 9107553652688..5764b59e538ae 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3360,19 +3360,18 @@ static Address emitDeclTargetVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD, QualType T) { std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - // Return an invalid address if variable is MT_To (or MT_Enter starting with - // OpenMP 5.2, or MT_Local in OpenMP 6.0) and unified memory is not enabled. - // For all other cases: MT_Link and MT_To (or MT_Enter/MT_Local) with unified - // memory, return a valid address. - if (!Res || ((*Res == OMPDeclareTargetDeclAttr::MT_To || - *Res == OMPDeclareTargetDeclAttr::MT_Enter || - *Res == OMPDeclareTargetDeclAttr::MT_Local) && - !CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) + // Always return an invalid address for MT_Local, and also for + // MT_To/MT_Enter when unified memory is not enabled. These use direct + // access (global exists in device image). Otherwise, return a valid + // address. + if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Local || + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && + !CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) return Address::invalid(); assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || ((*Res == OMPDeclareTargetDeclAttr::MT_To || - *Res == OMPDeclareTargetDeclAttr::MT_Enter || - *Res == OMPDeclareTargetDeclAttr::MT_Local) && + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) && "Expected link clause OR to clause with unified memory enabled."); QualType PtrTy = CGF.getContext().getPointerType(VD->getType()); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index a99a257c14a2a..7cdc206aea0c4 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1529,12 +1529,14 @@ convertCaptureClause(const VarDecl *VD) { return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; break; case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter: - case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Local: return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter; - break; case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link: return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink; break; + case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Local: + // MT_Local variables don't need offload entry (device-local). + llvm_unreachable("MT_Local should not reach convertCaptureClause"); + break; default: return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; break; @@ -7983,8 +7985,7 @@ class MappableExprsHandler { OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || ((*Res == OMPDeclareTargetDeclAttr::MT_To || - *Res == OMPDeclareTargetDeclAttr::MT_Enter || - *Res == OMPDeclareTargetDeclAttr::MT_Local) && + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { RequiresReference = true; BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); @@ -11326,8 +11327,7 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { cast<VarDecl>(GD.getDecl())); if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || ((*Res == OMPDeclareTargetDeclAttr::MT_To || - *Res == OMPDeclareTargetDeclAttr::MT_Enter || - *Res == OMPDeclareTargetDeclAttr::MT_Local) && + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) { DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); return true; @@ -11350,6 +11350,11 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, VD->hasExternalStorage()) return; + // MT_Local variables use direct access with no host-device mapping. + // No offload entry needed — the device global keeps its own initializer. + if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Local) + return; + if (!Res) { if (CGM.getLangOpts().OpenMPIsTargetDevice) { // Register non-target variables being emitted in device code (debug info @@ -11396,10 +11401,11 @@ void CGOpenMPRuntime::emitDeferredTargetDecls() const { OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res) continue; - if ((*Res == OMPDeclareTargetDeclAttr::MT_To || - *Res == OMPDeclareTargetDeclAttr::MT_Enter || - *Res == OMPDeclareTargetDeclAttr::MT_Local) && - !HasRequiresUnifiedSharedMemory) { + // MT_Local and MT_To/MT_Enter without USM are always emitted. + if (*Res == OMPDeclareTargetDeclAttr::MT_Local || + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && + !HasRequiresUnifiedSharedMemory)) { CGM.EmitGlobal(VD); } else { assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index e88fed1296667..5b26332765fab 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4465,16 +4465,15 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { bool UnifiedMemoryEnabled = getOpenMPRuntime().hasRequiresUnifiedSharedMemory(); - if ((*Res == OMPDeclareTargetDeclAttr::MT_To || - *Res == OMPDeclareTargetDeclAttr::MT_Enter || - *Res == OMPDeclareTargetDeclAttr::MT_Local) && - !UnifiedMemoryEnabled) { + if (*Res == OMPDeclareTargetDeclAttr::MT_Local || + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && + !UnifiedMemoryEnabled)) { (void)GetAddrOfGlobalVar(VD); } else { assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || ((*Res == OMPDeclareTargetDeclAttr::MT_To || - *Res == OMPDeclareTargetDeclAttr::MT_Enter || - *Res == OMPDeclareTargetDeclAttr::MT_Local) && + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && UnifiedMemoryEnabled)) && "Link clause or to clause with unified memory expected."); (void)getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 53ded7a5e177e..d6f6bc919a31b 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -24859,13 +24859,18 @@ void SemaOpenMP::ActOnOpenMPDeclareTargetName( if (!IndirectE) IsIndirect = true; } - // FIXME: 'local' clause is not yet implemented in CodeGen. For now, it is - // treated as 'enter'. For host compilation, 'local' is a no-op. + // FIXME: 'local' with 'device_type(nohost)' is not yet fully supported + // in codegen. Treat as 'device_type(any)' for now. The variable will + // exist on both host and device, but the host copy is unused. + auto DT = DTCI.DT; if (MT == OMPDeclareTargetDeclAttr::MT_Local && - getLangOpts().OpenMPIsTargetDevice) - Diag(Loc, diag::warn_omp_declare_target_local_not_implemented); + DT == OMPDeclareTargetDeclAttr::DT_NoHost) { + Diag(Loc, diag::warn_omp_declare_target_local_nohost); + DT = OMPDeclareTargetDeclAttr::DT_Any; + } + auto *A = OMPDeclareTargetDeclAttr::CreateImplicit( - getASTContext(), MT, DTCI.DT, IndirectE, IsIndirect, Level, + getASTContext(), MT, DT, IndirectE, IsIndirect, Level, SourceRange(Loc, Loc)); ND->addAttr(A); if (ASTMutationListener *ML = getASTContext().getASTMutationListener()) diff --git a/clang/test/OpenMP/declare_target_ast_print.cpp b/clang/test/OpenMP/declare_target_ast_print.cpp index 3ebe261cf79f0..7b63c15dd455e 100644 --- a/clang/test/OpenMP/declare_target_ast_print.cpp +++ b/clang/test/OpenMP/declare_target_ast_print.cpp @@ -4,7 +4,7 @@ // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -I %S/Inputs -ast-print %s | FileCheck %s --check-prefix=CHECK --check-prefix=OMP50 // RUN: %clang_cc1 -verify -fopenmp -I %S/Inputs -ast-print %s | FileCheck %s --check-prefix=CHECK --check-prefix=OMP51 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=52 -I %S/Inputs -ast-print %s | FileCheck %s --check-prefix=CHECK --check-prefix=OMP52 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -I %S/Inputs -ast-print %s | FileCheck %s --check-prefix=CHECK --check-prefix=OMP60 +// RUN: %clang_cc1 -verify=omp60 -fopenmp -fopenmp-version=60 -I %S/Inputs -ast-print %s | FileCheck %s --check-prefix=CHECK --check-prefix=OMP60 // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -I %S/Inputs -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -I %S/Inputs -verify %s -ast-print | FileCheck %s --check-prefix=CHECK --check-prefix=OMP50 @@ -133,18 +133,18 @@ int l1; // OMP60: #pragma omp end declare target int l2; -#pragma omp declare target device_type(nohost) local(l2) -// OMP60: #pragma omp declare target device_type(nohost) local +#pragma omp declare target device_type(nohost) local(l2) // omp60-warning {{'device_type(nohost)' is not yet supported with 'local' clause; treating as 'device_type(any)'}} +// OMP60: #pragma omp declare target local // OMP60: int l2; // OMP60: #pragma omp end declare target int l3; int a = 0; -#pragma omp declare target local(l3) device_type(nohost) local(a) -// OMP60: #pragma omp declare target device_type(nohost) local +#pragma omp declare target local(l3) device_type(nohost) local(a) // omp60-warning 2 {{'device_type(nohost)' is not yet supported with 'local' clause; treating as 'device_type(any)'}} +// OMP60: #pragma omp declare target local // OMP60: int l3; // OMP60: #pragma omp end declare target -// OMP60: #pragma omp declare target device_type(nohost) local +// OMP60: #pragma omp declare target local // OMP60: int a = 0; // OMP60: #pragma omp end declare target diff --git a/clang/test/OpenMP/declare_target_local_codegen.cpp b/clang/test/OpenMP/declare_target_local_codegen.cpp new file mode 100644 index 0000000000000..b82e8b3bba9ff --- /dev/null +++ b/clang/test/OpenMP/declare_target_local_codegen.cpp @@ -0,0 +1,430 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// RUN: %clang_cc1 -verify=omp60 -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST +// RUN: %clang_cc1 -verify=omp60 -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc +// RUN: %clang_cc1 -verify=omp60 -fopenmp -fopenmp-version=60 -x c++ -triple amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefix=DEVICE +// RUN: %clang_cc1 -verify=omp60 -fopenmp -fopenmp-version=60 -x c++ -triple amdgcn-amd-amdhsa %s -fopenmp-is-target-device -fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -emit-pch -o %t +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -triple amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -include-pch %t -o - | FileCheck %s --check-prefix=DEVICE + +#ifndef HEADER +#define HEADER + +// --------------------------------------------------------------------------- +// Explicit local clause (default device_type is 'any') +// --------------------------------------------------------------------------- +int local_scalar; +#pragma omp declare target local(local_scalar) + +int local_array[64]; +#pragma omp declare target local(local_array) + +// --------------------------------------------------------------------------- +// local + device_type(nohost) +// --------------------------------------------------------------------------- +int local_nohost_var; +#pragma omp declare target local(local_nohost_var) device_type(nohost) // omp60-warning {{'device_type(nohost)' is not yet supported with 'local' clause; treating as 'device_type(any)'}} + +double local_nohost_arr[32]; +#pragma omp declare target local(local_nohost_arr) device_type(nohost) // omp60-warning {{'device_type(nohost)' is not yet supported with 'local' clause; treating as 'device_type(any)'}} + +// --------------------------------------------------------------------------- +// Template with local variable +// --------------------------------------------------------------------------- +template <typename T> +struct LocalStorage { + static T value; +}; + +template <typename T> +T LocalStorage<T>::value; + +#pragma omp declare target local(LocalStorage<int>::value) +#pragma omp declare target local(LocalStorage<double>::value) + +#pragma omp begin declare target +template <typename T> +T read_local_storage() { + return LocalStorage<T>::value; +} +#pragma omp end declare target + +// --------------------------------------------------------------------------- +// Non-template static data member with local +// --------------------------------------------------------------------------- +struct PlainStruct { + static int s_member; +}; +int PlainStruct::s_member; +#pragma omp declare target local(PlainStruct::s_member) + +// --------------------------------------------------------------------------- +// Initialized local variable +// --------------------------------------------------------------------------- +int local_init_var = 42; +#pragma omp declare target local(local_init_var) + +// --------------------------------------------------------------------------- +// Use local variables in a target region +// --------------------------------------------------------------------------- +int use_local_vars() { + int result = 0; + #pragma omp target map(from: result) + { + local_scalar = 42; + local_array[0] = 1; + LocalStorage<int>::value = 100; + result = local_scalar + local_array[0] + + read_local_storage<int>(); + } + return result; +} + +// --------------------------------------------------------------------------- +// Use nohost local variables in a target region +// --------------------------------------------------------------------------- +int use_nohost_local_vars() { + int result = 0; + #pragma omp target map(from: result) + { + local_nohost_var = 7; + result = local_nohost_var; + } + return result; +} + +// --------------------------------------------------------------------------- +// Use static data member, initialized var, and static local in target region +// --------------------------------------------------------------------------- +int use_new_local_vars() { + int result = 0; + #pragma omp target map(from: result) + { + PlainStruct::s_member = 55; + local_init_var = 77; + result = PlainStruct::s_member + local_init_var; + } + return result; +} + +#endif +// HOST-LABEL: define {{[^@]+}}@_Z14use_local_varsv +// HOST-SAME: () #[[ATTR0:[0-9]+]] { +// HOST-NEXT: entry: +// HOST-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +// HOST-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8 +// HOST-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8 +// HOST-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8 +// HOST-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// HOST-NEXT: store i32 0, ptr [[RESULT]], align 4 +// HOST-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// HOST-NEXT: store ptr [[RESULT]], ptr [[TMP0]], align 8 +// HOST-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// HOST-NEXT: store ptr [[RESULT]], ptr [[TMP1]], align 8 +// HOST-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// HOST-NEXT: store ptr null, ptr [[TMP2]], align 8 +// HOST-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// HOST-NEXT: store ptr null, ptr [[TMP3]], align 8 +// HOST-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// HOST-NEXT: store ptr null, ptr [[TMP4]], align 8 +// HOST-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// HOST-NEXT: store ptr null, ptr [[TMP5]], align 8 +// HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// HOST-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// HOST-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// HOST-NEXT: store i32 4, ptr [[TMP8]], align 4 +// HOST-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// HOST-NEXT: store i32 2, ptr [[TMP9]], align 4 +// HOST-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// HOST-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// HOST-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// HOST-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 +// HOST-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// HOST-NEXT: store ptr @.offload_sizes, ptr [[TMP12]], align 8 +// HOST-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// HOST-NEXT: store ptr @.offload_maptypes, ptr [[TMP13]], align 8 +// HOST-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// HOST-NEXT: store ptr null, ptr [[TMP14]], align 8 +// HOST-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// HOST-NEXT: store ptr null, ptr [[TMP15]], align 8 +// HOST-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// HOST-NEXT: store i64 0, ptr [[TMP16]], align 8 +// HOST-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// HOST-NEXT: store i64 0, ptr [[TMP17]], align 8 +// HOST-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// HOST-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP18]], align 4 +// HOST-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// HOST-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP19]], align 4 +// HOST-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// HOST-NEXT: store i32 0, ptr [[TMP20]], align 4 +// HOST-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14use_local_varsv_l70.region_id, ptr [[KERNEL_ARGS]]) +// HOST-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// HOST-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// HOST: omp_offload.failed: +// HOST-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14use_local_varsv_l70(ptr [[RESULT]], ptr null) #[[ATTR2:[0-9]+]] +// HOST-NEXT: br label [[OMP_OFFLOAD_CONT]] +// HOST: omp_offload.cont: +// HOST-NEXT: [[TMP23:%.*]] = load i32, ptr [[RESULT]], align 4 +// HOST-NEXT: ret i32 [[TMP23]] +// +// +// HOST-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14use_local_varsv_l70 +// HOST-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR1:[0-9]+]] { +// HOST-NEXT: entry: +// HOST-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8 +// HOST-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// HOST-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR]], align 8 +// HOST-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]] +// HOST-NEXT: store i32 42, ptr @local_scalar, align 4 +// HOST-NEXT: store i32 1, ptr @local_array, align 4 +// HOST-NEXT: store i32 100, ptr @_ZN12LocalStorageIiE5valueE, align 4 +// HOST-NEXT: [[TMP1:%.*]] = load i32, ptr @local_scalar, align 4 +// HOST-NEXT: [[TMP2:%.*]] = load i32, ptr @local_array, align 4 +// HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +// HOST-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z18read_local_storageIiET_v() +// HOST-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[CALL]] +// HOST-NEXT: store i32 [[ADD1]], ptr [[TMP0]], align 4 +// HOST-NEXT: ret void +// +// +// HOST-LABEL: define {{[^@]+}}@_Z18read_local_storageIiET_v +// HOST-SAME: () #[[ATTR0]] comdat { +// HOST-NEXT: entry: +// HOST-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZN12LocalStorageIiE5valueE, align 4 +// HOST-NEXT: ret i32 [[TMP0]] +// +// +// HOST-LABEL: define {{[^@]+}}@_Z21use_nohost_local_varsv +// HOST-SAME: () #[[ATTR0]] { +// HOST-NEXT: entry: +// HOST-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +// HOST-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8 +// HOST-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8 +// HOST-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8 +// HOST-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// HOST-NEXT: store i32 0, ptr [[RESULT]], align 4 +// HOST-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// HOST-NEXT: store ptr [[RESULT]], ptr [[TMP0]], align 8 +// HOST-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// HOST-NEXT: store ptr [[RESULT]], ptr [[TMP1]], align 8 +// HOST-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// HOST-NEXT: store ptr null, ptr [[TMP2]], align 8 +// HOST-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// HOST-NEXT: store ptr null, ptr [[TMP3]], align 8 +// HOST-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// HOST-NEXT: store ptr null, ptr [[TMP4]], align 8 +// HOST-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// HOST-NEXT: store ptr null, ptr [[TMP5]], align 8 +// HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// HOST-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// HOST-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// HOST-NEXT: store i32 4, ptr [[TMP8]], align 4 +// HOST-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// HOST-NEXT: store i32 2, ptr [[TMP9]], align 4 +// HOST-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// HOST-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// HOST-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// HOST-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 +// HOST-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// HOST-NEXT: store ptr @.offload_sizes.1, ptr [[TMP12]], align 8 +// HOST-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// HOST-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP13]], align 8 +// HOST-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// HOST-NEXT: store ptr null, ptr [[TMP14]], align 8 +// HOST-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// HOST-NEXT: store ptr null, ptr [[TMP15]], align 8 +// HOST-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// HOST-NEXT: store i64 0, ptr [[TMP16]], align 8 +// HOST-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// HOST-NEXT: store i64 0, ptr [[TMP17]], align 8 +// HOST-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// HOST-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP18]], align 4 +// HOST-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// HOST-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP19]], align 4 +// HOST-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// HOST-NEXT: store i32 0, ptr [[TMP20]], align 4 +// HOST-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21use_nohost_local_varsv_l86.region_id, ptr [[KERNEL_ARGS]]) +// HOST-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// HOST-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// HOST: omp_offload.failed: +// HOST-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21use_nohost_local_varsv_l86(ptr [[RESULT]], ptr null) #[[ATTR2]] +// HOST-NEXT: br label [[OMP_OFFLOAD_CONT]] +// HOST: omp_offload.cont: +// HOST-NEXT: [[TMP23:%.*]] = load i32, ptr [[RESULT]], align 4 +// HOST-NEXT: ret i32 [[TMP23]] +// +// +// HOST-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21use_nohost_local_varsv_l86 +// HOST-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR1]] { +// HOST-NEXT: entry: +// HOST-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8 +// HOST-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// HOST-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR]], align 8 +// HOST-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META8]], !align [[META9]] +// HOST-NEXT: store i32 7, ptr @local_nohost_var, align 4 +// HOST-NEXT: [[TMP1:%.*]] = load i32, ptr @local_nohost_var, align 4 +// HOST-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// HOST-NEXT: ret void +// +// +// HOST-LABEL: define {{[^@]+}}@_Z18use_new_local_varsv +// HOST-SAME: () #[[ATTR0]] { +// HOST-NEXT: entry: +// HOST-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +// HOST-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8 +// HOST-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8 +// HOST-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8 +// HOST-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// HOST-NEXT: store i32 0, ptr [[RESULT]], align 4 +// HOST-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// HOST-NEXT: store ptr [[RESULT]], ptr [[TMP0]], align 8 +// HOST-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// HOST-NEXT: store ptr [[RESULT]], ptr [[TMP1]], align 8 +// HOST-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// HOST-NEXT: store ptr null, ptr [[TMP2]], align 8 +// HOST-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// HOST-NEXT: store ptr null, ptr [[TMP3]], align 8 +// HOST-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// HOST-NEXT: store ptr null, ptr [[TMP4]], align 8 +// HOST-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// HOST-NEXT: store ptr null, ptr [[TMP5]], align 8 +// HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// HOST-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// HOST-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// HOST-NEXT: store i32 4, ptr [[TMP8]], align 4 +// HOST-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// HOST-NEXT: store i32 2, ptr [[TMP9]], align 4 +// HOST-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// HOST-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// HOST-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// HOST-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 +// HOST-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// HOST-NEXT: store ptr @.offload_sizes.3, ptr [[TMP12]], align 8 +// HOST-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// HOST-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP13]], align 8 +// HOST-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// HOST-NEXT: store ptr null, ptr [[TMP14]], align 8 +// HOST-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// HOST-NEXT: store ptr null, ptr [[TMP15]], align 8 +// HOST-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// HOST-NEXT: store i64 0, ptr [[TMP16]], align 8 +// HOST-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// HOST-NEXT: store i64 0, ptr [[TMP17]], align 8 +// HOST-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// HOST-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP18]], align 4 +// HOST-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// HOST-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP19]], align 4 +// HOST-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// HOST-NEXT: store i32 0, ptr [[TMP20]], align 4 +// HOST-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18use_new_local_varsv_l99.region_id, ptr [[KERNEL_ARGS]]) +// HOST-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// HOST-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// HOST: omp_offload.failed: +// HOST-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18use_new_local_varsv_l99(ptr [[RESULT]], ptr null) #[[ATTR2]] +// HOST-NEXT: br label [[OMP_OFFLOAD_CONT]] +// HOST: omp_offload.cont: +// HOST-NEXT: [[TMP23:%.*]] = load i32, ptr [[RESULT]], align 4 +// HOST-NEXT: ret i32 [[TMP23]] +// +// +// HOST-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18use_new_local_varsv_l99 +// HOST-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR1]] { +// HOST-NEXT: entry: +// HOST-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8 +// HOST-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 +// HOST-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR]], align 8 +// HOST-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META8]], !align [[META9]] +// HOST-NEXT: store i32 55, ptr @_ZN11PlainStruct8s_memberE, align 4 +// HOST-NEXT: store i32 77, ptr @local_init_var, align 4 +// HOST-NEXT: [[TMP1:%.*]] = load i32, ptr @_ZN11PlainStruct8s_memberE, align 4 +// HOST-NEXT: [[TMP2:%.*]] = load i32, ptr @local_init_var, align 4 +// HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +// HOST-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// HOST-NEXT: ret void +// +// +// DEVICE-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14use_local_varsv_l70 +// DEVICE-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// DEVICE-NEXT: entry: +// DEVICE-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEVICE-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEVICE-NEXT: [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_ADDR]] to ptr +// DEVICE-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// DEVICE-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8 +// DEVICE-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// DEVICE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]] +// DEVICE-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14use_local_varsv_l70_kernel_environment to ptr), ptr [[DYN_PTR]]) +// DEVICE-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// DEVICE-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// DEVICE: user_code.entry: +// DEVICE-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(1) @local_scalar to ptr), align 4 +// DEVICE-NEXT: store i32 1, ptr addrspacecast (ptr addrspace(1) @local_array to ptr), align 4 +// DEVICE-NEXT: store i32 100, ptr addrspacecast (ptr addrspace(1) @_ZN12LocalStorageIiE5valueE to ptr), align 4 +// DEVICE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @local_scalar to ptr), align 4 +// DEVICE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @local_array to ptr), align 4 +// DEVICE-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]] +// DEVICE-NEXT: [[CALL:%.*]] = call noundef i32 @_Z18read_local_storageIiET_v() #[[ATTR2:[0-9]+]] +// DEVICE-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[CALL]] +// DEVICE-NEXT: store i32 [[ADD1]], ptr [[TMP0]], align 4 +// DEVICE-NEXT: call void @__kmpc_target_deinit() +// DEVICE-NEXT: ret void +// DEVICE: worker.exit: +// DEVICE-NEXT: ret void +// +// +// DEVICE-LABEL: define {{[^@]+}}@_Z18read_local_storageIiET_v +// DEVICE-SAME: () #[[ATTR1:[0-9]+]] comdat { +// DEVICE-NEXT: entry: +// DEVICE-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @_ZN12LocalStorageIiE5valueE to ptr), align 4 +// DEVICE-NEXT: ret i32 [[TMP0]] +// +// +// DEVICE-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21use_nohost_local_varsv_l86 +// DEVICE-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] { +// DEVICE-NEXT: entry: +// DEVICE-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEVICE-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEVICE-NEXT: [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_ADDR]] to ptr +// DEVICE-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// DEVICE-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8 +// DEVICE-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// DEVICE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] +// DEVICE-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21use_nohost_local_varsv_l86_kernel_environment to ptr), ptr [[DYN_PTR]]) +// DEVICE-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// DEVICE-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// DEVICE: user_code.entry: +// DEVICE-NEXT: store i32 7, ptr addrspacecast (ptr addrspace(1) @local_nohost_var to ptr), align 4 +// DEVICE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @local_nohost_var to ptr), align 4 +// DEVICE-NEXT: store i32 [[TMP2]], ptr [[TMP0]], align 4 +// DEVICE-NEXT: call void @__kmpc_target_deinit() +// DEVICE-NEXT: ret void +// DEVICE: worker.exit: +// DEVICE-NEXT: ret void +// +// +// DEVICE-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18use_new_local_varsv_l99 +// DEVICE-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] { +// DEVICE-NEXT: entry: +// DEVICE-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEVICE-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEVICE-NEXT: [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_ADDR]] to ptr +// DEVICE-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// DEVICE-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8 +// DEVICE-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// DEVICE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]] +// DEVICE-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18use_new_local_varsv_l99_kernel_environment to ptr), ptr [[DYN_PTR]]) +// DEVICE-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// DEVICE-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// DEVICE: user_code.entry: +// DEVICE-NEXT: store i32 55, ptr addrspacecast (ptr addrspace(1) @_ZN11PlainStruct8s_memberE to ptr), align 4 +// DEVICE-NEXT: store i32 77, ptr addrspacecast (ptr addrspace(1) @local_init_var to ptr), align 4 +// DEVICE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @_ZN11PlainStruct8s_memberE to ptr), align 4 +// DEVICE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @local_init_var to ptr), align 4 +// DEVICE-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]] +// DEVICE-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// DEVICE-NEXT: call void @__kmpc_target_deinit() +// DEVICE-NEXT: ret void +// DEVICE: worker.exit: +// DEVICE-NEXT: ret void +// diff --git a/clang/test/OpenMP/declare_target_local_usm_codegen.cpp b/clang/test/OpenMP/declare_target_local_usm_codegen.cpp new file mode 100644 index 0000000000000..d97d6f409d265 --- /dev/null +++ b/clang/test/OpenMP/declare_target_local_usm_codegen.cpp @@ -0,0 +1,52 @@ +// Test that declare target local variables are NOT affected by +// unified_shared_memory. Local variables always use direct access +// (no offload entry, no _decl_tgt_ref_ptr) regardless of USM. For +// comparison, enter variables with USM use pointer-reference indirection +// when normally they would also be direct access. +// +// CHECK lines not auto-generated because they are specifically verifying +// absence of ref ptr and offload entry for local variable and, by contrast, +// presence of ref ptr and offload entry for enter variable. + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefix=DEVICE + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +#pragma omp requires unified_shared_memory + +int local_var; +#pragma omp declare target local(local_var) + +int enter_var; +#pragma omp declare target enter(enter_var) + +// local_var: direct access, no ref ptr, no offload entry +// HOST-DAG: @local_var = global i32 0 +// HOST-NOT: @local_var_decl_tgt_ref_ptr + +// enter_var with USM: pointer-reference indirection +// HOST-DAG: @enter_var_decl_tgt_ref_ptr = weak global ptr @enter_var +// HOST-DAG: @.offloading.entry.enter_var_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @enter_var_decl_tgt_ref_ptr, ptr @.offloading.entry_name{{.*}}, i64 8, i64 0, ptr null }, section "llvm_offload_entries" + +// Device: local_var is a direct global, enter_var uses ref ptr +// DEVICE-DAG: @local_var = protected addrspace(1) global i32 0 +// DEVICE-NOT: @local_var_decl_tgt_ref_ptr +// DEVICE-DAG: @enter_var_decl_tgt_ref_ptr = weak global ptr null + +int use_vars() { + int result = 0; +#pragma omp target map(from: result) + { + local_var = 42; + enter_var = 10; + result = local_var + enter_var; + } + return result; +} + +#endif diff --git a/clang/test/OpenMP/declare_target_messages.cpp b/clang/test/OpenMP/declare_target_messages.cpp index 9875bd95141fd..6fe477755dbe7 100644 --- a/clang/test/OpenMP/declare_target_messages.cpp +++ b/clang/test/OpenMP/declare_target_messages.cpp @@ -152,10 +152,14 @@ void func() {} // expected-note@+1 {{'func_local' defined here}} void func_local() {} -// dev60-warning@+3 {{'local' clause on 'declare_target' directive is not yet fully implemented; variable will be treated as 'enter'}} // omp60-error@+2 {{unexpected 'allocate' clause, only 'enter', 'link', 'device_type', 'indirect' or 'local' clauses expected}} // expected-error@+1 {{function name is not allowed in 'local' clause}} #pragma omp declare target local(func_local) allocate(a) + +// omp60-error@+1 {{unexpected 'local' clause, only 'device_type', 'indirect' clauses expected}} +#pragma omp begin declare target local +int begin_local_var; +#pragma omp end declare target #endif // _OPENMP void bar(); @@ -335,7 +339,6 @@ int y_enter_local; #pragma omp declare target local(y_enter_local) int y_local_enter; -// dev60-warning@+1 {{'local' clause on 'declare_target' directive is not yet fully implemented; variable will be treated as 'enter'}} #pragma omp declare target local(y_local_enter) // expected-error@+1 {{'y_local_enter' must not appear in both clauses 'local' and 'enter'}} #pragma omp declare target enter(y_local_enter) @@ -346,7 +349,6 @@ int y_link_local; #pragma omp declare target local(y_link_local) int y_local_link; -// dev60-warning@+1 {{'local' clause on 'declare_target' directive is not yet fully implemented; variable will be treated as 'enter'}} #pragma omp declare target local(y_local_link) // expected-error@+1 {{'y_local_link' must not appear in both clauses 'local' and 'link'}} #pragma omp declare target link(y_local_link) @@ -466,7 +468,6 @@ int MultiDevTy; #pragma omp declare target to(MultiDevTy) device_type(nohost) int counter = 0; -// dev60-warning@+9 {{'local' clause on 'declare_target' directive is not yet fully implemented; variable will be treated as 'enter'}} // omp52-error@+8 {{unexpected 'local' clause, only 'enter', 'link', 'device_type' or 'indirect' clauses expected}} // omp52-error@+7 {{expected at least one 'enter', 'link' or 'indirect' clause}} // omp51-error@+6 {{unexpected 'local' clause, only 'to', 'link', 'device_type' or 'indirect' clauses expected}} diff --git a/offload/test/offloading/declare_target_local.cpp b/offload/test/offloading/declare_target_local.cpp new file mode 100644 index 0000000000000..9bff4ea38a407 --- /dev/null +++ b/offload/test/offloading/declare_target_local.cpp @@ -0,0 +1,40 @@ +// clang-format off +// RUN: %libomptarget-compilexx-generic -fopenmp-version=60 +// RUN: %libomptarget-run-generic | %fcheck-generic +// RUN: %libomptarget-compileoptxx-generic -fopenmp-version=60 +// RUN: %libomptarget-run-generic | %fcheck-generic +// clang-format on + +// Sanity test for OpenMP 6.0 declare target 'local' clause. +// Verify 'local' variable has device-local storage, that it has +// correct initial value, persists across target regions, and is +// independent from host copy. + +#include <cstdio> +int local_var = 42; +#pragma omp declare target local(local_var) + +int main() { + // Device should get the initializer value. + int init = -1; +#pragma omp target map(from : init) + init = local_var; + + // Device write should persist to a second region. + int persist = -1; +#pragma omp target + local_var = 100; +#pragma omp target map(from : persist) + persist = local_var; + + // Host copy should not be affected by device write. Should + // retain original value. + + // CHECK: PASS + if (init == 42 && persist == 100 && local_var == 42) + printf("PASS\n"); + else + printf("FAIL init=%d persist=%d host=%d\n", init, persist, local_var); + + return (init == 42 && persist == 100 && local_var == 42) ? 0 : 1; +} >From 47571f06e1c9b06428717386ba94dde4d289bf25 Mon Sep 17 00:00:00 2001 From: Dave Pagan <[email protected]> Date: Sat, 9 May 2026 12:07:00 -0500 Subject: [PATCH 2/2] Updated OpenMPSupport.rst and ReleaseNotes.rst. --- clang/docs/OpenMPSupport.rst | 3 ++- clang/docs/ReleaseNotes.rst | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 962fc717bc496..efe9726137625 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -595,7 +595,8 @@ implementation. +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | Message and severity clauses | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/146093 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| Local clause on declare target | :part:`In Progress` | :none:`unclaimed` | | +| Local clause on declare target | :good:`done` | :none:`unclaimed` | clang Parse/Sema: https://github.com/llvm/llvm-project/pull/186281 | +| | | | clang Codegen : https://github.com/llvm/llvm-project/pull/196431 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | groupprivate directive | :part:`In Progress` | :part:`partial` | Flang: kparzysz, mjklemm | | | | | | diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index cb19b80b7e994..644d001aeedbe 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -793,6 +793,8 @@ OpenMP Support - Added support for ``transparent`` clause in task and taskloop directives. - Added support for ``use_device_ptr`` clause to accept an optional ``fallback`` modifier (``fb_nullify`` or ``fb_preserve``) with OpenMP >= 61. +- Added support for ``local`` clause with declare_target directive when + OpenMP >= 60. Improvements ^^^^^^^^^^^^ _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
