[clang] fc47c0e - [clang] Fix compilation warnings in OpenMP declare mapper codegen.
Author: George Rokos Date: 2020-07-16T11:04:12-07:00 New Revision: fc47c0e0a6a2681154efa9d31b8605fc91a62daa URL: https://github.com/llvm/llvm-project/commit/fc47c0e0a6a2681154efa9d31b8605fc91a62daa DIFF: https://github.com/llvm/llvm-project/commit/fc47c0e0a6a2681154efa9d31b8605fc91a62daa.diff LOG: [clang] Fix compilation warnings in OpenMP declare mapper codegen. This patch fixes the compilation warnings that L is not a reference. Thanks to Lingda Li for providing the patch. Differential Revision: https://reviews.llvm.org/D83959 Added: Modified: clang/lib/CodeGen/CGOpenMPRuntime.cpp Removed: diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 4fecd89d2bc5..89f403f2c82f 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -8008,12 +8008,12 @@ class MappableExprsHandler { C->isImplicit(), std::get<2>(L)); } for (const auto *C : CurExecDir->getClausesOfKind()) - for (const auto : C->component_lists()) { + for (const auto L : C->component_lists()) { InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L)); } for (const auto *C : CurExecDir->getClausesOfKind()) - for (const auto : C->component_lists()) { + for (const auto L : C->component_lists()) { InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L)); } @@ -8029,7 +8029,7 @@ class MappableExprsHandler { for (const auto *C : CurExecDir->getClausesOfKind()) { - for (const auto : C->component_lists()) { + for (const auto L : C->component_lists()) { OMPClauseMappableExprCommon::MappableExprComponentListRef Components = std::get<1>(L); assert(!Components.empty() && ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] fca49fe - [clang-offload-wrapper] Lower priority of __tgt_register_lib in favor of __tgt_register_requires
Author: George Rokos Date: 2020-03-03T12:31:40-08:00 New Revision: fca49fe8e34f13632c42e68aad4b14e3e00bdcc8 URL: https://github.com/llvm/llvm-project/commit/fca49fe8e34f13632c42e68aad4b14e3e00bdcc8 DIFF: https://github.com/llvm/llvm-project/commit/fca49fe8e34f13632c42e68aad4b14e3e00bdcc8.diff LOG: [clang-offload-wrapper] Lower priority of __tgt_register_lib in favor of __tgt_register_requires Lower priority of __tgt_register_lib in order to make sure that __tgt_register_requires is called before loading a libomptarget plugin. We want to know beforehand which requirements the user has asked for so that upon loading the plugin libomptarget can report how many devices there are that can satisfy these requirements. Differential Revision: https://reviews.llvm.org/D75223 Added: Modified: clang/test/Driver/clang-offload-wrapper.c clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp Removed: diff --git a/clang/test/Driver/clang-offload-wrapper.c b/clang/test/Driver/clang-offload-wrapper.c index c8c17bd8a514..9a36559e34dd 100644 --- a/clang/test/Driver/clang-offload-wrapper.c +++ b/clang/test/Driver/clang-offload-wrapper.c @@ -39,8 +39,8 @@ // CHECK-IR: [[DESC:@.+]] = internal constant [[DESCTY]] { i32 1, [[IMAGETY]]* getelementptr inbounds ([1 x [[IMAGETY]]], [1 x [[IMAGETY]]]* [[IMAGES]], i64 0, i64 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] } -// CHECK-IR: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* [[REGFN:@.+]], i8* null }] -// CHECK-IR: @llvm.global_dtors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* [[UNREGFN:@.+]], i8* null }] +// CHECK-IR: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* [[REGFN:@.+]], i8* null }] +// CHECK-IR: @llvm.global_dtors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* [[UNREGFN:@.+]], i8* null }] // CHECK-IR: define internal void [[REGFN]]() // CHECK-IR: call void @__tgt_register_lib([[DESCTY]]* [[DESC]]) diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index c3863422adf6..78d96539f47e 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -262,7 +262,12 @@ class BinaryWrapper { Builder.CreateRetVoid(); // Add this function to constructors. -appendToGlobalCtors(M, Func, 0); +// Set priority to 1 so that __tgt_register_lib is executed AFTER +// __tgt_register_requires (we want to know what requirements have been +// asked for before we load a libomptarget plugin so that by the time the +// plugin is loaded it can report how many devices there are which can +// satisfy these requirements). +appendToGlobalCtors(M, Func, /*Priority*/ 1); } void createUnregisterFunction(GlobalVariable *BinDesc) { @@ -283,7 +288,8 @@ class BinaryWrapper { Builder.CreateRetVoid(); // Add this function to global destructors. -appendToGlobalDtors(M, Func, 0); +// Match priority of __tgt_register_lib +appendToGlobalDtors(M, Func, /*Priority*/ 1); } public: ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r320082 - [OpenMP] NVPTX: Set default/minimum compute capability to sm_35
Author: grokos Date: Thu Dec 7 12:27:31 2017 New Revision: 320082 URL: http://llvm.org/viewvc/llvm-project?rev=320082=rev Log: [OpenMP] NVPTX: Set default/minimum compute capability to sm_35 The current implementation of the nvptx runtime (to be upstreamed shortly) uses the atomicMax operation on 64-bit integers. This is only supported in compute capabilities 3.5 and later. I've changed the clang default to sm_35. Differential Revision: https://reviews.llvm.org/D40977 Modified: cfe/trunk/CMakeLists.txt Modified: cfe/trunk/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/CMakeLists.txt?rev=320082=320081=320082=diff == --- cfe/trunk/CMakeLists.txt (original) +++ cfe/trunk/CMakeLists.txt Thu Dec 7 12:27:31 2017 @@ -241,14 +241,15 @@ set(CLANG_DEFAULT_OBJCOPY "objcopy" CACH set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING "Default OpenMP runtime used by -fopenmp.") -# OpenMP offloading requires at least sm_30 because we use shuffle instructions -# to generate efficient code for reductions. -set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING +# OpenMP offloading requires at least sm_35 because we use shuffle instructions +# to generate efficient code for reductions and the atomicMax instruction on +# 64-bit integers in the implementation of conditional lastprivate. +set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING "Default architecture for OpenMP offloading to Nvidia GPUs.") string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}") -if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 30) - message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_30") - set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING +if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 35) + message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35") + set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE) endif() ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r317598 - Clang/libomptarget map interface flag renaming - NFC patch
Author: grokos Date: Tue Nov 7 10:27:04 2017 New Revision: 317598 URL: http://llvm.org/viewvc/llvm-project?rev=317598=rev Log: Clang/libomptarget map interface flag renaming - NFC patch This patch renames some of the flag names of the clang/libomptarget map interface. The old names are slightly misleading, whereas the new ones describe in a better way what each flag is about. Only the macros within the enumeration are renamed, there is no change in functionality therefore there are no updated regression tests. Differential Revision: https://reviews.llvm.org/D39745 Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=317598=317597=317598=diff == --- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original) +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Tue Nov 7 10:27:04 2017 @@ -5978,22 +5978,21 @@ public: /// \brief Delete the element from the device environment, ignoring the /// current reference count associated with the element. OMP_MAP_DELETE = 0x08, -/// \brief The element being mapped is a pointer, therefore the pointee -/// should be mapped as well. -OMP_MAP_IS_PTR = 0x10, -/// \brief This flags signals that an argument is the first one relating to -/// a map/private clause expression. For some cases a single -/// map/privatization results in multiple arguments passed to the runtime -/// library. -OMP_MAP_FIRST_REF = 0x20, +/// \brief The element being mapped is a pointer-pointee pair; both the +/// pointer and the pointee should be mapped. +OMP_MAP_PTR_AND_OBJ = 0x10, +/// \brief This flags signals that the base address of an entry should be +/// passed to the target kernel as an argument. +OMP_MAP_TARGET_PARAM = 0x20, /// \brief Signal that the runtime library has to return the device pointer -/// in the current position for the data being mapped. -OMP_MAP_RETURN_PTR = 0x40, +/// in the current position for the data being mapped. Used when we have the +/// use_device_ptr clause. +OMP_MAP_RETURN_PARAM = 0x40, /// \brief This flag signals that the reference being passed is a pointer to /// private data. -OMP_MAP_PRIVATE_PTR = 0x80, +OMP_MAP_PRIVATE = 0x80, /// \brief Pass the element to the device by value. -OMP_MAP_PRIVATE_VAL = 0x100, +OMP_MAP_LITERAL = 0x100, /// Implicit map OMP_MAP_IMPLICIT = 0x200, }; @@ -6084,7 +6083,7 @@ private: /// expression. unsigned getMapTypeBits(OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, - bool AddIsFirstFlag) const { + bool AddIsTargetParamFlag) const { unsigned Bits = 0u; switch (MapType) { case OMPC_MAP_alloc: @@ -6111,9 +6110,9 @@ private: break; } if (AddPtrFlag) - Bits |= OMP_MAP_IS_PTR; -if (AddIsFirstFlag) - Bits |= OMP_MAP_FIRST_REF; + Bits |= OMP_MAP_PTR_AND_OBJ; +if (AddIsTargetParamFlag) + Bits |= OMP_MAP_TARGET_PARAM; if (MapTypeModifier == OMPC_MAP_always) Bits |= OMP_MAP_ALWAYS; return Bits; @@ -6220,28 +6219,28 @@ private: // // map(s.p[:22], s.a s.b) // , &(s.p), sizeof(double*), noflags -// &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag +// &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag // // map(s.ps) // , &(s.ps), sizeof(S2*), noflags // // map(s.ps->s.i) // , &(s.ps), sizeof(S2*), noflags -// &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag +// &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag // // map(s.ps->ps) // , &(s.ps), sizeof(S2*), noflags -// &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag +// &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag // // map(s.ps->ps->ps) // , &(s.ps), sizeof(S2*), noflags -// &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag -// &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag +// &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag +// &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag // // map(s.ps->ps->s.f[:22]) // , &(s.ps), sizeof(S2*), noflags -// &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag -// &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag +// &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag +// &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag // // map(ps) // , , sizeof(S2*), noflags @@ -6257,29 +6256,28 @@ private: // // map(ps->p[:22]) // ps, &(ps->p), sizeof(double*), noflags -// &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag +// &(ps->p), &(ps->p[0]),
r304056 - [OpenMP] Create COMDAT group for OpenMP offload registration code to avoid multiple copies
Author: grokos Date: Fri May 26 22:03:13 2017 New Revision: 304056 URL: http://llvm.org/viewvc/llvm-project?rev=304056=rev Log: [OpenMP] Create COMDAT group for OpenMP offload registration code to avoid multiple copies Thanks to Sergey Dmitriev for submitting the patch. Differential Revision: https://reviews.llvm.org/D33509 Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp cfe/trunk/lib/CodeGen/CodeGenModule.cpp cfe/trunk/test/OpenMP/target_codegen.cpp cfe/trunk/test/OpenMP/target_codegen_registration.cpp cfe/trunk/test/OpenMP/target_parallel_codegen.cpp cfe/trunk/test/OpenMP/target_parallel_codegen_registration.cpp cfe/trunk/test/OpenMP/target_parallel_if_codegen.cpp cfe/trunk/test/OpenMP/target_parallel_num_threads_codegen.cpp cfe/trunk/test/OpenMP/target_teams_codegen.cpp cfe/trunk/test/OpenMP/target_teams_codegen_registration.cpp cfe/trunk/test/OpenMP/target_teams_num_teams_codegen.cpp cfe/trunk/test/OpenMP/target_teams_thread_limit_codegen.cpp Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=304056=304055=304056=diff == --- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original) +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Fri May 26 22:03:13 2017 @@ -2903,6 +2903,19 @@ CGOpenMPRuntime::createOffloadingBinaryD Desc); CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); }); + if (CGM.supportsCOMDAT()) { +// It is sufficient to call registration function only once, so create a +// COMDAT group for registration/unregistration functions and associated +// data. That would reduce startup time and code size. Registration +// function serves as a COMDAT group key. +auto ComdatKey = M.getOrInsertComdat(RegFn->getName()); +RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); +RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); +RegFn->setComdat(ComdatKey); +UnRegFn->setComdat(ComdatKey); +DeviceImages->setComdat(ComdatKey); +Desc->setComdat(ComdatKey); + } return RegFn; } Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=304056=304055=304056=diff == --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original) +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Fri May 26 22:03:13 2017 @@ -400,8 +400,11 @@ void CodeGenModule::Release() { } if (OpenMPRuntime) if (llvm::Function *OpenMPRegistrationFunction = -OpenMPRuntime->emitRegistrationFunction()) - AddGlobalCtor(OpenMPRegistrationFunction, 0); +OpenMPRuntime->emitRegistrationFunction()) { + auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ? +OpenMPRegistrationFunction : nullptr; + AddGlobalCtor(OpenMPRegistrationFunction, 0, ComdatKey); +} if (PGOReader) { getModule().setProfileSummary(PGOReader->getSummary().getMD(VMContext)); if (PGOStats.hasDiagnostics()) Modified: cfe/trunk/test/OpenMP/target_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_codegen.cpp?rev=304056=304055=304056=diff == --- cfe/trunk/test/OpenMP/target_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/target_codegen.cpp Fri May 26 22:03:13 2017 @@ -28,6 +28,8 @@ // TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 } +// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat + // We have 8 target regions, but only 7 that actually will generate offloading // code, only 6 will have mapped arguments, and only 4 have all-constant map // sizes. @@ -64,11 +66,11 @@ // CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]] // CHECK: [[DEVBEGIN:@.+]] = external constant i8 // CHECK: [[DEVEND:@.+]] = external constant i8 -// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }] -// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] } +// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]]) +// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) // Check target registration is registered as a Ctor. -// CHECK: appending global [1 x { i32,