https://github.com/Jason-VanBeusekom updated https://github.com/llvm/llvm-project/pull/159856
>From e7aafa4162d216914902aab34f51db5232fc8c45 Mon Sep 17 00:00:00 2001 From: "[email protected]" <[email protected]> Date: Fri, 12 Sep 2025 14:07:54 -0500 Subject: [PATCH 1/6] [OpenMP][clang] Register Vtables on device for indirect calls Runtime / Registration support for indirect and virtual function calls in OpenMP target regions - Register Vtable's to OpenMP offload table - Modify PluginInterface to register Vtables to indirect call table This Patch does not have the logic for calling __llvm_omp_indirect_call_lookup, and lacks implementation logic --------- Co-authored-by: Chi-Chun Chen <[email protected]> Co-authored-by: Jeffery Sandoval <[email protected]> --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 128 ++++++++ clang/lib/CodeGen/CGOpenMPRuntime.h | 13 + clang/lib/CodeGen/CGStmtOpenMP.cpp | 4 + clang/lib/CodeGen/CGVTables.cpp | 6 + clang/lib/CodeGen/CGVTables.h | 4 + clang/lib/CodeGen/CodeGenModule.h | 3 + clang/test/OpenMP/target_vtable_codegen.cpp | 280 ++++++++++++++++++ .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 5 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 19 +- offload/include/omptarget.h | 2 + offload/libomptarget/PluginManager.cpp | 7 +- offload/libomptarget/device.cpp | 37 ++- 12 files changed, 497 insertions(+), 11 deletions(-) create mode 100644 clang/test/OpenMP/target_vtable_codegen.cpp diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index a503aaf613e30..028d14e897667 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1771,12 +1771,126 @@ void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD, Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility); } + // Register the indirect Vtable: + // This is similar to OMPTargetGlobalVarEntryIndirect, except that the + // size field refers to the size of memory pointed to, not the size of + // the pointer symbol itself (which is implicitly the size of a pointer). OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo( Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(), llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect, llvm::GlobalValue::WeakODRLinkage); } +void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable, + const VarDecl *VD) { + // TODO: add logic to avoid duplicate vtable registrations per + // translation unit; though for external linkage, this should no + // longer be an issue - or at least we can avoid the issue by + // checking for an existing offloading entry. But, perhaps the + // better approach is to defer emission of the vtables and offload + // entries until later (by tracking a list of items that need to be + // emitted). + + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + + // Generate a new externally visible global to point to the + // internally visible vtable. Doing this allows us to keep the + // visibility and linkage of the associated vtable unchanged while + // allowing the runtime to access its value. The externally + // visible global var needs to be emitted with a unique mangled + // name that won't conflict with similarly named (internal) + // vtables in other translation units. + + // Register vtable with source location of dynamic object in map + // clause. + llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( + CGM, OMPBuilder, VD->getCanonicalDecl()->getBeginLoc(), + VTable->getName()); + + llvm::GlobalVariable *Addr = VTable; + size_t PointerSize = CGM.getDataLayout().getPointerSize(); + SmallString<128> AddrName; + OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(AddrName, EntryInfo); + AddrName.append("addr"); + + if (CGM.getLangOpts().OpenMPIsTargetDevice) { + Addr = new llvm::GlobalVariable( + CGM.getModule(), VTable->getType(), + /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable, + AddrName, + /*InsertBefore*/ nullptr, llvm::GlobalValue::NotThreadLocal, + CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace()); + Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility); + } + OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo( + AddrName, VTable, + CGM.getDataLayout().getTypeAllocSize(VTable->getInitializer()->getType()), + llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable, + llvm::GlobalValue::WeakODRLinkage); +} + +// Register VTable by scanning through the map clause of OpenMP target region. +void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) { + // Get CXXRecordDecl and VarDecl from Expr. + auto getVTableDecl = [](const Expr *E) { + QualType VDTy = E->getType(); + CXXRecordDecl *CXXRecord = nullptr; + if (const auto *RefType = VDTy->getAs<LValueReferenceType>()) + VDTy = RefType->getPointeeType(); + if (VDTy->isPointerType()) + CXXRecord = VDTy->getPointeeType()->getAsCXXRecordDecl(); + else + CXXRecord = VDTy->getAsCXXRecordDecl(); + + const VarDecl *VD = nullptr; + if (auto *DRE = dyn_cast<DeclRefExpr>(E)) + VD = cast<VarDecl>(DRE->getDecl()); + return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD); + }; + + // Emit VTable and register the VTable to OpenMP offload entry recursively. + std::function<void(CodeGenModule &, CXXRecordDecl *, const VarDecl *)> + emitAndRegisterVTable = [&emitAndRegisterVTable](CodeGenModule &CGM, + CXXRecordDecl *CXXRecord, + const VarDecl *VD) { + // Register C++ VTable to OpenMP Offload Entry if it's a new + // CXXRecordDecl. + if (CXXRecord && CXXRecord->isDynamicClass() && + CGM.getOpenMPRuntime().VTableDeclMap.find(CXXRecord) == + CGM.getOpenMPRuntime().VTableDeclMap.end()) { + CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD); + CGM.EmitVTable(CXXRecord); + auto VTables = CGM.getVTables(); + auto *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord); + if (VTablesAddr) { + CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD); + } + // Emit VTable for all the fields containing dynamic CXXRecord + for (const FieldDecl *Field : CXXRecord->fields()) { + if (CXXRecordDecl *RecordDecl = + Field->getType()->getAsCXXRecordDecl()) { + emitAndRegisterVTable(CGM, RecordDecl, VD); + } + } + // Emit VTable for all dynamic parent class + for (CXXBaseSpecifier &Base : CXXRecord->bases()) { + if (CXXRecordDecl *BaseDecl = + Base.getType()->getAsCXXRecordDecl()) { + emitAndRegisterVTable(CGM, BaseDecl, VD); + } + } + } + }; + + // Collect VTable from OpenMP map clause. + for (const auto *C : D.getClausesOfKind<OMPMapClause>()) { + for (const auto *E : C->varlist()) { + auto DeclPair = getVTableDecl(E); + emitAndRegisterVTable(CGM, DeclPair.first, DeclPair.second); + } + } +} + Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) { @@ -6249,6 +6363,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr); } } + registerVTable(D); } /// Checks if the expression is constant or does not have non-trivial function @@ -9955,6 +10070,19 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, if (!S) return; + // Register vtable from device for target data and target directives. + // Add this block here since scanForTargetRegionsFunctions ignores + // target data by checking if S is a executable directive (target). + if (isa<OMPExecutableDirective>(S) && + isOpenMPTargetDataManagementDirective( + cast<OMPExecutableDirective>(S)->getDirectiveKind())) { + auto &E = *cast<OMPExecutableDirective>(S); + // Don't need to check if it's device compile + // since scanForTargetRegionsFunctions currently only called + // in device compilation. + registerVTable(E); + } + // Codegen OMP target directives that offload compute to the device. bool RequiresDeviceCodegen = isa<OMPExecutableDirective>(S) && diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index eb04eceee236c..0f7937ae95c06 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -605,6 +605,9 @@ class CGOpenMPRuntime { LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray); + /// Keep track of VTable Declarations so we don't register duplicate VTable. + llvm::DenseMap<CXXRecordDecl*, const VarDecl*> VTableDeclMap; + public: explicit CGOpenMPRuntime(CodeGenModule &CGM); virtual ~CGOpenMPRuntime() {} @@ -1111,6 +1114,16 @@ class CGOpenMPRuntime { virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV); + /// Register VTable to OpenMP offload entry. + /// \param VTable VTable of the C++ class. + /// \param RD C++ class decl. + virtual void registerVTableOffloadEntry(llvm::GlobalVariable *VTable, + const VarDecl *VD); + /// Emit code for registering vtable by scanning through map clause + /// in OpenMP target region. + /// \param D OpenMP target directive. + virtual void registerVTable(const OMPExecutableDirective &D); + /// Creates artificial threadprivate variable with name \p Name and type \p /// VarType. /// \param VarType Type of the artificial threadprivate variable. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index d72cd8fbfd608..582dd0f3ade65 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -7617,6 +7617,10 @@ void CodeGenFunction::EmitOMPUseDeviceAddrClause( // Generate the instructions for '#pragma omp target data' directive. void CodeGenFunction::EmitOMPTargetDataDirective( const OMPTargetDataDirective &S) { + // Emit vtable only from host for target data directive. + if (!CGM.getLangOpts().OpenMPIsTargetDevice) { + CGM.getOpenMPRuntime().registerVTable(S); + } CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true, /*SeparateBeginEndCalls=*/true); diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index e14e883a55ac5..de4a67db313ea 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -38,6 +38,12 @@ llvm::Constant *CodeGenModule::GetAddrOfThunk(StringRef Name, llvm::Type *FnTy, /*DontDefer=*/true, /*IsThunk=*/true); } +llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTable(const CXXRecordDecl *RD) { + llvm::GlobalVariable *VTable = + CGM.getCXXABI().getAddrOfVTable(RD, CharUnits()); + return VTable; +} + static void setThunkProperties(CodeGenModule &CGM, const ThunkInfo &Thunk, llvm::Function *ThunkFn, bool ForVTable, GlobalDecl GD) { diff --git a/clang/lib/CodeGen/CGVTables.h b/clang/lib/CodeGen/CGVTables.h index 5c45e355fb145..37458eee02e34 100644 --- a/clang/lib/CodeGen/CGVTables.h +++ b/clang/lib/CodeGen/CGVTables.h @@ -122,6 +122,10 @@ class CodeGenVTables { llvm::GlobalVariable::LinkageTypes Linkage, const CXXRecordDecl *RD); + /// GetAddrOfVTable - Get the address of the VTable for the given record + /// decl. + llvm::GlobalVariable *GetAddrOfVTable(const CXXRecordDecl *RD); + /// EmitThunks - Emit the associated thunks for the given global decl. void EmitThunks(GlobalDecl GD); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 3971b296b3f80..4ace1abcb5246 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -754,6 +754,9 @@ class CodeGenModule : public CodeGenTypeCache { // i32 @__isPlatformVersionAtLeast(i32, i32, i32, i32) llvm::FunctionCallee IsPlatformVersionAtLeastFn = nullptr; + // Store indirect CallExprs that are within an omp target region + llvm::SmallPtrSet<const CallExpr *, 16> OMPTargetCalls; + InstrProfStats &getPGOStats() { return PGOStats; } llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); } diff --git a/clang/test/OpenMP/target_vtable_codegen.cpp b/clang/test/OpenMP/target_vtable_codegen.cpp new file mode 100644 index 0000000000000..276cef4eb8801 --- /dev/null +++ b/clang/test/OpenMP/target_vtable_codegen.cpp @@ -0,0 +1,280 @@ +///==========================================================================/// +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s --check-prefix=CK1 +// +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s --check-prefix=CK2 +// +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s --check-prefix=CK3 +// +// RUN: %clang_cc1 -DCK4 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 +// RUN: %clang_cc1 -DCK4 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s --check-prefix=CK4 +// +// RUN: %clang_cc1 -DCK5 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 -stdlib=libc++ +// RUN: %clang_cc1 -DCK5 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 -stdlib=libc++ | FileCheck %s --check-prefix=CK5 +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER +#ifdef CK1 + +// Make sure both host and device compilation emit vtable for Dervied +// CK1-DAG: $_ZN7DerivedD1Ev = comdat any +// CK1-DAG: $_ZN7DerivedD0Ev = comdat any +// CK1-DAG: $_ZN7Derived5BaseAEi = comdat any +// CK1-DAG: $_ZN7Derived8DerivedBEv = comdat any +// CK1-DAG: $_ZN7DerivedD2Ev = comdat any +// CK1-DAG: $_ZN4BaseD2Ev = comdat any +// CK1-DAG: $_ZTV7Derived = comdat any +class Base { +public: + virtual ~Base() = default; + virtual void BaseA(int a) { } +}; + +// CK1: @_ZTV7Derived = linkonce_odr unnamed_addr constant { [6 x ptr] } +class Derived : public Base { +public: + ~Derived() override = default; + void BaseA(int a) override { x = a; } + virtual void DerivedB() { } +private: + int x; +}; + +int main() { + + Derived d; + Base& c = d; + int a = 50; + // Should emit vtable for Derived since d is added to map clause +#pragma omp target data map (to: d, a) + { + #pragma omp target map(d) + { + c.BaseA(a); + } + } + return 0; +} + +#endif // CK1 + +#ifdef CK2 + +namespace { + +// Make sure both host and device compilation emit vtable for Dervied +// CK2-DAG: @_ZTVN12_GLOBAL__N_17DerivedE +// CK2-DAG: @_ZN12_GLOBAL__N_17DerivedD1Ev +// CK2-DAG: @_ZN12_GLOBAL__N_17DerivedD0Ev +// CK2-DAG: @_ZN12_GLOBAL__N_17Derived5BaseAEi +// CK2-DAG: @_ZN12_GLOBAL__N_17Derived8DerivedBEv +class Base { +public: + virtual ~Base() = default; + virtual void BaseA(int a) { } +}; + +class Derived : public Base { +public: + ~Derived() override = default; + void BaseA(int a) override { x = a; } + virtual void DerivedB() { } +private: + int x; +}; + +}; + +int main() { + + Derived d; + Base& c = d; + int a = 50; +#pragma omp target data map (to: d, a) + { + #pragma omp target + { + c.BaseA(a); + } + } + return 0; +} + +#endif // CK2 + +#ifdef CK3 + +// CK3-DAG: @_ZTV6Base_1 +// CK3-DAG: @_ZTV7Derived +// CK3-DAG: @_ZTV6Base_2 +#pragma omp begin declare target + +class Base_1 { +public: + virtual void foo() { } + virtual void bar() { } +}; + +class Base_2 { +public: + virtual void foo() { } + virtual void bar() { } +}; + +class Derived : public Base_1, public Base_2 { +public: + virtual void foo() override { } + virtual void bar() override { } +}; + +#pragma omp end declare target + +int main() { + Base_1 base; + Derived derived; + + // Make sure we emit vtable for parent class (Base_1 and Base_2) +#pragma omp target data map(derived) + { + Base_1 *p1 = &derived; + +#pragma omp target + { + p1->foo(); + p1->bar(); + } + } + return 0; +} + +#endif // CK3 + +#ifdef CK4 + +// CK4-DAG: @_ZTV3Car +// CK4-DAG: @_ZTV6Engine +// CK4-DAG: @_ZTV6Wheels +// CK4-DAG: @_ZTV7Vehicle +// CK4-DAG: @_ZTV5Brand +class Engine { +public: + Engine(const char *type) : type(type) {} + virtual ~Engine() {} + + virtual void start() const { } + +protected: + const char *type; +}; + +class Wheels { +public: + Wheels(int count) : count(count) {} + virtual ~Wheels() {} + + virtual void roll() const { } + +protected: + int count; +}; + +class Vehicle { +public: + Vehicle(int speed) : speed(speed) {} + virtual ~Vehicle() {} + + virtual void move() const { } + +protected: + int speed; +}; + +class Brand { +public: + Brand(const char *brandName) : brandName(brandName) {} + virtual ~Brand() {} + + void showBrand() const { } + +protected: + const char *brandName; +}; + +class Car : public Vehicle, public Brand { +public: + Car(const char *brand, int speed, const char *engineType, int wheelCount) + : Vehicle(speed), Brand(brand), engine(engineType), wheels(wheelCount) {} + + void move() const override { } + + void drive() const { + showBrand(); + engine.start(); + wheels.roll(); + move(); + } + +private: + Engine engine; + Wheels wheels; +}; + +int main() { + Car myActualCar("Ford", 100, "Hybrid", 4); + + // Make sure we emit VTable for dynamic class as field +#pragma omp target map(myActualCar) + { + myActualCar.drive(); + } + return 0; +} + +#endif // CK4 + +#ifdef CK5 + +// CK5-DAG: @_ZTV7Derived +// CK5-DAG: @_ZTV4Base +template <typename T> +class Container { +private: +T value; +public: +Container() : value() {} +Container(T val) : value(val) {} + +T getValue() const { return value; } + +void setValue(T val) { value = val; } +}; + +class Base { +public: + virtual void foo() {} +}; +class Derived : public Base {}; + +class Test { +public: + Container<Derived> v; +}; + +int main() { + Test test; + Derived d; + test.v.setValue(d); + +// Make sure we emit VTable for type indirectly (template specialized type) +#pragma omp target map(test) + { + test.v.getValue().foo(); + } + return 0; +} + +#endif // CK5 +#endif diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index f43ef932e965a..cc0d4c89f9b9f 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -390,6 +390,8 @@ class OffloadEntriesInfoManager { OMPTargetGlobalVarEntryIndirect = 0x8, /// Mark the entry as a register requires global. OMPTargetGlobalRegisterRequires = 0x10, + /// Mark the entry as a declare target indirect vtable. + OMPTargetGlobalVarEntryIndirectVTable = 0x20, }; /// Kind of device clause for declare target variables @@ -2666,7 +2668,8 @@ class OpenMPIRBuilder { enum EmitMetadataErrorKind { EMIT_MD_TARGET_REGION_ERROR, EMIT_MD_DECLARE_TARGET_ERROR, - EMIT_MD_GLOBAL_VAR_LINK_ERROR + EMIT_MD_GLOBAL_VAR_LINK_ERROR, + EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR }; /// Callback function type diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 220eee3cb8b08..a18a4bcb6d62e 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -10246,6 +10246,13 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( continue; } break; + case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect: + case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable: + if (!CE->getAddress()) { + ErrorFn(EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR, E.second); + continue; + } + break; default: break; } @@ -10255,12 +10262,17 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( // entry. Indirect variables are handled separately on the device. if (auto *GV = dyn_cast<GlobalValue>(CE->getAddress())) if ((GV->hasLocalLinkage() || GV->hasHiddenVisibility()) && - Flags != OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect) + (Flags != + OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect || + Flags != OffloadEntriesInfoManager:: + OMPTargetGlobalVarEntryIndirectVTable)) continue; // Indirect globals need to use a special name that doesn't match the name // of the associated host global. - if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect) + if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect || + Flags == + OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable) createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(), Flags, CE->getLinkage(), CE->getVarName()); else @@ -10689,7 +10701,8 @@ void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo( } return; } - if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect) + if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect || + Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable) OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage, VarName.str()); diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h index 8fd722bb15022..bdcda770f2d37 100644 --- a/offload/include/omptarget.h +++ b/offload/include/omptarget.h @@ -94,6 +94,8 @@ enum OpenMPOffloadingDeclareTargetFlags { OMP_DECLARE_TARGET_INDIRECT = 0x08, /// This is an entry corresponding to a requirement to be registered. OMP_REGISTER_REQUIRES = 0x10, + /// Mark the entry global as being an indirect vtable. + OMP_DECLARE_TARGET_INDIRECT_VTABLE = 0x20, }; enum TargetAllocTy : int32_t { diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index b57a2f815cba6..0cdeeb2d55f17 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -434,7 +434,8 @@ static int loadImagesOntoDevice(DeviceTy &Device) { llvm::offloading::EntryTy DeviceEntry = Entry; if (Entry.Size) { - if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, + if (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) && + Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, &DeviceEntry.Address) != OFFLOAD_SUCCESS) REPORT("Failed to load symbol %s\n", Entry.SymbolName); @@ -443,7 +444,9 @@ static int loadImagesOntoDevice(DeviceTy &Device) { // the device to point to the memory on the host. if ((PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) || (PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY)) { - if (Device.RTL->data_submit(DeviceId, DeviceEntry.Address, + if (!(OMP_DECLARE_TARGET_INDIRECT_VTABLE | + OMP_DECLARE_TARGET_INDIRECT) && + Device.RTL->data_submit(DeviceId, DeviceEntry.Address, Entry.Address, Entry.Size) != OFFLOAD_SUCCESS) REPORT("Failed to write symbol for USM %s\n", Entry.SymbolName); diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp index 71423ae0c94d9..fa1920eb8e89b 100644 --- a/offload/libomptarget/device.cpp +++ b/offload/libomptarget/device.cpp @@ -112,13 +112,39 @@ setupIndirectCallTable(DeviceTy &Device, __tgt_device_image *Image, llvm::SmallVector<std::pair<void *, void *>> IndirectCallTable; for (const auto &Entry : Entries) { if (Entry.Kind != llvm::object::OffloadKind::OFK_OpenMP || - Entry.Size == 0 || !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT)) + Entry.Size == 0 || + !(Entry.Flags & + (OMP_DECLARE_TARGET_INDIRECT | OMP_DECLARE_TARGET_INDIRECT_VTABLE))) continue; - assert(Entry.Size == sizeof(void *) && "Global not a function pointer?"); - auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back(); - - void *Ptr; + size_t PtrSize = sizeof(void *); + if (Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) { + // This is a VTable entry, the current entry is the first index of the + // VTable and Entry.Size is the total size of the VTable. Unlike the + // indirect function case below, the Global is not of size Entry.Size and + // is instead of size PtrSize (sizeof(void*)). + void *Vtable; + void *res; + if (Device.RTL->get_global(Binary, PtrSize, Entry.SymbolName, &Vtable)) + return error::createOffloadError(error::ErrorCode::INVALID_BINARY, + "failed to load %s", Entry.SymbolName); + + // HstPtr = Entry.Address; + if (Device.retrieveData(&res, Vtable, PtrSize, AsyncInfo)) + return error::createOffloadError(error::ErrorCode::INVALID_BINARY, + "failed to load %s", Entry.SymbolName); + // Calculate and emplace entire Vtable from first Vtable byte + for (uint64_t i = 0; i < Entry.Size / PtrSize; ++i) { + auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back(); + HstPtr = (void *)((uintptr_t)Entry.Address + i * PtrSize); + DevPtr = (void *)((uintptr_t)res + i * PtrSize); + } + } else { + // Indirect function case: Entry.Size should equal PtrSize since we're + // dealing with a single function pointer (not a VTable) + assert(Entry.Size == PtrSize && "Global not a function pointer?"); + auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back(); + void *Ptr; if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, &Ptr)) return error::createOffloadError(error::ErrorCode::INVALID_BINARY, "failed to load %s", Entry.SymbolName); @@ -127,6 +153,7 @@ setupIndirectCallTable(DeviceTy &Device, __tgt_device_image *Image, if (Device.retrieveData(&DevPtr, Ptr, Entry.Size, AsyncInfo)) return error::createOffloadError(error::ErrorCode::INVALID_BINARY, "failed to load %s", Entry.SymbolName); + } } // If we do not have any indirect globals we exit early. >From 22f6af48e58bfda9380c43e1f10bb94915bb3950 Mon Sep 17 00:00:00 2001 From: jason-van-beusekom <[email protected]> Date: Tue, 30 Sep 2025 16:33:19 -0500 Subject: [PATCH 2/6] Review feedback --- offload/libomptarget/PluginManager.cpp | 4 +- offload/libomptarget/device.cpp | 34 ++++-- .../test/api/omp_indirect_call_table_manual.c | 107 ++++++++++++++++++ 3 files changed, 131 insertions(+), 14 deletions(-) create mode 100644 offload/test/api/omp_indirect_call_table_manual.c diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 0cdeeb2d55f17..6fc330b92f0f5 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -444,8 +444,8 @@ static int loadImagesOntoDevice(DeviceTy &Device) { // the device to point to the memory on the host. if ((PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) || (PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY)) { - if (!(OMP_DECLARE_TARGET_INDIRECT_VTABLE | - OMP_DECLARE_TARGET_INDIRECT) && + if (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) && + !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT) && Device.RTL->data_submit(DeviceId, DeviceEntry.Address, Entry.Address, Entry.Size) != OFFLOAD_SUCCESS) diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp index fa1920eb8e89b..d5436bde47ba5 100644 --- a/offload/libomptarget/device.cpp +++ b/offload/libomptarget/device.cpp @@ -113,8 +113,8 @@ setupIndirectCallTable(DeviceTy &Device, __tgt_device_image *Image, for (const auto &Entry : Entries) { if (Entry.Kind != llvm::object::OffloadKind::OFK_OpenMP || Entry.Size == 0 || - !(Entry.Flags & - (OMP_DECLARE_TARGET_INDIRECT | OMP_DECLARE_TARGET_INDIRECT_VTABLE))) + (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT) && + !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE))) continue; size_t PtrSize = sizeof(void *); @@ -133,11 +133,17 @@ setupIndirectCallTable(DeviceTy &Device, __tgt_device_image *Image, if (Device.retrieveData(&res, Vtable, PtrSize, AsyncInfo)) return error::createOffloadError(error::ErrorCode::INVALID_BINARY, "failed to load %s", Entry.SymbolName); + if (Device.synchronize(AsyncInfo)) + return error::createOffloadError( + error::ErrorCode::INVALID_BINARY, + "failed to synchronize after retrieving %s", Entry.SymbolName); // Calculate and emplace entire Vtable from first Vtable byte for (uint64_t i = 0; i < Entry.Size / PtrSize; ++i) { auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back(); - HstPtr = (void *)((uintptr_t)Entry.Address + i * PtrSize); - DevPtr = (void *)((uintptr_t)res + i * PtrSize); + HstPtr = reinterpret_cast<void *>( + reinterpret_cast<uintptr_t>(Entry.Address) + i * PtrSize); + DevPtr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(res) + + i * PtrSize); } } else { // Indirect function case: Entry.Size should equal PtrSize since we're @@ -145,15 +151,19 @@ setupIndirectCallTable(DeviceTy &Device, __tgt_device_image *Image, assert(Entry.Size == PtrSize && "Global not a function pointer?"); auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back(); void *Ptr; - if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, &Ptr)) - return error::createOffloadError(error::ErrorCode::INVALID_BINARY, - "failed to load %s", Entry.SymbolName); - - HstPtr = Entry.Address; - if (Device.retrieveData(&DevPtr, Ptr, Entry.Size, AsyncInfo)) - return error::createOffloadError(error::ErrorCode::INVALID_BINARY, - "failed to load %s", Entry.SymbolName); + if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, &Ptr)) + return error::createOffloadError(error::ErrorCode::INVALID_BINARY, + "failed to load %s", Entry.SymbolName); + + HstPtr = Entry.Address; + if (Device.retrieveData(&DevPtr, Ptr, Entry.Size, AsyncInfo)) + return error::createOffloadError(error::ErrorCode::INVALID_BINARY, + "failed to load %s", Entry.SymbolName); } + if (Device.synchronize(AsyncInfo)) + return error::createOffloadError( + error::ErrorCode::INVALID_BINARY, + "failed to synchronize after retrieving %s", Entry.SymbolName); } // If we do not have any indirect globals we exit early. diff --git a/offload/test/api/omp_indirect_call_table_manual.c b/offload/test/api/omp_indirect_call_table_manual.c new file mode 100644 index 0000000000000..9c6fd4ca84ea3 --- /dev/null +++ b/offload/test/api/omp_indirect_call_table_manual.c @@ -0,0 +1,107 @@ +// RUN: %libomptarget-compile-run-and-check-generic +#include <assert.h> +#include <omp.h> +#include <stdio.h> + +// --------------------------------------------------------------------------- +// Various definitions copied from OpenMP RTL + +typedef struct { + uint64_t Reserved; + uint16_t Version; + uint16_t Kind; // OpenMP==1 + uint32_t Flags; + void *Address; + char *SymbolName; + uint64_t Size; + uint64_t Data; + void *AuxAddr; +} __tgt_offload_entry; + +enum OpenMPOffloadingDeclareTargetFlags { + /// Mark the entry global as having a 'link' attribute. + OMP_DECLARE_TARGET_LINK = 0x01, + /// Mark the entry global as being an indirectly callable function. + OMP_DECLARE_TARGET_INDIRECT = 0x08, + /// This is an entry corresponding to a requirement to be registered. + OMP_REGISTER_REQUIRES = 0x10, + /// Mark the entry global as being an indirect vtable. + OMP_DECLARE_TARGET_INDIRECT_VTABLE = 0x20, +}; + +#pragma omp begin declare variant match(device = {kind(gpu)}) +// Provided by the runtime. +void *__llvm_omp_indirect_call_lookup(void *host_ptr); +#pragma omp declare target to(__llvm_omp_indirect_call_lookup) \ + device_type(nohost) +#pragma omp end declare variant + +#pragma omp begin declare variant match(device = {kind(cpu)}) +// We assume unified addressing on the CPU target. +void *__llvm_omp_indirect_call_lookup(void *host_ptr) { return host_ptr; } +#pragma omp end declare variant + +#pragma omp begin declare target +void foo(int *i) { *i += 1; } +void bar(int *i) { *i += 10; } +void baz(int *i) { *i += 100; } +#pragma omp end declare target + +typedef void (*fptr_t)(int *i); + +// Dispatch Table - declare separately on host and device to avoid +// registering with the library; this also allows us to use separate +// names, which is convenient for debugging. This dispatchTable is +// intended to mimic what Clang emits for C++ vtables. +fptr_t dispatchTable[] = {foo, bar, baz}; +#pragma omp begin declare target device_type(nohost) +fptr_t GPUdispatchTable[] = {foo, bar, baz}; +fptr_t *GPUdispatchTablePtr = GPUdispatchTable; +#pragma omp end declare target + +// Define "manual" OpenMP offload entries, where we emit Clang +// offloading entry structure definitions in the appropriate ELF +// section. This allows us to emulate the offloading entries that Clang would +// normally emit for us + +__attribute__((weak, section("llvm_offload_entries"), aligned(8))) +const __tgt_offload_entry __offloading_entry[] = {{ + 0ULL, // Reserved + 1, // Version + 1, // Kind + OMP_DECLARE_TARGET_INDIRECT_VTABLE, // Flags + &dispatchTable, // Address + "GPUdispatchTablePtr", // SymbolName + (size_t)(sizeof(dispatchTable)), // Size + 0ULL, // Data + NULL // AuxAddr +}}; + +// Mimic how Clang emits vtable pointers for C++ classes +typedef struct { + fptr_t *dispatchPtr; +} myClass; + +// --------------------------------------------------------------------------- +int main() { + myClass obj_foo = {dispatchTable + 0}; + myClass obj_bar = {dispatchTable + 1}; + myClass obj_baz = {dispatchTable + 2}; + int aaa = 0; + +#pragma omp target map(aaa) map (to: obj_foo, obj_bar, obj_baz) + { + // Lookup + fptr_t *foo_ptr = __llvm_omp_indirect_call_lookup(obj_foo.dispatchPtr); + fptr_t *bar_ptr = __llvm_omp_indirect_call_lookup(obj_bar.dispatchPtr); + fptr_t *baz_ptr = __llvm_omp_indirect_call_lookup(obj_baz.dispatchPtr); + foo_ptr[0](&aaa); + bar_ptr[0](&aaa); + baz_ptr[0](&aaa); + } + + assert(aaa == 111); + // CHECK: PASS + printf("PASS\n"); + return 0; +} >From 3cd3157cd806df78a071bf294fd9cc653dc60298 Mon Sep 17 00:00:00 2001 From: jason-van-beusekom <[email protected]> Date: Mon, 20 Oct 2025 19:28:37 -0500 Subject: [PATCH 3/6] Updated based on feedback --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 67 +++++++++++++-------------- clang/lib/CodeGen/CGOpenMPRuntime.h | 9 +++- clang/lib/CodeGen/CGStmtOpenMP.cpp | 4 +- 3 files changed, 42 insertions(+), 38 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 028d14e897667..c2f74d06df78f 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1829,30 +1829,9 @@ void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable, llvm::GlobalValue::WeakODRLinkage); } -// Register VTable by scanning through the map clause of OpenMP target region. -void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) { - // Get CXXRecordDecl and VarDecl from Expr. - auto getVTableDecl = [](const Expr *E) { - QualType VDTy = E->getType(); - CXXRecordDecl *CXXRecord = nullptr; - if (const auto *RefType = VDTy->getAs<LValueReferenceType>()) - VDTy = RefType->getPointeeType(); - if (VDTy->isPointerType()) - CXXRecord = VDTy->getPointeeType()->getAsCXXRecordDecl(); - else - CXXRecord = VDTy->getAsCXXRecordDecl(); - - const VarDecl *VD = nullptr; - if (auto *DRE = dyn_cast<DeclRefExpr>(E)) - VD = cast<VarDecl>(DRE->getDecl()); - return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD); - }; - - // Emit VTable and register the VTable to OpenMP offload entry recursively. - std::function<void(CodeGenModule &, CXXRecordDecl *, const VarDecl *)> - emitAndRegisterVTable = [&emitAndRegisterVTable](CodeGenModule &CGM, - CXXRecordDecl *CXXRecord, - const VarDecl *VD) { +void CGOpenMPRuntime::emitAndRegisterVTable(CodeGenModule &CGM, + CXXRecordDecl *CXXRecord, + const VarDecl *VD) { // Register C++ VTable to OpenMP Offload Entry if it's a new // CXXRecordDecl. if (CXXRecord && CXXRecord->isDynamicClass() && @@ -1860,32 +1839,50 @@ void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) { CGM.getOpenMPRuntime().VTableDeclMap.end()) { CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD); CGM.EmitVTable(CXXRecord); - auto VTables = CGM.getVTables(); - auto *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord); - if (VTablesAddr) { + CodeGenVTables VTables = CGM.getVTables(); + llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord); + if (VTablesAddr) CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD); - } // Emit VTable for all the fields containing dynamic CXXRecord for (const FieldDecl *Field : CXXRecord->fields()) { if (CXXRecordDecl *RecordDecl = - Field->getType()->getAsCXXRecordDecl()) { + Field->getType()->getAsCXXRecordDecl()) emitAndRegisterVTable(CGM, RecordDecl, VD); - } + } // Emit VTable for all dynamic parent class for (CXXBaseSpecifier &Base : CXXRecord->bases()) { if (CXXRecordDecl *BaseDecl = - Base.getType()->getAsCXXRecordDecl()) { + Base.getType()->getAsCXXRecordDecl()) emitAndRegisterVTable(CGM, BaseDecl, VD); - } + } } }; + +void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) { + // Register VTable by scanning through the map clause of OpenMP target region. + // Get CXXRecordDecl and VarDecl from Expr. + auto GetVTableDecl = [](const Expr *E) { + QualType VDTy = E->getType(); + CXXRecordDecl *CXXRecord = nullptr; + if (const auto *RefType = VDTy->getAs<LValueReferenceType>()) + VDTy = RefType->getPointeeType(); + if (VDTy->isPointerType()) + CXXRecord = VDTy->getPointeeType()->getAsCXXRecordDecl(); + else + CXXRecord = VDTy->getAsCXXRecordDecl(); + + const VarDecl *VD = nullptr; + if (auto *DRE = dyn_cast<DeclRefExpr>(E)) + VD = cast<VarDecl>(DRE->getDecl()); + return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD); + }; // Collect VTable from OpenMP map clause. for (const auto *C : D.getClausesOfKind<OMPMapClause>()) { for (const auto *E : C->varlist()) { - auto DeclPair = getVTableDecl(E); + auto DeclPair = GetVTableDecl(E); emitAndRegisterVTable(CGM, DeclPair.first, DeclPair.second); } } @@ -10075,8 +10072,8 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, // target data by checking if S is a executable directive (target). if (isa<OMPExecutableDirective>(S) && isOpenMPTargetDataManagementDirective( - cast<OMPExecutableDirective>(S)->getDirectiveKind())) { - auto &E = *cast<OMPExecutableDirective>(S); + dyn_cast<OMPExecutableDirective>(S)->getDirectiveKind())) { + auto &E = *dyn_cast<OMPExecutableDirective>(S); // Don't need to check if it's device compile // since scanForTargetRegionsFunctions currently only called // in device compilation. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 0f7937ae95c06..7f8a81d4090e2 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -606,7 +606,7 @@ class CGOpenMPRuntime { Address DependenciesArray); /// Keep track of VTable Declarations so we don't register duplicate VTable. - llvm::DenseMap<CXXRecordDecl*, const VarDecl*> VTableDeclMap; + llvm::SmallDenseMap<CXXRecordDecl *, const VarDecl *> VTableDeclMap; public: explicit CGOpenMPRuntime(CodeGenModule &CGM); @@ -1124,6 +1124,13 @@ class CGOpenMPRuntime { /// \param D OpenMP target directive. virtual void registerVTable(const OMPExecutableDirective &D); + /// Emit and register VTable for the C++ class in OpenMP offload entry. + /// \param CXXRecord C++ class decl. + /// \param VD Variable decl which holds VTable. + virtual void emitAndRegisterVTable(CodeGenModule &CGM, + CXXRecordDecl *CXXRecord, + const VarDecl *VD); + /// Creates artificial threadprivate variable with name \p Name and type \p /// VarType. /// \param VarType Type of the artificial threadprivate variable. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 582dd0f3ade65..0b88f1dc5f0ea 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -7618,9 +7618,9 @@ void CodeGenFunction::EmitOMPUseDeviceAddrClause( void CodeGenFunction::EmitOMPTargetDataDirective( const OMPTargetDataDirective &S) { // Emit vtable only from host for target data directive. - if (!CGM.getLangOpts().OpenMPIsTargetDevice) { + if (!CGM.getLangOpts().OpenMPIsTargetDevice) CGM.getOpenMPRuntime().registerVTable(S); - } + CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true, /*SeparateBeginEndCalls=*/true); >From d86188bfccd09f67bee877e93b72427ca07856a4 Mon Sep 17 00:00:00 2001 From: jason-van-beusekom <[email protected]> Date: Tue, 21 Oct 2025 16:57:59 -0500 Subject: [PATCH 4/6] split codegen tests based on feedback --- clang/test/OpenMP/target_vtable_codegen.cpp | 280 ------------------ .../target_vtable_codegen_container.cpp | 42 +++ .../OpenMP/target_vtable_codegen_explicit.cpp | 48 +++ ...rget_vtable_codegen_implicit_namespace.cpp | 43 +++ ...arget_vtable_codegen_mult_inherritence.cpp | 46 +++ .../OpenMP/target_vtable_codegen_nested.cpp | 82 +++++ 6 files changed, 261 insertions(+), 280 deletions(-) delete mode 100644 clang/test/OpenMP/target_vtable_codegen.cpp create mode 100644 clang/test/OpenMP/target_vtable_codegen_container.cpp create mode 100644 clang/test/OpenMP/target_vtable_codegen_explicit.cpp create mode 100644 clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp create mode 100644 clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp create mode 100644 clang/test/OpenMP/target_vtable_codegen_nested.cpp diff --git a/clang/test/OpenMP/target_vtable_codegen.cpp b/clang/test/OpenMP/target_vtable_codegen.cpp deleted file mode 100644 index 276cef4eb8801..0000000000000 --- a/clang/test/OpenMP/target_vtable_codegen.cpp +++ /dev/null @@ -1,280 +0,0 @@ -///==========================================================================/// -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s --check-prefix=CK1 -// -// RUN: %clang_cc1 -DCK2 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 -// RUN: %clang_cc1 -DCK2 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s --check-prefix=CK2 -// -// RUN: %clang_cc1 -DCK3 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 -// RUN: %clang_cc1 -DCK3 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s --check-prefix=CK3 -// -// RUN: %clang_cc1 -DCK4 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 -// RUN: %clang_cc1 -DCK4 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s --check-prefix=CK4 -// -// RUN: %clang_cc1 -DCK5 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 -stdlib=libc++ -// RUN: %clang_cc1 -DCK5 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 -stdlib=libc++ | FileCheck %s --check-prefix=CK5 -// expected-no-diagnostics - -#ifndef HEADER -#define HEADER -#ifdef CK1 - -// Make sure both host and device compilation emit vtable for Dervied -// CK1-DAG: $_ZN7DerivedD1Ev = comdat any -// CK1-DAG: $_ZN7DerivedD0Ev = comdat any -// CK1-DAG: $_ZN7Derived5BaseAEi = comdat any -// CK1-DAG: $_ZN7Derived8DerivedBEv = comdat any -// CK1-DAG: $_ZN7DerivedD2Ev = comdat any -// CK1-DAG: $_ZN4BaseD2Ev = comdat any -// CK1-DAG: $_ZTV7Derived = comdat any -class Base { -public: - virtual ~Base() = default; - virtual void BaseA(int a) { } -}; - -// CK1: @_ZTV7Derived = linkonce_odr unnamed_addr constant { [6 x ptr] } -class Derived : public Base { -public: - ~Derived() override = default; - void BaseA(int a) override { x = a; } - virtual void DerivedB() { } -private: - int x; -}; - -int main() { - - Derived d; - Base& c = d; - int a = 50; - // Should emit vtable for Derived since d is added to map clause -#pragma omp target data map (to: d, a) - { - #pragma omp target map(d) - { - c.BaseA(a); - } - } - return 0; -} - -#endif // CK1 - -#ifdef CK2 - -namespace { - -// Make sure both host and device compilation emit vtable for Dervied -// CK2-DAG: @_ZTVN12_GLOBAL__N_17DerivedE -// CK2-DAG: @_ZN12_GLOBAL__N_17DerivedD1Ev -// CK2-DAG: @_ZN12_GLOBAL__N_17DerivedD0Ev -// CK2-DAG: @_ZN12_GLOBAL__N_17Derived5BaseAEi -// CK2-DAG: @_ZN12_GLOBAL__N_17Derived8DerivedBEv -class Base { -public: - virtual ~Base() = default; - virtual void BaseA(int a) { } -}; - -class Derived : public Base { -public: - ~Derived() override = default; - void BaseA(int a) override { x = a; } - virtual void DerivedB() { } -private: - int x; -}; - -}; - -int main() { - - Derived d; - Base& c = d; - int a = 50; -#pragma omp target data map (to: d, a) - { - #pragma omp target - { - c.BaseA(a); - } - } - return 0; -} - -#endif // CK2 - -#ifdef CK3 - -// CK3-DAG: @_ZTV6Base_1 -// CK3-DAG: @_ZTV7Derived -// CK3-DAG: @_ZTV6Base_2 -#pragma omp begin declare target - -class Base_1 { -public: - virtual void foo() { } - virtual void bar() { } -}; - -class Base_2 { -public: - virtual void foo() { } - virtual void bar() { } -}; - -class Derived : public Base_1, public Base_2 { -public: - virtual void foo() override { } - virtual void bar() override { } -}; - -#pragma omp end declare target - -int main() { - Base_1 base; - Derived derived; - - // Make sure we emit vtable for parent class (Base_1 and Base_2) -#pragma omp target data map(derived) - { - Base_1 *p1 = &derived; - -#pragma omp target - { - p1->foo(); - p1->bar(); - } - } - return 0; -} - -#endif // CK3 - -#ifdef CK4 - -// CK4-DAG: @_ZTV3Car -// CK4-DAG: @_ZTV6Engine -// CK4-DAG: @_ZTV6Wheels -// CK4-DAG: @_ZTV7Vehicle -// CK4-DAG: @_ZTV5Brand -class Engine { -public: - Engine(const char *type) : type(type) {} - virtual ~Engine() {} - - virtual void start() const { } - -protected: - const char *type; -}; - -class Wheels { -public: - Wheels(int count) : count(count) {} - virtual ~Wheels() {} - - virtual void roll() const { } - -protected: - int count; -}; - -class Vehicle { -public: - Vehicle(int speed) : speed(speed) {} - virtual ~Vehicle() {} - - virtual void move() const { } - -protected: - int speed; -}; - -class Brand { -public: - Brand(const char *brandName) : brandName(brandName) {} - virtual ~Brand() {} - - void showBrand() const { } - -protected: - const char *brandName; -}; - -class Car : public Vehicle, public Brand { -public: - Car(const char *brand, int speed, const char *engineType, int wheelCount) - : Vehicle(speed), Brand(brand), engine(engineType), wheels(wheelCount) {} - - void move() const override { } - - void drive() const { - showBrand(); - engine.start(); - wheels.roll(); - move(); - } - -private: - Engine engine; - Wheels wheels; -}; - -int main() { - Car myActualCar("Ford", 100, "Hybrid", 4); - - // Make sure we emit VTable for dynamic class as field -#pragma omp target map(myActualCar) - { - myActualCar.drive(); - } - return 0; -} - -#endif // CK4 - -#ifdef CK5 - -// CK5-DAG: @_ZTV7Derived -// CK5-DAG: @_ZTV4Base -template <typename T> -class Container { -private: -T value; -public: -Container() : value() {} -Container(T val) : value(val) {} - -T getValue() const { return value; } - -void setValue(T val) { value = val; } -}; - -class Base { -public: - virtual void foo() {} -}; -class Derived : public Base {}; - -class Test { -public: - Container<Derived> v; -}; - -int main() { - Test test; - Derived d; - test.v.setValue(d); - -// Make sure we emit VTable for type indirectly (template specialized type) -#pragma omp target map(test) - { - test.v.getValue().foo(); - } - return 0; -} - -#endif // CK5 -#endif diff --git a/clang/test/OpenMP/target_vtable_codegen_container.cpp b/clang/test/OpenMP/target_vtable_codegen_container.cpp new file mode 100644 index 0000000000000..9fd4c6b736163 --- /dev/null +++ b/clang/test/OpenMP/target_vtable_codegen_container.cpp @@ -0,0 +1,42 @@ +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 -stdlib=libc++ +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 -stdlib=libc++ | FileCheck %s +// expected-no-diagnostics + +// CHECK-DAG: @_ZTV7Derived +// CHECK-DAG: @_ZTV4Base +template <typename T> +class Container { +private: +T value; +public: +Container() : value() {} +Container(T val) : value(val) {} + +T getValue() const { return value; } + +void setValue(T val) { value = val; } +}; + +class Base { +public: + virtual void foo() {} +}; +class Derived : public Base {}; + +class Test { +public: + Container<Derived> v; +}; + +int main() { + Test test; + Derived d; + test.v.setValue(d); + +// Make sure we emit VTable for type indirectly (template specialized type) +#pragma omp target map(test) + { + test.v.getValue().foo(); + } + return 0; +} diff --git a/clang/test/OpenMP/target_vtable_codegen_explicit.cpp b/clang/test/OpenMP/target_vtable_codegen_explicit.cpp new file mode 100644 index 0000000000000..001ed8fdd9cd7 --- /dev/null +++ b/clang/test/OpenMP/target_vtable_codegen_explicit.cpp @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s +// expected-no-diagnostics + +// Make sure both host and device compilation emit vtable for Dervied +// CHECK-DAG: $_ZN7DerivedD1Ev = comdat any +// CHECK-DAG: $_ZN7DerivedD0Ev = comdat any +// CHECK-DAG: $_ZN7Derived5BaseAEi = comdat any +// CHECK-DAG: $_ZN7Derived8DerivedBEv = comdat any +// CHECK-DAG: $_ZN7DerivedD2Ev = comdat any +// CHECK-DAG: $_ZN4BaseD2Ev = comdat any +// CHECK-DAG: $_ZTV7Derived = comdat any +class Base { +public: + + virtual ~Base() = default; + + virtual void BaseA(int a) { } +}; + +// CHECK: @_ZTV7Derived = linkonce_odr unnamed_addr constant { [6 x ptr] } +class Derived : public Base { +public: + + ~Derived() override = default; + + void BaseA(int a) override { x = a; } + + virtual void DerivedB() { } +private: + int x; +}; + +int main() { + + Derived d; + Base& c = d; + int a = 50; + // Should emit vtable for Derived since d is added to map clause +#pragma omp target data map (to: d, a) + { + #pragma omp target map(d) + { + c.BaseA(a); + } + } + return 0; +} diff --git a/clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp b/clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp new file mode 100644 index 0000000000000..f9a7cc10474d4 --- /dev/null +++ b/clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s +// expected-no-diagnostics + +namespace { + +// Make sure both host and device compilation emit vtable for Dervied +// CHECK-DAG: @_ZTVN12_GLOBAL__N_17DerivedE +// CHECK-DAG: @_ZN12_GLOBAL__N_17DerivedD1Ev +// CHECK-DAG: @_ZN12_GLOBAL__N_17DerivedD0Ev +// CHECK-DAG: @_ZN12_GLOBAL__N_17Derived5BaseAEi +// CHECK-DAG: @_ZN12_GLOBAL__N_17Derived8DerivedBEv +class Base { +public: + virtual ~Base() = default; + virtual void BaseA(int a) { } +}; + +class Derived : public Base { +public: + ~Derived() override = default; + void BaseA(int a) override { x = a; } + virtual void DerivedB() { } +private: + int x; +}; + +}; + +int main() { + + Derived d; + Base& c = d; + int a = 50; +#pragma omp target data map (to: d, a) + { + #pragma omp target + { + c.BaseA(a); + } + } + return 0; +} \ No newline at end of file diff --git a/clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp b/clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp new file mode 100644 index 0000000000000..bd0fd8fd92167 --- /dev/null +++ b/clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp @@ -0,0 +1,46 @@ +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s +// expected-no-diagnostics + +// CHECK-DAG: @_ZTV6Base_1 +// CHECK-DAG: @_ZTV7Derived +// CHECK-DAG: @_ZTV6Base_2 +#pragma omp begin declare target + +class Base_1 { +public: + virtual void foo() { } + virtual void bar() { } +}; + +class Base_2 { +public: + virtual void foo() { } + virtual void bar() { } +}; + +class Derived : public Base_1, public Base_2 { +public: + virtual void foo() override { } + virtual void bar() override { } +}; + +#pragma omp end declare target + +int main() { + Base_1 base; + Derived derived; + + // Make sure we emit vtable for parent class (Base_1 and Base_2) +#pragma omp target data map(derived) + { + Base_1 *p1 = &derived; + +#pragma omp target + { + p1->foo(); + p1->bar(); + } + } + return 0; +} \ No newline at end of file diff --git a/clang/test/OpenMP/target_vtable_codegen_nested.cpp b/clang/test/OpenMP/target_vtable_codegen_nested.cpp new file mode 100644 index 0000000000000..1ece83d60ac58 --- /dev/null +++ b/clang/test/OpenMP/target_vtable_codegen_nested.cpp @@ -0,0 +1,82 @@ +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s +// expected-no-diagnostics + +// CHECK-DAG: @_ZTV3Car +// CHECK-DAG: @_ZTV6Engine +// CHECK-DAG: @_ZTV6Wheels +// CHECK-DAG: @_ZTV7Vehicle +// CHECK-DAG: @_ZTV5Brand +class Engine { +public: + Engine(const char *type) : type(type) {} + virtual ~Engine() {} + + virtual void start() const { } + +protected: + const char *type; +}; + +class Wheels { +public: + Wheels(int count) : count(count) {} + virtual ~Wheels() {} + + virtual void roll() const { } + +protected: + int count; +}; + +class Vehicle { +public: + Vehicle(int speed) : speed(speed) {} + virtual ~Vehicle() {} + + virtual void move() const { } + +protected: + int speed; +}; + +class Brand { +public: + Brand(const char *brandName) : brandName(brandName) {} + virtual ~Brand() {} + + void showBrand() const { } + +protected: + const char *brandName; +}; + +class Car : public Vehicle, public Brand { +public: + Car(const char *brand, int speed, const char *engineType, int wheelCount) + : Vehicle(speed), Brand(brand), engine(engineType), wheels(wheelCount) {} + + void move() const override { } + + void drive() const { + showBrand(); + engine.start(); + wheels.roll(); + move(); + } + +private: + Engine engine; + Wheels wheels; +}; + +int main() { + Car myActualCar("Ford", 100, "Hybrid", 4); + + // Make sure we emit VTable for dynamic class as field +#pragma omp target map(myActualCar) + { + myActualCar.drive(); + } + return 0; +} >From 0dc410c37456d2895e3af3363227f691dce93259 Mon Sep 17 00:00:00 2001 From: jason-van-beusekom <[email protected]> Date: Wed, 22 Oct 2025 15:05:14 -0500 Subject: [PATCH 5/6] format fix --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 73 +++++++++---------- clang/lib/CodeGen/CodeGenModule.h | 2 +- ...rget_vtable_codegen_implicit_namespace.cpp | 2 +- ...arget_vtable_codegen_mult_inherritence.cpp | 2 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 7 +- offload/include/omptarget.h | 2 +- .../test/api/omp_indirect_call_table_manual.c | 2 +- 7 files changed, 43 insertions(+), 47 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index c2f74d06df78f..16cd752e462e4 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1830,36 +1830,31 @@ void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable, } void CGOpenMPRuntime::emitAndRegisterVTable(CodeGenModule &CGM, - CXXRecordDecl *CXXRecord, - const VarDecl *VD) { - // Register C++ VTable to OpenMP Offload Entry if it's a new - // CXXRecordDecl. - if (CXXRecord && CXXRecord->isDynamicClass() && - CGM.getOpenMPRuntime().VTableDeclMap.find(CXXRecord) == - CGM.getOpenMPRuntime().VTableDeclMap.end()) { - CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD); - CGM.EmitVTable(CXXRecord); - CodeGenVTables VTables = CGM.getVTables(); - llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord); - if (VTablesAddr) - CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD); - // Emit VTable for all the fields containing dynamic CXXRecord - for (const FieldDecl *Field : CXXRecord->fields()) { - if (CXXRecordDecl *RecordDecl = - Field->getType()->getAsCXXRecordDecl()) - emitAndRegisterVTable(CGM, RecordDecl, VD); - - } - // Emit VTable for all dynamic parent class - for (CXXBaseSpecifier &Base : CXXRecord->bases()) { - if (CXXRecordDecl *BaseDecl = - Base.getType()->getAsCXXRecordDecl()) - emitAndRegisterVTable(CGM, BaseDecl, VD); - - } - } - }; - + CXXRecordDecl *CXXRecord, + const VarDecl *VD) { + // Register C++ VTable to OpenMP Offload Entry if it's a new + // CXXRecordDecl. + if (CXXRecord && CXXRecord->isDynamicClass() && + CGM.getOpenMPRuntime().VTableDeclMap.find(CXXRecord) == + CGM.getOpenMPRuntime().VTableDeclMap.end()) { + CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD); + CGM.EmitVTable(CXXRecord); + CodeGenVTables VTables = CGM.getVTables(); + llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord); + if (VTablesAddr) + CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD); + // Emit VTable for all the fields containing dynamic CXXRecord + for (const FieldDecl *Field : CXXRecord->fields()) { + if (CXXRecordDecl *RecordDecl = Field->getType()->getAsCXXRecordDecl()) + emitAndRegisterVTable(CGM, RecordDecl, VD); + } + // Emit VTable for all dynamic parent class + for (CXXBaseSpecifier &Base : CXXRecord->bases()) { + if (CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl()) + emitAndRegisterVTable(CGM, BaseDecl, VD); + } + } +}; void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) { // Register VTable by scanning through the map clause of OpenMP target region. @@ -10070,15 +10065,15 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, // Register vtable from device for target data and target directives. // Add this block here since scanForTargetRegionsFunctions ignores // target data by checking if S is a executable directive (target). - if (isa<OMPExecutableDirective>(S) && - isOpenMPTargetDataManagementDirective( - dyn_cast<OMPExecutableDirective>(S)->getDirectiveKind())) { - auto &E = *dyn_cast<OMPExecutableDirective>(S); - // Don't need to check if it's device compile - // since scanForTargetRegionsFunctions currently only called - // in device compilation. - registerVTable(E); - } + if (isa<OMPExecutableDirective>(S) && + isOpenMPTargetDataManagementDirective( + dyn_cast<OMPExecutableDirective>(S)->getDirectiveKind())) { + auto &E = *dyn_cast<OMPExecutableDirective>(S); + // Don't need to check if it's device compile + // since scanForTargetRegionsFunctions currently only called + // in device compilation. + registerVTable(E); + } // Codegen OMP target directives that offload compute to the device. bool RequiresDeviceCodegen = diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 4ace1abcb5246..49dcba4b7618b 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -754,7 +754,7 @@ class CodeGenModule : public CodeGenTypeCache { // i32 @__isPlatformVersionAtLeast(i32, i32, i32, i32) llvm::FunctionCallee IsPlatformVersionAtLeastFn = nullptr; - // Store indirect CallExprs that are within an omp target region + // Store indirect CallExprs that are within an omp target region llvm::SmallPtrSet<const CallExpr *, 16> OMPTargetCalls; InstrProfStats &getPGOStats() { return PGOStats; } diff --git a/clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp b/clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp index f9a7cc10474d4..364c55cd07985 100644 --- a/clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp +++ b/clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp @@ -40,4 +40,4 @@ int main() { } } return 0; -} \ No newline at end of file +} diff --git a/clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp b/clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp index bd0fd8fd92167..3069a4994a479 100644 --- a/clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp +++ b/clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp @@ -43,4 +43,4 @@ int main() { } } return 0; -} \ No newline at end of file +} diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index a18a4bcb6d62e..236cfab3f031c 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -10249,8 +10249,8 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect: case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable: if (!CE->getAddress()) { - ErrorFn(EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR, E.second); - continue; + ErrorFn(EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR, E.second); + continue; } break; default: @@ -10702,7 +10702,8 @@ void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo( return; } if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect || - Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable) + Flags == + OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable) OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage, VarName.str()); diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h index bdcda770f2d37..3317441f04eba 100644 --- a/offload/include/omptarget.h +++ b/offload/include/omptarget.h @@ -94,7 +94,7 @@ enum OpenMPOffloadingDeclareTargetFlags { OMP_DECLARE_TARGET_INDIRECT = 0x08, /// This is an entry corresponding to a requirement to be registered. OMP_REGISTER_REQUIRES = 0x10, - /// Mark the entry global as being an indirect vtable. + /// Mark the entry global as being an indirect vtable. OMP_DECLARE_TARGET_INDIRECT_VTABLE = 0x20, }; diff --git a/offload/test/api/omp_indirect_call_table_manual.c b/offload/test/api/omp_indirect_call_table_manual.c index 9c6fd4ca84ea3..e958d47d69dad 100644 --- a/offload/test/api/omp_indirect_call_table_manual.c +++ b/offload/test/api/omp_indirect_call_table_manual.c @@ -89,7 +89,7 @@ int main() { myClass obj_baz = {dispatchTable + 2}; int aaa = 0; -#pragma omp target map(aaa) map (to: obj_foo, obj_bar, obj_baz) +#pragma omp target map(aaa) map(to : obj_foo, obj_bar, obj_baz) { // Lookup fptr_t *foo_ptr = __llvm_omp_indirect_call_lookup(obj_foo.dispatchPtr); >From 21b8a8168426ad4b84543f0489beacddd52b51ce Mon Sep 17 00:00:00 2001 From: jason-van-beusekom <[email protected]> Date: Tue, 4 Nov 2025 17:04:15 -0600 Subject: [PATCH 6/6] Update memberexpr case --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 7 +++ ...ble_memberexpr_indirect_vtable_codegen.cpp | 56 +++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 clang/test/OpenMP/target_vtable_memberexpr_indirect_vtable_codegen.cpp diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 16cd752e462e4..292fcc336d077 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1872,6 +1872,13 @@ void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) { const VarDecl *VD = nullptr; if (auto *DRE = dyn_cast<DeclRefExpr>(E)) VD = cast<VarDecl>(DRE->getDecl()); +<<<<<<< Updated upstream +======= + else if (auto *MRE = dyn_cast<MemberExpr>(E)) + if (auto *BaseDRE = dyn_cast<DeclRefExpr>(MRE->getBase())) + if (auto *BaseVD = dyn_cast<VarDecl>(BaseDRE->getDecl())) + VD = BaseVD; +>>>>>>> Stashed changes return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD); }; // Collect VTable from OpenMP map clause. diff --git a/clang/test/OpenMP/target_vtable_memberexpr_indirect_vtable_codegen.cpp b/clang/test/OpenMP/target_vtable_memberexpr_indirect_vtable_codegen.cpp new file mode 100644 index 0000000000000..0535ba1dec741 --- /dev/null +++ b/clang/test/OpenMP/target_vtable_memberexpr_indirect_vtable_codegen.cpp @@ -0,0 +1,56 @@ +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s +// expected-no-diagnostics + + +// CHECK-DAG: $_ZN4Base5BaseAEi = comdat any +// CHECK-DAG: $_ZN7Derived5BaseAEi = comdat any +// CHECK-DAG: $_ZN7Derived8DerivedBEv = comdat any +// CHECK-DAG: $_ZN4BaseD1Ev = comdat any +// CHECK-DAG: $_ZN4BaseD0Ev = comdat any +// CHECK-DAG: $_ZN7DerivedD1Ev = comdat any +// CHECK-DAG: $_ZN7DerivedD0Ev = comdat any +// CHECK-DAG: $_ZN4BaseD2Ev = comdat any +// CHECK-DAG: $_ZN7DerivedD2Ev = comdat any +// CHECK-DAG: $_ZTV4Base = comdat any +// CHECK-DAG: $_ZTV7Derived = comdat any +class Base { +public: + + virtual ~Base() = default; + + virtual void BaseA(int a) { } +}; + +class Derived : public Base { +public: + + ~Derived() override = default; + + void BaseA(int a) override { x = a; } + + virtual void DerivedB() { } +private: + int x; +}; + +struct VirtualContainer { + Base baseObj; + Derived derivedObj; + Base *basePtr; +}; + +int main() { + VirtualContainer container; + container.basePtr = &container.derivedObj; + int a = 50; +#pragma omp target map(container.baseObj, container.derivedObj, \ + container.basePtr[ : 1]) + { + container.baseObj.BaseA(a); + container.derivedObj.BaseA(a); + container.derivedObj.DerivedB(); + container.basePtr->BaseA(a); + } + return 0; +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
