https://github.com/jsjodin updated https://github.com/llvm/llvm-project/pull/195452
>From d9c8c58b0be2cb8cedd203369ca5248dbee4e0fa Mon Sep 17 00:00:00 2001 From: Jan Leyonberg <[email protected]> Date: Mon, 16 Mar 2026 12:07:44 -0400 Subject: [PATCH] [CIR][OpenMP] Initial implementation of target region support This patch adds support for target regions with some basic support for map clauses. It also changes the clause handling to make use of the OMP dialect ClauseOps to simplify op constrution. Assisted-by: Cursor / claude-4.6-opus-high --- clang/lib/CIR/CodeGen/CIRGenFunction.h | 4 - clang/lib/CIR/CodeGen/CIRGenOpenMPClause.cpp | 204 ++++++++++++------ clang/lib/CIR/CodeGen/CIRGenOpenMPClause.h | 76 +++++++ clang/lib/CIR/CodeGen/CIRGenStmt.cpp | 3 +- clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp | 114 +++++++++- .../CIR/CodeGenOpenMP/not-yet-implemented.c | 4 +- .../CodeGenOpenMP/target-map-llvm-device.c | 111 ++++++++++ .../CIR/CodeGenOpenMP/target-map-llvm-host.c | 122 +++++++++++ clang/test/CIR/CodeGenOpenMP/target-map.c | 105 +++++++++ 9 files changed, 657 insertions(+), 86 deletions(-) create mode 100644 clang/lib/CIR/CodeGen/CIRGenOpenMPClause.h create mode 100644 clang/test/CIR/CodeGenOpenMP/target-map-llvm-device.c create mode 100644 clang/test/CIR/CodeGenOpenMP/target-map-llvm-host.c create mode 100644 clang/test/CIR/CodeGenOpenMP/target-map.c diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index b2e6d93f7c2e5..9b4d2e3e354e7 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -2439,10 +2439,6 @@ class CIRGenFunction : public CIRGenTypeCache { void emitOMPDeclareMapper(const OMPDeclareMapperDecl &d); void emitOMPRequiresDecl(const OMPRequiresDecl &d); -private: - template <typename Op> - void emitOpenMPClauses(Op &op, ArrayRef<const OMPClause *> clauses); - //===--------------------------------------------------------------------===// // OpenACC Emission //===--------------------------------------------------------------------===// diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenMPClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenMPClause.cpp index a0f0ea9299c8d..3ed42375176c0 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenMPClause.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenMPClause.cpp @@ -6,90 +6,152 @@ // //===----------------------------------------------------------------------===// // -// Emit OpenMP clause nodes as CIR code. +// OpenMP clause processor implementation. See CIRGenOpenMPClause.h. // //===----------------------------------------------------------------------===// +#include "CIRGenOpenMPClause.h" #include "CIRGenFunction.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "clang/Basic/OpenMPKinds.h" using namespace clang; using namespace clang::CIRGen; -namespace { -template <typename OpTy> -class OpenMPClauseCIREmitter final - : public ConstOMPClauseVisitor<OpenMPClauseCIREmitter<OpTy>> { - OpTy &operation; - CIRGen::CIRGenFunction &cgf; - CIRGen::CIRGenBuilderTy &builder; - -public: - OpenMPClauseCIREmitter(OpTy &operation, CIRGen::CIRGenFunction &cgf, - CIRGen::CIRGenBuilderTy &builder) - : operation(operation), cgf(cgf), builder(builder) {} - - void VisitOMPClause(const OMPClause *clause) { - cgf.cgm.errorNYI(clause->getBeginLoc(), "OpenMPClause ", - llvm::omp::getOpenMPClauseName(clause->getClauseKind())); +static mlir::omp::ClauseMapFlags +mapClauseKindToFlags(OpenMPMapClauseKind kind) { + switch (kind) { + case OMPC_MAP_to: + return mlir::omp::ClauseMapFlags::to; + case OMPC_MAP_from: + return mlir::omp::ClauseMapFlags::from; + case OMPC_MAP_tofrom: + return mlir::omp::ClauseMapFlags::to | mlir::omp::ClauseMapFlags::from; + case OMPC_MAP_alloc: + case OMPC_MAP_release: + return mlir::omp::ClauseMapFlags::storage; + case OMPC_MAP_delete: + return mlir::omp::ClauseMapFlags::del; + default: + return mlir::omp::ClauseMapFlags::none; } +} - void VisitOMPProcBindClause(const OMPProcBindClause *clause) { - if constexpr (std::is_same_v<OpTy, mlir::omp::ParallelOp>) { - mlir::omp::ClauseProcBindKind kind; - switch (clause->getProcBindKind()) { - case llvm::omp::ProcBindKind::OMP_PROC_BIND_master: - kind = mlir::omp::ClauseProcBindKind::Master; - break; - case llvm::omp::ProcBindKind::OMP_PROC_BIND_close: - kind = mlir::omp::ClauseProcBindKind::Close; - break; - case llvm::omp::ProcBindKind::OMP_PROC_BIND_spread: - kind = mlir::omp::ClauseProcBindKind::Spread; - break; - case llvm::omp::ProcBindKind::OMP_PROC_BIND_primary: - kind = mlir::omp::ClauseProcBindKind::Primary; - break; - case llvm::omp::ProcBindKind::OMP_PROC_BIND_default: - // 'default' in the classic-codegen does no runtime call/doesn't - // really do anything. So this is a no-op, and thus shouldn't change - // the IR. - return; - case llvm::omp::ProcBindKind::OMP_PROC_BIND_unknown: - llvm_unreachable("unknown proc-bind kind"); - } - operation.setProcBindKind(kind); - } else { - cgf.cgm.errorNYI( - clause->getBeginLoc(), - "OMPProcBindClause unimplemented on this directive kind"); - } - } +static mlir::Value emitMapInfoForVar(CIRGenFunction &cgf, + mlir::OpBuilder &builder, + mlir::Location loc, const VarDecl *vd, + mlir::omp::ClauseMapFlags mapFlags) { + Address addr = cgf.getAddrOfLocalVar(vd); + mlir::Value varPtr = addr.getPointer(); + auto varPtrType = mlir::cast<cir::PointerType>(varPtr.getType()); + mlir::Type elementType = varPtrType.getPointee(); - void emitClauses(ArrayRef<const OMPClause *> clauses) { - for (const auto *c : clauses) - this->Visit(c); + // Cast to generic pointer if needed. + if (varPtrType.getAddrSpace()) { + auto genericPtrType = + cir::PointerType::get(builder.getContext(), elementType); + varPtr = cir::CastOp::create(builder, loc, genericPtrType, + cir::CastKind::address_space, varPtr); + varPtrType = genericPtrType; } -}; -template <typename OpTy> -auto makeClauseEmitter(OpTy &op, CIRGen::CIRGenFunction &cgf, - CIRGen::CIRGenBuilderTy &builder) { - return OpenMPClauseCIREmitter<OpTy>(op, cgf, builder); + + return mlir::omp::MapInfoOp::create( + builder, loc, + /*omp_ptr=*/varPtrType, + /*var_ptr=*/varPtr, + /*var_type=*/elementType, + /*map_type=*/mapFlags, + /*map_capture_type=*/mlir::omp::VariableCaptureKind::ByRef, + /*var_ptr_ptr=*/mlir::Value{}, + /*members=*/mlir::ValueRange{}, + /*members_index=*/mlir::ArrayAttr{}, + /*bounds=*/mlir::ValueRange{}, + /*mapper_id=*/mlir::FlatSymbolRefAttr{}, + /*name=*/builder.getStringAttr(vd->getName()), + /*partial_map=*/false); } -} // namespace - -template <typename Op> -void CIRGenFunction::emitOpenMPClauses(Op &op, - ArrayRef<const OMPClause *> clauses) { - mlir::OpBuilder::InsertionGuard guardCase(builder); - builder.setInsertionPoint(op); - makeClauseEmitter(op, *this, builder).emitClauses(clauses); + +bool OpenMPClauseProcessor::processProcBind( + mlir::omp::ProcBindClauseOps &result) const { + for (const OMPClause *clause : clauses) { + const auto *pbc = dyn_cast<OMPProcBindClause>(clause); + if (!pbc) + continue; + + switch (pbc->getProcBindKind()) { + case llvm::omp::ProcBindKind::OMP_PROC_BIND_master: + result.procBindKind = mlir::omp::ClauseProcBindKindAttr::get( + builder.getContext(), mlir::omp::ClauseProcBindKind::Master); + break; + case llvm::omp::ProcBindKind::OMP_PROC_BIND_close: + result.procBindKind = mlir::omp::ClauseProcBindKindAttr::get( + builder.getContext(), mlir::omp::ClauseProcBindKind::Close); + break; + case llvm::omp::ProcBindKind::OMP_PROC_BIND_spread: + result.procBindKind = mlir::omp::ClauseProcBindKindAttr::get( + builder.getContext(), mlir::omp::ClauseProcBindKind::Spread); + break; + case llvm::omp::ProcBindKind::OMP_PROC_BIND_primary: + result.procBindKind = mlir::omp::ClauseProcBindKindAttr::get( + builder.getContext(), mlir::omp::ClauseProcBindKind::Primary); + break; + case llvm::omp::ProcBindKind::OMP_PROC_BIND_default: + break; + case llvm::omp::ProcBindKind::OMP_PROC_BIND_unknown: + llvm_unreachable("unknown proc-bind kind"); + } + return true; + } + return false; } -// We're defining the template for this in a .cpp file, so we have to explicitly -// specialize the templates. -#define EXPL_SPEC(N) \ - template void CIRGenFunction::emitOpenMPClauses<N>( \ - N &, ArrayRef<const OMPClause *>); -EXPL_SPEC(mlir::omp::ParallelOp) -#undef EXPL_SPEC +bool OpenMPClauseProcessor::processMap( + mlir::omp::MapClauseOps &result, + llvm::SmallVectorImpl<const VarDecl *> *mapSyms) const { + bool found = false; + for (const OMPClause *clause : clauses) { + const auto *mc = dyn_cast<OMPMapClause>(clause); + if (!mc) + continue; + + found = true; + + for (OpenMPMapModifierKind mod : mc->getMapTypeModifiers()) { + if (mod != OMPC_MAP_MODIFIER_unknown) + cgm.errorNYI(mc->getBeginLoc(), + std::string("OpenMP map modifier '") + + getOpenMPSimpleClauseTypeName( + llvm::omp::Clause::OMPC_map, mod) + + "'"); + } + + if (mc->isImplicit()) { + cgm.errorNYI(mc->getBeginLoc(), "OpenMP implicit map clause"); + continue; + } + + mlir::omp::ClauseMapFlags mapFlags = mapClauseKindToFlags(mc->getMapType()); + + for (const Expr *varExpr : mc->varlist()) { + const auto *refExpr = dyn_cast<DeclRefExpr>(varExpr->IgnoreImplicit()); + if (!refExpr) { + cgm.errorNYI(varExpr->getExprLoc(), + "OpenMP map clause with non-DeclRefExpr variable"); + continue; + } + + const auto *vd = dyn_cast<VarDecl>(refExpr->getDecl()); + if (!vd) { + cgm.errorNYI(varExpr->getExprLoc(), + "OpenMP map clause with non-VarDecl variable"); + continue; + } + + result.mapVars.push_back( + emitMapInfoForVar(cgf, builder, loc, vd, mapFlags)); + if (mapSyms) + mapSyms->push_back(vd); + } + } + return found; +} diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenMPClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenMPClause.h new file mode 100644 index 0000000000000..79f56c5bf12e9 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenOpenMPClause.h @@ -0,0 +1,76 @@ +//===--- CIRGenOpenMPClause.h - OpenMP clause processor ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENOPENMPCLAUSE_H +#define LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENOPENMPCLAUSE_H + +#include "CIRGenBuilder.h" +#include "CIRGenModule.h" +#include "mlir/Dialect/OpenMP/OpenMPClauseOperands.h" +#include "clang/AST/OpenMPClause.h" +#include "clang/AST/StmtOpenMP.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h" + +namespace clang::CIRGen { + +class CIRGenFunction; + +/// Processes OpenMP clauses for a directive, writing results into the +/// auto-generated ClauseOps from the OMP dialect. +class OpenMPClauseProcessor { + CIRGenFunction &cgf; + CIRGenModule &cgm; + CIRGenBuilderTy &builder; + mlir::Location loc; + llvm::ArrayRef<const OMPClause *> clauses; + +public: + OpenMPClauseProcessor(CIRGenFunction &cgf, CIRGenModule &cgm, + CIRGenBuilderTy &builder, mlir::Location loc, + llvm::ArrayRef<const OMPClause *> clauses) + : cgf(cgf), cgm(cgm), builder(builder), loc(loc), clauses(clauses) {} + + bool processProcBind(mlir::omp::ProcBindClauseOps &result) const; + + /// Process map clauses. The optional \p mapSyms parameter collects the + /// VarDecls corresponding to each map operand. + bool + processMap(mlir::omp::MapClauseOps &result, + llvm::SmallVectorImpl<const VarDecl *> *mapSyms = nullptr) const; + + /// Emit an errorNYI for each clause of the given types if present. + template <typename... ClauseTypes> + void processTODO(llvm::omp::Directive directive) const; + +private: + template <typename ClauseType> + void processTODOClause(llvm::omp::Directive directive) const; +}; + +template <typename ClauseType> +void OpenMPClauseProcessor::processTODOClause( + llvm::omp::Directive directive) const { + for (const OMPClause *c : clauses) { + if (isa<ClauseType>(c)) { + std::string msg = + ("OpenMP " + llvm::omp::getOpenMPDirectiveName(directive) + " " + + llvm::omp::getOpenMPClauseName(c->getClauseKind()) + " clause") + .str(); + cgm.errorNYI(c->getBeginLoc(), msg); + } + } +} + +template <typename... ClauseTypes> +void OpenMPClauseProcessor::processTODO(llvm::omp::Directive directive) const { + (processTODOClause<ClauseTypes>(directive), ...); +} + +} // namespace clang::CIRGen + +#endif // LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENOPENMPCLAUSE_H diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index a64a2a080bade..c3892a0279390 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -420,7 +420,6 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s, case Stmt::CaseStmtClass: case Stmt::SEHLeaveStmtClass: case Stmt::SYCLKernelCallStmtClass: - case Stmt::CapturedStmtClass: case Stmt::ObjCAtTryStmtClass: case Stmt::ObjCAtThrowStmtClass: case Stmt::ObjCAtSynchronizedStmtClass: @@ -433,6 +432,8 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s, cgm.errorNYI(s->getSourceRange(), std::string("emitStmt: ") + s->getStmtClassName()); return mlir::failure(); + case Stmt::CapturedStmtClass: + llvm_unreachable("CapturedStmt should be handled by the parent directive"); } llvm_unreachable("Unexpected statement class"); diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp index eb4934644b519..3c3938a3159e8 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp @@ -12,7 +12,9 @@ #include "CIRGenBuilder.h" #include "CIRGenFunction.h" +#include "CIRGenOpenMPClause.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "clang/AST/OpenMPClause.h" #include "clang/AST/StmtOpenMP.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" using namespace clang; @@ -31,14 +33,19 @@ CIRGenFunction::emitOMPErrorDirective(const OMPErrorDirective &s) { mlir::LogicalResult CIRGenFunction::emitOMPParallelDirective(const OMPParallelDirective &s) { mlir::LogicalResult res = mlir::success(); - llvm::SmallVector<mlir::Type> retTy; - llvm::SmallVector<mlir::Value> operands; mlir::Location begin = getLoc(s.getBeginLoc()); mlir::Location end = getLoc(s.getEndLoc()); - auto parallelOp = - mlir::omp::ParallelOp::create(builder, begin, retTy, operands); - emitOpenMPClauses(parallelOp, s.clauses()); + mlir::omp::ParallelOperands clauseOps; + OpenMPClauseProcessor cp(*this, getCIRGenModule(), builder, begin, + s.clauses()); + cp.processProcBind(clauseOps); + cp.processTODO<OMPAllocateClause, OMPCopyinClause, OMPDefaultClause, + OMPFirstprivateClause, OMPIfClause, OMPNumThreadsClause, + OMPPrivateClause, OMPReductionClause, OMPSharedClause>( + llvm::omp::Directive::OMPD_parallel); + + auto parallelOp = mlir::omp::ParallelOp::create(builder, begin, clauseOps); { mlir::Block &block = parallelOp.getRegion().emplaceBlock(); @@ -207,10 +214,103 @@ CIRGenFunction::emitOMPAtomicDirective(const OMPAtomicDirective &s) { getCIRGenModule().errorNYI(s.getSourceRange(), "OpenMP OMPAtomicDirective"); return mlir::failure(); } + +/// Check for unsupported implicit captures in a target region. +static void +emitOMPTargetImplicitCaptures(CIRGenFunction &cgf, const OMPTargetDirective &s, + llvm::ArrayRef<const VarDecl *> mapSyms) { + const CapturedStmt *cs = s.getCapturedStmt(llvm::omp::OMPD_target); + for (const auto &capture : cs->captures()) { + if (capture.capturesThis()) { + cgf.getCIRGenModule().errorNYI(s.getBeginLoc(), + "OpenMP target capture of 'this' pointer"); + continue; + } + if (capture.capturesVariableByCopy()) { + cgf.getCIRGenModule().errorNYI(s.getBeginLoc(), + "OpenMP target capture by copy"); + continue; + } + if (capture.capturesVariableArrayType()) { + cgf.getCIRGenModule().errorNYI( + s.getBeginLoc(), + "OpenMP target capture of variable-length array type"); + continue; + } + if (capture.capturesVariable()) { + const VarDecl *vd = capture.getCapturedVar(); + if (llvm::is_contained(mapSyms, vd)) + continue; + + cgf.getCIRGenModule().errorNYI(s.getBeginLoc(), + "OpenMP target implicit by-ref capture"); + } + } +} + +/// Emit the body of an omp.target region, remapping mapped variables to the +/// block arguments of the target op's region. +static mlir::LogicalResult +emitOMPTargetBody(CIRGenFunction &cgf, const OMPTargetDirective &s, + mlir::omp::TargetOp targetOp, + llvm::ArrayRef<mlir::Value> mapVars, + llvm::ArrayRef<const VarDecl *> mappedVarDecls, + mlir::Location begin, mlir::Location end) { + mlir::Block &block = targetOp.getRegion().emplaceBlock(); + + for (mlir::Value mapVar : mapVars) + block.addArgument(mapVar.getType(), begin); + + mlir::OpBuilder::InsertionGuard guard(cgf.getBuilder()); + cgf.getBuilder().setInsertionPointToEnd(&block); + + CIRGenFunction::LexicalScope ls{cgf, begin, + cgf.getBuilder().getInsertionBlock()}; + + llvm::SmallVector<std::pair<const VarDecl *, Address>> savedAddrs; + for (auto [idx, vd] : llvm::enumerate(mappedVarDecls)) { + Address origAddr = cgf.getAddrOfLocalVar(vd); + savedAddrs.push_back({vd, origAddr}); + mlir::Value blockArg = block.getArgument(idx); + cgf.replaceAddrOfLocalVar(vd, Address(blockArg, origAddr.getAlignment())); + } + + const CapturedStmt *cs = s.getCapturedStmt(llvm::omp::OMPD_target); + mlir::LogicalResult res = + cgf.emitStmt(cs->getCapturedStmt(), /*useCurrentScope=*/true); + + mlir::omp::TerminatorOp::create(cgf.getBuilder(), end); + + for (auto &[vd, addr] : savedAddrs) + cgf.replaceAddrOfLocalVar(vd, addr); + + return res; +} + mlir::LogicalResult CIRGenFunction::emitOMPTargetDirective(const OMPTargetDirective &s) { - getCIRGenModule().errorNYI(s.getSourceRange(), "OpenMP OMPTargetDirective"); - return mlir::failure(); + mlir::Location begin = getLoc(s.getBeginLoc()); + mlir::Location end = getLoc(s.getEndLoc()); + + mlir::omp::TargetOperands clauseOps; + llvm::SmallVector<const VarDecl *> mapSyms; + + OpenMPClauseProcessor cp(*this, getCIRGenModule(), builder, begin, + s.clauses()); + cp.processMap(clauseOps, &mapSyms); + cp.processTODO<OMPAllocateClause, OMPDefaultClause, OMPDefaultmapClause, + OMPDependClause, OMPDeviceClause, OMPFirstprivateClause, + OMPHasDeviceAddrClause, OMPIfClause, OMPInReductionClause, + OMPIsDevicePtrClause, OMPNowaitClause, OMPPrivateClause, + OMPThreadLimitClause, OMPUsesAllocatorsClause, OMPXBareClause>( + llvm::omp::Directive::OMPD_target); + + emitOMPTargetImplicitCaptures(*this, s, mapSyms); + + auto targetOp = mlir::omp::TargetOp::create(builder, begin, clauseOps); + + return emitOMPTargetBody(*this, s, targetOp, clauseOps.mapVars, mapSyms, + begin, end); } mlir::LogicalResult CIRGenFunction::emitOMPTeamsDirective(const OMPTeamsDirective &s) { diff --git a/clang/test/CIR/CodeGenOpenMP/not-yet-implemented.c b/clang/test/CIR/CodeGenOpenMP/not-yet-implemented.c index 6d59f45d6e5e4..3502f0f291b22 100644 --- a/clang/test/CIR/CodeGenOpenMP/not-yet-implemented.c +++ b/clang/test/CIR/CodeGenOpenMP/not-yet-implemented.c @@ -10,9 +10,7 @@ void do_things() { {} int i; - // TODO(OMP): We might consider overloading operator<< for OMPClauseKind in - // the future if we want to improve this. - // expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenMPClause : if}} + // expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenMP parallel if clause}} #pragma omp parallel if(i) {} } diff --git a/clang/test/CIR/CodeGenOpenMP/target-map-llvm-device.c b/clang/test/CIR/CodeGenOpenMP/target-map-llvm-device.c new file mode 100644 index 0000000000000..25f80f20122a3 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenMP/target-map-llvm-device.c @@ -0,0 +1,111 @@ +// Two-step host-BC -> device pipeline that mirrors the offloading driver. +// +// Step 1: Host compilation to bitcode (provides offload entry info to device pass). +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ +// RUN: -fclangir -emit-llvm-bc %s -o %t-cir-host.bc +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ +// RUN: -emit-llvm-bc %s -o %t-ogcg-host.bc +// +// Step 2: Device compilation using host BC. +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device \ +// RUN: -fopenmp-host-ir-file-path %t-cir-host.bc \ +// RUN: -fclangir -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefix=LLVM +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device \ +// RUN: -fopenmp-host-ir-file-path %t-ogcg-host.bc \ +// RUN: -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefix=OGCG + +void use(int); + +void target_map_to(int x) { +#pragma omp target map(to : x) + { + use(x); + } +} + +void target_map_from(int x) { +#pragma omp target map(from : x) + { + x = 42; + } +} + +void target_map_tofrom(int x) { +#pragma omp target map(tofrom : x) + { + x = x + 1; + } +} + +void target_map_multiple(int a, int b) { +#pragma omp target map(to : a) map(from : b) + { + b = a; + } +} + +// LLVM-LABEL: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_target_map_to_l +// LLVM: call i32 @__kmpc_target_init( +// LLVM: user_code.entry: +// LLVM: %[[V:.*]] = load i32, ptr %{{.*}}, align 4 +// LLVM: call void @use(i32 {{.*}} %[[V]]) +// LLVM: call void @__kmpc_target_deinit() +// LLVM: ret void + +// LLVM-LABEL: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_target_map_from_l +// LLVM: call i32 @__kmpc_target_init( +// LLVM: user_code.entry: +// LLVM: store i32 42, ptr %{{.*}}, align 4 +// LLVM: call void @__kmpc_target_deinit() +// LLVM: ret void + +// LLVM-LABEL: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_target_map_tofrom_l +// LLVM: call i32 @__kmpc_target_init( +// LLVM: user_code.entry: +// LLVM: %[[LD:.*]] = load i32, ptr %{{.*}}, align 4 +// LLVM: %[[ADD:.*]] = add nsw i32 %[[LD]], 1 +// LLVM: store i32 %[[ADD]], ptr %{{.*}}, align 4 +// LLVM: call void @__kmpc_target_deinit() +// LLVM: ret void + +// LLVM-LABEL: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_target_map_multiple_l +// LLVM: call i32 @__kmpc_target_init( +// LLVM: user_code.entry: +// LLVM: %[[A:.*]] = load i32, ptr %{{.*}}, align 4 +// LLVM: store i32 %[[A]], ptr %{{.*}}, align 4 +// LLVM: call void @__kmpc_target_deinit() +// LLVM: ret void + +// OGCG-LABEL: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_target_map_to_l +// OGCG: call i32 @__kmpc_target_init( +// OGCG: user_code.entry: +// OGCG: %[[V:.*]] = load i32, ptr %{{.*}}, align 4 +// OGCG: call void @use(i32 {{.*}} %[[V]]) +// OGCG: call void @__kmpc_target_deinit() +// OGCG: ret void + +// OGCG-LABEL: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_target_map_from_l +// OGCG: call i32 @__kmpc_target_init( +// OGCG: user_code.entry: +// OGCG: store i32 42, ptr %{{.*}}, align 4 +// OGCG: call void @__kmpc_target_deinit() +// OGCG: ret void + +// OGCG-LABEL: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_target_map_tofrom_l +// OGCG: call i32 @__kmpc_target_init( +// OGCG: user_code.entry: +// OGCG: %[[LD:.*]] = load i32, ptr %{{.*}}, align 4 +// OGCG: %[[ADD:.*]] = add nsw i32 %[[LD]], 1 +// OGCG: store i32 %[[ADD]], ptr %{{.*}}, align 4 +// OGCG: call void @__kmpc_target_deinit() +// OGCG: ret void + +// OGCG-LABEL: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_target_map_multiple_l +// OGCG: call i32 @__kmpc_target_init( +// OGCG: user_code.entry: +// OGCG: %[[A:.*]] = load i32, ptr %{{.*}}, align 4 +// OGCG: store i32 %[[A]], ptr %{{.*}}, align 4 +// OGCG: call void @__kmpc_target_deinit() +// OGCG: ret void diff --git a/clang/test/CIR/CodeGenOpenMP/target-map-llvm-host.c b/clang/test/CIR/CodeGenOpenMP/target-map-llvm-host.c new file mode 100644 index 0000000000000..2b1c314cc8c0b --- /dev/null +++ b/clang/test/CIR/CodeGenOpenMP/target-map-llvm-host.c @@ -0,0 +1,122 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp \ +// RUN: -fopenmp-targets=amdgcn-amd-amdhsa -fclangir -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp \ +// RUN: -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefix=OGCG + +void use(int); + +void target_map_to(int x) { +#pragma omp target map(to : x) + { + use(x); + } +} + +void target_map_from(int x) { +#pragma omp target map(from : x) + { + x = 42; + } +} + +void target_map_tofrom(int x) { +#pragma omp target map(tofrom : x) + { + x = x + 1; + } +} + +void target_map_multiple(int a, int b) { +#pragma omp target map(to : a) map(from : b) + { + b = a; + } +} + +// Host wrappers + +// LLVM-LABEL: define {{.*}} void @target_map_to(i32 +// LLVM: call i32 @__tgt_target_kernel( +// LLVM: omp_offload.failed: +// LLVM: call void @__omp_offloading_{{.*}}_target_map_to_l + +// LLVM-LABEL: define {{.*}} void @target_map_from(i32 +// LLVM: call i32 @__tgt_target_kernel( +// LLVM: omp_offload.failed: +// LLVM: call void @__omp_offloading_{{.*}}_target_map_from_l + +// LLVM-LABEL: define {{.*}} void @target_map_tofrom(i32 +// LLVM: call i32 @__tgt_target_kernel( +// LLVM: omp_offload.failed: +// LLVM: call void @__omp_offloading_{{.*}}_target_map_tofrom_l + +// LLVM-LABEL: define {{.*}} void @target_map_multiple(i32 +// LLVM: call i32 @__tgt_target_kernel( +// LLVM: omp_offload.failed: +// LLVM: call void @__omp_offloading_{{.*}}_target_map_multiple_l + +// Outlined target functions + +// LLVM-LABEL: define internal void @__omp_offloading_{{.*}}_target_map_to_l +// LLVM: %[[V:.*]] = load i32, ptr %{{.*}}, align 4 +// LLVM: call void @use(i32 {{.*}} %[[V]]) +// LLVM: ret void + +// LLVM-LABEL: define internal void @__omp_offloading_{{.*}}_target_map_from_l +// LLVM: store i32 42, ptr %{{.*}}, align 4 +// LLVM: ret void + +// LLVM-LABEL: define internal void @__omp_offloading_{{.*}}_target_map_tofrom_l +// LLVM: %[[LD:.*]] = load i32, ptr %{{.*}}, align 4 +// LLVM: %[[ADD:.*]] = add nsw i32 %[[LD]], 1 +// LLVM: store i32 %[[ADD]], ptr %{{.*}}, align 4 +// LLVM: ret void + +// LLVM-LABEL: define internal void @__omp_offloading_{{.*}}_target_map_multiple_l +// LLVM: %[[A:.*]] = load i32, ptr %{{.*}}, align 4 +// LLVM: store i32 %[[A]], ptr %{{.*}}, align 4 +// LLVM: ret void + +// OGCG interleaves host wrapper and outlined function per target region. + +// OGCG-LABEL: define {{.*}} void @target_map_to(i32 +// OGCG: call i32 @__tgt_target_kernel( +// OGCG: omp_offload.failed: +// OGCG: call void @__omp_offloading_{{.*}}_target_map_to_l + +// OGCG-LABEL: define internal void @__omp_offloading_{{.*}}_target_map_to_l +// OGCG: %[[V:.*]] = load i32, ptr %{{.*}}, align 4 +// OGCG: call void @use(i32 {{.*}} %[[V]]) +// OGCG: ret void + +// OGCG-LABEL: define {{.*}} void @target_map_from(i32 +// OGCG: call i32 @__tgt_target_kernel( +// OGCG: omp_offload.failed: +// OGCG: call void @__omp_offloading_{{.*}}_target_map_from_l + +// OGCG-LABEL: define internal void @__omp_offloading_{{.*}}_target_map_from_l +// OGCG: store i32 42, ptr %{{.*}}, align 4 +// OGCG: ret void + +// OGCG-LABEL: define {{.*}} void @target_map_tofrom(i32 +// OGCG: call i32 @__tgt_target_kernel( +// OGCG: omp_offload.failed: +// OGCG: call void @__omp_offloading_{{.*}}_target_map_tofrom_l + +// OGCG-LABEL: define internal void @__omp_offloading_{{.*}}_target_map_tofrom_l +// OGCG: %[[LD:.*]] = load i32, ptr %{{.*}}, align 4 +// OGCG: %[[ADD:.*]] = add nsw i32 %[[LD]], 1 +// OGCG: store i32 %[[ADD]], ptr %{{.*}}, align 4 +// OGCG: ret void + +// OGCG-LABEL: define {{.*}} void @target_map_multiple(i32 +// OGCG: call i32 @__tgt_target_kernel( +// OGCG: omp_offload.failed: +// OGCG: call void @__omp_offloading_{{.*}}_target_map_multiple_l + +// OGCG-LABEL: define internal void @__omp_offloading_{{.*}}_target_map_multiple_l +// OGCG: %[[A:.*]] = load i32, ptr %{{.*}}, align 4 +// OGCG: store i32 %[[A]], ptr %{{.*}}, align 4 +// OGCG: ret void diff --git a/clang/test/CIR/CodeGenOpenMP/target-map.c b/clang/test/CIR/CodeGenOpenMP/target-map.c new file mode 100644 index 0000000000000..22d0ee811b91a --- /dev/null +++ b/clang/test/CIR/CodeGenOpenMP/target-map.c @@ -0,0 +1,105 @@ +// Host compilation (x86 host, AMDGPU offload target): no address space on allocas. +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -emit-cir -fclangir %s -o - \ +// RUN: | FileCheck %s --check-prefix=CIR-HOST + +// Device compilation (AMDGPU): allocas in private address space, addrspacecast for map info. +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-target-device \ +// RUN: -emit-cir -fclangir %s -o - \ +// RUN: | FileCheck %s --check-prefix=CIR-DEVICE + +void use(int); + +void target_map_to(int x) { + // CIR-HOST: cir.func{{.*}}@target_map_to + // CIR-HOST: %[[X_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] + // CIR-HOST: %[[MAP:.*]] = omp.map.info var_ptr(%[[X_ALLOCA]] : !cir.ptr<!s32i>, !s32i) map_clauses(to) capture(ByRef) -> !cir.ptr<!s32i> {name = "x"} + // CIR-HOST-NEXT: omp.target map_entries(%[[MAP]] -> %[[ARG:.*]] : !cir.ptr<!s32i>) { + // CIR-HOST-NEXT: %[[LOAD:.*]] = cir.load align(4) %[[ARG]] + // CIR-HOST-NEXT: cir.call @use(%[[LOAD]]) + // CIR-HOST-NEXT: omp.terminator + // CIR-HOST-NEXT: } + + // CIR-DEVICE: cir.func{{.*}}@target_map_to + // CIR-DEVICE: %[[X_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i, target_address_space(5)>, ["x", init] + // CIR-DEVICE: %[[CAST:.*]] = cir.cast address_space %[[X_ALLOCA]] : !cir.ptr<!s32i, target_address_space(5)> -> !cir.ptr<!s32i> + // CIR-DEVICE: %[[MAP:.*]] = omp.map.info var_ptr(%[[CAST]] : !cir.ptr<!s32i>, !s32i) map_clauses(to) capture(ByRef) -> !cir.ptr<!s32i> {name = "x"} + // CIR-DEVICE-NEXT: omp.target map_entries(%[[MAP]] -> %[[ARG:.*]] : !cir.ptr<!s32i>) { + // CIR-DEVICE: omp.terminator + // CIR-DEVICE-NEXT: } +#pragma omp target map(to : x) + { + use(x); + } +} + +void target_map_from(int x) { + // CIR-HOST: cir.func{{.*}}@target_map_from + // CIR-HOST: %[[X_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] + // CIR-HOST: %[[MAP:.*]] = omp.map.info var_ptr(%[[X_ALLOCA]] : !cir.ptr<!s32i>, !s32i) map_clauses(from) capture(ByRef) -> !cir.ptr<!s32i> {name = "x"} + // CIR-HOST-NEXT: omp.target map_entries(%[[MAP]] -> %[[ARG:.*]] : !cir.ptr<!s32i>) { + // CIR-HOST-NEXT: %[[C42:.*]] = cir.const #cir.int<42> : !s32i + // CIR-HOST-NEXT: cir.store align(4) %[[C42]], %[[ARG]] + // CIR-HOST-NEXT: omp.terminator + // CIR-HOST-NEXT: } + + // CIR-DEVICE: cir.func{{.*}}@target_map_from + // CIR-DEVICE: %[[X_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i, target_address_space(5)>, ["x", init] + // CIR-DEVICE: %[[CAST:.*]] = cir.cast address_space %[[X_ALLOCA]] : !cir.ptr<!s32i, target_address_space(5)> -> !cir.ptr<!s32i> + // CIR-DEVICE: %[[MAP:.*]] = omp.map.info var_ptr(%[[CAST]] : !cir.ptr<!s32i>, !s32i) map_clauses(from) capture(ByRef) -> !cir.ptr<!s32i> {name = "x"} + // CIR-DEVICE-NEXT: omp.target map_entries(%[[MAP]] -> %[[ARG:.*]] : !cir.ptr<!s32i>) { + // CIR-DEVICE: omp.terminator + // CIR-DEVICE-NEXT: } +#pragma omp target map(from : x) + { + x = 42; + } +} + +void target_map_tofrom(int x) { + // CIR-HOST: cir.func{{.*}}@target_map_tofrom + // CIR-HOST: %[[X_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] + // CIR-HOST: %[[MAP:.*]] = omp.map.info var_ptr(%[[X_ALLOCA]] : !cir.ptr<!s32i>, !s32i) map_clauses(tofrom) capture(ByRef) -> !cir.ptr<!s32i> {name = "x"} + // CIR-HOST-NEXT: omp.target map_entries(%[[MAP]] -> %[[ARG:.*]] : !cir.ptr<!s32i>) { + // CIR-HOST: omp.terminator + // CIR-HOST-NEXT: } + + // CIR-DEVICE: cir.func{{.*}}@target_map_tofrom + // CIR-DEVICE: %[[X_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i, target_address_space(5)>, ["x", init] + // CIR-DEVICE: %[[CAST:.*]] = cir.cast address_space %[[X_ALLOCA]] : !cir.ptr<!s32i, target_address_space(5)> -> !cir.ptr<!s32i> + // CIR-DEVICE: %[[MAP:.*]] = omp.map.info var_ptr(%[[CAST]] : !cir.ptr<!s32i>, !s32i) map_clauses(tofrom) capture(ByRef) -> !cir.ptr<!s32i> {name = "x"} + // CIR-DEVICE-NEXT: omp.target map_entries(%[[MAP]] -> %[[ARG:.*]] : !cir.ptr<!s32i>) { + // CIR-DEVICE: omp.terminator + // CIR-DEVICE-NEXT: } +#pragma omp target map(tofrom : x) + { + x = x + 1; + } +} + +void target_map_multiple(int a, int b) { + // CIR-HOST: cir.func{{.*}}@target_map_multiple + // CIR-HOST-DAG: %[[A_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] + // CIR-HOST-DAG: %[[B_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] + // CIR-HOST: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[A_ALLOCA]] : !cir.ptr<!s32i>, !s32i) map_clauses(to) capture(ByRef) -> !cir.ptr<!s32i> {name = "a"} + // CIR-HOST-NEXT: %[[MAP_B:.*]] = omp.map.info var_ptr(%[[B_ALLOCA]] : !cir.ptr<!s32i>, !s32i) map_clauses(from) capture(ByRef) -> !cir.ptr<!s32i> {name = "b"} + // CIR-HOST-NEXT: omp.target map_entries(%[[MAP_A]] -> %[[ARG_A:.*]], %[[MAP_B]] -> %[[ARG_B:.*]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>) { + // CIR-HOST: omp.terminator + // CIR-HOST-NEXT: } + + // CIR-DEVICE: cir.func{{.*}}@target_map_multiple + // CIR-DEVICE-DAG: %[[A_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i, target_address_space(5)>, ["a", init] + // CIR-DEVICE-DAG: %[[B_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i, target_address_space(5)>, ["b", init] + // CIR-DEVICE: %[[CAST_A:.*]] = cir.cast address_space %[[A_ALLOCA]] : !cir.ptr<!s32i, target_address_space(5)> -> !cir.ptr<!s32i> + // CIR-DEVICE: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[CAST_A]] : !cir.ptr<!s32i>, !s32i) map_clauses(to) capture(ByRef) -> !cir.ptr<!s32i> {name = "a"} + // CIR-DEVICE: %[[CAST_B:.*]] = cir.cast address_space %[[B_ALLOCA]] : !cir.ptr<!s32i, target_address_space(5)> -> !cir.ptr<!s32i> + // CIR-DEVICE: %[[MAP_B:.*]] = omp.map.info var_ptr(%[[CAST_B]] : !cir.ptr<!s32i>, !s32i) map_clauses(from) capture(ByRef) -> !cir.ptr<!s32i> {name = "b"} + // CIR-DEVICE: omp.target map_entries(%[[MAP_A]] -> %[[ARG_A:.*]], %[[MAP_B]] -> %[[ARG_B:.*]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>) { + // CIR-DEVICE: omp.terminator + // CIR-DEVICE-NEXT: } +#pragma omp target map(to : a) map(from : b) + { + b = a; + } +} + +// TODO: Test implicit mapping. Currently NYI _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
