jhuber6 created this revision. jhuber6 added reviewers: jdoerfert, tianshilei1992, JonChesterfield, ABataev. Herald added subscribers: dexonsmith, dang, guansong, yaxunl. jhuber6 requested review of this revision. Herald added subscribers: cfe-commits, sstefan1. Herald added a project: clang.
Currently when we generate OpenMP offloading code we always make fallback code for the CPU. This is necessary for implementing features like conditional offloading and ensuring that unhandled pragmas don't result in missing symbols. However, this is problematic for a few cases. For offloading tests we can silently fail to the host without realizing that offloading failed. Additionally, this makes it impossible to provide interoperabiility to other offloading schemes like HIP or CUDA because those methods do not provide any such host fallback guaruntee. this patch adds the `-fopenmp-offload-mandatory` flag to prevent generating the fallback symbol on the CPU and instead replaces the function with a dummy global and the failed branch with 'unreachable'. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D120353 Files: clang/include/clang/Basic/LangOptions.def clang/include/clang/Driver/Options.td clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Sema/SemaOpenMP.cpp clang/test/OpenMP/target_offload_mandatory_codegen.cpp
Index: clang/test/OpenMP/target_offload_mandatory_codegen.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/target_offload_mandatory_codegen.cpp @@ -0,0 +1,32 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-offload-mandatory -emit-llvm %s -o - | FileCheck %s --check-prefix=MANDATORY +// expected-no-diagnostics + +int x; +#pragma omp declare target(x) + +void foo(int) {} +#pragma omp declare target device_type(nohost) to(foo) + +void host() { +#pragma omp target + { foo(x); } +} +// MANDATORY-LABEL: define {{[^@]+}}@_Z4hostv +// MANDATORY-SAME: () #[[ATTR1:[0-9]+]] { +// MANDATORY-NEXT: entry: +// MANDATORY-NEXT: [[TMP0:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4hostv_l12.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null) +// MANDATORY-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0 +// MANDATORY-NEXT: br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// MANDATORY: omp_offload.failed: +// MANDATORY-NEXT: unreachable +// MANDATORY: omp_offload.cont: +// MANDATORY-NEXT: ret void +// +// +// MANDATORY-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// MANDATORY-SAME: () #[[ATTR3:[0-9]+]] { +// MANDATORY-NEXT: entry: +// MANDATORY-NEXT: call void @__tgt_register_requires(i64 1) +// MANDATORY-NEXT: ret void +// Index: clang/lib/Sema/SemaOpenMP.cpp =================================================================== --- clang/lib/Sema/SemaOpenMP.cpp +++ clang/lib/Sema/SemaOpenMP.cpp @@ -2517,7 +2517,7 @@ << HostDevTy; return; } - if (!LangOpts.OpenMPIsDevice && DevTy && + if (!LangOpts.OpenMPIsDevice && !LangOpts.OpenMPOffloadMandatory && DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) { // Diagnose nohost function called during host codegen. StringRef NoHostDevTy = getOpenMPSimpleClauseTypeName( Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -5997,6 +5997,8 @@ CmdArgs.push_back("-fopenmp-assume-threads-oversubscription"); if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state)) CmdArgs.push_back("-fopenmp-assume-no-thread-state"); + if (Args.hasArg(options::OPT_fopenmp_offload_mandatory)) + CmdArgs.push_back("-fopenmp-offload-mandatory"); break; default: // By default, if Clang doesn't know how to generate useful OpenMP code Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6538,6 +6538,8 @@ // mangled name of the function that encloses the target region and BB is the // line number of the target region. + const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice || + !CGM.getLangOpts().OpenMPOffloadMandatory; unsigned DeviceID; unsigned FileID; unsigned Line; @@ -6556,7 +6558,8 @@ CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); + if (BuildOutlinedFn) + OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); // If this target outline function is not an offload entry, we don't need to // register it. @@ -6588,9 +6591,20 @@ llvm::Constant::getNullValue(CGM.Int8Ty), Name); } + // If we do not allow host fallback we still need a named address to use. + llvm::Constant *TargetRegionEntryAddr = OutlinedFn; + if (!BuildOutlinedFn) { + assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) && + "Named kernel already exists?"); + TargetRegionEntryAddr = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, + llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName); + } + // Register the information for the entry associated with this target region. OffloadEntriesInfoManager.registerTargetRegionEntryInfo( - DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, + DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); // Add NumTeams and ThreadLimit attributes to the outlined GPU function @@ -6607,7 +6621,8 @@ std::to_string(DefaultValThreads)); } - CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); + if (BuildOutlinedFn) + CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); } /// Checks if the expression is constant or does not have non-trivial function @@ -10324,7 +10339,10 @@ if (!CGF.HaveInsertPoint()) return; - assert(OutlinedFn && "Invalid outlined function!"); + const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice && + CGM.getLangOpts().OpenMPOffloadMandatory; + + assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || D.hasClausesOfKind<OMPNowaitClause>(); @@ -10339,18 +10357,28 @@ CodeGenFunction::OMPTargetDataInfo InputInfo; llvm::Value *MapTypesArray = nullptr; llvm::Value *MapNamesArray = nullptr; - // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, - &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, - &CapturedVars, - SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { - if (Device.getInt() == OMPC_DEVICE_ancestor) { - // Reverse offloading is not supported, so just execute on the host. + // Generate code for the host fallback function. + auto &&FallbackGen = [this, OutlinedFn, OutlinedFnID, &D, &CapturedVars, + RequiresOuterTask, &CS, + OffloadingMandatory](CodeGenFunction &CGF) { + if (OffloadingMandatory) { + CGF.Builder.CreateUnreachable(); + } else { if (RequiresOuterTask) { CapturedVars.clear(); CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); } emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); + } + }; + // Fill up the pointer arrays and transfer execution to the device. + auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, + &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, + &CapturedVars, SizeEmitter, + FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { + if (Device.getInt() == OMPC_DEVICE_ancestor) { + // Reverse offloading is not supported, so just execute on the host. + FallbackGen(CGF); return; } @@ -10494,25 +10522,17 @@ CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); CGF.EmitBlock(OffloadFailedBlock); - if (RequiresOuterTask) { - CapturedVars.clear(); - CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); - } - emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); + FallbackGen(CGF); + CGF.EmitBranch(OffloadContBlock); CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); }; // Notify that the host version must be executed. - auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, - RequiresOuterTask](CodeGenFunction &CGF, - PrePostActionTy &) { - if (RequiresOuterTask) { - CapturedVars.clear(); - CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); - } - emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); + auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, RequiresOuterTask, + FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { + FallbackGen(CGF); }; auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2477,6 +2477,10 @@ Flags<[CC1Option, NoArgumentUnused, HelpHidden]>, HelpText<"Assert no thread in a parallel region modifies an ICV">, MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>; +def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group<f_Group>, + Flags<[CC1Option, NoArgumentUnused]>, + HelpText<"Do not create a host fallback if offloading to the device fails.">, + MarshallingInfoFlag<LangOpts<"OpenMPOffloadMandatory">>; defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime", LangOpts<"OpenMPTargetNewRuntime">, DefaultTrue, PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP offloading">, Index: clang/include/clang/Basic/LangOptions.def =================================================================== --- clang/include/clang/Basic/LangOptions.def +++ clang/include/clang/Basic/LangOptions.def @@ -247,6 +247,7 @@ LANGOPT(OpenMPThreadSubscription , 1, 0, "Assume work-shared loops do not have more iterations than participating threads.") LANGOPT(OpenMPTeamSubscription , 1, 0, "Assume distributed loops do not have more iterations than participating teams.") LANGOPT(OpenMPNoThreadState , 1, 0, "Assume that no thread in a parallel region will modify an ICV.") +LANGOPT(OpenMPOffloadMandatory , 1, 0, "Assert that offloading is mandatory and do not create a host fallback.") LANGOPT(RenderScript , 1, 0, "RenderScript") LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits