llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-mlir-openmp Author: Robert Imschweiler (ro-i) <details> <summary>Changes</summary> Together with the fix for https://github.com/llvm/llvm-project/pull/195102#issuecomment-4756584289 --- Patch is 1.41 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/205071.diff 173 Files Affected: - (modified) clang/include/clang/Basic/LangOptions.def (-1) - (modified) clang/include/clang/Options/Options.td (+5-1) - (modified) clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp (+5-7) - (modified) clang/lib/Driver/ToolChains/Clang.cpp (+11-2) - (modified) clang/lib/Frontend/CompilerInvocation.cpp (-7) - (modified) clang/test/Driver/openmp-offload-gpu.c (+2-2) - (modified) clang/test/OpenMP/bug60602.cpp (+2-2) - (modified) clang/test/OpenMP/declare_target_local_codegen.cpp (+3-3) - (modified) clang/test/OpenMP/distribute_codegen.cpp (+10-10) - (modified) clang/test/OpenMP/distribute_firstprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/distribute_lastprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/distribute_parallel_for_codegen.cpp (+28-28) - (modified) clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp (+8-8) - (modified) clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp (+24-24) - (modified) clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp (+3-3) - (modified) clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp (+28-28) - (modified) clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp (+32-32) - (modified) clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp (+24-24) - (modified) clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp (+3-3) - (modified) clang/test/OpenMP/distribute_private_codegen.cpp (+6-6) - (modified) clang/test/OpenMP/distribute_simd_codegen.cpp (+20-20) - (modified) clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/distribute_simd_private_codegen.cpp (+6-6) - (modified) clang/test/OpenMP/distribute_simd_reduction_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/map_struct_ordering.cpp (+1-1) - (modified) clang/test/OpenMP/nvptx_lambda_capturing.cpp (+5-5) - (modified) clang/test/OpenMP/reduction_implicit_map.cpp (+6-6) - (modified) clang/test/OpenMP/spirv_target_teams_reduction_addrspace.c (+1-1) - (modified) clang/test/OpenMP/target_codegen_global_capture.cpp (+6-6) - (modified) clang/test/OpenMP/target_default_codegen.cpp (+16-16) - (modified) clang/test/OpenMP/target_defaultmap_codegen_03.cpp (+8-8) - (modified) clang/test/OpenMP/target_dyn_groupprivate_codegen.cpp (+12-12) - (modified) clang/test/OpenMP/target_firstprivate_codegen.cpp (+24-24) - (modified) clang/test/OpenMP/target_has_device_addr_codegen.cpp (+15-15) - (modified) clang/test/OpenMP/target_has_device_addr_codegen_01.cpp (+2-2) - (modified) clang/test/OpenMP/target_is_device_ptr_codegen.cpp (+44-44) - (modified) clang/test/OpenMP/target_map_array_of_structs_with_nested_mapper_codegen.cpp (+1-1) - (modified) clang/test/OpenMP/target_map_array_section_no_length_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_map_array_section_of_structs_with_nested_mapper_codegen.cpp (+1-1) - (modified) clang/test/OpenMP/target_map_codegen_03.cpp (+2-2) - (modified) clang/test/OpenMP/target_map_codegen_hold.cpp (+12-12) - (modified) clang/test/OpenMP/target_map_deref_array_codegen.cpp (+3-3) - (modified) clang/test/OpenMP/target_map_member_expr_codegen.cpp (+3-3) - (modified) clang/test/OpenMP/target_offload_mandatory_codegen.cpp (+3-3) - (modified) clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp (+12-12) - (modified) clang/test/OpenMP/target_parallel_codegen.cpp (+14-14) - (modified) clang/test/OpenMP/target_parallel_for_codegen.cpp (+28-28) - (modified) clang/test/OpenMP/target_parallel_for_simd_codegen.cpp (+28-28) - (modified) clang/test/OpenMP/target_parallel_generic_loop_codegen-1.cpp (+12-12) - (modified) clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp (+2-2) - (modified) clang/test/OpenMP/target_parallel_generic_loop_uses_allocators_codegen.cpp (+1-1) - (modified) clang/test/OpenMP/target_parallel_if_codegen.cpp (+12-12) - (modified) clang/test/OpenMP/target_parallel_num_threads_codegen.cpp (+12-12) - (modified) clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp (+8-8) - (modified) clang/test/OpenMP/target_task_affinity_codegen.cpp (+2-2) - (modified) clang/test/OpenMP/target_teams_codegen.cpp (+26-26) - (modified) clang/test/OpenMP/target_teams_distribute_codegen.cpp (+14-14) - (modified) clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp (+6-6) - (modified) clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp (+18-18) - (modified) clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp (+6-6) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp (+18-18) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp (+6-6) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp (+1-1) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp (+3-3) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp (+60-60) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp (+6-6) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp (+6-6) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp (+18-18) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp (+24-24) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp (+3-3) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp (+60-60) - (modified) clang/test/OpenMP/target_teams_distribute_private_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp (+40-40) - (modified) clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp (+28-28) - (modified) clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp (+6-6) - (modified) clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp (+18-18) - (modified) clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_generic_loop_codegen.cpp (+1-27) - (modified) clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp (+6-6) - (modified) clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp (+5-5) - (modified) clang/test/OpenMP/target_teams_generic_loop_order_codegen.cpp (+1-1) - (modified) clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_generic_loop_reduction_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/target_teams_generic_loop_uses_allocators_codegen.cpp (+1-1) - (modified) clang/test/OpenMP/target_teams_map_codegen.cpp (+18-18) - (modified) clang/test/OpenMP/target_teams_num_teams_codegen.cpp (+12-12) - (renamed) clang/test/OpenMP/target_teams_reduction_codegen.cpp (+89-1400) - (modified) clang/test/OpenMP/target_teams_thread_limit_codegen.cpp (+12-12) - (modified) clang/test/OpenMP/teams_codegen.cpp (+20-20) - (modified) clang/test/OpenMP/teams_distribute_codegen.cpp (+12-12) - (modified) clang/test/OpenMP/teams_distribute_collapse_codegen.cpp (+6-6) - (modified) clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp (+18-18) - (modified) clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp (+12-12) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp (+6-6) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp (+18-18) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp (+8-8) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp (+12-12) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp (+3-3) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp (+60-60) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp (+12-12) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp (+6-6) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp (+18-18) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp (+32-32) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp (+12-12) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp (+3-3) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp (+60-60) - (modified) clang/test/OpenMP/teams_distribute_private_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_distribute_reduction_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_distribute_simd_codegen.cpp (+18-18) - (modified) clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp (+6-6) - (modified) clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp (+18-18) - (modified) clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_firstprivate_codegen.cpp (+12-12) - (modified) clang/test/OpenMP/teams_generic_loop_codegen-1.cpp (+12-12) - (modified) clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp (+6-6) - (modified) clang/test/OpenMP/teams_generic_loop_private_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp (+4-4) - (modified) clang/test/OpenMP/teams_private_codegen.cpp (+10-10) - (modified) llvm/include/llvm/Frontend/OpenMP/OMPConstants.h (+1-1) - (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+31-11) - (modified) llvm/include/llvm/Frontend/OpenMP/OMPKinds.def (+6-8) - (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+153-74) - (modified) llvm/lib/Transforms/IPO/OpenMPOpt.cpp (+1-1) - (modified) llvm/test/Transforms/OpenMP/add_attributes.ll (+4-4) - (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (-4) - (modified) mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir (+4-12) - (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+3-2) - (modified) mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction-array-descriptor.mlir (+10-15) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir (+2-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+2-1) - (modified) mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir (+2-2) - (modified) offload/include/Shared/Environment.h (+1-3) - (modified) offload/plugins-nextgen/common/include/PluginInterface.h (+7-5) - (modified) offload/plugins-nextgen/common/src/PluginInterface.cpp (+16-10) - (modified) openmp/device/include/Interface.h (+6-7) - (modified) openmp/device/src/Reduction.cpp (+154-168) ``````````diff diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 319fd18cddb36..d68784b7efbcd 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -231,7 +231,6 @@ LANGOPT(OpenMPCUDAMode , 1, 0, NotCompatible, "Generate code for OpenMP pragm LANGOPT(OpenMPIRBuilder , 1, 0, NotCompatible, "Use the experimental OpenMP-IR-Builder codegen path.") LANGOPT(OpenMPCUDANumSMs , 32, 0, NotCompatible, "Number of SMs for CUDA devices.") LANGOPT(OpenMPCUDABlocksPerSM , 32, 0, NotCompatible, "Number of blocks per SM for CUDA devices.") -LANGOPT(OpenMPCUDAReductionBufNum , 32, 1024, NotCompatible, "Number of the reduction records in the intermediate reduction buffer used for the teams reductions.") LANGOPT(OpenMPTargetDebug , 32, 0, NotCompatible, "Enable debugging in the OpenMP offloading device RTL") LANGOPT(OpenMPOptimisticCollapse , 1, 0, NotCompatible, "Use at most 32 bits to represent the collapsed loop nest counter.") LANGOPT(OpenMPThreadSubscription , 1, 0, NotCompatible, "Assume work-shared loops do not have more iterations than participating threads.") diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index e4a9d95ece0ab..377897a15f746 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -4120,7 +4120,11 @@ def fopenmp_cuda_number_of_sm_EQ : Joined<["-"], "fopenmp-cuda-number-of-sm=">, def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], "fopenmp-cuda-blocks-per-sm=">, Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>; def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>, - Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>; + Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>, + HelpText<"Deprecated and ignored. The teams reduction buffer is sized " + "automatically at kernel launch to match the actual number of " + "teams; this flag is accepted for backwards compatibility only " + "and emits a deprecation warning when used.">; //===----------------------------------------------------------------------===// // Shared cc1 + fc1 OpenMP Target Options diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index cb0e7297f1a89..19e8c73884dfc 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -788,8 +788,7 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF, ? 0 : DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue(); CGBuilderTy &Bld = CGF.Builder; - OMPBuilder.createTargetDeinit(Bld, ReductionDataSize, - C.getLangOpts().OpenMPCUDAReductionBufNum); + OMPBuilder.createTargetDeinit(Bld, ReductionDataSize); TeamsReductions.clear(); } @@ -1698,8 +1697,6 @@ void CGOpenMPRuntimeGPU::emitReduction( bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind); bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind); - ASTContext &C = CGM.getContext(); - if (Options.SimpleReduction) { assert(!TeamsReduction && !ParallelReduction && "Invalid reduction selection in emitReduction."); @@ -1790,12 +1787,13 @@ void CGOpenMPRuntimeGPU::emitReduction( Idx++; } + bool IsSPMD = getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD; llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(OMPBuilder.createReductionsGPU( OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, /*IsByRef=*/{}, false, - TeamsReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang, - CGF.getTarget().getGridValue(), - C.getLangOpts().OpenMPCUDAReductionBufNum, RTLoc)); + TeamsReduction, IsSPMD, + llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang, + CGF.getTarget().getGridValue(), RTLoc)); CGF.Builder.restoreIP(AfterIP); } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 906abd1474b6c..7763b33567d87 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6952,8 +6952,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fno-openmp-extensions"); Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_number_of_sm_EQ); Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_blocks_per_sm_EQ); - Args.AddAllArgs(CmdArgs, - options::OPT_fopenmp_cuda_teams_reduction_recs_num_EQ); + // '-fopenmp-cuda-teams-reduction-recs-num=' is deprecated and has no + // effect: the teams reduction buffer is sized at kernel launch by the + // offload plugin to match the actual number of teams. Honoring a + // smaller user-supplied value would silently truncate the buffer for + // larger launches. + if (Arg *A = Args.getLastArg( + options::OPT_fopenmp_cuda_teams_reduction_recs_num_EQ)) + D.Diag(diag::warn_drv_deprecated_custom) + << A->getAsString(Args) + << "the value is ignored; the teams reduction buffer is sized " + "automatically at kernel launch"; if (Args.hasFlag(options::OPT_fopenmp_optimistic_collapse, options::OPT_fno_openmp_optimistic_collapse, /*Default=*/false)) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index d2847739e3143..ca2d02c7dbd97 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3910,10 +3910,6 @@ void CompilerInvocationBase::GenerateLangArgs(const LangOptions &Opts, GenerateArg(Consumer, OPT_fopenmp_cuda_blocks_per_sm_EQ, Twine(Opts.OpenMPCUDABlocksPerSM)); - if (Opts.OpenMPCUDAReductionBufNum != 1024) - GenerateArg(Consumer, OPT_fopenmp_cuda_teams_reduction_recs_num_EQ, - Twine(Opts.OpenMPCUDAReductionBufNum)); - if (!Opts.OMPTargetTriples.empty()) { std::string Targets; llvm::raw_string_ostream OS(Targets); @@ -4359,9 +4355,6 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, Opts.OpenMPCUDABlocksPerSM = getLastArgIntValue(Args, options::OPT_fopenmp_cuda_blocks_per_sm_EQ, Opts.OpenMPCUDABlocksPerSM, Diags); - Opts.OpenMPCUDAReductionBufNum = getLastArgIntValue( - Args, options::OPT_fopenmp_cuda_teams_reduction_recs_num_EQ, - Opts.OpenMPCUDAReductionBufNum, Diags); } // Set the value of the debugging flag used in the new offloading device RTL. diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c index b5644e6ae445f..4ad0aede8a50e 100644 --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -193,8 +193,8 @@ // RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-teams-reduction-recs-num=2048 2>&1 \ // RUN: | FileCheck -check-prefix=CUDA_RED_RECS %s -// CUDA_RED_RECS: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda" -// CUDA_RED_RECS-SAME: "-fopenmp-cuda-teams-reduction-recs-num=2048" +// CUDA_RED_RECS: warning: argument '-fopenmp-cuda-teams-reduction-recs-num=2048' is deprecated, the value is ignored; the teams reduction buffer is sized automatically at kernel launch +// CUDA_RED_RECS-NOT: "-fopenmp-cuda-teams-reduction-recs-num=2048" // RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ // RUN: --offload-arch=sm_52 --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \ diff --git a/clang/test/OpenMP/bug60602.cpp b/clang/test/OpenMP/bug60602.cpp index e9174d7be3a12..8235a5a7d83d1 100644 --- a/clang/test/OpenMP/bug60602.cpp +++ b/clang/test/OpenMP/bug60602.cpp @@ -119,7 +119,7 @@ int kernel_within_loop(int *a, int *b, int N, int num_iters) { // CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP37]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP37]], align 4 // CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 6, ptr [[TMP38]], align 4 // CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -223,7 +223,7 @@ int kernel_within_loop(int *a, int *b, int N, int num_iters) { // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP89]], 1 // CHECK-NEXT: [[TMP90:%.*]] = zext i32 [[ADD]] to i64 // CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP91]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP91]], align 4 // CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK-NEXT: store i32 6, ptr [[TMP92]], align 4 // CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 diff --git a/clang/test/OpenMP/declare_target_local_codegen.cpp b/clang/test/OpenMP/declare_target_local_codegen.cpp index b82e8b3bba9ff..fe3a46aed5916 100644 --- a/clang/test/OpenMP/declare_target_local_codegen.cpp +++ b/clang/test/OpenMP/declare_target_local_codegen.cpp @@ -130,7 +130,7 @@ int use_new_local_vars() { // HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // HOST-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // HOST-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// HOST-NEXT: store i32 4, ptr [[TMP8]], align 4 +// HOST-NEXT: store i32 5, ptr [[TMP8]], align 4 // HOST-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // HOST-NEXT: store i32 2, ptr [[TMP9]], align 4 // HOST-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -217,7 +217,7 @@ int use_new_local_vars() { // HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // HOST-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // HOST-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// HOST-NEXT: store i32 4, ptr [[TMP8]], align 4 +// HOST-NEXT: store i32 5, ptr [[TMP8]], align 4 // HOST-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // HOST-NEXT: store i32 2, ptr [[TMP9]], align 4 // HOST-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -291,7 +291,7 @@ int use_new_local_vars() { // HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // HOST-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // HOST-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// HOST-NEXT: store i32 4, ptr [[TMP8]], align 4 +// HOST-NEXT: store i32 5, ptr [[TMP8]], align 4 // HOST-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // HOST-NEXT: store i32 2, ptr [[TMP9]], align 4 // HOST-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp index 62b7ad8b979a2..afd18e91911dd 100644 --- a/clang/test/OpenMP/distribute_codegen.cpp +++ b/clang/test/OpenMP/distribute_codegen.cpp @@ -169,7 +169,7 @@ int fint(void) { return ftemplate<int>(); } // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -368,7 +368,7 @@ int fint(void) { return ftemplate<int>(); } // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -567,7 +567,7 @@ int fint(void) { return ftemplate<int>(); } // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -774,7 +774,7 @@ int fint(void) { return ftemplate<int>(); } // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[ADD4]] to i64 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP14]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP14]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP15]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -949,7 +949,7 @@ int fint(void) { return ftemplate<int>(); } // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1130,7 +1130,7 @@ int fint(void) { return ftemplate<int>(); } // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1325,7 +1325,7 @@ int fint(void) { return ftemplate<int>(); } // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1520,7 +1520,7 @@ int fint(void) { return ftemplate<int>(); } // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1723,7 +1723,7 @@ int fint(void) { return ftemplate<int>(); } // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[ADD4]] to i64 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP14]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP14]], align 4 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP15]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1898,7 +1898,7 @@ int fint(void) { return ftemplate<int>(); } // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[T... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/205071 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
