Author: Sebastian Pop
Date: 2025-09-15T18:48:32+05:30
New Revision: 895cda70a95529fd22aac05eee7c34f7624996af

URL: 
https://github.com/llvm/llvm-project/commit/895cda70a95529fd22aac05eee7c34f7624996af
DIFF: 
https://github.com/llvm/llvm-project/commit/895cda70a95529fd22aac05eee7c34f7624996af.diff

LOG: Introduce -fexperimental-loop-fuse to clang and flang (#142686)

This patch adds the flag -fexperimental-loop-fuse to the clang and flang
drivers. This is primarily useful for experiments as we envision to
enable the pass one day.

The options are based on the same principles and reason on which we have
`floop-interchange`.

---------

Co-authored-by: Madhur Amilkanthwar <madh...@nvidia.com>

Added: 
    flang/test/Driver/loop-fuse.f90

Modified: 
    clang/include/clang/Basic/CodeGenOptions.def
    clang/include/clang/Driver/Options.td
    clang/lib/CodeGen/BackendUtil.cpp
    clang/lib/Driver/ToolChains/Clang.cpp
    clang/lib/Driver/ToolChains/Flang.cpp
    clang/lib/Frontend/CompilerInvocation.cpp
    clang/test/Driver/clang_f_opts.c
    flang/docs/ReleaseNotes.md
    flang/include/flang/Frontend/CodeGenOptions.def
    flang/lib/Frontend/CompilerInvocation.cpp
    flang/lib/Frontend/FrontendActions.cpp
    llvm/include/llvm/Passes/PassBuilder.h
    llvm/lib/Passes/PassBuilderPipelines.cpp
    llvm/tools/opt/NewPMDriver.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/CodeGenOptions.def 
b/clang/include/clang/Basic/CodeGenOptions.def
index fda0da99b60c0..872f73ebf3810 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -322,6 +322,7 @@ CODEGENOPT(TimeTrace         , 1, 0, Benign) ///< Set when 
-ftime-trace is enabl
 VALUE_CODEGENOPT(TimeTraceGranularity, 32, 500, Benign) ///< Minimum time 
granularity (in microseconds),
                                                         ///< traced by time 
profiler
 CODEGENOPT(InterchangeLoops  , 1, 0, Benign) ///< Run loop-interchange.
+CODEGENOPT(FuseLoops         , 1, 0, Benign) ///< Run loop-fusion.
 CODEGENOPT(UnrollLoops       , 1, 0, Benign) ///< Control whether loops are 
unrolled.
 CODEGENOPT(RerollLoops       , 1, 0, Benign) ///< Control whether loops are 
rerolled.
 CODEGENOPT(NoUseJumpTables   , 1, 0, Benign) ///< Set when -fno-jump-tables is 
enabled.

diff  --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index a7c514e809aa9..47d328f862e07 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4304,6 +4304,10 @@ def floop_interchange : Flag<["-"], 
"floop-interchange">, Group<f_Group>,
   HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, 
CC1Option, FlangOption, FC1Option]>;
 def fno_loop_interchange: Flag<["-"], "fno-loop-interchange">, Group<f_Group>,
   HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, 
CC1Option, FlangOption, FC1Option]>;
+defm experimental_loop_fusion
+    : OptInCC1FFlag<"experimental-loop-fusion", "Enable", "Disable",
+                    "Enable the loop fusion pass",
+                    [ClangOption, FlangOption, FC1Option]>;
 def funroll_loops : Flag<["-"], "funroll-loops">, Group<f_Group>,
   HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, 
FlangOption, FC1Option]>;
 def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>,

diff  --git a/clang/lib/CodeGen/BackendUtil.cpp 
b/clang/lib/CodeGen/BackendUtil.cpp
index 3f095c03397fd..8c99af2bdff83 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -896,6 +896,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
   PipelineTuningOptions PTO;
   PTO.LoopUnrolling = CodeGenOpts.UnrollLoops;
   PTO.LoopInterchange = CodeGenOpts.InterchangeLoops;
+  PTO.LoopFusion = CodeGenOpts.FuseLoops;
   // For historical reasons, loop interleaving is set to mirror setting for 
loop
   // unrolling.
   PTO.LoopInterleaving = CodeGenOpts.UnrollLoops;
@@ -1331,6 +1332,7 @@ runThinLTOBackend(CompilerInstance &CI, 
ModuleSummaryIndex *CombinedIndex,
   Conf.SampleProfile = std::move(SampleProfile);
   Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops;
   Conf.PTO.LoopInterchange = CGOpts.InterchangeLoops;
+  Conf.PTO.LoopFusion = CGOpts.FuseLoops;
   // For historical reasons, loop interleaving is set to mirror setting for 
loop
   // unrolling.
   Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops;

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index 946b1e39af3b9..63efb0f02baa8 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6854,6 +6854,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction 
&JA,
                   options::OPT_fno_unroll_loops);
   Args.AddLastArg(CmdArgs, options::OPT_floop_interchange,
                   options::OPT_fno_loop_interchange);
+  Args.addOptInFlag(CmdArgs, options::OPT_fexperimental_loop_fusion,
+                    options::OPT_fno_experimental_loop_fusion);
 
   Args.AddLastArg(CmdArgs, options::OPT_fstrict_flex_arrays_EQ);
 

diff  --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 1535f4cebf436..d3f4af164f672 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -151,6 +151,9 @@ void Flang::addCodegenOptions(const ArgList &Args,
       !stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
     CmdArgs.push_back("-fstack-arrays");
 
+  Args.addOptInFlag(CmdArgs, options::OPT_fexperimental_loop_fusion,
+                    options::OPT_fno_experimental_loop_fusion);
+
   handleInterchangeLoopsArgs(Args, CmdArgs);
   handleVectorizeLoopsArgs(Args, CmdArgs);
   handleVectorizeSLPArgs(Args, CmdArgs);

diff  --git a/clang/lib/Frontend/CompilerInvocation.cpp 
b/clang/lib/Frontend/CompilerInvocation.cpp
index 761310813f787..422375240bab6 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1680,6 +1680,9 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const 
CodeGenOptions &Opts,
   else
     GenerateArg(Consumer, OPT_fno_loop_interchange);
 
+  if (Opts.FuseLoops)
+    GenerateArg(Consumer, OPT_fexperimental_loop_fusion);
+
   if (!Opts.BinutilsVersion.empty())
     GenerateArg(Consumer, OPT_fbinutils_version_EQ, Opts.BinutilsVersion);
 
@@ -2001,6 +2004,8 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions 
&Opts, ArgList &Args,
                    (Opts.OptimizationLevel > 1));
   Opts.InterchangeLoops =
       Args.hasFlag(OPT_floop_interchange, OPT_fno_loop_interchange, false);
+  Opts.FuseLoops = Args.hasFlag(OPT_fexperimental_loop_fusion,
+                                OPT_fno_experimental_loop_fusion, false);
   Opts.BinutilsVersion =
       std::string(Args.getLastArgValue(OPT_fbinutils_version_EQ));
 

diff  --git a/clang/test/Driver/clang_f_opts.c 
b/clang/test/Driver/clang_f_opts.c
index ee7ded265769b..eb3994ddabcd3 100644
--- a/clang/test/Driver/clang_f_opts.c
+++ b/clang/test/Driver/clang_f_opts.c
@@ -52,6 +52,15 @@
 // CHECK-INTERCHANGE-LOOPS: "-floop-interchange"
 // CHECK-NO-INTERCHANGE-LOOPS: "-fno-loop-interchange"
 
+// RUN: %clang -### -S -fexperimental-loop-fusion %s 2>&1 | FileCheck 
-check-prefix=CHECK-FUSE-LOOPS %s
+// CHECK-FUSE-LOOPS: "-fexperimental-loop-fusion"
+//
+// RUN: %clang -c -fexperimental-loop-fusion -mllvm -print-pipeline-passes -O3 
%s 2>&1 | FileCheck --check-prefixes=LOOP-FUSION-ON %s
+// RUN: %clang -c -mllvm -print-pipeline-passes -O3 %s 2>&1 | FileCheck 
--check-prefixes=LOOP-FUSION-OFF %s
+
+// LOOP-FUSION-ON: loop-fusion
+// LOOP-FUSION-OFF-NOT: loop-fusion
+
 // RUN: %clang -### -S -fprofile-sample-accurate %s 2>&1 | FileCheck 
-check-prefix=CHECK-PROFILE-SAMPLE-ACCURATE %s
 // CHECK-PROFILE-SAMPLE-ACCURATE: "-fprofile-sample-accurate"
 

diff  --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md
index c9623ea08c4e6..6a285f829053b 100644
--- a/flang/docs/ReleaseNotes.md
+++ b/flang/docs/ReleaseNotes.md
@@ -35,6 +35,8 @@ page](https://llvm.org/releases/).
 
 ## New Compiler Flags
 
+* -fexperimental-loop-fusion is now recognized by flang.
+
 ## Windows Support
 
 ## Fortran Language Changes in Flang

diff  --git a/flang/include/flang/Frontend/CodeGenOptions.def 
b/flang/include/flang/Frontend/CodeGenOptions.def
index cdeea93c9aecb..edab48a70d29d 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.def
+++ b/flang/include/flang/Frontend/CodeGenOptions.def
@@ -43,6 +43,7 @@ CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the 
stack-arrays pass)
 CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization.
 CODEGENOPT(VectorizeSLP, 1, 0) ///< Enable SLP vectorization.
 CODEGENOPT(InterchangeLoops, 1, 0) ///< Enable loop interchange.
+CODEGENOPT(FuseLoops, 1, 0) ///< Enable loop fusion.
 CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
 CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling
 CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass

diff  --git a/flang/lib/Frontend/CompilerInvocation.cpp 
b/flang/lib/Frontend/CompilerInvocation.cpp
index 6295a58b1bdad..4f42fbd66eac0 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -276,6 +276,9 @@ static void 
parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
   if (args.getLastArg(clang::driver::options::OPT_floop_interchange))
     opts.InterchangeLoops = 1;
 
+  if (args.getLastArg(clang::driver::options::OPT_fexperimental_loop_fusion))
+    opts.FuseLoops = 1;
+
   if (args.getLastArg(clang::driver::options::OPT_vectorize_loops))
     opts.VectorizeLoop = 1;
 

diff  --git a/flang/lib/Frontend/FrontendActions.cpp 
b/flang/lib/Frontend/FrontendActions.cpp
index 3bef6b1c31825..23cc1e63e773d 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -958,6 +958,7 @@ void 
CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
     si.getTimePasses().setOutStream(ci.getTimingStreamLLVM());
   pto.LoopUnrolling = opts.UnrollLoops;
   pto.LoopInterchange = opts.InterchangeLoops;
+  pto.LoopFusion = opts.FuseLoops;
   pto.LoopInterleaving = opts.UnrollLoops;
   pto.LoopVectorization = opts.VectorizeLoop;
   pto.SLPVectorization = opts.VectorizeSLP;

diff  --git a/flang/test/Driver/loop-fuse.f90 b/flang/test/Driver/loop-fuse.f90
new file mode 100644
index 0000000000000..ddfd9065e0fd4
--- /dev/null
+++ b/flang/test/Driver/loop-fuse.f90
@@ -0,0 +1,17 @@
+! RUN: %flang -### -S -fexperimental-loop-fusion %s 2>&1 | FileCheck 
-check-prefix=CHECK-LOOP-FUSE %s
+! RUN: %flang -### -S -fno-experimental-loop-fusion %s 2>&1 | FileCheck 
-check-prefix=CHECK-NO-LOOP-FUSE %s
+! RUN: %flang -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE 
%s
+! RUN: %flang -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE 
%s
+! RUN: %flang -### -S -O2 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE 
%s
+! RUN: %flang -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE 
%s
+! RUN: %flang -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE 
%s
+! RUN: %flang -### -S -Oz %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE 
%s
+! CHECK-LOOP-FUSE: "-fexperimental-loop-fusion"
+! CHECK-NO-LOOP-FUSE-NOT: "-fexperimental-loop-fusion"
+! RUN: %flang_fc1 -emit-llvm -O2 -fexperimental-loop-fusion -mllvm 
-print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck 
-check-prefix=CHECK-LOOP-FUSE-PASS %s
+! RUN: %flang_fc1 -emit-llvm -O2 -fno-experimental-loop-fusion -mllvm 
-print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck 
-check-prefix=CHECK-NO-LOOP-FUSE-PASS %s
+! CHECK-LOOP-FUSE-PASS: loop-fusion
+! CHECK-NO-LOOP-FUSE-PASS-NOT: loop-fusion
+
+program test
+end program

diff  --git a/llvm/include/llvm/Passes/PassBuilder.h 
b/llvm/include/llvm/Passes/PassBuilder.h
index 9cdb7ca7dbc9b..2742ec1b71b7e 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -65,6 +65,9 @@ class PipelineTuningOptions {
   /// false.
   bool LoopInterchange;
 
+  /// Tuning option to enable/disable loop fusion. Its default value is false.
+  bool LoopFusion;
+
   /// Tuning option to forget all SCEV loops in LoopUnroll. Its default value
   /// is that of the flag: `-forget-scev-loop-unroll`.
   bool ForgetAllSCEVInLoopUnroll;

diff  --git a/llvm/lib/Passes/PassBuilderPipelines.cpp 
b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 98821bb1408a7..79642e650ac83 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -104,6 +104,7 @@
 #include "llvm/Transforms/Scalar/LoopDeletion.h"
 #include "llvm/Transforms/Scalar/LoopDistribute.h"
 #include "llvm/Transforms/Scalar/LoopFlatten.h"
+#include "llvm/Transforms/Scalar/LoopFuse.h"
 #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
 #include "llvm/Transforms/Scalar/LoopInterchange.h"
@@ -1551,6 +1552,11 @@ 
PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   OptimizePM.addPass(createFunctionToLoopPassAdaptor(
       std::move(LPM), /*UseMemorySSA=*/false, 
/*UseBlockFrequencyInfo=*/false));
 
+  // FIXME: This may not be the right place in the pipeline.
+  // We need to have the data to support the right place.
+  if (PTO.LoopFusion)
+    OptimizePM.addPass(LoopFusePass());
+
   // Distribute loops to allow partial vectorization.  I.e. isolate dependences
   // into separate loop that would otherwise inhibit vectorization.  This is
   // currently only performed for loops marked with the metadata
@@ -2355,4 +2361,4 @@ AAManager PassBuilder::buildDefaultAAPipeline() {
 bool PassBuilder::isInstrumentedPGOUse() const {
   return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
          !UseCtxProfile.empty();
-}
\ No newline at end of file
+}

diff  --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp
index b9b8929a0f703..0c991b71a6b26 100644
--- a/llvm/tools/opt/NewPMDriver.cpp
+++ b/llvm/tools/opt/NewPMDriver.cpp
@@ -60,6 +60,9 @@ cl::opt<bool> VerifyEachDebugInfoPreserve(
     cl::desc("Start each pass with collecting and end it with checking of "
              "debug info preservation."));
 
+static cl::opt<bool> EnableLoopFusion("enable-loopfusion", cl::init(false),
+                                      cl::Hidden,
+                                      cl::desc("Enable the LoopFuse Pass"));
 cl::opt<std::string>
     VerifyDIPreserveExport("verify-di-preserve-export",
                    cl::desc("Export debug info preservation failures into "
@@ -446,6 +449,7 @@ bool llvm::runPassPipeline(
   // option has been enabled.
   PTO.LoopUnrolling = !DisableLoopUnrolling;
   PTO.UnifiedLTO = UnifiedLTO;
+  PTO.LoopFusion = EnableLoopFusion;
   PassBuilder PB(TM, PTO, P, &PIC);
   registerEPCallbacks(PB);
 


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to