Author: Kajetan Puchalski
Date: 2025-08-14T14:20:15+01:00
New Revision: d3d96e20573771c9b0f54a07c1557c448b1d9ae1

URL: 
https://github.com/llvm/llvm-project/commit/d3d96e20573771c9b0f54a07c1557c448b1d9ae1
DIFF: 
https://github.com/llvm/llvm-project/commit/d3d96e20573771c9b0f54a07c1557c448b1d9ae1.diff

LOG: [flang][OpenMP] Add -f[no]-openmp-simd (#150269)

Both clang and gfortran support the -fopenmp-simd flag, which enables
OpenMP support only for simd constructs, while disabling the rest of
OpenMP.

Implement the appropriate parse tree rewriting to remove non-SIMD OpenMP
constructs at the parsing stage.

Add a new SimdOnly flang OpenMP IR pass which rewrites generated OpenMP
FIR to handle untangling composite simd constructs, and clean up OpenMP
operations leftover after the parse tree rewriting stage.
With this approach, the two parts of the logic required to make the flag
work can be self-contained within the parse tree rewriter and the MLIR
pass, respectively. It does not need to be implemented within the core
lowering logic itself.

The flag is expected to have no effect if -fopenmp is passed explicitly,
and is only expected to remove OpenMP constructs, not things like OpenMP
library functions calls. This matches the behaviour of other compilers.

---------

Signed-off-by: Kajetan Puchalski <kajetan.puchal...@arm.com>

Added: 
    flang/lib/Optimizer/OpenMP/SimdOnly.cpp
    flang/test/Driver/fopenmp-simd.f90
    flang/test/Semantics/OpenMP/simd-only.f90
    flang/test/Transforms/OpenMP/simd-only.mlir

Modified: 
    clang/include/clang/Driver/Options.td
    clang/lib/Driver/ToolChains/Flang.cpp
    flang/include/flang/Optimizer/OpenMP/Passes.td
    flang/include/flang/Optimizer/Passes/Pipelines.h
    flang/include/flang/Semantics/openmp-directive-sets.h
    flang/include/flang/Support/LangOptions.def
    flang/include/flang/Tools/CrossToolHelpers.h
    flang/lib/Frontend/CompilerInvocation.cpp
    flang/lib/Frontend/FrontendActions.cpp
    flang/lib/Lower/OpenMP/ClauseProcessor.h
    flang/lib/Lower/OpenMP/OpenMP.cpp
    flang/lib/Optimizer/OpenMP/CMakeLists.txt
    flang/lib/Optimizer/Passes/Pipelines.cpp
    flang/lib/Semantics/rewrite-parse-tree.cpp
    flang/tools/bbc/bbc.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index ee4df6727e7f0..7712a49cef154 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3731,14 +3731,20 @@ def fopenmp_relocatable_target : Flag<["-"], 
"fopenmp-relocatable-target">,
 def fnoopenmp_relocatable_target : Flag<["-"], "fnoopenmp-relocatable-target">,
   Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>,
   Visibility<[ClangOption, CC1Option]>;
-def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group<f_Group>,
-  Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>,
-  HelpText<"Emit OpenMP code only for SIMD-based constructs.">;
+def fopenmp_simd : Flag<["-"], "fopenmp-simd">,
+                   Group<f_Group>,
+                   Flags<[NoArgumentUnused]>,
+                   Visibility<[ClangOption, CC1Option, FlangOption, 
FC1Option]>,
+                   HelpText<"Emit OpenMP code only for SIMD-based 
constructs.">;
 def fopenmp_enable_irbuilder : Flag<["-"], "fopenmp-enable-irbuilder">, 
Group<f_Group>,
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>,
   HelpText<"Use the experimental OpenMP-IR-Builder codegen path.">;
-def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group<f_Group>,
-  Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>;
+def fno_openmp_simd
+    : Flag<["-"], "fno-openmp-simd">,
+      Group<f_Group>,
+      Flags<[NoArgumentUnused]>,
+      Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
+      HelpText<"Do not emit code for any OpenMP constructs.">;
 def fopenmp_cuda_mode : Flag<["-"], "fopenmp-cuda-mode">, Group<f_Group>,
   Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group<f_Group>,

diff  --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 7ab41e9b85a04..547e3156f519a 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -937,6 +937,8 @@ void Flang::ConstructJob(Compilation &C, const JobAction 
&JA,
 
       if (Args.hasArg(options::OPT_fopenmp_force_usm))
         CmdArgs.push_back("-fopenmp-force-usm");
+      Args.AddLastArg(CmdArgs, options::OPT_fopenmp_simd,
+                      options::OPT_fno_openmp_simd);
 
       // FIXME: Clang supports a whole bunch more flags here.
       break;
@@ -952,6 +954,9 @@ void Flang::ConstructJob(Compilation &C, const JobAction 
&JA,
           << A->getSpelling() << A->getValue();
       break;
     }
+  } else {
+    Args.AddLastArg(CmdArgs, options::OPT_fopenmp_simd,
+                    options::OPT_fno_openmp_simd);
   }
 
   // Pass the path to compiler resource files.

diff  --git a/flang/include/flang/Optimizer/OpenMP/Passes.td 
b/flang/include/flang/Optimizer/OpenMP/Passes.td
index 704faf0ccd856..e06289cfa8229 100644
--- a/flang/include/flang/Optimizer/OpenMP/Passes.td
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.td
@@ -112,4 +112,9 @@ def GenericLoopConversionPass
   ];
 }
 
+def SimdOnlyPass : Pass<"omp-simd-only", "mlir::ModuleOp"> {
+  let summary = "Filters out non-simd OpenMP constructs";
+  let dependentDialects = ["mlir::omp::OpenMPDialect"];
+}
+
 #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES

diff  --git a/flang/include/flang/Optimizer/Passes/Pipelines.h 
b/flang/include/flang/Optimizer/Passes/Pipelines.h
index a3f59ee8dd013..fd8c43cc88a19 100644
--- a/flang/include/flang/Optimizer/Passes/Pipelines.h
+++ b/flang/include/flang/Optimizer/Passes/Pipelines.h
@@ -119,13 +119,16 @@ void 
registerDefaultInlinerPass(MLIRToLLVMPassPipelineConfig &config);
 void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
                                            MLIRToLLVMPassPipelineConfig &pc);
 
+/// Select which mode to enable OpenMP support in.
+enum class EnableOpenMP { None, Simd, Full };
+
 /// Create a pass pipeline for lowering from HLFIR to FIR
 ///
 /// \param pm - MLIR pass manager that will hold the pipeline definition
 /// \param optLevel - optimization level used for creating FIR optimization
 ///   passes pipeline
 void createHLFIRToFIRPassPipeline(
-    mlir::PassManager &pm, bool enableOpenMP,
+    mlir::PassManager &pm, EnableOpenMP enableOpenMP,
     llvm::OptimizationLevel optLevel = defaultOptLevel);
 
 struct OpenMPFIRPassPipelineOpts {

diff  --git a/flang/include/flang/Semantics/openmp-directive-sets.h 
b/flang/include/flang/Semantics/openmp-directive-sets.h
index dd610c9702c28..cc66cc833e8b7 100644
--- a/flang/include/flang/Semantics/openmp-directive-sets.h
+++ b/flang/include/flang/Semantics/openmp-directive-sets.h
@@ -401,6 +401,22 @@ static const OmpDirectiveSet nestedWorkshareErrSet{
         Directive::OMPD_taskloop,
     } | workShareSet,
 };
+
+//===----------------------------------------------------------------------===//
+// Misc directive sets
+//===----------------------------------------------------------------------===//
+
+// Simple standalone directives than can be erased by -fopenmp-simd.
+static const OmpDirectiveSet simpleStandaloneNonSimdOnlySet{
+    Directive::OMPD_taskyield,
+    Directive::OMPD_barrier,
+    Directive::OMPD_ordered,
+    Directive::OMPD_target_enter_data,
+    Directive::OMPD_target_exit_data,
+    Directive::OMPD_target_update,
+    Directive::OMPD_taskwait,
+};
+
 } // namespace llvm::omp
 
 #endif // FORTRAN_SEMANTICS_OPENMP_DIRECTIVE_SETS_H_

diff  --git a/flang/include/flang/Support/LangOptions.def 
b/flang/include/flang/Support/LangOptions.def
index d5bf7a2ecc036..ba72d7b4b7212 100644
--- a/flang/include/flang/Support/LangOptions.def
+++ b/flang/include/flang/Support/LangOptions.def
@@ -58,6 +58,8 @@ LANGOPT(OpenMPTeamSubscription, 1, 0)
 LANGOPT(OpenMPNoThreadState, 1, 0)
 /// Assume that no thread in a parallel region will encounter a parallel region
 LANGOPT(OpenMPNoNestedParallelism, 1, 0)
+/// Use SIMD only OpenMP support.
+LANGOPT(OpenMPSimd, 1, false)
 
 LANGOPT(VScaleMin, 32, 0)  ///< Minimum vscale range value
 LANGOPT(VScaleMax, 32, 0)  ///< Maximum vscale range value

diff  --git a/flang/include/flang/Tools/CrossToolHelpers.h 
b/flang/include/flang/Tools/CrossToolHelpers.h
index df1da27058552..51958fa36c3ad 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -134,6 +134,7 @@ struct MLIRToLLVMPassPipelineConfig : public 
FlangEPCallBacks {
                                       ///< functions.
   bool NSWOnLoopVarInc = true; ///< Add nsw flag to loop variable increments.
   bool EnableOpenMP = false; ///< Enable OpenMP lowering.
+  bool EnableOpenMPSimd = false; ///< Enable OpenMP simd-only mode.
   std::string InstrumentFunctionEntry =
       ""; ///< Name of the instrument-function that is called on each
           ///< function-entry

diff  --git a/flang/lib/Frontend/CompilerInvocation.cpp 
b/flang/lib/Frontend/CompilerInvocation.cpp
index 111c5aa48726f..708fb7f0b82ee 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1162,8 +1162,15 @@ static bool parseOpenMPArgs(CompilerInvocation &res, 
llvm::opt::ArgList &args,
                             clang::DiagnosticsEngine &diags) {
   llvm::opt::Arg *arg = args.getLastArg(clang::driver::options::OPT_fopenmp,
                                         
clang::driver::options::OPT_fno_openmp);
-  if (!arg || arg->getOption().matches(clang::driver::options::OPT_fno_openmp))
-    return true;
+  if (!arg ||
+      arg->getOption().matches(clang::driver::options::OPT_fno_openmp)) {
+    bool isSimdSpecified = args.hasFlag(
+        clang::driver::options::OPT_fopenmp_simd,
+        clang::driver::options::OPT_fno_openmp_simd, /*Default=*/false);
+    if (!isSimdSpecified)
+      return true;
+    res.getLangOpts().OpenMPSimd = 1;
+  }
 
   unsigned numErrorsBefore = diags.getNumErrors();
   llvm::Triple t(res.getTargetOpts().triple);

diff  --git a/flang/lib/Frontend/FrontendActions.cpp 
b/flang/lib/Frontend/FrontendActions.cpp
index 5c66ecf3043cd..3bef6b1c31825 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -298,6 +298,7 @@ bool CodeGenAction::beginSourceFileAction() {
   bool isOpenMPEnabled =
       ci.getInvocation().getFrontendOpts().features.IsEnabled(
           Fortran::common::LanguageFeature::OpenMP);
+  bool isOpenMPSimd = ci.getInvocation().getLangOpts().OpenMPSimd;
 
   fir::OpenMPFIRPassPipelineOpts opts;
 
@@ -329,12 +330,13 @@ bool CodeGenAction::beginSourceFileAction() {
     if (auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(
             mlirModule->getOperation()))
       opts.isTargetDevice = offloadMod.getIsTargetDevice();
+  }
 
-    // WARNING: This pipeline must be run immediately after the lowering to
-    // ensure that the FIR is correct with respect to OpenMP operations/
-    // attributes.
+  // WARNING: This pipeline must be run immediately after the lowering to
+  // ensure that the FIR is correct with respect to OpenMP operations/
+  // attributes.
+  if (isOpenMPEnabled || isOpenMPSimd)
     fir::createOpenMPFIRPassPipeline(pm, opts);
-  }
 
   pm.enableVerifier(/*verifyPasses=*/true);
   pm.addPass(std::make_unique<Fortran::lower::VerifierPass>());
@@ -617,12 +619,14 @@ void CodeGenAction::lowerHLFIRToFIR() {
   pm.addPass(std::make_unique<Fortran::lower::VerifierPass>());
   pm.enableVerifier(/*verifyPasses=*/true);
 
+  fir::EnableOpenMP enableOpenMP = fir::EnableOpenMP::None;
+  if (ci.getInvocation().getFrontendOpts().features.IsEnabled(
+          Fortran::common::LanguageFeature::OpenMP))
+    enableOpenMP = fir::EnableOpenMP::Full;
+  if (ci.getInvocation().getLangOpts().OpenMPSimd)
+    enableOpenMP = fir::EnableOpenMP::Simd;
   // Create the pass pipeline
-  fir::createHLFIRToFIRPassPipeline(
-      pm,
-      ci.getInvocation().getFrontendOpts().features.IsEnabled(
-          Fortran::common::LanguageFeature::OpenMP),
-      level);
+  fir::createHLFIRToFIRPassPipeline(pm, enableOpenMP, level);
   (void)mlir::applyPassManagerCLOptions(pm);
 
   mlir::TimingScope timingScopeMLIRPasses = timingScopeRoot.nest(
@@ -748,6 +752,9 @@ void CodeGenAction::generateLLVMIR() {
           Fortran::common::LanguageFeature::OpenMP))
     config.EnableOpenMP = true;
 
+  if (ci.getInvocation().getLangOpts().OpenMPSimd)
+    config.EnableOpenMPSimd = true;
+
   if (ci.getInvocation().getLoweringOpts().getIntegerWrapAround())
     config.NSWOnLoopVarInc = false;
 

diff  --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h 
b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 7f894afc1ab37..c46bdb348a3ef 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -208,11 +208,15 @@ void ClauseProcessor::processTODO(mlir::Location 
currentLocation,
     if (!x)
       return;
     unsigned version = semaCtx.langOptions().OpenMPVersion;
-    TODO(currentLocation,
-         "Unhandled clause " + llvm::omp::getOpenMPClauseName(id).upper() +
-             " in " +
-             llvm::omp::getOpenMPDirectiveName(directive, version).upper() +
-             " construct");
+    bool isSimdDirective = llvm::omp::getOpenMPDirectiveName(directive, 
version)
+                               .upper()
+                               .find("SIMD") != llvm::StringRef::npos;
+    if (!semaCtx.langOptions().OpenMPSimd || isSimdDirective)
+      TODO(currentLocation,
+           "Unhandled clause " + llvm::omp::getOpenMPClauseName(id).upper() +
+               " in " +
+               llvm::omp::getOpenMPDirectiveName(directive, version).upper() +
+               " construct");
   };
 
   for (ClauseIterator it = clauses.begin(); it != clauses.end(); ++it)

diff  --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index ae60432afccd0..fef64ccc15015 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -2262,7 +2262,8 @@ genOrderedOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
              semantics::SemanticsContext &semaCtx, lower::pft::Evaluation 
&eval,
              mlir::Location loc, const ConstructQueue &queue,
              ConstructQueue::const_iterator item) {
-  TODO(loc, "OMPD_ordered");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(loc, "OMPD_ordered");
   return nullptr;
 }
 
@@ -2449,7 +2450,8 @@ genScopeOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
            semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
            mlir::Location loc, const ConstructQueue &queue,
            ConstructQueue::const_iterator item) {
-  TODO(loc, "Scope construct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(loc, "Scope construct");
   return nullptr;
 }
 
@@ -3276,7 +3278,8 @@ static mlir::omp::TaskloopOp genCompositeTaskloopSimd(
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
-  TODO(loc, "Composite TASKLOOP SIMD");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(loc, "Composite TASKLOOP SIMD");
   return nullptr;
 }
 
@@ -3448,8 +3451,10 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
     break;
   case llvm::omp::Directive::OMPD_tile: {
     unsigned version = semaCtx.langOptions().OpenMPVersion;
-    TODO(loc, "Unhandled loop directive (" +
-                  llvm::omp::getOpenMPDirectiveName(dir, version) + ")");
+    if (!semaCtx.langOptions().OpenMPSimd)
+      TODO(loc, "Unhandled loop directive (" +
+                    llvm::omp::getOpenMPDirectiveName(dir, version) + ")");
+    break;
   }
   case llvm::omp::Directive::OMPD_unroll:
     genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
@@ -3484,35 +3489,40 @@ static void
 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
        const parser::OpenMPDeclarativeAllocate &declarativeAllocate) {
-  TODO(converter.getCurrentLocation(), "OpenMPDeclarativeAllocate");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPDeclarativeAllocate");
 }
 
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap 
&symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPDeclarativeAssumes &assumesConstruct) {
-  TODO(converter.getCurrentLocation(), "OpenMP ASSUMES declaration");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMP ASSUMES declaration");
 }
 
 static void
 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
        const parser::OmpDeclareVariantDirective &declareVariantDirective) {
-  TODO(converter.getCurrentLocation(), "OmpDeclareVariantDirective");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OmpDeclareVariantDirective");
 }
 
 static void genOMP(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
     const parser::OpenMPDeclareReductionConstruct &declareReductionConstruct) {
-  TODO(converter.getCurrentLocation(), "OpenMPDeclareReductionConstruct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPDeclareReductionConstruct");
 }
 
 static void
 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
        const parser::OpenMPDeclareSimdConstruct &declareSimdConstruct) {
-  TODO(converter.getCurrentLocation(), "OpenMPDeclareSimdConstruct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPDeclareSimdConstruct");
 }
 
 static void
@@ -3706,14 +3716,16 @@ static void genOMP(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   (void)objects;
   (void)clauses;
 
-  TODO(converter.getCurrentLocation(), "OpenMPDepobjConstruct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPDepobjConstruct");
 }
 
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap 
&symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPInteropConstruct &interopConstruct) {
-  TODO(converter.getCurrentLocation(), "OpenMPInteropConstruct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPInteropConstruct");
 }
 
 static void
@@ -3729,7 +3741,8 @@ static void genOMP(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPAllocatorsConstruct &allocsConstruct) {
-  TODO(converter.getCurrentLocation(), "OpenMPAllocatorsConstruct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPAllocatorsConstruct");
 }
 
 
//===----------------------------------------------------------------------===//
@@ -3795,7 +3808,8 @@ static void genOMP(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
         !std::holds_alternative<clause::Detach>(clause.u)) {
       std::string name =
           
parser::ToUpperCaseLetters(llvm::omp::getOpenMPClauseName(clause.id));
-      TODO(clauseLocation, name + " clause is not implemented yet");
+      if (!semaCtx.langOptions().OpenMPSimd)
+        TODO(clauseLocation, name + " clause is not implemented yet");
     }
   }
 
@@ -3811,7 +3825,8 @@ static void genOMP(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPAssumeConstruct &assumeConstruct) {
   mlir::Location clauseLocation = 
converter.genLocation(assumeConstruct.source);
-  TODO(clauseLocation, "OpenMP ASSUME construct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(clauseLocation, "OpenMP ASSUME construct");
 }
 
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap 
&symTable,
@@ -3847,21 +3862,24 @@ static void genOMP(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPUtilityConstruct &) {
-  TODO(converter.getCurrentLocation(), "OpenMPUtilityConstruct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPUtilityConstruct");
 }
 
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap 
&symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPDispatchConstruct &) {
-  TODO(converter.getCurrentLocation(), "OpenMPDispatchConstruct");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPDispatchConstruct");
 }
 
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap 
&symTable,
                    semantics::SemanticsContext &semaCtx,
                    lower::pft::Evaluation &eval,
                    const parser::OpenMPExecutableAllocate &execAllocConstruct) 
{
-  TODO(converter.getCurrentLocation(), "OpenMPExecutableAllocate");
+  if (!semaCtx.langOptions().OpenMPSimd)
+    TODO(converter.getCurrentLocation(), "OpenMPExecutableAllocate");
 }
 
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap 
&symTable,

diff  --git a/flang/lib/Optimizer/OpenMP/CMakeLists.txt 
b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
index e31543328a9f9..3fb0bac05ce0d 100644
--- a/flang/lib/Optimizer/OpenMP/CMakeLists.txt
+++ b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
@@ -9,6 +9,7 @@ add_flang_library(FlangOpenMPTransforms
   MarkDeclareTarget.cpp
   LowerWorkshare.cpp
   LowerNontemporal.cpp
+  SimdOnly.cpp
 
   DEPENDS
   FIRDialect

diff  --git a/flang/lib/Optimizer/OpenMP/SimdOnly.cpp 
b/flang/lib/Optimizer/OpenMP/SimdOnly.cpp
new file mode 100644
index 0000000000000..7d332faf9b299
--- /dev/null
+++ b/flang/lib/Optimizer/OpenMP/SimdOnly.cpp
@@ -0,0 +1,212 @@
+//===-- SimdOnly.cpp 
------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/Support/Debug.h"
+
+namespace flangomp {
+#define GEN_PASS_DEF_SIMDONLYPASS
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+namespace {
+
+#define DEBUG_TYPE "omp-simd-only-pass"
+
+/// Rewrite and remove OpenMP operations left after the parse tree rewriting 
for
+/// -fopenmp-simd is done. If possible, OpenMP constructs should be rewritten 
at
+/// the parse tree stage. This pass is supposed to only handle complexities
+/// around untangling composite simd constructs, and perform the necessary
+/// cleanup.
+class SimdOnlyConversionPattern : public mlir::RewritePattern {
+public:
+  SimdOnlyConversionPattern(mlir::MLIRContext *ctx)
+      : mlir::RewritePattern(MatchAnyOpTypeTag{}, 1, ctx) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::Operation *op,
+                  mlir::PatternRewriter &rewriter) const override {
+    if (op->getDialect()->getNamespace() !=
+        mlir::omp::OpenMPDialect::getDialectNamespace())
+      return rewriter.notifyMatchFailure(op, "Not an OpenMP op");
+
+    if (auto simdOp = mlir::dyn_cast<mlir::omp::SimdOp>(op)) {
+      // Remove the composite attr given that the op will no longer be 
composite
+      if (simdOp.isComposite()) {
+        simdOp.setComposite(false);
+        return mlir::success();
+      }
+
+      return rewriter.notifyMatchFailure(op, "Op is a plain SimdOp");
+    }
+
+    if (op->getParentOfType<mlir::omp::SimdOp>() &&
+        (mlir::isa<mlir::omp::YieldOp>(op) ||
+         mlir::isa<mlir::omp::ScanOp>(op) ||
+         mlir::isa<mlir::omp::LoopNestOp>(op) ||
+         mlir::isa<mlir::omp::TerminatorOp>(op)))
+      return rewriter.notifyMatchFailure(op, "Op is part of a simd construct");
+
+    if (!mlir::isa<mlir::func::FuncOp>(op->getParentOp()) &&
+        (mlir::isa<mlir::omp::TerminatorOp>(op) ||
+         mlir::isa<mlir::omp::YieldOp>(op)))
+      return rewriter.notifyMatchFailure(op,
+                                         "Non top-level yield or terminator");
+
+    LLVM_DEBUG(llvm::dbgs() << "SimdOnlyPass matched OpenMP op:\n");
+    LLVM_DEBUG(op->dump());
+
+    auto eraseUnlessUsedBySimd = [&](mlir::Operation *ompOp,
+                                     mlir::StringAttr name) {
+      if (auto uses =
+              mlir::SymbolTable::getSymbolUses(name, op->getParentOp())) {
+        for (auto &use : *uses)
+          if (mlir::isa<mlir::omp::SimdOp>(use.getUser()))
+            return rewriter.notifyMatchFailure(op,
+                                               "Op used by a simd construct");
+      }
+      rewriter.eraseOp(ompOp);
+      return mlir::success();
+    };
+
+    if (auto ompOp = mlir::dyn_cast<mlir::omp::PrivateClauseOp>(op))
+      return eraseUnlessUsedBySimd(ompOp, ompOp.getSymNameAttr());
+    if (auto ompOp = mlir::dyn_cast<mlir::omp::DeclareReductionOp>(op))
+      return eraseUnlessUsedBySimd(ompOp, ompOp.getSymNameAttr());
+
+    // Might be left over from rewriting composite simd with target map
+    if (mlir::isa<mlir::omp::MapBoundsOp>(op)) {
+      rewriter.eraseOp(op);
+      return mlir::success();
+    }
+    if (auto mapInfoOp = mlir::dyn_cast<mlir::omp::MapInfoOp>(op)) {
+      mapInfoOp.getResult().replaceAllUsesWith(mapInfoOp.getVarPtr());
+      rewriter.eraseOp(mapInfoOp);
+      return mlir::success();
+    }
+
+    // Might be leftover after parse tree rewriting
+    if (auto threadPrivateOp = mlir::dyn_cast<mlir::omp::ThreadprivateOp>(op)) 
{
+      threadPrivateOp.getTlsAddr().replaceAllUsesWith(
+          threadPrivateOp.getSymAddr());
+      rewriter.eraseOp(threadPrivateOp);
+      return mlir::success();
+    }
+
+    fir::FirOpBuilder builder(rewriter, op);
+    mlir::Location loc = op->getLoc();
+
+    auto inlineSimpleOp = [&](mlir::Operation *ompOp) -> bool {
+      if (!ompOp)
+        return false;
+
+      assert("OpenMP operation has one region" && ompOp->getNumRegions() == 1);
+
+      llvm::SmallVector<std::pair<mlir::Value, mlir::BlockArgument>>
+          blockArgsPairs;
+      if (auto iface =
+              mlir::dyn_cast<mlir::omp::BlockArgOpenMPOpInterface>(op)) {
+        iface.getBlockArgsPairs(blockArgsPairs);
+        for (auto [value, argument] : blockArgsPairs)
+          rewriter.replaceAllUsesWith(argument, value);
+      }
+
+      if (ompOp->getRegion(0).getBlocks().size() == 1) {
+        auto &block = *ompOp->getRegion(0).getBlocks().begin();
+        // This block is about to be removed so any arguments should have been
+        // replaced by now.
+        block.eraseArguments(0, block.getNumArguments());
+        if (auto terminatorOp =
+                mlir::dyn_cast<mlir::omp::TerminatorOp>(block.back())) {
+          rewriter.eraseOp(terminatorOp);
+        }
+        rewriter.inlineBlockBefore(&block, ompOp, {});
+      } else {
+        // When dealing with multi-block regions we need to fix up the control
+        // flow
+        auto *origBlock = ompOp->getBlock();
+        auto *newBlock = rewriter.splitBlock(origBlock, ompOp->getIterator());
+        auto *innerFrontBlock = &ompOp->getRegion(0).getBlocks().front();
+        builder.setInsertionPointToEnd(origBlock);
+        mlir::cf::BranchOp::create(builder, loc, innerFrontBlock);
+        // We are no longer passing any arguments to the first block in the
+        // region, so this should be safe to erase.
+        innerFrontBlock->eraseArguments(0, innerFrontBlock->getNumArguments());
+
+        for (auto &innerBlock : ompOp->getRegion(0).getBlocks()) {
+          // Remove now-unused block arguments
+          for (auto arg : innerBlock.getArguments()) {
+            if (arg.getUses().empty())
+              innerBlock.eraseArgument(arg.getArgNumber());
+          }
+          if (auto terminatorOp =
+                  mlir::dyn_cast<mlir::omp::TerminatorOp>(innerBlock.back())) {
+            builder.setInsertionPointToEnd(&innerBlock);
+            mlir::cf::BranchOp::create(builder, loc, newBlock);
+            rewriter.eraseOp(terminatorOp);
+          }
+        }
+
+        rewriter.inlineRegionBefore(ompOp->getRegion(0), newBlock);
+      }
+
+      rewriter.eraseOp(op);
+      return true;
+    };
+
+    // Remove ops that will be surrounding simd once a composite simd construct
+    // goes through the codegen stage. All of the other ones should have alredy
+    // been removed in the parse tree rewriting stage.
+    if (inlineSimpleOp(mlir::dyn_cast<mlir::omp::TeamsOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::ParallelOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::TargetOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::WsloopOp>(op)) ||
+        inlineSimpleOp(mlir::dyn_cast<mlir::omp::DistributeOp>(op)))
+      return mlir::success();
+
+    op->emitOpError("left unhandled after SimdOnly pass.");
+    return mlir::failure();
+  }
+};
+
+class SimdOnlyPass : public flangomp::impl::SimdOnlyPassBase<SimdOnlyPass> {
+
+public:
+  SimdOnlyPass() = default;
+
+  void runOnOperation() override {
+    mlir::ModuleOp module = getOperation();
+
+    mlir::MLIRContext *context = &getContext();
+    mlir::RewritePatternSet patterns(context);
+    patterns.insert<SimdOnlyConversionPattern>(context);
+
+    mlir::GreedyRewriteConfig config;
+    // Prevent the pattern driver from merging blocks.
+    config.setRegionSimplificationLevel(
+        mlir::GreedySimplifyRegionLevel::Disabled);
+
+    if (mlir::failed(
+            mlir::applyPatternsGreedily(module, std::move(patterns), config))) 
{
+      mlir::emitError(module.getLoc(), "Error in SimdOnly conversion pass");
+      signalPassFailure();
+    }
+  }
+};
+
+} // namespace

diff  --git a/flang/lib/Optimizer/Passes/Pipelines.cpp 
b/flang/lib/Optimizer/Passes/Pipelines.cpp
index ca8e820608688..5a870928f8413 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -242,7 +242,8 @@ void 
createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
 /// \param pm - MLIR pass manager that will hold the pipeline definition
 /// \param optLevel - optimization level used for creating FIR optimization
 ///   passes pipeline
-void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, bool enableOpenMP,
+void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
+                                  EnableOpenMP enableOpenMP,
                                   llvm::OptimizationLevel optLevel) {
   if (optLevel.isOptimizingForSpeed()) {
     addCanonicalizerPassWithoutRegionSimplification(pm);
@@ -294,8 +295,10 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, 
bool enableOpenMP,
     addNestedPassToAllTopLevelOperations<PassConstructor>(
         pm, hlfir::createInlineHLFIRAssign);
   pm.addPass(hlfir::createConvertHLFIRtoFIR());
-  if (enableOpenMP)
+  if (enableOpenMP != EnableOpenMP::None)
     pm.addPass(flangomp::createLowerWorkshare());
+  if (enableOpenMP == EnableOpenMP::Simd)
+    pm.addPass(flangomp::createSimdOnlyPass());
 }
 
 /// Create a pass pipeline for handling certain OpenMP transformations needed
@@ -396,7 +399,12 @@ void createDefaultFIRCodeGenPassPipeline(mlir::PassManager 
&pm,
 void createMLIRToLLVMPassPipeline(mlir::PassManager &pm,
                                   MLIRToLLVMPassPipelineConfig &config,
                                   llvm::StringRef inputFilename) {
-  fir::createHLFIRToFIRPassPipeline(pm, config.EnableOpenMP, config.OptLevel);
+  fir::EnableOpenMP enableOpenMP = fir::EnableOpenMP::None;
+  if (config.EnableOpenMP)
+    enableOpenMP = fir::EnableOpenMP::Full;
+  if (config.EnableOpenMPSimd)
+    enableOpenMP = fir::EnableOpenMP::Simd;
+  fir::createHLFIRToFIRPassPipeline(pm, enableOpenMP, config.OptLevel);
 
   // Add default optimizer pass pipeline.
   fir::createDefaultFIROptimizerPassPipeline(pm, config);

diff  --git a/flang/lib/Semantics/rewrite-parse-tree.cpp 
b/flang/lib/Semantics/rewrite-parse-tree.cpp
index 4eeb1b9ed3c1e..b3019762ead1f 100644
--- a/flang/lib/Semantics/rewrite-parse-tree.cpp
+++ b/flang/lib/Semantics/rewrite-parse-tree.cpp
@@ -12,6 +12,7 @@
 #include "flang/Parser/parse-tree-visitor.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Parser/tools.h"
+#include "flang/Semantics/openmp-directive-sets.h"
 #include "flang/Semantics/scope.h"
 #include "flang/Semantics/semantics.h"
 #include "flang/Semantics/symbol.h"
@@ -41,11 +42,23 @@ class RewriteMutator {
 
   void Post(parser::Name &);
   bool Pre(parser::MainProgram &);
+  bool Pre(parser::Module &);
   bool Pre(parser::FunctionSubprogram &);
   bool Pre(parser::SubroutineSubprogram &);
   bool Pre(parser::SeparateModuleSubprogram &);
   bool Pre(parser::BlockConstruct &);
+  bool Pre(parser::Block &);
+  bool Pre(parser::DoConstruct &);
+  bool Pre(parser::IfConstruct &);
   bool Pre(parser::ActionStmt &);
+  void Post(parser::MainProgram &);
+  void Post(parser::FunctionSubprogram &);
+  void Post(parser::SubroutineSubprogram &);
+  void Post(parser::SeparateModuleSubprogram &);
+  void Post(parser::BlockConstruct &);
+  void Post(parser::Block &);
+  void Post(parser::DoConstruct &);
+  void Post(parser::IfConstruct &);
   void Post(parser::ReadStmt &);
   void Post(parser::WriteStmt &);
 
@@ -67,8 +80,15 @@ class RewriteMutator {
   bool Pre(parser::EndSubroutineStmt &) { return false; }
   bool Pre(parser::EndTypeStmt &) { return false; }
 
+  bool Pre(parser::OpenMPBlockConstruct &);
+  bool Pre(parser::OpenMPLoopConstruct &);
+  void Post(parser::OpenMPBlockConstruct &);
+  void Post(parser::OpenMPLoopConstruct &);
+
 private:
   void FixMisparsedStmtFuncs(parser::SpecificationPart &, parser::Block &);
+  void OpenMPSimdOnly(parser::Block &, bool);
+  void OpenMPSimdOnly(parser::SpecificationPart &);
 
   SemanticsContext &context_;
   bool errorOnUnresolvedName_{true};
@@ -96,6 +116,132 @@ static bool ReturnsDataPointer(const Symbol &symbol) {
   return false;
 }
 
+static bool LoopConstructIsSIMD(parser::OpenMPLoopConstruct *ompLoop) {
+  auto &begin = std::get<parser::OmpBeginLoopDirective>(ompLoop->t);
+  auto directive = std::get<parser::OmpLoopDirective>(begin.t).v;
+  return llvm::omp::allSimdSet.test(directive);
+}
+
+// Remove non-SIMD OpenMPConstructs once they are parsed.
+// This massively simplifies the logic inside the SimdOnlyPass for
+// -fopenmp-simd.
+void RewriteMutator::OpenMPSimdOnly(parser::SpecificationPart &specPart) {
+  auto &list{std::get<std::list<parser::DeclarationConstruct>>(specPart.t)};
+  for (auto it{list.begin()}; it != list.end();) {
+    if (auto *specConstr{std::get_if<parser::SpecificationConstruct>(&it->u)}) 
{
+      if (auto *ompDecl{std::get_if<
+              common::Indirection<parser::OpenMPDeclarativeConstruct>>(
+              &specConstr->u)}) {
+        if (std::holds_alternative<parser::OpenMPThreadprivate>(
+                ompDecl->value().u) ||
+            std::holds_alternative<parser::OpenMPDeclareMapperConstruct>(
+                ompDecl->value().u)) {
+          it = list.erase(it);
+          continue;
+        }
+      }
+    }
+    ++it;
+  }
+}
+
+// Remove non-SIMD OpenMPConstructs once they are parsed.
+// This massively simplifies the logic inside the SimdOnlyPass for
+// -fopenmp-simd. `isNonSimdLoopBody` should be set to true if `block` is the
+// body of a non-simd OpenMP loop. This is to indicate that scan constructs
+// should be removed from the body, where they would be kept if it were a simd
+// loop.
+void RewriteMutator::OpenMPSimdOnly(
+    parser::Block &block, bool isNonSimdLoopBody = false) {
+  auto replaceInlineBlock =
+      [&](std::list<parser::ExecutionPartConstruct> &innerBlock,
+          auto it) -> auto {
+    auto insertPos = std::next(it);
+    block.splice(insertPos, innerBlock);
+    block.erase(it);
+    return insertPos;
+  };
+
+  for (auto it{block.begin()}; it != block.end();) {
+    if (auto *stmt{std::get_if<parser::ExecutableConstruct>(&it->u)}) {
+      if (auto *omp{std::get_if<common::Indirection<parser::OpenMPConstruct>>(
+              &stmt->u)}) {
+        if (auto *ompStandalone{std::get_if<parser::OpenMPStandaloneConstruct>(
+                &omp->value().u)}) {
+          if (std::holds_alternative<parser::OpenMPCancelConstruct>(
+                  ompStandalone->u) ||
+              std::holds_alternative<parser::OpenMPFlushConstruct>(
+                  ompStandalone->u) ||
+              std::holds_alternative<parser::OpenMPCancellationPointConstruct>(
+                  ompStandalone->u)) {
+            it = block.erase(it);
+            continue;
+          }
+          if (auto 
*constr{std::get_if<parser::OpenMPSimpleStandaloneConstruct>(
+                  &ompStandalone->u)}) {
+            auto directive = constr->v.DirId();
+            // Scan should only be removed from non-simd loops
+            if (llvm::omp::simpleStandaloneNonSimdOnlySet.test(directive) ||
+                (isNonSimdLoopBody && directive == llvm::omp::OMPD_scan)) {
+              it = block.erase(it);
+              continue;
+            }
+          }
+        } else if (auto *ompBlock{std::get_if<parser::OpenMPBlockConstruct>(
+                       &omp->value().u)}) {
+          it = replaceInlineBlock(std::get<parser::Block>(ompBlock->t), it);
+          continue;
+        } else if (auto *ompLoop{std::get_if<parser::OpenMPLoopConstruct>(
+                       &omp->value().u)}) {
+          if (LoopConstructIsSIMD(ompLoop)) {
+            ++it;
+            continue;
+          }
+          auto &nest =
+              std::get<std::optional<parser::NestedConstruct>>(ompLoop->t);
+
+          if (auto *doConstruct =
+                  std::get_if<parser::DoConstruct>(&nest.value())) {
+            auto &loopBody = std::get<parser::Block>(doConstruct->t);
+            // We can only remove some constructs from a loop when it's _not_ a
+            // OpenMP simd loop
+            OpenMPSimdOnly(loopBody, /*isNonSimdLoopBody=*/true);
+            auto newDoConstruct = std::move(*doConstruct);
+            auto newLoop = parser::ExecutionPartConstruct{
+                parser::ExecutableConstruct{std::move(newDoConstruct)}};
+            it = block.erase(it);
+            block.insert(it, std::move(newLoop));
+            continue;
+          }
+        } else if (auto *ompCon{std::get_if<parser::OpenMPSectionsConstruct>(
+                       &omp->value().u)}) {
+          auto &sections =
+              std::get<std::list<parser::OpenMPConstruct>>(ompCon->t);
+          auto insertPos = std::next(it);
+          for (auto &sectionCon : sections) {
+            auto &section =
+                std::get<parser::OpenMPSectionConstruct>(sectionCon.u);
+            auto &innerBlock = std::get<parser::Block>(section.t);
+            block.splice(insertPos, innerBlock);
+          }
+          block.erase(it);
+          it = insertPos;
+          continue;
+        } else if (auto *atomic{std::get_if<parser::OpenMPAtomicConstruct>(
+                       &omp->value().u)}) {
+          it = replaceInlineBlock(std::get<parser::Block>(atomic->t), it);
+          continue;
+        } else if (auto *critical{std::get_if<parser::OpenMPCriticalConstruct>(
+                       &omp->value().u)}) {
+          it = replaceInlineBlock(std::get<parser::Block>(critical->t), it);
+          continue;
+        }
+      }
+    }
+    ++it;
+  }
+}
+
 // Finds misparsed statement functions in a specification part, rewrites
 // them into array element assignment statements, and moves them into the
 // beginning of the corresponding (execution part's) block.
@@ -133,33 +279,155 @@ void RewriteMutator::FixMisparsedStmtFuncs(
 bool RewriteMutator::Pre(parser::MainProgram &program) {
   FixMisparsedStmtFuncs(std::get<parser::SpecificationPart>(program.t),
       std::get<parser::ExecutionPart>(program.t).v);
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::ExecutionPart>(program.t).v);
+    OpenMPSimdOnly(std::get<parser::SpecificationPart>(program.t));
+  }
+  return true;
+}
+
+void RewriteMutator::Post(parser::MainProgram &program) {
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::ExecutionPart>(program.t).v);
+  }
+}
+
+bool RewriteMutator::Pre(parser::Module &module) {
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::SpecificationPart>(module.t));
+  }
   return true;
 }
 
 bool RewriteMutator::Pre(parser::FunctionSubprogram &func) {
   FixMisparsedStmtFuncs(std::get<parser::SpecificationPart>(func.t),
       std::get<parser::ExecutionPart>(func.t).v);
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::ExecutionPart>(func.t).v);
+  }
   return true;
 }
 
+void RewriteMutator::Post(parser::FunctionSubprogram &func) {
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::ExecutionPart>(func.t).v);
+  }
+}
+
 bool RewriteMutator::Pre(parser::SubroutineSubprogram &subr) {
   FixMisparsedStmtFuncs(std::get<parser::SpecificationPart>(subr.t),
       std::get<parser::ExecutionPart>(subr.t).v);
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::ExecutionPart>(subr.t).v);
+  }
   return true;
 }
 
+void RewriteMutator::Post(parser::SubroutineSubprogram &subr) {
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::ExecutionPart>(subr.t).v);
+  }
+}
+
 bool RewriteMutator::Pre(parser::SeparateModuleSubprogram &subp) {
   FixMisparsedStmtFuncs(std::get<parser::SpecificationPart>(subp.t),
       std::get<parser::ExecutionPart>(subp.t).v);
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::ExecutionPart>(subp.t).v);
+  }
   return true;
 }
 
+void RewriteMutator::Post(parser::SeparateModuleSubprogram &subp) {
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::ExecutionPart>(subp.t).v);
+  }
+}
+
 bool RewriteMutator::Pre(parser::BlockConstruct &block) {
   FixMisparsedStmtFuncs(std::get<parser::BlockSpecificationPart>(block.t).v,
       std::get<parser::Block>(block.t));
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::Block>(block.t));
+  }
+  return true;
+}
+
+void RewriteMutator::Post(parser::BlockConstruct &block) {
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(std::get<parser::Block>(block.t));
+  }
+}
+
+bool RewriteMutator::Pre(parser::Block &block) {
+  if (context_.langOptions().OpenMPSimd) {
+    OpenMPSimdOnly(block);
+  }
   return true;
 }
 
+void RewriteMutator::Post(parser::Block &block) { this->Pre(block); }
+
+bool RewriteMutator::Pre(parser::OpenMPBlockConstruct &block) {
+  if (context_.langOptions().OpenMPSimd) {
+    auto &innerBlock = std::get<parser::Block>(block.t);
+    OpenMPSimdOnly(innerBlock);
+  }
+  return true;
+}
+
+void RewriteMutator::Post(parser::OpenMPBlockConstruct &block) {
+  this->Pre(block);
+}
+
+bool RewriteMutator::Pre(parser::OpenMPLoopConstruct &ompLoop) {
+  if (context_.langOptions().OpenMPSimd) {
+    if (LoopConstructIsSIMD(&ompLoop)) {
+      return true;
+    }
+    // If we're looking at a non-simd OpenMP loop, we need to explicitly
+    // call OpenMPSimdOnly on the nested loop block while indicating where
+    // the block comes from.
+    auto &nest = std::get<std::optional<parser::NestedConstruct>>(ompLoop.t);
+    if (!nest.has_value()) {
+      return true;
+    }
+    if (auto *doConstruct = std::get_if<parser::DoConstruct>(&*nest)) {
+      auto &innerBlock = std::get<parser::Block>(doConstruct->t);
+      OpenMPSimdOnly(innerBlock, /*isNonSimdLoopBody=*/true);
+    }
+  }
+  return true;
+}
+
+void RewriteMutator::Post(parser::OpenMPLoopConstruct &ompLoop) {
+  this->Pre(ompLoop);
+}
+
+bool RewriteMutator::Pre(parser::DoConstruct &doConstruct) {
+  if (context_.langOptions().OpenMPSimd) {
+    auto &innerBlock = std::get<parser::Block>(doConstruct.t);
+    OpenMPSimdOnly(innerBlock);
+  }
+  return true;
+}
+
+void RewriteMutator::Post(parser::DoConstruct &doConstruct) {
+  this->Pre(doConstruct);
+}
+
+bool RewriteMutator::Pre(parser::IfConstruct &ifConstruct) {
+  if (context_.langOptions().OpenMPSimd) {
+    auto &innerBlock = std::get<parser::Block>(ifConstruct.t);
+    OpenMPSimdOnly(innerBlock);
+  }
+  return true;
+}
+
+void RewriteMutator::Post(parser::IfConstruct &ifConstruct) {
+  this->Pre(ifConstruct);
+}
+
 // Rewrite PRINT NML -> WRITE(*,NML=NML)
 bool RewriteMutator::Pre(parser::ActionStmt &x) {
   if (auto *print{std::get_if<common::Indirection<parser::PrintStmt>>(&x.u)};

diff  --git a/flang/test/Driver/fopenmp-simd.f90 
b/flang/test/Driver/fopenmp-simd.f90
new file mode 100644
index 0000000000000..b25adee2779ee
--- /dev/null
+++ b/flang/test/Driver/fopenmp-simd.f90
@@ -0,0 +1,59 @@
+! RUN: %flang -target x86_64-linux-gnu -fopenmp-simd %s -o %t -### 2>&1 | 
FileCheck %s --check-prefix=CHECK-OPENMP-SIMD-FLAG 
--check-prefix=CHECK-NO-LD-ANY
+! RUN: %flang -target x86_64-darwin -fopenmp-simd %s -o %t -### 2>&1 | 
FileCheck %s --check-prefix=CHECK-OPENMP-SIMD-FLAG 
--check-prefix=CHECK-NO-LD-ANY
+! RUN: %flang -target x86_64-freebsd -fopenmp-simd %s -o %t -### 2>&1 | 
FileCheck %s --check-prefix=CHECK-OPENMP-SIMD-FLAG 
--check-prefix=CHECK-NO-LD-ANY
+! RUN: %flang -target x86_64-windows-gnu -fopenmp-simd %s -o %t -### 2>&1 | 
FileCheck %s --check-prefix=CHECK-OPENMP-SIMD-FLAG 
--check-prefix=CHECK-NO-LD-ANY
+
+! CHECK-OPENMP-SIMD-FLAG: "-fopenmp-simd"
+! CHECK-NO-LD-ANY-NOT: "-l{{(omp|gomp|iomp5)}}"
+
+! -fopenmp-simd enables openmp support only for simd constructs
+! RUN: %flang_fc1 -fopenmp-simd %s -emit-fir -o - | FileCheck 
--check-prefix=CHECK-OMP-SIMD %s
+! RUN: %flang_fc1 -fno-openmp-simd %s -emit-fir -o - | FileCheck 
--check-prefix=CHECK-NO-OMP-SIMD %s
+! RUN: %flang_fc1 -fopenmp-simd -fno-openmp-simd %s -emit-fir -o - | FileCheck 
--check-prefix=CHECK-NO-OMP-SIMD %s
+! RUN: %flang_fc1 -fno-openmp-simd -fopenmp-simd %s -emit-fir -o - | FileCheck 
--check-prefix=CHECK-OMP-SIMD %s
+! -fopenmp-simd should have no effect if -fopenmp is already set
+! RUN: %flang_fc1 -fopenmp %s -emit-fir -o - | FileCheck 
--check-prefix=CHECK-OMP %s
+! RUN: %flang_fc1 -fopenmp -fopenmp-simd %s -emit-fir -o - | FileCheck 
--check-prefix=CHECK-OMP %s
+! RUN: %flang_fc1 -fopenmp -fno-openmp-simd %s -emit-fir -o - | FileCheck 
--check-prefix=CHECK-OMP %s
+
+subroutine main
+  ! CHECK-OMP-SIMD-NOT: omp.parallel
+  ! CHECK-OMP-SIMD-NOT: omp.wsloop
+  ! CHECK-OMP-SIMD-NOT: omp.loop_nest
+  ! CHECK-OMP-SIMD: fir.do_loop
+  ! CHECK-NO-OMP-SIMD-NOT: omp.parallel
+  ! CHECK-NO-OMP-SIMD-NOT: omp.wsloop
+  ! CHECK-NO-OMP-SIMD-NOT: omp.loop_nest
+  ! CHECK-NO-OMP-SIMD: fir.do_loop
+  ! CHECK-OMP: omp.parallel
+  ! CHECK-OMP: omp.wsloop
+  ! CHECK-OMP: omp.loop_nest
+  ! CHECK-OMP-NOT: fir.do_loop
+  !$omp parallel do
+  do i = 1, 10
+    print *, "test"
+  end do
+  ! CHECK-NO-OMP-SIMD-NOT: omp.yield
+  ! CHECK-NO-OMP-SIMD-NOT: omp.terminator
+  ! CHECK-OMP-SIMD-NOT: omp.yield
+  ! CHECK-OMP-SIMD-NOT: omp.terminator
+  ! CHECK-OMP: omp.yield
+  ! CHECK-OMP: omp.terminator
+  !$omp end parallel do
+
+  ! CHECK-OMP-SIMD: omp.simd
+  ! CHECK-NO-OMP-SIMD-NOT: omp.simd
+  ! CHECK-OMP: omp.simd
+  !$omp simd
+  ! CHECK-OMP-SIMD: omp.loop_nest
+  ! CHECK-NO-OMP-SIMD-NOT: omp.loop_nest
+  ! CHECK-NO-OMP-SIMD: fir.do_loop
+  ! CHECK-OMP: omp.loop_nest
+  ! CHECK-OMP-NOT: fir.do_loop
+  do i = 1, 10
+    print *, "test"
+  ! CHECK-OMP-SIMD: omp.yield
+  ! CHECK-NO-OMP-SIMD-NOT: omp.yield
+  ! CHECK-OMP: omp.yield
+  end do
+end subroutine

diff  --git a/flang/test/Semantics/OpenMP/simd-only.f90 
b/flang/test/Semantics/OpenMP/simd-only.f90
new file mode 100644
index 0000000000000..da42b10d73bed
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/simd-only.f90
@@ -0,0 +1,416 @@
+! RUN: %flang_fc1 -fopenmp-simd -fdebug-dump-parse-tree %s 2>&1 | FileCheck %s
+
+! Test that non-SIMD OpenMPConstructs are removed on the parse tree level
+! when -fopenmp-simd is specified.
+! Tests the logic in lib/Semantics/rewrite-parse-tree.cpp
+
+! CHECK-LABEL: Name = 'test_simd'
+subroutine test_simd()
+  integer :: i
+
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> 
OpenMPLoopConstruct
+  ! CHECK: OmpLoopDirective -> llvm::omp::Directive = simd
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp simd
+  do i = 1, 100
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_do_simd'
+subroutine test_do_simd()
+  integer :: i
+
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> 
OpenMPLoopConstruct
+  ! CHECK: OmpLoopDirective -> llvm::omp::Directive = do simd
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp do simd
+  do i = 1, 100
+  end do
+end subroutine
+
+
+! CHECK-LABEL: Name = 'test_parallel_do_simd'
+subroutine test_parallel_do_simd()
+  integer :: i
+
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> 
OpenMPLoopConstruct
+  ! CHECK: OmpLoopDirective -> llvm::omp::Directive = parallel do simd
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp parallel do simd
+  do i = 1, 100
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_simd_scan'
+subroutine test_simd_scan()
+  integer :: i
+  real :: sum
+
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> 
OpenMPLoopConstruct
+  ! CHECK: OmpLoopDirective -> llvm::omp::Directive = simd
+  !$omp simd reduction(inscan,+:sum)
+  do i = 1, N
+    sum = sum + a(i)
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct 
-> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> 
OmpDirectiveSpecification
+    ! CHECK: OmpDirectiveName -> llvm::omp::Directive = scan
+    !$omp scan inclusive(sum)
+    sum       = sum + a(i)
+  end do
+
+end subroutine
+
+! CHECK-LABEL: Name = 'test_simd_atomic'
+subroutine test_simd_atomic()
+  integer :: i, x
+
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> 
OpenMPLoopConstruct
+  ! CHECK: OmpLoopDirective -> llvm::omp::Directive = simd
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp simd
+  do i = 1, 100
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPAtomicConstruct
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> 
AssignmentStmt = 'x=i'
+  !$omp atomic write
+  x = i
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_do'
+subroutine test_do()
+  integer :: i
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = do
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp parallel do
+  do i = 1, 100
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_do_nested'
+subroutine test_do_nested()
+  integer :: i
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = parallel do
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp parallel do
+  do i = 1, 100
+    do j = 1, 100
+    end do
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_target'
+subroutine test_target()
+  integer :: i
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPBlockct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = target
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp target
+  do i = 1, 100
+  end do
+  !$omp end target
+end subroutine
+
+! CHECK-LABEL: Name = 'test_target_teams_distribute'
+subroutine test_target_teams_distribute()
+  integer :: i
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = target teams 
distribute
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp target teams distribute
+  do i = 1, 100
+  end do
+  !$omp end target teams distribute
+end subroutine
+
+
+! CHECK-LABEL: Name = 'test_target_data'
+subroutine test_target_data()
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPBlockConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = target data
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp target data map(to: A) map(tofrom: B)
+  do i = 1, 100
+  end do
+  !$omp end target data
+end subroutine
+
+! CHECK-LABEL: Name = 'test_loop'
+subroutine test_loop()
+  integer :: i
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = loop
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp loop bind(thread)
+  do i = 1, 100
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_unroll'
+subroutine test_unroll()
+  integer :: i
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = unroll
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp unroll
+  do i = 1, 100
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_do_ordered'
+subroutine test_do_ordered()
+  integer :: i, x
+  x = 0
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = do
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp do ordered
+  do i = 1, 100
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPBlockConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = ordered
+  !$omp ordered
+  x = x + 1
+  !$omp end ordered
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_cancel'
+subroutine test_cancel()
+  integer :: i, x
+  x = 0
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = parallel do
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp parallel do
+  do i = 1, 100
+  if (i == 10) then
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPCancelConstruct -> 
OmpDirectiveSpecification
+    ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = cancel
+    !$omp cancel do
+  end if
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPStandaloneConstruct -> 
OpenMPCancellationPointConstruct -> OmpDirectiveSpecification
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = cancellation point
+  !$omp cancellation point do
+  end do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_scan'
+subroutine test_scan()
+  integer :: i, sum
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPLoopConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = parallel do
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  !$omp parallel do reduction(inscan, +: sum)
+  do i = 1, n
+    sum = sum + i
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct 
-> OmpDirectiveSpecification
+    ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = scan
+    !$omp scan inclusive(sum)
+  end do
+  !$omp end parallel do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_target_map'
+subroutine test_target_map()
+  integer :: array(10)
+
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPBlockConstruct
+  ! CHECK-NOT: OmpLoopDirective -> llvm::omp::Directive = target
+  !$omp target map(tofrom: array(2:10))
+    array(2) = array(2) * 2
+  !$omp end target
+end subroutine
+
+! CHECK-LABEL: Name = 'test_sections'
+subroutine test_sections()
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPSectionsConstruct
+  !$omp sections
+  ! CHECK-NOT: OpenMPConstruct -> OpenMPSectionConstruct
+  !$omp section
+  ! CHECK-NOT: OpenMPConstruct -> OpenMPSectionConstruct
+  !$omp section
+  !$omp end sections
+end subroutine
+
+! CHECK-LABEL: Name = 'test_threadprivate_mod'
+module test_threadprivate_mod
+  implicit none
+  ! CHECK: DeclarationConstruct -> SpecificationConstruct -> 
TypeDeclarationStmt
+  ! CHECK: Name = 'x'
+  ! CHECK: Name = 'y'
+  integer :: x, y
+  ! CHECK: DeclarationConstruct -> SpecificationConstruct -> 
OtherSpecificationStmt -> CommonStmt
+  ! CHECK: Name = 'x'
+  ! CHECK: Name = 'y'
+  common /vars/ x, y
+  ! CHECK-NOT: DeclarationConstruct -> SpecificationConstruct -> 
OpenMPDeclarativeConstruct -> OpenMPThreadprivate
+  !$omp threadprivate(/vars/)
+end module
+
+! CHECK-LABEL: Name = 'test_atomic'
+subroutine test_atomic()
+  real :: z, x, y
+  !$omp parallel private(tid, z)
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPAtomicConstruct
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> 
AssignmentStmt = 'x=y'
+    !$omp atomic write
+      x = y
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPAtomicConstruct
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> 
AssignmentStmt = 'z=x'
+    !$omp atomic read
+      z = x
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPAtomicConstruct
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> 
AssignmentStmt = 'x=x+1._4'
+    !$omp atomic update
+      x = x + 1
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPAtomicConstruct
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> 
AssignmentStmt = 'z=x'
+    !$omp atomic read
+      z = x
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPAtomicConstruct
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> 
AssignmentStmt = 'x=x+y'
+    !$omp atomic capture
+      x   = x + y
+    !$omp end atomic
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: Name = 'test_task_single_taskwait'
+subroutine test_task_single_taskwait()
+  integer :: x
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPBlockConstruct
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = parallel
+  !$omp parallel
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPBlockConstruct
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = single
+  !$omp single
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+    do i = 1, 5
+      ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPBlockConstruct
+      ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = task
+      ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> 
AssignmentStmt = 'x=i'
+      !$omp task
+      x = i
+      !$omp end task
+    end do
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct 
-> OmpDirectiveSpecification
+    ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = taskwait
+    !$omp taskwait
+  !$omp end single
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: Name = 'test_task_taskyield_flush_barrier'
+subroutine test_task_taskyield_flush_barrier()
+  integer :: x, i
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPBlockConstruct
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = parallel
+  !$omp parallel
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct 
-> OmpDirectiveSpecification
+    ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = barrier
+    !$omp barrier
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPBlockConstruct
+    ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = single
+    !$omp single
+      ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPBlockConstruct
+      ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = task
+      !$omp task
+        ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct 
-> OmpDirectiveSpecification
+        ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = taskyield
+        !$omp taskyield
+        ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt 
-> AssignmentStmt = 'x=i'
+        x = i
+        ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPFlushConstruct -> 
OmpDirectiveSpecification
+        !$omp flush
+      !$omp end task
+      ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPBlockConstruct
+      ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = task
+      !$omp task
+        ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPFlushConstruct -> 
OmpDirectiveSpecification
+        !$omp flush
+      !$omp end task
+      ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct 
-> OmpDirectiveSpecification
+      ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = taskwait
+      !$omp taskwait
+    !$omp end single
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: Name = 'test_master_masked'
+subroutine test_master_masked()
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPBlockConstruct
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = parallel
+  !$omp parallel private(tid)
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPBlockConstruct
+    ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = masked
+    !$omp masked
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> 
AssignmentStmt = 'x=y'
+    x = y
+    !$omp end masked
+    ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPBlockConstruct
+    ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = master
+    !$omp master
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> 
AssignmentStmt = 'y=x'
+    y = x
+    !$omp end master
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: Name = 'test_critical'
+subroutine test_critical()
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPBlockConstruct
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = parallel
+  !$omp parallel do private(i)
+  do i = 1, 4
+    !$omp critical(mylock)
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> 
AssignmentStmt = 'x=y'
+    x = y
+    !$omp end critical(mylock)
+  end do
+  !$omp end parallel do
+end subroutine
+
+! CHECK-LABEL: Name = 'test_target_enter_exit_update_data'
+subroutine test_target_enter_exit_update_data()
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct 
-> OmpDirectiveSpecification
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = target enter data
+  !$omp target enter data map(to: A)
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPBlockConstruct
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = target teams 
distribute parallel do
+  !$omp target teams distribute parallel do
+  ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> DoConstruct
+  do i = 1, n
+    ! CHECK: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> 
AssignmentStmt = 'x=y'
+    x = y
+  end do
+  !$omp end target teams distribute parallel do
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct 
-> OmpDirectiveSpecification
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = target update
+  !$omp target update from(A)
+  ! CHECK-NOT: ExecutionPartConstruct -> ExecutableConstruct -> 
OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct 
-> OmpDirectiveSpecification
+  ! CHECK-NOT: OmpDirectiveName -> llvm::omp::Directive = target exit data
+  !$omp target exit data map(from: A)
+end subroutine
+
+! CHECK-LABEL: Name = 'test_declare_mapper'
+module test_declare_mapper
+  implicit none
+
+  type :: myvec_t
+    integer               :: len
+    real, allocatable     :: data(:)
+  end type myvec_t
+
+  ! CHECK-NOT: DeclarationConstruct -> SpecificationConstruct -> 
OpenMPDeclarativeConstruct -> OpenMPDeclareMapperConstruct
+  !$omp declare mapper(myvec_t :: v) map(v, v%data(1:v%len))
+end module

diff  --git a/flang/test/Transforms/OpenMP/simd-only.mlir 
b/flang/test/Transforms/OpenMP/simd-only.mlir
new file mode 100644
index 0000000000000..0025d10fbd21a
--- /dev/null
+++ b/flang/test/Transforms/OpenMP/simd-only.mlir
@@ -0,0 +1,196 @@
+// RUN: fir-opt --split-input-file --verify-diagnostics --omp-simd-only %s | 
FileCheck %s
+
+// Check that simd operations are not removed and rewritten, but all the other 
OpenMP ops are.
+// Tests the logic in flang/lib/Optimizer/OpenMP/SimdOnly.cpp
+
+// CHECK: omp.private
+// CHECK-LABEL: func.func @simd
+omp.private {type = private} @_QFEi_private_i32 : i32
+func.func @simd(%arg0: i32, %arg1: !fir.ref<i32>, %arg2: !fir.ref<i32>) {
+  %c1_i32 = arith.constant 1 : i32
+  %c100000_i32 = arith.constant 100000 : i32
+  // CHECK: omp.simd private
+  omp.simd private(@_QFEi_private_i32 %arg2 -> %arg3 : !fir.ref<i32>) {
+    // CHECK: omp.loop_nest
+    omp.loop_nest (%arg4) : i32 = (%c1_i32) to (%c100000_i32) inclusive step 
(%c1_i32) {
+      // CHECK: fir.store
+      fir.store %arg0 to %arg1 : !fir.ref<i32>
+      // CHECK: omp.yield
+      omp.yield
+    }
+  }
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func.func @simd_composite
+func.func @simd_composite(%arg0: i32, %arg1: !fir.ref<i32>) {
+  %c1_i32 = arith.constant 1 : i32
+  %c100000_i32 = arith.constant 100000 : i32
+  // CHECK-NOT: omp.parallel
+  omp.parallel {
+    // CHECK-NOT: omp.wsloop
+    omp.wsloop {
+      // CHECK: omp.simd
+      omp.simd {
+        // CHECK: omp.loop_nest
+        omp.loop_nest (%arg3) : i32 = (%c1_i32) to (%c100000_i32) inclusive 
step (%c1_i32) {
+          // CHECK: fir.store
+          fir.store %arg0 to %arg1 : !fir.ref<i32>
+          // CHECK: omp.yield
+          omp.yield
+        }
+      // CHECK-NOT: {omp.composite}
+      } {omp.composite}
+    } {omp.composite}
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+// CHECK-NOT: omp.private
+// CHECK-LABEL: func.func @parallel
+omp.private {type = private} @_QFEi_private_i32 : i32
+func.func @parallel(%arg0: i32, %arg1: !fir.ref<i32>) {
+  %c1 = arith.constant 1 : index
+  %c1_i32 = arith.constant 1 : i32
+  %c100000_i32 = arith.constant 100000 : i32
+  // CHECK-NOT: omp.parallel
+  omp.parallel private(@_QFEi_private_i32 %arg1 -> %arg3 : !fir.ref<i32>) {
+    // CHECK: fir.convert
+    %15 = fir.convert %c1_i32 : (i32) -> index
+    // CHECK: fir.convert
+    %16 = fir.convert %c100000_i32 : (i32) -> index
+    // CHECK: fir.do_loop
+    %18:2 = fir.do_loop %arg4 = %15 to %16 step %c1 iter_args(%arg2 = %arg0) 
-> (index, i32) {
+      // CHECK: fir.store
+      fir.store %arg0 to %arg1 : !fir.ref<i32>
+      fir.result %arg4, %arg2 : index, i32
+    }
+    // CHECK-NOT: omp.terminator
+    omp.terminator
+    }
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func.func @target_map(
+// CHECK-SAME: %[[ARG_0:.*]]: i32, %[[ARG_1:.*]]: !fir.ref<i32>
+func.func @target_map(%arg5: i32, %arg6: !fir.ref<i32>) {
+  // CHECK-NOT: omp.map.info
+  %3 = omp.map.info var_ptr(%arg6 : !fir.ref<i32>, i32) map_clauses(implicit) 
capture(ByCopy) -> !fir.ref<i32>
+  // CHECK-NOT: omp.target
+  omp.target map_entries(%3 -> %arg0 : !fir.ref<i32>) {
+    // CHECK: arith.constant
+    %c1_i32 = arith.constant 1 : i32
+    // CHECK: fir.store %c1_i32 to %[[ARG_1]]
+    fir.store %c1_i32 to %arg0 : !fir.ref<i32>
+    // CHECK-NOT: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func.func @teams
+func.func @teams(%arg0: i32, %arg1: !fir.ref<i32>) {
+  // CHECK-NOT: omp.teams
+  omp.teams {
+    // CHECK: fir.store
+    fir.store %arg0 to %arg1 : !fir.ref<i32>
+    // CHECK-NOT: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func.func @distribute_simd
+func.func @distribute_simd(%arg0: i32, %arg1: !fir.ref<i32>) {
+  %c1_i32 = arith.constant 1 : i32
+  %c100000_i32 = arith.constant 100000 : i32
+  // CHECK-NOT: omp.distribute
+  omp.distribute {
+    // CHECK: omp.simd
+    omp.simd {
+      // CHECK: omp.loop_nest
+      omp.loop_nest (%arg3) : i32 = (%c1_i32) to (%c100000_i32) inclusive step 
(%c1_i32) {
+        // CHECK: fir.store
+        fir.store %arg0 to %arg1 : !fir.ref<i32>
+        // CHECK: omp.yield
+        omp.yield
+      }
+    // CHECK-NOT: {omp.composite}
+    } {omp.composite}
+  // CHECK-NOT: {omp.composite}
+  } {omp.composite}
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func.func @threadprivate(
+// CHECK-SAME: %[[ARG_0:.*]]: i32, %[[ARG_1:.*]]: !fir.ref<i32>
+func.func @threadprivate(%arg0: i32, %arg1: !fir.ref<i32>) {
+  // CHECK-NOT: omp.threadprivate
+  %1 = omp.threadprivate %arg1 : !fir.ref<i32> -> !fir.ref<i32>
+  // CHECK: fir.store %[[ARG_0]] to %[[ARG_1]]
+  fir.store %arg0 to %1 : !fir.ref<i32>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func.func @multi_block(
+// CHECK-SAME: %[[ARG_0:.*]]: i32, %[[ARG_1:.*]]: !fir.ref<i32>, 
%[[ARG_3:.*]]: i1
+func.func @multi_block(%funcArg0: i32, %funcArg1: !fir.ref<i32>, %6: i1) {
+  %false = arith.constant false
+  %c0_i32 = arith.constant 0 : i32
+  // CHECK-NOT: omp.parallel
+  omp.parallel {
+    // CHECK: cf.cond_br %[[ARG_3]], ^[[BB1:.*]], ^[[BB2:.*]]
+    cf.cond_br %6, ^bb1, ^bb2
+  // CHECK: ^[[BB1]]
+  ^bb1:  // pred: ^bb0
+    // CHECK: fir.call
+    fir.call @_FortranAStopStatement(%c0_i32, %false, %false) 
fastmath<contract> : (i32, i1, i1) -> ()
+    // CHECK-NOT: omp.terminator
+    omp.terminator
+  // CHECK: ^[[BB2]]
+  ^bb2:  // pred: ^bb0
+    // CHECK: fir.store
+    fir.store %funcArg0 to %funcArg1 : !fir.ref<i32>
+    // CHECK-NOT: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func.func @map_info(
+// CHECK-SAME: %[[ARG_0:.*]]: i32, %[[ARG_1:.*]]: !fir.ref<i32>
+func.func @map_info(%funcArg0: i32, %funcArg1: !fir.ref<i32>) {
+  %c1 = arith.constant 1 : index
+  // CHECK-NOT: omp.map.bounds
+  %1 = omp.map.bounds lower_bound(%c1 : index) upper_bound(%c1 : index) 
extent(%c1 : index) stride(%c1 : index) start_idx(%c1 : index)
+  // CHECK-NOT: omp.map.info
+  %13 = omp.map.info var_ptr(%funcArg1 : !fir.ref<i32>, i32) map_clauses(to) 
capture(ByRef) bounds(%1) -> !fir.ref<i32>
+  // CHECK-NOT: omp.target
+  omp.target map_entries(%13 -> %arg3 : !fir.ref<i32>) {
+    %c1_i32 = arith.constant 1 : i32
+    // CHECK: fir.store %c1_i32 to %[[ARG_1]]
+    fir.store %c1_i32 to %arg3 : !fir.ref<i32>
+    // CHECK-NOT: omp.terminator
+    omp.terminator
+  }
+  // CHECK-NOT: omp.map.info
+  %18 = omp.map.info var_ptr(%funcArg1 : !fir.ref<i32>, i32) map_clauses(from) 
capture(ByRef) bounds(%1) -> !fir.ref<i32>
+  return
+}

diff  --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index edfc878d17524..82dff2653ad09 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -520,7 +520,9 @@ static llvm::LogicalResult convertFortranSourceToMLIR(
 
     if (emitFIR && useHLFIR) {
       // lower HLFIR to FIR
-      fir::createHLFIRToFIRPassPipeline(pm, enableOpenMP,
+      fir::EnableOpenMP enableOmp =
+          enableOpenMP ? fir::EnableOpenMP::Full : fir::EnableOpenMP::None;
+      fir::createHLFIRToFIRPassPipeline(pm, enableOmp,
                                         llvm::OptimizationLevel::O2);
       if (mlir::failed(pm.run(mlirModule))) {
         llvm::errs() << "FATAL: lowering from HLFIR to FIR failed";


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to