https://github.com/pawosm-arm updated 
https://github.com/llvm/llvm-project/pull/197947

>From 1ee207613582e1fe36e4e9a1a4c3c28047604881 Mon Sep 17 00:00:00 2001
From: Paul Osmialowski <[email protected]>
Date: Mon, 11 May 2026 08:11:25 +0000
Subject: [PATCH] [flang][cmake][perf-training] Optimize flang with PGO and
 BOLT

This is an attempt to replicate similar fearture already available to
clang. The changes in this patch were made with an intent to reuse as
much of existing infrastructure as possible. Namely, two-stage build
arrangement, perf-helper.py script and the means for building of the
instrumented binaries have been all incorporated in this approach.

It was deliberately chosen to optimize clang along with flang as they
are mostly working together in the final toolchain.

See the llvm/docs/AdvancedBuilds.rst documentation for more details.
---
 bolt/runtime/common.h                         |   2 +-
 clang/CMakeLists.txt                          |  12 +-
 flang/CMakeLists.txt                          |   1 +
 flang/cmake/caches/BOLT-PGO.cmake             |  27 ++++
 flang/cmake/caches/BOLT.cmake                 |  20 +++
 .../caches/PGO-stage2-instrumented.cmake      |  28 ++++
 flang/cmake/caches/PGO-stage2.cmake           |   5 +
 flang/cmake/caches/PGO.cmake                  |  38 +++++
 flang/tools/flang-driver/CMakeLists.txt       |  74 +++++++++
 flang/utils/perf-training/CMakeLists.txt      | 140 ++++++++++++++++++
 flang/utils/perf-training/README.txt          |   6 +
 flang/utils/perf-training/bolt.lit.cfg        |  59 ++++++++
 .../utils/perf-training/bolt.lit.site.cfg.in  |  19 +++
 .../utils/perf-training/f90/hello-openmp.f95  |  14 ++
 flang/utils/perf-training/f90/hello-world.f95 |   8 +
 flang/utils/perf-training/f90/hello.f         |  24 +++
 .../perf-training/f90/module_torture.f95      |  47 ++++++
 flang/utils/perf-training/f90/sincos.f95      |  24 +++
 .../perf-training/flang-modules/build.test    |  10 ++
 flang/utils/perf-training/lit.cfg             |  50 +++++++
 flang/utils/perf-training/lit.site.cfg.in     |  17 +++
 flang/utils/perf-training/order-files.lit.cfg |  43 ++++++
 .../perf-training/order-files.lit.site.cfg.in |  12 ++
 llvm/docs/AdvancedBuilds.rst                  |  86 ++++++++++-
 24 files changed, 755 insertions(+), 11 deletions(-)
 create mode 100644 flang/cmake/caches/BOLT-PGO.cmake
 create mode 100644 flang/cmake/caches/BOLT.cmake
 create mode 100644 flang/cmake/caches/PGO-stage2-instrumented.cmake
 create mode 100644 flang/cmake/caches/PGO-stage2.cmake
 create mode 100644 flang/cmake/caches/PGO.cmake
 create mode 100644 flang/utils/perf-training/CMakeLists.txt
 create mode 100644 flang/utils/perf-training/README.txt
 create mode 100644 flang/utils/perf-training/bolt.lit.cfg
 create mode 100644 flang/utils/perf-training/bolt.lit.site.cfg.in
 create mode 100644 flang/utils/perf-training/f90/hello-openmp.f95
 create mode 100644 flang/utils/perf-training/f90/hello-world.f95
 create mode 100644 flang/utils/perf-training/f90/hello.f
 create mode 100644 flang/utils/perf-training/f90/module_torture.f95
 create mode 100644 flang/utils/perf-training/f90/sincos.f95
 create mode 100644 flang/utils/perf-training/flang-modules/build.test
 create mode 100644 flang/utils/perf-training/lit.cfg
 create mode 100644 flang/utils/perf-training/lit.site.cfg.in
 create mode 100644 flang/utils/perf-training/order-files.lit.cfg
 create mode 100644 flang/utils/perf-training/order-files.lit.site.cfg.in

diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h
index 8689bc8b72041..96e269e1bb79b 100644
--- a/bolt/runtime/common.h
+++ b/bolt/runtime/common.h
@@ -162,7 +162,7 @@ struct timespec {
 #error "For AArch64/ARM64,X86_64 AND RISCV64 only."
 #endif
 
-constexpr uint32_t BufSize = 10240;
+constexpr uint32_t BufSize = 32768U;
 
 // Helper functions for writing strings to the .fdata file. We intentionally
 // avoid using libc names to make it clear it is our impl.
diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index cd7ba53b03061..e920e83a537d4 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -813,11 +813,15 @@ if (CLANG_ENABLE_BOOTSTRAP)
   if(LLVM_BUILD_INSTRUMENTED)
     string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" LLVM_BUILD_INSTRUMENTED)
     if (LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
-      add_dependencies(clang-bootstrap-deps generate-sprofdata)
-      set(PGO_OPT 
-DLLVM_SPROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.sprofdata)
+      set(PGO_OPT_SPROFDATA 
"${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.sprofdata" CACHE STRING 
"")
+      set(PGO_OPT_SPROFDATA_PROVIDER generate-sprofdata CACHE STRING "")
+      add_dependencies(clang-bootstrap-deps ${PGO_OPT_SPROFDATA_PROVIDER})
+      set(PGO_OPT -DLLVM_SPROFDATA_FILE=${PGO_OPT_SPROFDATA})
     else()
-      add_dependencies(clang-bootstrap-deps generate-profdata)
-      set(PGO_OPT 
-DLLVM_PROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata)
+      set(PGO_OPT_PROFDATA 
"${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata" CACHE STRING 
"")
+      set(PGO_OPT_PROFDATA_PROVIDER generate-profdata CACHE STRING "")
+      add_dependencies(clang-bootstrap-deps ${PGO_OPT_PROFDATA_PROVIDER})
+      set(PGO_OPT -DLLVM_PROFDATA_FILE=${PGO_OPT_PROFDATA})
     endif()
     # Use the current tools for LTO instead of the instrumented ones
     list(APPEND _BOOTSTRAP_DEFAULT_PASSTHROUGH
diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index bb74465411ad6..0e0be24236e72 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -510,6 +510,7 @@ if (FLANG_INCLUDE_TESTS)
   if (FLANG_GTEST_AVAIL)
     add_subdirectory(unittests)
   endif ()
+  add_subdirectory(utils/perf-training)
 endif()
 
 option(FLANG_INCLUDE_DOCS "Generate build targets for the Flang docs."
diff --git a/flang/cmake/caches/BOLT-PGO.cmake 
b/flang/cmake/caches/BOLT-PGO.cmake
new file mode 100644
index 0000000000000..e71b9a9817674
--- /dev/null
+++ b/flang/cmake/caches/BOLT-PGO.cmake
@@ -0,0 +1,27 @@
+# Two-stage build of Flang with the 2nd stage optimized using BOLT and PGO
+
+set(BOLT_PGO_CMAKE_CACHE "PGO" CACHE STRING "")
+set(LLVM_ENABLE_PROJECTS "bolt;clang;flang;lld" CACHE STRING "")
+set(LLVM_ENABLE_RUNTIMES "compiler-rt;flang-rt;libunwind;openmp" CACHE STRING 
"")
+
+set(CLANG_BOOTSTRAP_TARGETS
+  stage2-clang-bolt
+  stage2-flang-bolt
+  stage2-check-clang
+  stage2-check-flang
+  stage2-distribution
+  stage2-install-distribution
+  CACHE STRING "")
+set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
+  clang-bolt
+  flang-bolt
+  check-clang
+  check-flang
+  distribution
+  install-distribution
+  CACHE STRING "")
+
+set(PGO_BUILD_CONFIGURATION
+  ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake
+  CACHE STRING "")
+include(${CMAKE_CURRENT_LIST_DIR}/${BOLT_PGO_CMAKE_CACHE}.cmake)
diff --git a/flang/cmake/caches/BOLT.cmake b/flang/cmake/caches/BOLT.cmake
new file mode 100644
index 0000000000000..56ff3c78d6c13
--- /dev/null
+++ b/flang/cmake/caches/BOLT.cmake
@@ -0,0 +1,20 @@
+# Two-stage build of Flang with the 2nd stage optimized using BOLT
+
+set(CMAKE_BUILD_TYPE "Release" CACHE STRING "")
+set(CLANG_BOLT "INSTRUMENT" CACHE STRING "")
+set(FLANG_BOLT ${CLANG_BOLT} CACHE STRING "")
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
+set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
+
+set(LLVM_ENABLE_PROJECTS "bolt;clang;flang" CACHE STRING "")
+set(LLVM_ENABLE_RUNTIMES "compiler-rt;flang-rt;libunwind;openmp" CACHE STRING 
"")
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+
+# setup toolchain
+set(LLVM_INSTALL_TOOLCHAIN_ONLY ON CACHE BOOL "")
+set(LLVM_DISTRIBUTION_COMPONENTS
+  clang
+  clang-resource-headers
+  flang
+  runtimes
+  CACHE STRING "")
diff --git a/flang/cmake/caches/PGO-stage2-instrumented.cmake 
b/flang/cmake/caches/PGO-stage2-instrumented.cmake
new file mode 100644
index 0000000000000..5cc719f56ec19
--- /dev/null
+++ b/flang/cmake/caches/PGO-stage2-instrumented.cmake
@@ -0,0 +1,28 @@
+# Second stage instrumentation (used by PGO.cmake)
+
+set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "")
+set(CLANG_BOOTSTRAP_TARGETS
+  distribution
+  install-distribution
+  install-distribution-toolchain
+  check-all
+  check-llvm
+  check-clang
+  check-flang
+  test-suite CACHE STRING "")
+set(FLANG_PGO_TRAINING_CLANG_COUPLING ON CACHE BOOL "")
+set(PGO_OPT_PROFDATA "${CMAKE_BINARY_DIR}/flang.profdata" CACHE STRING "")
+set(PGO_OPT_PROFDATA_PROVIDER generate-flang-profdata CACHE STRING "")
+
+if(PGO_BUILD_CONFIGURATION)
+  include(${PGO_BUILD_CONFIGURATION})
+  set(CLANG_BOOTSTRAP_CMAKE_ARGS
+    -C ${PGO_BUILD_CONFIGURATION}
+    CACHE STRING "")
+else()
+  include(${CMAKE_CURRENT_LIST_DIR}/PGO-stage2.cmake)
+
+  set(CLANG_BOOTSTRAP_CMAKE_ARGS
+    -C ${CMAKE_CURRENT_LIST_DIR}/PGO-stage2.cmake
+    CACHE STRING "")
+endif()
diff --git a/flang/cmake/caches/PGO-stage2.cmake 
b/flang/cmake/caches/PGO-stage2.cmake
new file mode 100644
index 0000000000000..7e04e40941894
--- /dev/null
+++ b/flang/cmake/caches/PGO-stage2.cmake
@@ -0,0 +1,5 @@
+# Second stage of PGO (used by PGO-stage2-instrumented.cmake)
+
+set(CMAKE_BUILD_TYPE "Release" CACHE STRING "")
+set(LLVM_ENABLE_PROJECTS "clang;flang;lld" CACHE STRING "")
+set(LLVM_ENABLE_RUNTIMES 
"compiler-rt;flang-rt;libcxx;libcxxabi;libunwind;openmp" CACHE STRING "")
diff --git a/flang/cmake/caches/PGO.cmake b/flang/cmake/caches/PGO.cmake
new file mode 100644
index 0000000000000..11564802377d8
--- /dev/null
+++ b/flang/cmake/caches/PGO.cmake
@@ -0,0 +1,38 @@
+# Two-stage build of Flang with the 2nd stage optimized using PGO
+
+set(CMAKE_BUILD_TYPE "Release" CACHE STRING "")
+set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "")
+
+set(LLVM_ENABLE_PROJECTS "clang;flang;lld" CACHE STRING "")
+set(LLVM_ENABLE_RUNTIMES 
"compiler-rt;flang-rt;libcxx;libcxxabi;libunwind;openmp" CACHE STRING "")
+
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
+set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED IR CACHE BOOL "")
+set(CLANG_BOOTSTRAP_TARGETS
+  generate-flang-profdata
+  stage2
+  stage2-distribution
+  stage2-install-distribution
+  stage2-install-distribution-toolchain
+  stage2-check-all
+  stage2-check-llvm
+  stage2-check-clang
+  stage2-check-flang
+  stage2-test-suite CACHE STRING "")
+set(FLANG_PGO_TRAINING_CLANG_COUPLING ON CACHE BOOL "")
+set(PGO_OPT_PROFDATA "${CMAKE_BINARY_DIR}/flang.profdata" CACHE STRING "")
+set(PGO_OPT_PROFDATA_PROVIDER generate-flang-profdata CACHE STRING "")
+
+if(PGO_INSTRUMENT_LTO)
+  set(BOOTSTRAP_LLVM_ENABLE_LTO ${PGO_INSTRUMENT_LTO} CACHE BOOL "")
+  set(BOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LTO ${PGO_INSTRUMENT_LTO} CACHE BOOL "")
+endif()
+
+if(PGO_BUILD_CONFIGURATION)
+  set(EXTRA_ARGS -DPGO_BUILD_CONFIGURATION=${PGO_BUILD_CONFIGURATION})
+endif()
+
+set(CLANG_BOOTSTRAP_CMAKE_ARGS
+  ${EXTRA_ARGS}
+  -C ${CMAKE_CURRENT_LIST_DIR}/PGO-stage2-instrumented.cmake
+  CACHE STRING "")
diff --git a/flang/tools/flang-driver/CMakeLists.txt 
b/flang/tools/flang-driver/CMakeLists.txt
index 4dfc0d40cd55d..ab7f9e0d5bead 100644
--- a/flang/tools/flang-driver/CMakeLists.txt
+++ b/flang/tools/flang-driver/CMakeLists.txt
@@ -12,9 +12,29 @@ set( LLVM_LINK_COMPONENTS
   TargetParser
 )
 
+set(FLANG_BOLT_ALLOWLIST INSTRUMENT PERF LBR)
+set(FLANG_BOLT OFF CACHE STRING "Apply BOLT optimization to flang. \
+May be specified as one of ${FLANG_BOLT_ALLOWLIST} to use a particular 
profiling \
+  mechanism.")
+set_property(CACHE FLANG_BOLT PROPERTY STRINGS OFF ${FLANG_BOLT_ALLOWLIST})
+string(TOUPPER "${FLANG_BOLT}" FLANG_BOLT)
+if (FLANG_BOLT AND NOT FLANG_BOLT IN_LIST FLANG_BOLT_ALLOWLIST)
+    message(FATAL_ERROR "Specified FLANG_BOLT value '${FLANG_BOLT}' is not one 
of ${FLANG_BOLT_ALLOWLIST}.")
+endif()
+
+if (FLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
+  set(FLANG_BOLT_DEPS clear-flang-bolt-fdata llvm-bolt llvm-readobj clang-bolt)
+  if (NOT FLANG_BOLT STREQUAL "INSTRUMENT")
+    list(APPEND FLANG_BOLT_DEPS clear-flang-perf-data)
+  endif()
+endif()
+
 add_flang_tool(flang
   driver.cpp
   fc1_main.cpp
+
+  DEPENDS
+  ${FLANG_BOLT_DEPS}
 )
 
 target_link_libraries(flang
@@ -46,6 +66,60 @@ endif()
 
 install(TARGETS flang DESTINATION "${CMAKE_INSTALL_BINDIR}")
 
+if (FLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
+  # Add a flang-bolt target for backwards compatibility.
+  add_custom_target(flang-bolt DEPENDS flang)
+
+  set(FLANG_BOLT_INSTRUMENTED "flang-bolt.inst" CACHE STRING
+    "Name of BOLT-instrumented flang binary")
+  set(FLANG_INSTRUMENTED 
"${LLVM_RUNTIME_OUTPUT_INTDIR}/${FLANG_BOLT_INSTRUMENTED}")
+  set(PERF_TRAINING_BINARY_DIR 
"${CMAKE_CURRENT_BINARY_DIR}/../../utils/perf-training")
+  set(FLANG_BOLT_FDATA "${PERF_TRAINING_BINARY_DIR}/flang-prof.fdata")
+  get_llvm_lit_path(
+    lit_base_dir
+    lit_file_name
+    ALLOW_EXTERNAL
+  )
+  set(LIT_COMMAND "${lit_base_dir}/${lit_file_name}")
+
+  set(FLANG_BOLT_INPUTS "$<TARGET_FILE:flang>")
+  set(FLANG_INSTRUMENTED_OUTPUTS "${FLANG_INSTRUMENTED}")
+
+  # Add in dynamically linked libraries, if needs be. Currently only supported
+  # on Linux because it relies on LD_PRELOAD for instrumentation.
+  if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
+    if (LLVM_LINK_LLVM_DYLIB)
+      set(LLVM_BOLT_INSTRUMENTED "LLVM-bolt.inst" CACHE STRING
+        "Name of BOLT-instrumented LLVM library")
+      set(LLVM_INSTRUMENTED 
"${LLVM_RUNTIME_OUTPUT_INTDIR}/${LLVM_BOLT_INSTRUMENTED}")
+      list(APPEND FLANG_BOLT_INPUTS "$<TARGET_FILE:LLVM>")
+      list(APPEND FLANG_INSTRUMENTED_OUTPUTS "${LLVM_INSTRUMENTED}")
+    endif()
+  endif()
+
+  # This POST_BUILD command is executed unconditionally even if the flang 
target
+  # is already built.  We need to wrap the whole bolt optimization process in
+  # a single python wrapper, so that we can first check if the binary has
+  # already been optimized and then exit early with a 0 status if it has.
+  add_custom_command(
+    TARGET flang POST_BUILD
+    COMMAND  "${Python3_EXECUTABLE}" 
"${LLVM_MAIN_SRC_DIR}/../clang/utils/perf-training/perf-helper.py"
+             bolt-optimize
+             --method "${FLANG_BOLT}"
+             --input "${FLANG_BOLT_INPUTS}"
+             --instrumented-output "${FLANG_INSTRUMENTED_OUTPUTS}"
+             --fdata "${FLANG_BOLT_FDATA}"
+             --perf-training-binary-dir "${PERF_TRAINING_BINARY_DIR}"
+             --readelf "$<TARGET_FILE:llvm-readobj>"
+             --bolt "$<TARGET_FILE:llvm-bolt>"
+             --lit "${LIT_COMMAND}"
+             --merge-fdata "$<TARGET_FILE:merge-fdata>"
+    COMMENT "Optimizing flang with BOLT"
+    USES_TERMINAL
+    VERBATIM
+  )
+endif()
+
 # Keep "flang-new" as a symlink for backwards compatiblity. Remove once "flang"
 # is a widely adopted name.
 add_flang_symlink(flang-new flang)
diff --git a/flang/utils/perf-training/CMakeLists.txt 
b/flang/utils/perf-training/CMakeLists.txt
new file mode 100644
index 0000000000000..f4dedbde8c0db
--- /dev/null
+++ b/flang/utils/perf-training/CMakeLists.txt
@@ -0,0 +1,140 @@
+include(LLVMExternalProjectUtils)
+
+set(FLANG_PGO_TRAINING_DATA "${CMAKE_CURRENT_SOURCE_DIR}" CACHE PATH
+  "The path to a lit testsuite containing samples for PGO and order file 
generation"
+  )
+set(FLANG_PGO_TRAINING_DATA_SOURCE_DIR OFF CACHE STRING "Path to source 
directory containing cmake project with source files to use for generating 
flang pgo data")
+set(FLANG_PGO_TRAINING_DATA_SOURCE_CMAKE_ARGS "" CACHE STRING "Extra CMake 
flags to pass to the cmake project with source files to use for generating 
flang pgo data")
+set(FLANG_PGO_TRAINING_DEPS "" CACHE STRING "Extra dependencies needed to 
build the PGO training data.")
+set(FLANG_PGO_TRAINING_CLANG_COUPLING ON CACHE BOOL "Train clang and flang 
together")
+if(FLANG_PGO_TRAINING_CLANG_COUPLING)
+  set(CLANG_PGO_TRAINING_DATA_SOURCE_DIR OFF CACHE STRING "Path to source 
directory containing cmake project with source files to use for generating 
clang pgo data")
+  set(CLANG_PGO_TRAINING_DATA_SOURCE_CMAKE_ARGS "" CACHE STRING "Extra CMake 
flags to pass to the cmake project with source files to use for generating 
clang pgo data")
+endif()
+
+set(CLANG_CURRENT_BINARY_DIR 
"${CMAKE_CURRENT_BINARY_DIR}/../../../clang/utils/perf-training")
+
+set(PERF_HELPER 
"${LLVM_MAIN_SRC_DIR}/../clang/utils/perf-training/perf-helper.py")
+
+add_custom_target(clear-flang-perf-data
+  COMMAND "${Python3_EXECUTABLE}" "${PERF_HELPER}" clean 
"${CMAKE_CURRENT_BINARY_DIR}" perf.data
+  COMMENT "Clearing old flang perf data")
+
+option(FLANG_PGO_TRAINING_USE_LLVM_BUILD "Use LLVM build for generating PGO 
data" ON)
+
+llvm_canonicalize_cmake_booleans(
+  FLANG_PGO_TRAINING_USE_LLVM_BUILD
+)
+
+if(LLVM_BUILD_INSTRUMENTED)
+  configure_lit_site_cfg(
+    "${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in"
+    "${CMAKE_CURRENT_BINARY_DIR}/pgo-data/lit.site.cfg"
+    )
+
+  add_lit_testsuite(generate-flang-profraw "Generating flang PGO data"
+    "${CMAKE_CURRENT_BINARY_DIR}/pgo-data/"
+    EXCLUDE_FROM_CHECK_ALL
+    DEPENDS flang flang-rt
+    )
+  if(FLANG_PGO_TRAINING_CLANG_COUPLING)
+    add_lit_testsuite(generate-clang-profraw "Generating clang PGO data"
+      "${CLANG_CURRENT_BINARY_DIR}/pgo-data/"
+      EXCLUDE_FROM_CHECK_ALL
+      DEPENDS clang
+      )
+  endif()
+
+  add_custom_target(clear-flang-profraw
+    COMMAND "${Python3_EXECUTABLE}" "${PERF_HELPER}" clean 
"${CMAKE_CURRENT_BINARY_DIR}" "${CMAKE_BINARY_DIR}/profiles/" profraw
+    COMMENT "Clearing old flang profraw data")
+  if (FLANG_PGO_TRAINING_CLANG_COUPLING)
+    add_dependencies(clear-flang-profraw clear-profraw)
+  endif()
+
+  if(NOT LLVM_PROFDATA)
+    find_program(LLVM_PROFDATA llvm-profdata)
+  endif()
+
+  if(NOT LLVM_PROFDATA)
+    message(STATUS "To enable merging PGO data LLVM_PROFDATA has to point to 
llvm-profdata")
+  else()
+    set(PROFDATA "${CMAKE_BINARY_DIR}/flang.profdata")
+    set(PROFRAW_TARGETS "")
+    set(PROFRAW_DIRS "")
+    set(PROFRAW_DEPS "")
+    if(FLANG_PGO_TRAINING_CLANG_COUPLING)
+      list(APPEND PROFRAW_TARGETS generate-clang-profraw)
+      list(APPEND PROFRAW_DIRS "${CLANG_CURRENT_BINARY_DIR}")
+      list(APPEND PROFRAW_DEPS clang)
+      if (CLANG_PGO_TRAINING_DATA_SOURCE_DIR)
+        llvm_ExternalProject_Add(generate-clang-profraw-external
+                                 "${CLANG_PGO_TRAINING_DATA_SOURCE_DIR}"
+                                 USE_TOOLCHAIN
+                                 EXCLUDE_FROM_ALL
+                                 NO_INSTALL
+                                 CMAKE_ARGS 
"${CLANG_PGO_TRAINING_DATA_SOURCE_CMAKE_ARGS}")
+        list(APPEND PROFRAW_TARGETS generate-clang-profraw-external)
+      endif()
+    endif()
+    list(APPEND PROFRAW_TARGETS generate-flang-profraw)
+    list(APPEND PROFRAW_DIRS
+      "${CMAKE_CURRENT_BINARY_DIR}"
+      "${CMAKE_BINARY_DIR}/profiles/")
+    list(APPEND PROFRAW_DEPS
+      flang
+      flang-rt)
+    if (FLANG_PGO_TRAINING_DATA_SOURCE_DIR)
+      llvm_ExternalProject_Add(generate-flang-profraw-external
+                               "${FLANG_PGO_TRAINING_DATA_SOURCE_DIR}"
+                               USE_TOOLCHAIN
+                               ENABLE_FORTRAN
+                               EXCLUDE_FROM_ALL
+                               NO_INSTALL
+                               CMAKE_ARGS 
"${FLANG_PGO_TRAINING_DATA_SOURCE_CMAKE_ARGS}")
+      list(APPEND PROFRAW_TARGETS generate-flang-profraw-external)
+    endif()
+    add_custom_command(
+      OUTPUT "${PROFDATA}"
+      # PROFRAW_TARGETS are custom targets which are always considered stale.
+      # If we add them here to 'DEPENDS', then it will always execute and 
running
+      # ninja install && ninja check-all will result in the profile data being
+      # generated twice, and cause the ninja check-all build to fail with 
errors like:
+      # `ld.lld: error: Function Import: link error: linking module flags 
'ProfileSummary': IDs have conflicting values in`
+      # Therefore we call these targets manually as part of this custom 
command,
+      # which will only run if flang or ${FLANG_PGO_TRAINING_DEPS} are updated.
+      COMMAND "${CMAKE_COMMAND}" --build "${CMAKE_BINARY_DIR}" --target 
${PROFRAW_TARGETS}
+      COMMAND "${Python3_EXECUTABLE}" "${PERF_HELPER}" merge 
"${LLVM_PROFDATA}" "${PROFDATA}" ${PROFRAW_DIRS}
+      COMMENT "Merging flang profdata"
+      DEPENDS ${PROFRAW_DEPS} ${FLANG_PGO_TRAINING_DEPS} clear-flang-profraw
+    )
+    add_custom_target(generate-flang-profdata DEPENDS ${PROFDATA})
+
+    if(FLANG_PGO_TRAINING_CLANG_COUPLING)
+      string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" 
LLVM_BUILD_UPPER_INSTRUMENTED)
+      if (LLVM_BUILD_UPPER_INSTRUMENTED STREQUAL "CSSPGO")
+        message(STATUS "CSSPGO of flang is not supported")
+      else()
+        add_dependencies(clang-bootstrap-deps generate-flang-profdata)
+      endif()
+    endif()
+  endif()
+endif()
+
+if(FLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
+  configure_lit_site_cfg(
+    "${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in"
+    "${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg"
+    )
+
+  add_lit_testsuite(generate-flang-bolt-fdata "Generating BOLT profile for 
flang"
+    "${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/"
+    EXCLUDE_FROM_CHECK_ALL
+    DEPENDS clear-flang-bolt-fdata clear-flang-perf-data
+    )
+
+  add_custom_target(clear-flang-bolt-fdata
+    COMMAND "${Python3_EXECUTABLE}" "${PERF_HELPER}" clean 
"${CMAKE_CURRENT_BINARY_DIR}" fdata
+    COMMENT "Clearing old flang BOLT fdata")
+
+endif()
diff --git a/flang/utils/perf-training/README.txt 
b/flang/utils/perf-training/README.txt
new file mode 100644
index 0000000000000..31e1ec0a5e570
--- /dev/null
+++ b/flang/utils/perf-training/README.txt
@@ -0,0 +1,6 @@
+==========================
+ Performance Training Data
+==========================
+
+This directory contains simple source files for use as training data for
+generating PGO data and linker order files.
diff --git a/flang/utils/perf-training/bolt.lit.cfg 
b/flang/utils/perf-training/bolt.lit.cfg
new file mode 100644
index 0000000000000..e8cda61d154a0
--- /dev/null
+++ b/flang/utils/perf-training/bolt.lit.cfg
@@ -0,0 +1,59 @@
+# -*- Python -*-
+
+from lit import Test
+import lit.formats
+import lit.util
+import os
+import re
+import subprocess
+
+flang_bolt_mode = config.flang_bolt_mode.lower()
+flang_binary = "flang"
+perf_wrapper = f"{sys.executable} {config.perf_helper_dir}/perf-helper.py perf 
"
+
+if flang_bolt_mode == "instrument":
+    perf_wrapper = ""
+    flang_binary = config.flang_bolt_name
+elif flang_bolt_mode == "lbr":
+    perf_wrapper += " --lbr -- "
+elif flang_bolt_mode == "perf":
+    perf_wrapper += " -- "
+else:
+    assert 0, "Unsupported flang bolt mode"
+
+flang_nowrapper = os.path.realpath(
+    lit.util.which(flang_binary, config.flang_tools_dir)
+).replace("\\", "/")
+config.flang = perf_wrapper + flang_nowrapper
+config.cmake_compiler_args = "-DCMAKE_Fortran_COMPILER='{0}'".format(
+    re.sub(r"\s+", ";", flang_nowrapper)
+)
+
+config.name = "Flang Perf Training"
+config.suffixes = [
+    ".f",
+    ".f90",
+    ".f95",
+    ".f03",
+    ".f08",
+    ".f18",
+    ".F",
+    ".F90",
+    ".F95",
+    ".F03",
+    ".F08",
+    ".F18",
+    ".test",
+]
+
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+config.test_format = lit.formats.ShTest(use_lit_shell == "0")
+config.substitutions.append(("%flang_skip_driver", config.flang))
+config.substitutions.append(("%flang", config.flang))
+config.substitutions.append(("%test_root", config.test_exec_root))
+config.substitutions.append(("%cmake_compiler_args", 
config.cmake_compiler_args))
+config.substitutions.append(('%cmake_generator', config.cmake_generator))
+config.substitutions.append(('%cmake', config.cmake_exe))
+config.substitutions.append(('%llvm_src_dir', config.llvm_src_dir))
+config.substitutions.append(('%module_src_dir', config.module_src_dir))
+config.substitutions.append(('%perf_wrapper', perf_wrapper))
diff --git a/flang/utils/perf-training/bolt.lit.site.cfg.in 
b/flang/utils/perf-training/bolt.lit.site.cfg.in
new file mode 100644
index 0000000000000..ab421841b7d6c
--- /dev/null
+++ b/flang/utils/perf-training/bolt.lit.site.cfg.in
@@ -0,0 +1,19 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+import sys
+
+config.flang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@")
+config.perf_helper_dir = "@LLVM_MAIN_SRC_DIR@/../clang/utils/perf-training"
+config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
+config.test_source_root = "@FLANG_PGO_TRAINING_DATA@"
+config.target_triple = "@LLVM_TARGET_TRIPLE@"
+config.flang_obj_root = path(r"@FLANG_BINARY_DIR@")
+config.flang_bolt_mode = "@FLANG_BOLT@"
+config.flang_bolt_name = "@FLANG_BOLT_INSTRUMENTED@"
+config.cmake_exe = "@CMAKE_COMMAND@"
+config.llvm_src_dir = "@CMAKE_SOURCE_DIR@"
+config.module_src_dir = "@LLVM_MAIN_SRC_DIR@/../flang-rt/lib/runtime"
+config.cmake_generator = "@CMAKE_GENERATOR@"
+
+# Let the main config do the real work.
+lit_config.load_config(config, 
"@FLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
diff --git a/flang/utils/perf-training/f90/hello-openmp.f95 
b/flang/utils/perf-training/f90/hello-openmp.f95
new file mode 100644
index 0000000000000..ea13bab62cf7d
--- /dev/null
+++ b/flang/utils/perf-training/f90/hello-openmp.f95
@@ -0,0 +1,14 @@
+! RUN: %flang -fopenmp -c %s
+! RUN: %flang_skip_driver -fopenmp -c %s
+
+program hello_openmp
+  implicit none
+  integer :: i
+
+  !$omp parallel private(i)
+    do i = 0, 64
+      print *, "Hello ", i
+      !$omp barrier
+    end do
+  !$omp end parallel
+end program
diff --git a/flang/utils/perf-training/f90/hello-world.f95 
b/flang/utils/perf-training/f90/hello-world.f95
new file mode 100644
index 0000000000000..85d431c5e07c6
--- /dev/null
+++ b/flang/utils/perf-training/f90/hello-world.f95
@@ -0,0 +1,8 @@
+! RUN: %flang -c %s
+! RUN: %flang_skip_driver -c %s
+
+program hello_world
+  implicit none
+
+  print *, 'Hello, World!'
+end program
diff --git a/flang/utils/perf-training/f90/hello.f 
b/flang/utils/perf-training/f90/hello.f
new file mode 100644
index 0000000000000..d55602840449e
--- /dev/null
+++ b/flang/utils/perf-training/f90/hello.f
@@ -0,0 +1,24 @@
+! RUN: %flang -c %s
+! RUN: %flang_skip_driver -c %s
+
+      PROGRAM HELLO
+        IMPLICIT NONE
+        INTEGER I
+        INTEGER NUM
+        CHARACTER ARG * 32
+
+        NUM = 0
+        CALL GETARG(1, ARG)
+        IF (LEN_TRIM(ARG) .GT. 0) THEN
+          READ (ARG, *, IOSTAT = I) NUM
+        END IF
+        IF (NUM .GT. 0) THEN
+          DO 10 I = 1, NUM
+            WRITE (*, 100) I
+10        CONTINUE
+        ELSE
+          WRITE (*, 200)
+        END IF
+100     FORMAT(' ', I3, '. Hello')
+200     FORMAT(' Hello, world!')
+      END PROGRAM HELLO
diff --git a/flang/utils/perf-training/f90/module_torture.f95 
b/flang/utils/perf-training/f90/module_torture.f95
new file mode 100644
index 0000000000000..ea2f5d51ebef7
--- /dev/null
+++ b/flang/utils/perf-training/f90/module_torture.f95
@@ -0,0 +1,47 @@
+! RUN: %flang -c %s
+! RUN: %flang_skip_driver -c %s
+
+module example_module
+    implicit none
+
+    abstract interface
+
+        subroutine sub_i
+          implicit none
+        end subroutine
+
+    end interface
+
+contains
+
+    subroutine call_internal(string)
+        implicit none
+        character(len=*), intent(in) :: string
+
+        call call_it(print_it)
+
+    contains
+
+        subroutine print_it
+            implicit none
+
+            print *, string
+        end subroutine
+
+    end subroutine
+
+    subroutine call_it(sub)
+        implicit none
+        procedure(sub_i) :: sub
+
+        call sub
+    end subroutine
+
+end module
+
+program module_torture
+    use example_module
+    implicit none
+
+    call call_internal("Hello, World!")
+end program
diff --git a/flang/utils/perf-training/f90/sincos.f95 
b/flang/utils/perf-training/f90/sincos.f95
new file mode 100644
index 0000000000000..ad7a53c42b6e7
--- /dev/null
+++ b/flang/utils/perf-training/f90/sincos.f95
@@ -0,0 +1,24 @@
+! RUN: %flang -O3 -c %s
+! RUN: %flang_skip_driver -O3 -c %s
+
+program sincos_example
+  implicit none
+  integer, parameter :: size = 16
+  integer :: i, max_iter
+  real(8), dimension(size) :: dresult1, dresult2
+  character(len = 32) :: arg
+
+  max_iter = 10
+  if (command_argument_count() .gt. 0) then
+    call get_command_argument(1, arg)
+    read(arg, *), max_iter
+  end if
+  if (max_iter .gt. size) stop
+  do i = 1, max_iter
+    dresult1(i) = dsin(dble(i))
+    dresult2(i) = dcos(dble(i))
+  end do
+  do i = 1, max_iter
+    print *, dresult1(i), dresult2(i)
+  end do
+end program
diff --git a/flang/utils/perf-training/flang-modules/build.test 
b/flang/utils/perf-training/flang-modules/build.test
new file mode 100644
index 0000000000000..83ebcfef4db55
--- /dev/null
+++ b/flang/utils/perf-training/flang-modules/build.test
@@ -0,0 +1,10 @@
+RUN: rm -rf %t && mkdir -p %t
+RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only 
-module-dir %t %module_src_dir/__fortran_builtins.f90
+RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only 
-module-dir %t %module_src_dir/__fortran_ieee_exceptions.f90
+RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only 
-module-dir %t %module_src_dir/flang_debug.f90
+RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only 
-module-dir %t %module_src_dir/ieee_arithmetic.f90
+RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only 
-module-dir %t %module_src_dir/ieee_exceptions.f90
+RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only 
-module-dir %t %module_src_dir/ieee_features.f90
+RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only 
-module-dir %t %module_src_dir/iso_c_binding.f90
+RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only 
-module-dir %t %module_src_dir/iso_fortran_env_impl.f90
+RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only 
-module-dir %t %module_src_dir/iso_fortran_env.f90
diff --git a/flang/utils/perf-training/lit.cfg 
b/flang/utils/perf-training/lit.cfg
new file mode 100644
index 0000000000000..b551547f5ee3d
--- /dev/null
+++ b/flang/utils/perf-training/lit.cfg
@@ -0,0 +1,50 @@
+# -*- Python -*-
+
+from lit import Test
+import lit.formats
+import lit.util
+import subprocess
+
+def getSysrootFlagsOnDarwin(config, lit_config):
+    # On Darwin, support relocatable SDKs by providing flang with a
+    # default system root path.
+    if 'darwin' in config.target_triple:
+        try:
+            out = subprocess.check_output(['xcrun', 
'--show-sdk-path']).strip().decode()
+            res = 0
+        except OSError:
+            res = -1
+        if res == 0 and out:
+            sdk_path = out
+            lit_config.note('using SDKROOT: %r' % sdk_path)
+            return '-isysroot %s' % sdk_path
+    return ''
+
+sysroot_flags = getSysrootFlagsOnDarwin(config, lit_config)
+
+config.flang = lit.util.which('flang', config.flang_tools_dir).replace('\\', 
'/')
+
+config.name = 'Flang Perf Training'
+config.suffixes = ['.f', '.f90', '.f95', '.f03', '.f08', '.f18', '.F', '.F90', 
'.F95', '.F03', '.F08', '.F18', '.test']
+
+if not config.use_llvm_build:
+    config.excludes = ['llvm-support']
+
+fc1_wrapper = '%s %s/perf-helper.py cc1' % (sys.executable, 
config.perf_helper_dir)
+
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+config.test_format = lit.formats.ShTest(use_lit_shell == "0")
+config.cmake_compiler_args = '-DCMAKE_Fortran_COMPILER="{0}"'.format(
+   config.flang.replace(' ', ';')
+)
+config.substitutions.append( ('%flang_skip_driver', ' %s %s %s ' % 
(fc1_wrapper, config.flang, sysroot_flags)))
+config.substitutions.append( ('%flang', '%s %s ' % (config.flang, 
sysroot_flags) ) )
+config.substitutions.append( ('%test_root', config.test_exec_root ) )
+config.substitutions.append( ('%cmake_compiler_args', 
config.cmake_compiler_args))
+config.substitutions.append( ('%cmake_generator', config.cmake_generator ) )
+config.substitutions.append( ('%cmake', config.cmake_exe ) )
+config.substitutions.append( ('%llvm_src_dir', config.llvm_src_dir ) )
+config.substitutions.append( ('%module_src_dir', config.module_src_dir ) )
+config.substitutions.append( ('%perf_wrapper', '' ) )
+
+config.environment['LLVM_PROFILE_FILE'] = 'perf-training-%4m.profraw'
diff --git a/flang/utils/perf-training/lit.site.cfg.in 
b/flang/utils/perf-training/lit.site.cfg.in
new file mode 100644
index 0000000000000..47a8f35a8501e
--- /dev/null
+++ b/flang/utils/perf-training/lit.site.cfg.in
@@ -0,0 +1,17 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+import sys
+
+config.flang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@")
+config.perf_helper_dir = "@LLVM_MAIN_SRC_DIR@/../clang/utils/perf-training"
+config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
+config.test_source_root = "@FLANG_PGO_TRAINING_DATA@"
+config.target_triple = "@LLVM_TARGET_TRIPLE@"
+config.cmake_exe = "@CMAKE_COMMAND@"
+config.llvm_src_dir = "@CMAKE_SOURCE_DIR@"
+config.module_src_dir = "@LLVM_MAIN_SRC_DIR@/../flang-rt/lib/runtime"
+config.cmake_generator = "@CMAKE_GENERATOR@"
+config.use_llvm_build = @FLANG_PGO_TRAINING_USE_LLVM_BUILD@
+
+# Let the main config do the real work.
+lit_config.load_config(config, 
"@FLANG_SOURCE_DIR@/utils/perf-training/lit.cfg")
diff --git a/flang/utils/perf-training/order-files.lit.cfg 
b/flang/utils/perf-training/order-files.lit.cfg
new file mode 100644
index 0000000000000..114f6ff15c927
--- /dev/null
+++ b/flang/utils/perf-training/order-files.lit.cfg
@@ -0,0 +1,43 @@
+# -*- Python -*-
+
+from lit import Test
+import lit.formats
+import lit.util
+import os
+import subprocess
+
+def getSysrootFlagsOnDarwin(config, lit_config):
+    # On Darwin, support relocatable SDKs by providing flang with a
+    # default system root path.
+    if 'darwin' in config.target_triple:
+        try:
+            out = subprocess.check_output(['xcrun', '--show-sdk-path']).strip()
+            res = 0
+        except OSError:
+            res = -1
+        if res == 0 and out:
+            sdk_path = out
+            lit_config.note('using SDKROOT: %r' % sdk_path)
+            return '-isysroot %s' % sdk_path
+    return ''
+
+sysroot_flags = getSysrootFlagsOnDarwin(config, lit_config)
+
+config.flang = os.path.realpath(lit.util.which('flang', 
config.flang_tools_dir)).replace('\\', '/')
+
+config.name = 'Flang Perf Training'
+config.suffixes = ['.f', '.f90', '.f95', '.f03', '.f08', '.f18', '.F', '.F90', 
'.F95', '.F03', '.F08', '.F18', '.test']
+
+dtrace_wrapper = '%s %s/perf-helper.py dtrace' % (sys.executable, 
config.perf_helper_dir)
+dtrace_wrapper_fc1 = '%s %s/perf-helper.py dtrace --cc1' % (sys.executable, 
config.perf_helper_dir)
+
+if 'darwin' in config.target_triple:
+    lit_config.note('using DTrace oneshot probe')
+    dtrace_wrapper = '%s --use-oneshot' % dtrace_wrapper
+    dtrace_wrapper_fc1 = '%s --use-oneshot' % dtrace_wrapper_fc1
+
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+config.test_format = lit.formats.ShTest(use_lit_shell == "0")
+config.substitutions.append( ('%flang_skip_driver', ' %s %s %s ' % 
(dtrace_wrapper_fc1, config.flang, sysroot_flags)))
+config.substitutions.append( ('%flang', ' %s %s %s ' % (dtrace_wrapper, 
config.flang, sysroot_flags) ) )
+config.substitutions.append( ('%test_root', config.test_exec_root ) )
diff --git a/flang/utils/perf-training/order-files.lit.site.cfg.in 
b/flang/utils/perf-training/order-files.lit.site.cfg.in
new file mode 100644
index 0000000000000..9eb4956f277f4
--- /dev/null
+++ b/flang/utils/perf-training/order-files.lit.site.cfg.in
@@ -0,0 +1,12 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+import sys
+
+config.flang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@")
+config.perf_helper_dir = "@LLVM_MAIN_SRC_DIR@/../clang/utils/perf-training"
+config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
+config.test_source_root = "@FLANG_PGO_TRAINING_DATA@"
+config.target_triple = "@LLVM_TARGET_TRIPLE@"
+
+# Let the main config do the real work.
+lit_config.load_config(config, 
"@FLANG_SOURCE_DIR@/utils/perf-training/order-files.lit.cfg")
diff --git a/llvm/docs/AdvancedBuilds.rst b/llvm/docs/AdvancedBuilds.rst
index 9e25355365a81..1a2e988355c83 100644
--- a/llvm/docs/AdvancedBuilds.rst
+++ b/llvm/docs/AdvancedBuilds.rst
@@ -21,10 +21,17 @@ generator.
 
 Many of the build configurations mentioned on this documentation page can be
 utilized by using a CMake cache. A CMake cache is essentially a configuration
-file that sets the necessary flags for a specific build configuration. The 
caches
-for Clang are located in :code:`/clang/cmake/caches` within the monorepo. They
-can be passed to CMake using the :code:`-C` flag as demonstrated in the 
examples
-below along with additional configuration flags.
+file that sets the necessary flags for a specific build configuration.
+
+The caches for Clang are located in :code:`/clang/cmake/caches` within the
+monorepo. They can be passed to CMake using the :code:`-C` flag as demonstrated
+in the examples below along with additional configuration flags.
+
+The caches for Flang are located in :code:`/flang/cmake/caches` within the
+monorepo. They can be passed to CMake using the :code:`-C` flag as demonstrated
+in the examples below along with additional configuration flags. Due to the
+Flang's heavy reliance on Clang, these caches ensure equal handling of Flang
+and Clang, resulting in both being built within the same arrangement.
 
 Bootstrap Builds
 ================
@@ -132,6 +139,14 @@ configuration with CMake with the following command:
   $ cmake -G Ninja -C <path to source>/clang/cmake/caches/PGO.cmake \
       <path to source>/llvm
 
+Similarly, to build optimized Flang (along with Clang), you can use the
+following command:
+
+.. code-block:: console
+
+  $ cmake -G Ninja -C <path to source>/flang/cmake/caches/PGO.cmake \
+      <path to source>/llvm
+
 There are several additional options that the cache file also accepts to modify
 the build, particularly the ``PGO_INSTRUMENT_LTO`` option. Setting this option 
to
 Thin or Full will enable ThinLTO or full LTO respectively, further enhancing
@@ -145,8 +160,8 @@ that also enables ThinLTO, use the following command:
       -DPGO_INSTRUMENT_LTO=Thin \
       <path to source>/llvm
 
-By default, clang will generate profile data by compiling a simple
-hello world program.  You can also tell clang to use an external
+By default, the compiler will generate profile data by compiling a simple
+hello world program.  You can also configure the use of an external
 project for generating profile data that may be a better fit for your
 use case.  The project you specify must either be a lit test suite
 (use the ``CLANG_PGO_TRAINING_DATA`` option) or a CMake project (use the
@@ -162,6 +177,18 @@ profile data you would use the following command:
        -DBOOTSTRAP_CLANG_PGO_TRAINING_DATA_SOURCE_DIR=<path to 
llvm-test-suite> \
        -DBOOTSTRAP_CLANG_PGO_TRAINING_DEPS=runtimes
 
+Similarly, to build optimized Flang (along with Clang), you can use the
+following command:
+
+.. code-block:: console
+
+  $ cmake -G Ninja -C <path to source>/clang/cmake/caches/PGO.cmake \
+       -DBOOTSTRAP_CLANG_PGO_TRAINING_DATA_SOURCE_DIR=<path to 
llvm-test-suite> \
+       -DBOOTSTRAP_CLANG_PGO_TRAINING_DEPS=runtimes \
+       -DBOOTSTRAP_FLANG_PGO_TRAINING_DATA_SOURCE_DIR=<path to 
llvm-test-suite> \
+       
-DBOOTSTRAP_FLANG_PGO_TRAINING_DATA_SOURCE_CMAKE_ARGS="-DTEST_SUITE_SUBDIRS=Fortran"
 \
+       -DBOOTSTRAP_FLANG_PGO_TRAINING_DEPS=runtimes
+
 The ``BOOTSTRAP\_`` prefix tells CMake to pass the variables on to the 
instrumented
 stage two build.  And the ``CLANG_PGO_TRAINING_DEPS`` option lets you specify
 additional build targets to build before building the external project.  The
@@ -191,6 +218,12 @@ should be at a path something like:
 
   <build 
dir>/tools/clang/stage2-instrumented-bins/utils/perf-training/clang.profdata
 
+For Flang the profile data should be at a path something like:
+
+.. code-block:: console
+
+  <build dir>/tools/clang/stage2-instrumented-bins/flang.profdata
+
 You can feed that file into the ``LLVM_PROFDATA_FILE`` option when you build 
your
 optimized compiler.
 
@@ -202,6 +235,12 @@ variable for that purpose:
 
   set(CLANG_PGO_TRAINING_DEPS builtins runtimes CACHE STRING "")
 
+Similarly, for Flang you can make use of :code:`FLANG_PGO_TRAINING_DEPS`:
+
+.. code-block:: cmake
+
+  set(FLANG_PGO_TRAINING_DEPS builtins runtimes CACHE STRING "")
+
 The PGO cache has a slightly different stage naming scheme than other
 multi-stage builds. It generates three stages: stage1, stage2-instrumented, and
 stage2. Both of the stage2 builds are built using the stage1 compiler.
@@ -226,6 +265,10 @@ The PGO cache generates the following additional targets:
 **stage2-check-clang**
   Depends on stage2 and runs check-clang using the stage2 compiler.
 
+**stage2-check-flang**
+  Depends on stage2 and runs check-flang using the stage2 compiler (when using
+  the Flang's CMake caches)
+
 **stage2-check-all**
   Depends on stage2 and runs check-all using the stage2 compiler.
 
@@ -256,6 +299,20 @@ Then, build the BOLT-optimized binary by running the 
following ninja command:
 
   $ ninja clang-bolt
 
+Similarly, to get both Flang and Clang optimized, use the following CMake
+configuration:
+
+.. code-block:: console
+
+  $ cmake <path to source>/llvm -C <path to 
source>/flang/cmake/caches/BOLT.cmake
+
+Then, build the BOLT-optimized flang and clang binaries by running the 
following
+ninja command:
+
+.. code-block:: console
+
+  $ ninja flang-bolt
+
 If you're seeing errors in the build process, try building with a recent
 version of Clang/LLVM by setting the ``CMAKE_C_COMPILER`` and
 ``CMAKE_CXX_COMPILER`` flags to the appropriate values.
@@ -279,6 +336,23 @@ Then, to build the final optimized binary, build the 
stage2-clang-bolt target:
 
   $ ninja stage2-clang-bolt
 
+Similarly, to get both Flang and Clang optimized, use the following CMake
+configuration:
+
+.. code-block:: console
+
+  $ cmake -G Ninja <path to source>/llvm \
+      -C <path to source>/flang/cmake/caches/BOLT-PGO.cmake \
+      -DBOOTSTRAP_LLVM_ENABLE_LLD=ON \
+      -DBOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LLD=ON \
+      -DPGO_INSTRUMENT_LTO=Thin
+
+Then, to build the final optimized binaries, build the stage2-flang-bolt 
target:
+
+.. code-block:: console
+
+  $ ninja stage2-flang-bolt
+
 3-Stage Non-Determinism
 =======================
 

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to