[clang] [compiler-rt] [libcxx] [Instrumentor] Add runtime examples: [1/N] A flop counter (PR #205698)

Johannes Doerfert via cfe-commits Wed, 24 Jun 2026 17:09:53 -0700

https://github.com/jdoerfert updated 
https://github.com/llvm/llvm-project/pull/205698


>From 61cbfabb7ade682a64f516c871b2bacb1e3e324a Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <[email protected]>
Date: Mon, 15 Jun 2026 11:20:02 -0700
Subject: [PATCH] [Instrumentor] Add runtime examples: [1/N] A flop counter

This adds a instrumentor-tools folder into compiler RT to showcase
use cases of the instrumentor. The initial example is a program that,
via instrumentation, counts the number of flops performed. Call and
intrinsic support will follow after #198042.

Partially developped by Claude (AI), tested and verified by me.
---
 clang/cmake/caches/CrossWinToARMLinux.cmake   |   1 +
 clang/cmake/caches/VectorEngine.cmake         |   2 +
 .../cmake/caches/cross-linux-toolchain.cmake  |   1 +
 compiler-rt/CMakeLists.txt                    |   2 +
 .../cmake/Modules/AllSupportedArchDefs.cmake  |   2 +
 compiler-rt/cmake/caches/AMDGPU.cmake         |   1 +
 compiler-rt/cmake/caches/NVPTX.cmake          |   1 +
 compiler-rt/cmake/caches/SPIRV64.cmake        |   1 +
 .../caches/hexagon-builtins-baremetal.cmake   |   1 +
 .../cmake/caches/hexagon-linux-builtins.cmake |   1 +
 compiler-rt/cmake/config-ix.cmake             |   4 +
 compiler-rt/lib/CMakeLists.txt                |   5 +
 .../lib/instrumentor-tools/CMakeLists.txt     |  11 +
 compiler-rt/lib/instrumentor-tools/README.md  |  49 +++
 .../flop-counter/CMakeLists.txt               |  82 +++++
 .../instrumentor-tools/flop-counter/README.md |  77 +++++
 .../flop-counter/flop_counter_config.json     |  32 ++
 .../flop-counter/flop_counter_runtime.cpp     | 164 ++++++++++
 .../instrumentor-tools/instrumentor_runtime.h | 293 ++++++++++++++++++
 compiler-rt/test/CMakeLists.txt               |   6 +-
 .../test/instrumentor-tools/CMakeLists.txt    |  48 +++
 .../test/instrumentor-tools/lit.cfg.py        |  75 +++++
 .../instrumentor-tools/lit.site.cfg.py.in     |  12 +
 .../test/instrumentor-tools/simple_flops.c    |  49 +++
 .../test/instrumentor-tools/vector_flops.cpp  |  46 +++
 libcxx/cmake/caches/Generic-llvm-libc.cmake   |   1 +
 26 files changed, 966 insertions(+), 1 deletion(-)
 create mode 100644 compiler-rt/lib/instrumentor-tools/CMakeLists.txt
 create mode 100644 compiler-rt/lib/instrumentor-tools/README.md
 create mode 100644 
compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt
 create mode 100644 compiler-rt/lib/instrumentor-tools/flop-counter/README.md
 create mode 100644 
compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json
 create mode 100644 
compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp
 create mode 100644 compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h
 create mode 100644 compiler-rt/test/instrumentor-tools/CMakeLists.txt
 create mode 100644 compiler-rt/test/instrumentor-tools/lit.cfg.py
 create mode 100644 compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in
 create mode 100644 compiler-rt/test/instrumentor-tools/simple_flops.c
 create mode 100644 compiler-rt/test/instrumentor-tools/vector_flops.cpp

diff --git a/clang/cmake/caches/CrossWinToARMLinux.cmake 
b/clang/cmake/caches/CrossWinToARMLinux.cmake
index c47c4ac3bb73e..a3adc3a761ed0 100644
--- a/clang/cmake/caches/CrossWinToARMLinux.cmake
+++ b/clang/cmake/caches/CrossWinToARMLinux.cmake
@@ -196,6 +196,7 @@ 
set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_BUILD_LIBFUZZER
 set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_BUILD_PROFILE              
   OFF CACHE BOOL "")
 set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_BUILD_CRT                  
   ON CACHE BOOL "")
 set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_BUILD_ORC                  
   OFF CACHE BOOL "")
+set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS   
   OFF CACHE BOOL "")
 set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_DEFAULT_TARGET_ONLY        
   ON CACHE BOOL "")
 set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_INCLUDE_TESTS              
   ON CACHE BOOL "")
 set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_CAN_EXECUTE_TESTS          
   ON CACHE BOOL "")
diff --git a/clang/cmake/caches/VectorEngine.cmake 
b/clang/cmake/caches/VectorEngine.cmake
index b19e773a09049..b177e6d5257ac 100644
--- a/clang/cmake/caches/VectorEngine.cmake
+++ b/clang/cmake/caches/VectorEngine.cmake
@@ -43,6 +43,7 @@ 
set(RUNTIMES_x86_64-unknown-linux-gnu_COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "
 set(RUNTIMES_x86_64-unknown-linux-gnu_COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL 
"")
 set(RUNTIMES_x86_64-unknown-linux-gnu_COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
 set(RUNTIMES_x86_64-unknown-linux-gnu_COMPILER_RT_BUILD_GWP_ASAN OFF CACHE 
BOOL "")
+set(RUNTIMES_x86_64-unknown-linux-gnu_COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF 
CACHE BOOL "")
 
 # VE supports builtins, crt, and profile only.
 set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_BUILTINS ON CACHE BOOL "")
@@ -55,6 +56,7 @@ 
set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_CTX_PROFILE OFF CACHE BOOL "
 set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "")
 set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
 set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
+set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF 
CACHE BOOL "")
 
 # VE uses builtins from Compiler-RT.
 set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_USE_BUILTINS_LIBRARY TRUE CACHE 
BOOL "")
diff --git a/clang/cmake/caches/cross-linux-toolchain.cmake 
b/clang/cmake/caches/cross-linux-toolchain.cmake
index 8065961f7fcfe..d7b7d0fa1be4a 100644
--- a/clang/cmake/caches/cross-linux-toolchain.cmake
+++ b/clang/cmake/caches/cross-linux-toolchain.cmake
@@ -258,6 +258,7 @@ foreach(target ${LLVM_RUNTIME_TARGETS})
   set(RUNTIMES_${target}_COMPILER_RT_BUILD_PROFILE                OFF CACHE 
BOOL "")
   set(RUNTIMES_${target}_COMPILER_RT_BUILD_MEMPROF                OFF CACHE 
BOOL "")
   set(RUNTIMES_${target}_COMPILER_RT_BUILD_ORC                    OFF CACHE 
BOOL "")
+  set(RUNTIMES_${target}_COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS     OFF CACHE 
BOOL "")
 
   set(RUNTIMES_${target}_COMPILER_RT_INCLUDE_TESTS                ON CACHE 
BOOL "")
   set(RUNTIMES_${target}_COMPILER_RT_CAN_EXECUTE_TESTS            ON CACHE 
BOOL "")
diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 115d953e456c9..252eacf181860 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -100,6 +100,8 @@ option(COMPILER_RT_SCUDO_STANDALONE_BUILD_SHARED "Build 
SCUDO standalone for sha
 mark_as_advanced(COMPILER_RT_SCUDO_STANDALONE_BUILD_SHARED)
 option(COMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC "Build SCUDO 
standalone with LLVM's libc headers" OFF)
 mark_as_advanced(COMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC)
+option(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS "Build Instrumentor tool runtimes" 
ON)
+mark_as_advanced(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS)
 
 if(FUCHSIA)
   set(COMPILER_RT_HWASAN_WITH_INTERCEPTORS_DEFAULT OFF)
diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake 
b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
index a535cf9e3a8da..fee5f4a5720ed 100644
--- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -138,3 +138,5 @@ endif()
 if (WIN32)
   set(ALL_ORC_SUPPORTED_ARCH ${X86_64})
 endif()
+
+set(ALL_INSTRUMENTOR_SUPPORTED_ARCH ${ALL_SANITIZER_COMMON_SUPPORTED_ARCH})
diff --git a/compiler-rt/cmake/caches/AMDGPU.cmake 
b/compiler-rt/cmake/caches/AMDGPU.cmake
index f3a9510c4f311..a0e66c5431dc2 100644
--- a/compiler-rt/cmake/caches/AMDGPU.cmake
+++ b/compiler-rt/cmake/caches/AMDGPU.cmake
@@ -17,3 +17,4 @@ set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "")
 set(COMPILER_RT_PROFILE_BAREMETAL ON CACHE BOOL "")
+set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "")
diff --git a/compiler-rt/cmake/caches/NVPTX.cmake 
b/compiler-rt/cmake/caches/NVPTX.cmake
index dfeb96be190d4..7ebc37e585eaa 100644
--- a/compiler-rt/cmake/caches/NVPTX.cmake
+++ b/compiler-rt/cmake/caches/NVPTX.cmake
@@ -16,3 +16,4 @@ set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "")
 set(COMPILER_RT_PROFILE_BAREMETAL ON CACHE BOOL "")
+set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "")
diff --git a/compiler-rt/cmake/caches/SPIRV64.cmake 
b/compiler-rt/cmake/caches/SPIRV64.cmake
index 693970557f211..d89d1fba336f6 100644
--- a/compiler-rt/cmake/caches/SPIRV64.cmake
+++ b/compiler-rt/cmake/caches/SPIRV64.cmake
@@ -16,3 +16,4 @@ set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "")
 set(COMPILER_RT_PROFILE_BAREMETAL OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "")
diff --git a/compiler-rt/cmake/caches/hexagon-builtins-baremetal.cmake 
b/compiler-rt/cmake/caches/hexagon-builtins-baremetal.cmake
index 632e9ea5758a0..ab78a1374f613 100644
--- a/compiler-rt/cmake/caches/hexagon-builtins-baremetal.cmake
+++ b/compiler-rt/cmake/caches/hexagon-builtins-baremetal.cmake
@@ -14,6 +14,7 @@ set(COMPILER_RT_BUILTINS_ENABLE_PIC OFF CACHE BOOL "")
 set(COMPILER_RT_SUPPORTED_ARCH hexagon CACHE STRING "")
 # without this, build tries to use pthread which is not supported by 
hexagon-unknown-none-elf
 set(COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "" FORCE)
+set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "")
 
 set(CMAKE_C_FLAGS "-ffreestanding" CACHE STRING "")
 set(CMAKE_CXX_FLAGS "-ffreestanding" CACHE STRING "")
diff --git a/compiler-rt/cmake/caches/hexagon-linux-builtins.cmake 
b/compiler-rt/cmake/caches/hexagon-linux-builtins.cmake
index d9c9ff2a4655e..3d21073ce9616 100644
--- a/compiler-rt/cmake/caches/hexagon-linux-builtins.cmake
+++ b/compiler-rt/cmake/caches/hexagon-linux-builtins.cmake
@@ -11,5 +11,6 @@ set(COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "")
 set(COMPILER_RT_BUILTINS_ENABLE_PIC OFF CACHE BOOL "")
 set(COMPILER_RT_SUPPORTED_ARCH hexagon CACHE STRING "")
diff --git a/compiler-rt/cmake/config-ix.cmake 
b/compiler-rt/cmake/config-ix.cmake
index 083f1c98d0f16..de67acb937afd 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -704,6 +704,9 @@ if(APPLE)
   list_intersect(ORC_SUPPORTED_ARCH
     ALL_ORC_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
+  list_intersect(INSTRUMENTOR_SUPPORTED_ARCH
+    ALL_INSTRUMENTOR_SUPPORTED_ARCH
+    SANITIZER_COMMON_SUPPORTED_ARCH)
 
 else()
   # Architectures supported by compiler-rt libraries.
@@ -739,6 +742,7 @@ else()
   filter_available_targets(GWP_ASAN_SUPPORTED_ARCH 
${ALL_GWP_ASAN_SUPPORTED_ARCH})
   filter_available_targets(NSAN_SUPPORTED_ARCH ${ALL_NSAN_SUPPORTED_ARCH})
   filter_available_targets(ORC_SUPPORTED_ARCH ${ALL_ORC_SUPPORTED_ARCH})
+  filter_available_targets(INSTRUMENTOR_SUPPORTED_ARCH 
${ALL_INSTRUMENTOR_SUPPORTED_ARCH})
 endif()
 
 if (MSVC)
diff --git a/compiler-rt/lib/CMakeLists.txt b/compiler-rt/lib/CMakeLists.txt
index e6158ec408895..779685240f235 100644
--- a/compiler-rt/lib/CMakeLists.txt
+++ b/compiler-rt/lib/CMakeLists.txt
@@ -78,3 +78,8 @@ endif()
 # is true for fuzzers that exercise parts of the runtime. So we add the fuzzer
 # directories explicitly here.
 add_subdirectory(scudo/standalone/fuzz)
+
+# Instrumentor tools - educational tools demonstrating the Instrumentor pass
+if(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS)
+  add_subdirectory(instrumentor-tools)
+endif()
diff --git a/compiler-rt/lib/instrumentor-tools/CMakeLists.txt 
b/compiler-rt/lib/instrumentor-tools/CMakeLists.txt
new file mode 100644
index 0000000000000..6f8e2fe352f5a
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/CMakeLists.txt
@@ -0,0 +1,11 @@
+# CMakeLists.txt for Instrumentor Examples
+#
+# This directory contains example runtimes that demonstrate how to use the
+# LLVM Instrumentor pass for various profiling and analysis tasks.
+
+include(AddCompilerRT)
+
+add_compiler_rt_component(instrumentor-tools)
+
+# Add subdirectories for specific examples
+add_subdirectory(flop-counter)
diff --git a/compiler-rt/lib/instrumentor-tools/README.md 
b/compiler-rt/lib/instrumentor-tools/README.md
new file mode 100644
index 0000000000000..5f50c7c7b001a
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/README.md
@@ -0,0 +1,49 @@
+# Instrumentor Tools
+
+This directory contains example runtime libraries that demonstrate how to use
+the LLVM Instrumentor pass for various profiling and analysis tasks.
+
+## Overview
+
+The LLVM Instrumentor is a configurable instrumentation pass that allows you to
+insert runtime calls at various program points (e.g., function entry/exit,
+memory operations, floating-point operations). Each example in this directory
+provides:
+
+1. A runtime library that implements the instrumentation callbacks
+2. An instrumentor configuration JSON file
+3. Tests demonstrating usage
+
+## Building
+
+The instrumentor tools are built as part of the compiler-rt build:
+
+```bash
+cmake -S llvm -B build -G Ninja \
+  -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+  -DLLVM_ENABLE_PROJECTS="clang;compiler-rt"
+ninja -C build
+```
+
+The runtime libraries will be installed in:
+- Darwin: `lib/clang/<version>/lib/darwin/libclang_rt.<example>_osx.a`
+- Linux: `lib/clang/<version>/lib/linux/libclang_rt.<example>-<arch>.a`
+
+Configuration files will be installed in `share/llvm/instrumentor-configs/`.
+
+## Adding New Tools 
+
+To add a new instrumentor example:
+
+1. Create a new directory under `compiler-rt/lib/instrumentor-tools/`
+2. Add your runtime implementation (`.cpp` and `.h` files)
+3. Create an instrumentor configuration JSON file
+4. Add a `CMakeLists.txt` (see `flop-counter/CMakeLists.txt` as a template)
+5. Update `compiler-rt/lib/instrumentor-tools/CMakeLists.txt` to include your 
subdirectory
+6. Add tests in `compiler-rt/test/instrumentor-tools/`
+
+## Resources
+
+- [Instrumentor Documentation](../../../llvm/docs/Instrumentor.rst)
+- [Instrumentor Runtime Headers](../../../llvm/utils/instrumentor_runtime.h)
+- [Configuration Wizard](../../../llvm/utils/instrumentor-config-wizard.py)
diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt 
b/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt
new file mode 100644
index 0000000000000..bed346a179e34
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt
@@ -0,0 +1,82 @@
+# CMakeLists.txt for FLOP Counter Example
+#
+# This example demonstrates counting floating-point operations using the
+# Instrumentor pass. It provides a runtime library that can be linked with
+# instrumented code to track and report FLOP counts.
+
+add_compiler_rt_component(flop-counter)
+
+set(FLOP_COUNTER_SOURCES
+  flop_counter_runtime.cpp
+  )
+
+set(FLOP_COUNTER_HEADERS
+  )
+
+# Include paths for instrumentor runtime headers
+# The instrumentor runtime headers are in llvm/utils
+include_directories(${COMPILER_RT_SOURCE_DIR}/../llvm/utils)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+# Common flags
+set(FLOP_COUNTER_CFLAGS
+  ${COMPILER_RT_COMMON_CFLAGS}
+  ${COMPILER_RT_CXX_CFLAGS}
+  -std=c++17
+  )
+
+set(FLOP_COUNTER_LINK_FLAGS ${COMPILER_RT_COMMON_LINK_FLAGS})
+set(FLOP_COUNTER_LINK_LIBS ${COMPILER_RT_CXX_LINK_LIBS})
+
+# flop counter uses C++ standard library headers.
+if (TARGET cxx-headers OR HAVE_LIBCXX)
+  set(DEPS cxx-headers)
+endif()
+
+# Determine supported architectures
+if(APPLE)
+  # On Darwin, use the darwin OSX architectures
+  set(FLOP_COUNTER_SUPPORTED_ARCH arm64)
+  if(NOT CMAKE_OSX_ARCHITECTURES STREQUAL "")
+    set(FLOP_COUNTER_SUPPORTED_ARCH ${CMAKE_OSX_ARCHITECTURES})
+  endif()
+  if(DARWIN_osx_ARCHS)
+    set(FLOP_COUNTER_SUPPORTED_ARCH ${DARWIN_osx_ARCHS})
+  endif()
+else()
+  # For non-Apple platforms, use the default target architecture
+  set(FLOP_COUNTER_SUPPORTED_ARCH ${INSTRUMENTOR_SUPPORTED_ARCH})
+endif()
+
+message(STATUS "FLOP Counter supported architectures: 
${FLOP_COUNTER_SUPPORTED_ARCH}")
+
+# Build the static runtime library for Apple platforms
+if(APPLE)
+  add_compiler_rt_runtime(clang_rt.instrumentor_flop_counter
+    STATIC
+    OS osx
+    ARCHS ${FLOP_COUNTER_SUPPORTED_ARCH}
+    CFLAGS ${FLOP_COUNTER_CFLAGS}
+    SOURCES ${FLOP_COUNTER_SOURCES}
+    LINK_FLAGS ${FLOP_COUNTER_LINK_FLAGS}
+    LINK_LIBS ${FLOP_COUNTER_LINK_LIBS}
+    ADDITIONAL_HEADERS ${FLOP_COUNTER_HEADERS}
+    DEPS ${DEPS}
+    PARENT_TARGET flop-counter)
+else()
+  add_compiler_rt_runtime(clang_rt.instrumentor_flop_counter
+    STATIC
+    ARCHS ${FLOP_COUNTER_SUPPORTED_ARCH}
+    CFLAGS ${FLOP_COUNTER_CFLAGS}
+    SOURCES ${FLOP_COUNTER_SOURCES}
+    LINK_FLAGS ${FLOP_COUNTER_LINK_FLAGS}
+    LINK_LIBS ${FLOP_COUNTER_LINK_LIBS}
+    ADDITIONAL_HEADERS ${FLOP_COUNTER_HEADERS}
+    DEPS ${DEPS}
+    PARENT_TARGET flop-counter)
+endif()
+
+# Install the configuration file as a resource
+install(FILES flop_counter_config.json
+        DESTINATION share/llvm/instrumentor-configs
+        COMPONENT flop-counter)
diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/README.md 
b/compiler-rt/lib/instrumentor-tools/flop-counter/README.md
new file mode 100644
index 0000000000000..c00a3e57d1a65
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/flop-counter/README.md
@@ -0,0 +1,77 @@
+# FLOP Counter
+
+A runtime library for counting floating-point operations in programs using the 
LLVM Instrumentor pass.
+
+## Features
+
+- **Precision Tracking**: Separates counts for single (float), double, and 
extended precision operations
+- **Operation Categorization**: Tracks adds, multiplications, divisions, FMA 
operations (TODO), and others (sqrt, sin, cos, etc.) (TODO)
+- **Vector Support**: Counts FLOPs in vector operations
+- **Thread-Safe**: Uses atomic operations for counter updates
+- **Low Overhead**: Minimal runtime overhead for counting
+- **Automatic Reporting**: Prints statistics at program exit
+
+## Usage
+
+### Basic Example
+
+```c
+#include <stdio.h>
+#include <math.h>
+
+double compute(double a, double b) {
+  return sqrt(a * a + b * b);
+}
+
+int main() {
+  double result = compute(3.0, 4.0);
+  printf("Result: %f\n", result);
+  return 0;
+}
+```
+
+Compile with:
+```bash
+clangxx -O2 -finstrumentor=flop_counter_config.json example.cpp \
+        -lclang_rt.flop_counter -o example
+```
+
+Run:
+```bash
+./example
+```
+
+Output:
+```
+Result: 5.000000
+
+=================================================
+           FLOP Counter Statistics
+=================================================
+Total FLOPs:                             3
+...
+```
+
+## Implementation Details
+
+### Instrumentation Points
+
+The FLOP counter instruments:
+
+1. **Binary FP Operations**: `fadd`, `fsub`, `fmul`, `fdiv`, `frem`
+2. **Unary FP Operations**: `fneg`
+3. TODO: **FP Intrinsics**: `llvm.fma`, `llvm.sqrt`, `llvm.sin`, `llvm.cos`, 
etc.
+
+### FLOP Counting Rules
+
+- **Regular operations**: 1 FLOP per operation
+- **FMA (Fused Multiply-Add)**: 2 FLOPs (multiply + add)
+- **Vector operations**: Counted per element
+- **Intrinsics**: TODO
+
+### Configuration
+
+The `flop_counter_config.json` file configures the instrumentor to:
+- Insert callbacks after floating-point binary/unary operations
+- Pass value size, type IDs, and opcodes to the runtime
+- Filter to only instrument FP math operations
diff --git 
a/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json 
b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json
new file mode 100644
index 0000000000000..c3131c363fded
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json
@@ -0,0 +1,32 @@
+{
+  "configuration": {
+    "runtime_prefix": "__flop_counter_",
+    "runtime_prefix.description": "The runtime API prefix.",
+    "runtime_stubs_file": "rt",
+    "target_regex": "",
+    "target_regex.description": "Regular expression to be matched against the 
module target. Only targets that match this regex will be instrumented.",
+    "function_regex": "",
+    "function_regex.description": "Regular expression to be matched against a 
function name. Only functions that match this regex will be instrumented.",
+    "demangle_function_names": true,
+    "demangle_function_names.description": "Demangle functions names passed to 
the runtime.",
+    "host_enabled": true,
+    "host_enabled.description": "Instrument non-GPU targets",
+    "gpu_enabled": true,
+    "gpu_enabled.description": "Instrument GPU targets"
+  },
+  "instruction_post": {
+    "numeric": {
+      "enabled": true,
+      "filter": "type_id < 7 || ((type_id == 17 || type_id == 18) && 
sub_type_id < 7)",
+      "filter.description": "Static property filter to exclude 
instrumentation.",
+      "type_id": true,
+      "type_id.description": "The operation's type id.",
+      "sub_type_id": true,
+      "sub_type_id.description": "The operation's type id.",
+      "size": true,
+      "size.description": "The operation's type size.",
+      "opcode": true,
+      "opcode.description": "The instruction opcode."
+    }
+  }
+}
diff --git 
a/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp 
b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp
new file mode 100644
index 0000000000000..9eaa2d807838e
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp
@@ -0,0 +1,164 @@
+//===-- flop_counter_runtime.cpp - FLOP Counter Runtime ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the runtime for counting floating-point operations.
+// It hooks into instrumentation points inserted by the LLVM Instrumentor pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../instrumentor_runtime.h"
+
+#include <atomic>
+#include <cinttypes>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+namespace {
+
+/// FLOP counter statistics (thread-safe using atomics)
+struct FlopCounterStats {
+  std::atomic<uint64_t> TotalFlops{0};
+  std::atomic<uint64_t> FloatOps{0};  // 32-bit float operations
+  std::atomic<uint64_t> DoubleOps{0}; // 64-bit double operations
+  std::atomic<uint64_t> ExtendedOps{
+      0}; // 80/128-bit extended precision operations
+  std::atomic<uint64_t> VectorFlops{0}; // Total FLOPs from vector operations
+  std::atomic<uint64_t> AddOps{0};
+  std::atomic<uint64_t> MulOps{0};
+  std::atomic<uint64_t> DivOps{0};
+  std::atomic<uint64_t> FmaOps{0};   // Fused multiply-add operations
+  std::atomic<uint64_t> OtherOps{0}; // sqrt, sin, cos, etc.
+};
+
+// Global statistics counters
+static FlopCounterStats *Stats = nullptr;
+
+enum {
+  LLVMOpcodeFAdd = 15,
+  LLVMOpcodeFSub = 17,
+  LLVMOpcodeFMul = 19,
+  LLVMOpcodeFDiv = 22,
+  LLVMOpcodeFRem = 25,
+  LLVMOpcodeFNeg = 13,
+};
+
+} // namespace
+
+extern "C" {
+
+__attribute__((constructor(1000))) void __flop_counter_initialize() {
+  Stats = new FlopCounterStats();
+}
+
+__attribute__((destructor(1000))) void __flop_counter_finalize() {
+  std::printf("\n");
+  std::printf("=================================================\n");
+  std::printf("           FLOP Counter Statistics\n");
+  std::printf("=================================================\n");
+  std::printf("Total FLOPs:              %20llu\n",
+              Stats->TotalFlops.load(std::memory_order_relaxed));
+  std::printf("\n");
+  std::printf("By Precision:\n");
+  std::printf("  Single (float):         %20llu\n",
+              Stats->FloatOps.load(std::memory_order_relaxed));
+  std::printf("  Double (double):        %20llu\n",
+              Stats->DoubleOps.load(std::memory_order_relaxed));
+  std::printf("  Extended (fp80/fp128):  %20llu\n",
+              Stats->ExtendedOps.load(std::memory_order_relaxed));
+  std::printf("  Vector FLOPs:           %20llu\n",
+              Stats->VectorFlops.load(std::memory_order_relaxed));
+  std::printf("\n");
+  std::printf("By Operation:\n");
+  std::printf("  Addition/Subtraction:   %20llu\n",
+              Stats->AddOps.load(std::memory_order_relaxed));
+  std::printf("  Multiplication:         %20llu\n",
+              Stats->MulOps.load(std::memory_order_relaxed));
+  std::printf("  Division:               %20llu\n",
+              Stats->DivOps.load(std::memory_order_relaxed));
+  std::printf("  Fused Multiply-Add:     %20llu\n",
+              Stats->FmaOps.load(std::memory_order_relaxed));
+  std::printf("  Other (sqrt, sin, ...): %20llu\n",
+              Stats->OtherOps.load(std::memory_order_relaxed));
+  std::printf("=================================================\n");
+
+  delete Stats;
+}
+
+void __flop_counter_post_numeric(int32_t TypeId, int32_t SubTypeId,
+                                 int32_t Size, int32_t Opcode) {
+  bool IsVector = false;
+  switch (TypeId) {
+  case FixedVectorTyID:
+  case ScalableVectorTyID:
+    IsVector = true;
+    TypeId = SubTypeId;
+    break;
+  default:
+    break;
+  };
+
+  int32_t TypeSize = Size;
+  switch (TypeId) {
+  case HalfTyID:
+  case BFloatTyID:
+    TypeSize = 2;
+    break;
+  case FloatTyID:
+    TypeSize = 4;
+    break;
+  case DoubleTyID:
+    TypeSize = 8;
+    break;
+  case X86_FP80TyID:
+  case FP128TyID:
+  case PPC_FP128TyID:
+    TypeSize = 16;
+    break;
+  default:
+    break;
+  };
+
+  // Determine FLOP count based on whether it's a vector operation
+  uint64_t FlopCount = Size / TypeSize;
+  if (IsVector) {
+    Stats->VectorFlops.fetch_add(FlopCount, std::memory_order_relaxed);
+  } else {
+    // Categorize by precision
+    if (TypeId == 2) {
+      Stats->FloatOps.fetch_add(1, std::memory_order_relaxed);
+    } else if (TypeId == 3) {
+      Stats->DoubleOps.fetch_add(1, std::memory_order_relaxed);
+    } else {
+      Stats->ExtendedOps.fetch_add(1, std::memory_order_relaxed);
+    }
+  }
+
+  // Categorize by operation type
+  switch (Opcode) {
+  case LLVMOpcodeFAdd:
+  case LLVMOpcodeFSub:
+    Stats->AddOps.fetch_add(FlopCount, std::memory_order_relaxed);
+    break;
+  case LLVMOpcodeFMul:
+    Stats->MulOps.fetch_add(FlopCount, std::memory_order_relaxed);
+    break;
+  case LLVMOpcodeFDiv:
+  case LLVMOpcodeFRem:
+    Stats->DivOps.fetch_add(FlopCount, std::memory_order_relaxed);
+    break;
+  default:
+    Stats->OtherOps.fetch_add(FlopCount, std::memory_order_relaxed);
+    break;
+  }
+
+  Stats->TotalFlops.fetch_add(FlopCount, std::memory_order_relaxed);
+}
+
+} // extern "C"
diff --git a/compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h 
b/compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h
new file mode 100644
index 0000000000000..641096f2c0e22
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h
@@ -0,0 +1,293 @@
+//===-- Instrumentor Runtime Helper Header -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header provides helper structures and functions for reading data
+// generated by the LLVM Instrumentor pass and passed to runtime functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTRUMENTOR_RUNTIME_H
+#define INSTRUMENTOR_RUNTIME_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __cplusplus
+}
+#endif
+
+/// Header for each value in a value pack. Value packs are used to pass 
function
+/// arguments and other variable-length data to the runtime. The format is:
+///   [ValueHeader][Padding][Value Data]
+/// where padding aligns the value data to 8-byte boundaries.
+typedef struct {
+  uint32_t size;    // Size of the value in bytes
+  uint32_t type_id; // LLVM Type::TypeID of the value
+} ValuePackHeader;
+
+/// Iterator for reading values from a value pack.
+typedef struct {
+  const char *current; // Current position in the pack
+  uint64_t offset;     // Byte offset from the start
+  uint32_t count;      // Number of elements in the pack
+  uint32_t index;      // Current element index
+} ValuePackIterator;
+
+/// Initialize a value pack iterator.
+/// \param iter The iterator to initialize
+/// \param pack_ptr Pointer to the start of the value pack
+/// \param num_elements Number of elements in the pack
+static inline void initValuePackIterator(ValuePackIterator *iter,
+                                         const void *pack_ptr,
+                                         uint32_t num_elements) {
+  iter->current = (const char *)pack_ptr;
+  iter->offset = 0;
+  iter->count = num_elements;
+  iter->index = 0;
+}
+
+/// Get the header for the current value.
+static inline ValuePackHeader
+getValuePackHeader(const ValuePackIterator *iter) {
+  const ValuePackHeader *header = (const ValuePackHeader *)iter->current;
+  return *header;
+}
+
+/// Get a pointer to the current value data.
+static inline const void *getValuePackData(const ValuePackIterator *iter) {
+  // Skip header (8 bytes: size + type_id)
+  const char *data_start = iter->current + sizeof(ValuePackHeader);
+  // Calculate padding for 8-byte alignment
+  ValuePackHeader header = getValuePackHeader(iter);
+  uint32_t padding = (8 - (header.size % 8)) % 8;
+  // Skip padding
+  return data_start + padding;
+}
+
+/// Move to the next value in the pack.
+static inline void nextValuePack(ValuePackIterator *iter) {
+  if (iter->index >= iter->count) {
+    iter->current = NULL;
+    return;
+  }
+  ValuePackHeader header = getValuePackHeader(iter);
+  uint32_t padding = (8 - (header.size % 8)) % 8;
+  uint64_t advance = sizeof(ValuePackHeader) + padding + header.size;
+  iter->current += advance;
+  iter->offset += advance;
+  iter->index++;
+}
+
+/// Get the current offset in bytes from the start of the pack.
+static inline uint64_t getValuePackOffset(const ValuePackIterator *iter) {
+  return iter->offset;
+}
+
+/// Extract a specific value from a value pack by index.
+///
+/// \param pack_ptr Pointer to the start of the value pack
+/// \param num_elements Number of elements in the pack
+/// \param index Zero-based index of the value to extract
+/// \param header Output parameter for the value header (can be NULL)
+/// \return Pointer to the value data, or NULL if index is out of bounds
+static inline const void *getValuePackEntry(const void *pack_ptr,
+                                            uint32_t num_elements,
+                                            uint32_t index,
+                                            ValuePackHeader *header) {
+  if (!pack_ptr || index >= num_elements)
+    return NULL;
+
+  ValuePackIterator iter;
+  initValuePackIterator(&iter, pack_ptr, num_elements);
+
+  while (iter.current != NULL && iter.index < iter.count) {
+    ValuePackHeader h = getValuePackHeader(&iter);
+    if (iter.index == index) {
+      if (header)
+        *header = h;
+      return getValuePackData(&iter);
+    }
+    nextValuePack(&iter);
+  }
+
+  return NULL; // Index out of bounds
+}
+
+/// LLVM Type IDs for interpreting value pack data.
+/// These correspond to llvm::Type::TypeID enum values.
+enum LLVMTypeID {
+  HalfTyID = 0,  ///< 16-bit floating point type
+  BFloatTyID,    ///< 16-bit floating point type (7-bit significand)
+  FloatTyID,     ///< 32-bit floating point type
+  DoubleTyID,    ///< 64-bit floating point type
+  X86_FP80TyID,  ///< 80-bit floating point type (X87)
+  FP128TyID,     ///< 128-bit floating point type (112-bit significand)
+  PPC_FP128TyID, ///< 128-bit floating point type (two 64-bits, PowerPC)
+  VoidTyID,      ///< type with no size
+  LabelTyID,     ///< Labels
+  MetadataTyID,  ///< Metadata
+  X86_AMXTyID,   ///< AMX vectors (8192 bits, X86 specific)
+  TokenTyID,     ///< Tokens
+  // Derived types... see DerivedTypes.h file.
+  IntegerTyID,        ///< Arbitrary bit width integers
+  ByteTyID,           ///< Arbitrary bit width bytes
+  FunctionTyID,       ///< Functions
+  PointerTyID,        ///< Pointers
+  StructTyID,         ///< Structures
+  ArrayTyID,          ///< Arrays
+  FixedVectorTyID,    ///< Fixed width SIMD vector type
+  ScalableVectorTyID, ///< Scalable SIMD vector type
+  TypedPointerTyID,   ///< Typed pointer used by some GPU targets
+  TargetExtTyID,      ///< Target extension type
+};
+
+/// Get the string name of an LLVM Type ID.
+static inline const char *getLLVMTypeIDName(uint32_t type_id) {
+  switch (type_id) {
+  case HalfTyID:
+    return "half";
+  case BFloatTyID:
+    return "bfloat";
+  case FloatTyID:
+    return "float";
+  case DoubleTyID:
+    return "double";
+  case X86_FP80TyID:
+    return "x86_fp80";
+  case FP128TyID:
+    return "fp128";
+  case PPC_FP128TyID:
+    return "ppc_fp128";
+  case VoidTyID:
+    return "void";
+  case LabelTyID:
+    return "label";
+  case MetadataTyID:
+    return "metadata";
+  case X86_AMXTyID:
+    return "x86_amx";
+  case TokenTyID:
+    return "token";
+  case IntegerTyID:
+    return "integer";
+  case ByteTyID:
+    return "integer";
+  case FunctionTyID:
+    return "function";
+  case PointerTyID:
+    return "pointer";
+  case StructTyID:
+    return "struct";
+  case ArrayTyID:
+    return "array";
+  case FixedVectorTyID:
+    return "fixed_vector";
+  case ScalableVectorTyID:
+    return "scalable_vector";
+  case TypedPointerTyID:
+    return "typed_pointer";
+  case TargetExtTyID:
+    return "target_ext";
+  default:
+    return "unknown";
+  }
+}
+
+#ifdef __cplusplus
+
+// C++ overlays for range-based iteration and quality of life improvements
+
+/// Range wrapper for value packs enabling range-based for loops.
+/// Example:
+///   for (auto val : ValuePackRange(pack_ptr, num_elements)) {
+///     // val provides access to header and data
+///   }
+class ValuePackRange {
+public:
+  struct ValueRef {
+    ValuePackHeader header;
+    const void *data;
+
+    uint32_t type_id() const { return header.type_id; }
+    uint32_t size() const { return header.size; }
+    const char *type_name() const { return getLLVMTypeIDName(header.type_id); }
+
+    template <typename T> const T &as() const {
+      return *static_cast<const T *>(data);
+    }
+    template <typename T> const T *ptr() const {
+      return static_cast<const T *>(data);
+    }
+  };
+
+  class iterator {
+  public:
+    iterator(const void *ptr, uint32_t num_elements, uint64_t max_offset)
+        : max_offset_(max_offset) {
+      initValuePackIterator(&iter_, ptr, num_elements);
+      if (ptr && !is_valid_position())
+        iter_.current = nullptr;
+    }
+
+    ValueRef operator*() const {
+      return ValueRef{getValuePackHeader(&iter_), getValuePackData(&iter_)};
+    }
+
+    iterator &operator++() {
+      nextValuePack(&iter_);
+      if (!is_valid_position())
+        iter_.current = nullptr;
+      return *this;
+    }
+
+    bool operator!=(const iterator &other) const {
+      return iter_.current != other.iter_.current;
+    }
+
+  private:
+    bool is_valid_position() const {
+      if (!iter_.current)
+        return false;
+      if (iter_.index >= iter_.count)
+        return false;
+      if (max_offset_ > 0 && iter_.offset >= max_offset_)
+        return false;
+      return true;
+    }
+
+    ValuePackIterator iter_;
+    uint64_t max_offset_;
+  };
+
+  ValuePackRange(const void *ptr, uint32_t num_elements, uint64_t max_size = 0)
+      : ptr_(ptr), num_elements_(num_elements), max_size_(max_size) {}
+
+  iterator begin() const { return iterator(ptr_, num_elements_, max_size_); }
+  iterator end() const { return iterator(nullptr, 0, 0); }
+
+private:
+  const void *ptr_;
+  uint32_t num_elements_;
+  uint64_t max_size_;
+};
+
+/// Template helper to extract a typed value from a value pack by index.
+template <typename T>
+inline const T *getValueAs(const void *pack_ptr, uint32_t num_elements,
+                           uint32_t index) {
+  return static_cast<const T *>(
+      getValuePackEntry(pack_ptr, num_elements, index, nullptr));
+}
+
+#endif // __cplusplus
+
+#endif // INSTRUMENTOR_RUNTIME_H
diff --git a/compiler-rt/test/CMakeLists.txt b/compiler-rt/test/CMakeLists.txt
index 3fab82518e75f..701759ad5cabc 100644
--- a/compiler-rt/test/CMakeLists.txt
+++ b/compiler-rt/test/CMakeLists.txt
@@ -60,7 +60,7 @@ umbrella_lit_testsuite_begin(check-compiler-rt)
 set(COMPILER_RT_KNOWN_TEST_SUITES
     builtins;ctx_profile;fuzzer;interception;lsan;memprof;metadata
     ;orc;profile;sanitizer_common;shadowcallstack
-    ;ubsan;xray)
+    ;ubsan;xray;instrumentor-tools)
 list(APPEND COMPILER_RT_KNOWN_TEST_SUITES ${ALL_SANITIZERS})
 list(REMOVE_DUPLICATES COMPILER_RT_KNOWN_TEST_SUITES)
 # Sort the list so that's easier to read when emitting errors.
@@ -170,6 +170,10 @@ if(COMPILER_RT_CAN_EXECUTE_TESTS)
   # ShadowCallStack does not yet provide a runtime with compiler-rt, the tests
   # include their own minimal runtime
   compiler_rt_test_runtime(shadowcallstack NO_COMPILER_RT_HAS_GUARD)
+
+  if(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS)
+    compiler_rt_test_runtime(instrumentor-tools NO_COMPILER_RT_HAS_GUARD)
+  endif()
 endif()
 
 # Now that we've traversed all the directories and know all the lit testsuites,
diff --git a/compiler-rt/test/instrumentor-tools/CMakeLists.txt 
b/compiler-rt/test/instrumentor-tools/CMakeLists.txt
new file mode 100644
index 0000000000000..a9a1ca07829e4
--- /dev/null
+++ b/compiler-rt/test/instrumentor-tools/CMakeLists.txt
@@ -0,0 +1,48 @@
+set(INSTRUMENTOR_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+set(INSTRUMENTOR_LIT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
+
+set(INSTRUMENTOR_TESTSUITES)
+set(INSTRUMENTOR_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
+list(APPEND INSTRUMENTOR_TEST_DEPS flop-counter)
+
+# Check if INSTRUMENTOR_SUPPORTED_ARCH is defined
+if(NOT DEFINED INSTRUMENTOR_SUPPORTED_ARCH)
+  message(STATUS "INSTRUMENTOR_SUPPORTED_ARCH is NOT DEFINED - not building 
tests")
+  return()
+endif()
+
+set(INSTRUMENTOR_TEST_ARCH ${INSTRUMENTOR_SUPPORTED_ARCH})
+if(APPLE)
+  darwin_filter_host_archs(INSTRUMENTOR_SUPPORTED_ARCH INSTRUMENTOR_TEST_ARCH)
+endif()
+
+
+macro(add_instrumentor_testsuite test_mode sanitizer arch)
+  set(INSTRUMENTOR_LIT_TEST_MODE "${test_mode}")
+  set(CONFIG_NAME ${test_mode}-${arch})
+
+  set(INSTRUMENTOR_TEST_TARGET_ARCH ${arch})
+  get_test_cc_for_arch(${arch} INSTRUMENTOR_TEST_TARGET_CC 
INSTRUMENTOR_TEST_TARGET_CFLAGS)
+
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py)
+  list(APPEND INSTRUMENTOR_TESTSUITES 
${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+
+endmacro()
+
+if(INSTRUMENTOR_TEST_ARCH)
+  foreach(arch ${INSTRUMENTOR_TEST_ARCH})
+    add_instrumentor_testsuite("InstrumentorTools" instrumentor-tools ${arch})
+  endforeach()
+else()
+  message(WARNING "No architectures configured for instrumentor-tools tests!")
+endif()
+
+if(INSTRUMENTOR_TESTSUITES)
+  add_lit_testsuite(check-instrumentor-tools "Running the instrumentor-tools 
tests"
+    ${INSTRUMENTOR_TESTSUITES}
+    DEPENDS ${INSTRUMENTOR_TEST_DEPS})
+else()
+  message(WARNING "No test suites configured for instrumentor-tools!")
+endif()
diff --git a/compiler-rt/test/instrumentor-tools/lit.cfg.py 
b/compiler-rt/test/instrumentor-tools/lit.cfg.py
new file mode 100644
index 0000000000000..00f9e120168d7
--- /dev/null
+++ b/compiler-rt/test/instrumentor-tools/lit.cfg.py
@@ -0,0 +1,75 @@
+# -*- Python -*-
+
+import os
+
+
+def get_required_attr(config, attr_name):
+    attr_value = getattr(config, attr_name, None)
+    if attr_value is None:
+        lit_config.fatal(
+            "No attribute %r in test configuration! You may need to run "
+            "tests from your build directory or add this attribute "
+            "to lit.site.cfg.py " % attr_name
+        )
+    return attr_value
+
+
+# Setup config name.
+config.name = "InstrumentorTools-" + config.target_arch
+
+# Setup source root.
+config.test_source_root = os.path.dirname(__file__)
+
+# Setup executable root.
+if (
+    hasattr(config, "instrumentor_lit_binary_dir")
+    and config.instrumentor_lit_binary_dir is not None
+):
+    config.test_exec_root = os.path.join(
+        config.instrumentor_lit_binary_dir, config.name
+    )
+
+# Test suffixes.
+config.suffixes = [".c", ".cpp", ".m", ".mm", ".ll", ".test"]
+
+# What to exclude.
+config.excludes = ["Inputs"]
+
+# Clang flags.
+target_cflags = [get_required_attr(config, "target_cflags")]
+clang_cflags = target_cflags
+clang_cxxflags = config.cxx_mode_flags + clang_cflags
+
+
+def build_invocation(compile_flags):
+    return " " + " ".join([config.clang] + compile_flags) + " "
+
+
+def make_lib_name(name):
+    if config.target_os != "Darwin":
+        return "clang_rt.instrumentor_" + name
+    return "clang_rt.instrumentor_" + name + "_osx"
+
+
+# Add clang substitutions.
+config.substitutions.append(("%clang ", build_invocation(clang_cflags)))
+config.substitutions.append(("%clangxx ", build_invocation(clang_cxxflags)))
+
+flop_counter_lib = make_lib_name("flop_counter")
+config.substitutions.append(("%flop_counter_lib", flop_counter_lib))
+
+config.substitutions.append(("%lib_dir", config.compiler_rt_libdir))
+
+# Add path to instrumentor config files
+config_dir = os.path.join(
+    config.test_source_root, "..", "..", "lib", "instrumentor-tools"
+)
+config.substitutions.append(("%config_dir", config_dir))
+
+# Check if running on a supported platform
+if config.target_os not in [
+    "Darwin",
+    "Linux",
+    "FreeBSD",
+]:
+    config.unsupported = True
diff --git a/compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in 
b/compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in
new file mode 100644
index 0000000000000..8474c796f8eeb
--- /dev/null
+++ b/compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in
@@ -0,0 +1,12 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+# Tool-specific config options.
+config.instrumentor_lit_binary_dir = "@INSTRUMENTOR_LIT_BINARY_DIR@"
+config.target_cflags = "@INSTRUMENTOR_TEST_TARGET_CFLAGS@"
+config.target_arch = "@INSTRUMENTOR_TEST_TARGET_ARCH@"
+
+# Load common config for all compiler-rt lit tests.
+lit_config.load_config(config, 
"@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
+
+# Load tool-specific config that would do the real work.
+lit_config.load_config(config, "@INSTRUMENTOR_LIT_SOURCE_DIR@/lit.cfg.py")
diff --git a/compiler-rt/test/instrumentor-tools/simple_flops.c 
b/compiler-rt/test/instrumentor-tools/simple_flops.c
new file mode 100644
index 0000000000000..9434cb506f602
--- /dev/null
+++ b/compiler-rt/test/instrumentor-tools/simple_flops.c
@@ -0,0 +1,49 @@
+// Test basic FLOP counting functionality
+//
+// This test verifies that the FLOP counter correctly counts floating-point
+// operations in a simple program.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm 
-instrumentor-read-config-files=%config_dir/flop-counter/flop_counter_config.json
 %s -L%lib_dir -l%flop_counter_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// TODO: For the correct values we need to track fmuladd calls too.
+//
+// CHECK: Total FLOPs: 400
+// CHECK: Single (float):  100
+// CHECK: Double (double): 300
+
+#include <stdio.h>
+
+// Simple function with known FLOP count
+float compute_float(float a, float b, float c) {
+  // 3 FLOPs: add, mul, add
+  return a + b * c;
+}
+
+double compute_double(double a, double b) {
+  // 4 FLOPs: mul, mul, add, div
+  return (a * a + b * b) / 2.0;
+}
+
+int main(void) {
+  float f1 = 1.0f, f2 = 2.0f, f3 = 3.0f;
+  double d1 = 4.0, d2 = 5.0;
+
+  // Call functions multiple times to get meaningful counts
+  float result_f = 0.0f;
+  for (int i = 0; i < 100; i++) {
+    result_f += compute_float(f1, f2, f3);
+  }
+
+  double result_d = 0.0;
+  for (int i = 0; i < 100; i++) {
+    result_d += compute_double(d1, d2);
+  }
+
+  // Prevent optimization from removing the computations
+  if (result_f > 0.0f && result_d > 0.0) {
+    printf("Computation complete\n");
+  }
+
+  return 0;
+}
diff --git a/compiler-rt/test/instrumentor-tools/vector_flops.cpp 
b/compiler-rt/test/instrumentor-tools/vector_flops.cpp
new file mode 100644
index 0000000000000..db729e5886849
--- /dev/null
+++ b/compiler-rt/test/instrumentor-tools/vector_flops.cpp
@@ -0,0 +1,46 @@
+// Test FLOP counting with vector operations
+//
+// This test verifies that the FLOP counter correctly counts vector
+// floating-point operations.
+//
+// RUN: %clangxx -O2 -g -mllvm -enable-instrumentor -mllvm 
-instrumentor-read-config-files=%config_dir/flop-counter/flop_counter_config.json
 %s -L%lib_dir -l%flop_counter_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Total FLOPs:
+// CHECK: Vector FLOPs:
+
+#include <cmath>
+#include <stdio.h>
+
+// Function using vector operations (if vectorized by the compiler)
+void vector_compute(float *a, float *b, float *c, int n) {
+  for (int i = 0; i < n; i++) {
+    c[i] = std::sqrt(a[i] * a[i] + b[i] * b[i]);
+  }
+}
+
+int main(void) {
+  const int N = 1000;
+  float a[N], b[N], c[N];
+
+  // Initialize arrays
+  for (int i = 0; i < N; i++) {
+    a[i] = (float)i;
+    b[i] = (float)(i + 1);
+  }
+
+  // Compute
+  vector_compute(a, b, c, N);
+
+  // Prevent optimization
+  float sum = 0.0f;
+  for (int i = 0; i < N; i++) {
+    sum += c[i];
+  }
+
+  if (sum > 0.0f) {
+    printf("Vector computation complete\n");
+  }
+
+  return 0;
+}
diff --git a/libcxx/cmake/caches/Generic-llvm-libc.cmake 
b/libcxx/cmake/caches/Generic-llvm-libc.cmake
index 8e8a2699858b3..84f17224ebc22 100644
--- a/libcxx/cmake/caches/Generic-llvm-libc.cmake
+++ b/libcxx/cmake/caches/Generic-llvm-libc.cmake
@@ -18,6 +18,7 @@ set(COMPILER_RT_BUILD_SANITIZERS ON CACHE BOOL "")
 set(COMPILER_RT_BUILD_XRAY OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_LIBFUZZER OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "")
 set(COMPILER_RT_DEBUG OFF CACHE BOOL "")
 set(COMPILER_RT_CRT_USE_EH_FRAME_REGISTRY ON CACHE BOOL "")
 

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [compiler-rt] [libcxx] [Instrumentor] Add runtime examples: [1/N] A flop counter (PR #205698)

Reply via email to