llvmorg-github-actions[bot] wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Johannes Doerfert (jdoerfert)

<details>
<summary>Changes</summary>

This adds a instrumentor-tools folder into compiler RT to showcase use cases of 
the instrumentor. The initial example is a program that, via instrumentation, 
counts the number of flops performed. Call and intrinsic support will follow 
after #<!-- -->198042.

This is the second try with more CMake magic after 
https://github.com/llvm/llvm-project/pull/205221 failed on some platforms.

Partially developped by Claude (AI), tested and verified by me.

---

Patch is 42.93 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/205698.diff


26 Files Affected:

- (modified) clang/cmake/caches/CrossWinToARMLinux.cmake (+1) 
- (modified) clang/cmake/caches/VectorEngine.cmake (+2) 
- (modified) clang/cmake/caches/cross-linux-toolchain.cmake (+1) 
- (modified) compiler-rt/CMakeLists.txt (+2) 
- (modified) compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake (+2) 
- (modified) compiler-rt/cmake/caches/AMDGPU.cmake (+1) 
- (modified) compiler-rt/cmake/caches/NVPTX.cmake (+1) 
- (modified) compiler-rt/cmake/caches/SPIRV64.cmake (+1) 
- (modified) compiler-rt/cmake/caches/hexagon-builtins-baremetal.cmake (+1) 
- (modified) compiler-rt/cmake/caches/hexagon-linux-builtins.cmake (+1) 
- (modified) compiler-rt/cmake/config-ix.cmake (+4) 
- (modified) compiler-rt/lib/CMakeLists.txt (+5) 
- (added) compiler-rt/lib/instrumentor-tools/CMakeLists.txt (+11) 
- (added) compiler-rt/lib/instrumentor-tools/README.md (+49) 
- (added) compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt (+82) 
- (added) compiler-rt/lib/instrumentor-tools/flop-counter/README.md (+77) 
- (added) 
compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json (+32) 
- (added) 
compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp (+164) 
- (added) compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h (+293) 
- (modified) compiler-rt/test/CMakeLists.txt (+5-1) 
- (added) compiler-rt/test/instrumentor-tools/CMakeLists.txt (+53) 
- (added) compiler-rt/test/instrumentor-tools/lit.cfg.py (+75) 
- (added) compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in (+12) 
- (added) compiler-rt/test/instrumentor-tools/simple_flops.c (+49) 
- (added) compiler-rt/test/instrumentor-tools/vector_flops.cpp (+46) 
- (modified) libcxx/cmake/caches/Generic-llvm-libc.cmake (+1) 


``````````diff
diff --git a/clang/cmake/caches/CrossWinToARMLinux.cmake 
b/clang/cmake/caches/CrossWinToARMLinux.cmake
index c47c4ac3bb73e..a3adc3a761ed0 100644
--- a/clang/cmake/caches/CrossWinToARMLinux.cmake
+++ b/clang/cmake/caches/CrossWinToARMLinux.cmake
@@ -196,6 +196,7 @@ 
set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_BUILD_LIBFUZZER
 set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_BUILD_PROFILE              
   OFF CACHE BOOL "")
 set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_BUILD_CRT                  
   ON CACHE BOOL "")
 set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_BUILD_ORC                  
   OFF CACHE BOOL "")
+set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS   
   OFF CACHE BOOL "")
 set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_DEFAULT_TARGET_ONLY        
   ON CACHE BOOL "")
 set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_INCLUDE_TESTS              
   ON CACHE BOOL "")
 set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_CAN_EXECUTE_TESTS          
   ON CACHE BOOL "")
diff --git a/clang/cmake/caches/VectorEngine.cmake 
b/clang/cmake/caches/VectorEngine.cmake
index b19e773a09049..b177e6d5257ac 100644
--- a/clang/cmake/caches/VectorEngine.cmake
+++ b/clang/cmake/caches/VectorEngine.cmake
@@ -43,6 +43,7 @@ 
set(RUNTIMES_x86_64-unknown-linux-gnu_COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "
 set(RUNTIMES_x86_64-unknown-linux-gnu_COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL 
"")
 set(RUNTIMES_x86_64-unknown-linux-gnu_COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
 set(RUNTIMES_x86_64-unknown-linux-gnu_COMPILER_RT_BUILD_GWP_ASAN OFF CACHE 
BOOL "")
+set(RUNTIMES_x86_64-unknown-linux-gnu_COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF 
CACHE BOOL "")
 
 # VE supports builtins, crt, and profile only.
 set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_BUILTINS ON CACHE BOOL "")
@@ -55,6 +56,7 @@ 
set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_CTX_PROFILE OFF CACHE BOOL "
 set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "")
 set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
 set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
+set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF 
CACHE BOOL "")
 
 # VE uses builtins from Compiler-RT.
 set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_USE_BUILTINS_LIBRARY TRUE CACHE 
BOOL "")
diff --git a/clang/cmake/caches/cross-linux-toolchain.cmake 
b/clang/cmake/caches/cross-linux-toolchain.cmake
index 8065961f7fcfe..d7b7d0fa1be4a 100644
--- a/clang/cmake/caches/cross-linux-toolchain.cmake
+++ b/clang/cmake/caches/cross-linux-toolchain.cmake
@@ -258,6 +258,7 @@ foreach(target ${LLVM_RUNTIME_TARGETS})
   set(RUNTIMES_${target}_COMPILER_RT_BUILD_PROFILE                OFF CACHE 
BOOL "")
   set(RUNTIMES_${target}_COMPILER_RT_BUILD_MEMPROF                OFF CACHE 
BOOL "")
   set(RUNTIMES_${target}_COMPILER_RT_BUILD_ORC                    OFF CACHE 
BOOL "")
+  set(RUNTIMES_${target}_COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS     OFF CACHE 
BOOL "")
 
   set(RUNTIMES_${target}_COMPILER_RT_INCLUDE_TESTS                ON CACHE 
BOOL "")
   set(RUNTIMES_${target}_COMPILER_RT_CAN_EXECUTE_TESTS            ON CACHE 
BOOL "")
diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 115d953e456c9..252eacf181860 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -100,6 +100,8 @@ option(COMPILER_RT_SCUDO_STANDALONE_BUILD_SHARED "Build 
SCUDO standalone for sha
 mark_as_advanced(COMPILER_RT_SCUDO_STANDALONE_BUILD_SHARED)
 option(COMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC "Build SCUDO 
standalone with LLVM's libc headers" OFF)
 mark_as_advanced(COMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC)
+option(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS "Build Instrumentor tool runtimes" 
ON)
+mark_as_advanced(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS)
 
 if(FUCHSIA)
   set(COMPILER_RT_HWASAN_WITH_INTERCEPTORS_DEFAULT OFF)
diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake 
b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
index a535cf9e3a8da..fee5f4a5720ed 100644
--- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -138,3 +138,5 @@ endif()
 if (WIN32)
   set(ALL_ORC_SUPPORTED_ARCH ${X86_64})
 endif()
+
+set(ALL_INSTRUMENTOR_SUPPORTED_ARCH ${ALL_SANITIZER_COMMON_SUPPORTED_ARCH})
diff --git a/compiler-rt/cmake/caches/AMDGPU.cmake 
b/compiler-rt/cmake/caches/AMDGPU.cmake
index f3a9510c4f311..a0e66c5431dc2 100644
--- a/compiler-rt/cmake/caches/AMDGPU.cmake
+++ b/compiler-rt/cmake/caches/AMDGPU.cmake
@@ -17,3 +17,4 @@ set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "")
 set(COMPILER_RT_PROFILE_BAREMETAL ON CACHE BOOL "")
+set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "")
diff --git a/compiler-rt/cmake/caches/NVPTX.cmake 
b/compiler-rt/cmake/caches/NVPTX.cmake
index dfeb96be190d4..7ebc37e585eaa 100644
--- a/compiler-rt/cmake/caches/NVPTX.cmake
+++ b/compiler-rt/cmake/caches/NVPTX.cmake
@@ -16,3 +16,4 @@ set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "")
 set(COMPILER_RT_PROFILE_BAREMETAL ON CACHE BOOL "")
+set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "")
diff --git a/compiler-rt/cmake/caches/SPIRV64.cmake 
b/compiler-rt/cmake/caches/SPIRV64.cmake
index 693970557f211..d89d1fba336f6 100644
--- a/compiler-rt/cmake/caches/SPIRV64.cmake
+++ b/compiler-rt/cmake/caches/SPIRV64.cmake
@@ -16,3 +16,4 @@ set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "")
 set(COMPILER_RT_PROFILE_BAREMETAL OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "")
diff --git a/compiler-rt/cmake/caches/hexagon-builtins-baremetal.cmake 
b/compiler-rt/cmake/caches/hexagon-builtins-baremetal.cmake
index 632e9ea5758a0..ab78a1374f613 100644
--- a/compiler-rt/cmake/caches/hexagon-builtins-baremetal.cmake
+++ b/compiler-rt/cmake/caches/hexagon-builtins-baremetal.cmake
@@ -14,6 +14,7 @@ set(COMPILER_RT_BUILTINS_ENABLE_PIC OFF CACHE BOOL "")
 set(COMPILER_RT_SUPPORTED_ARCH hexagon CACHE STRING "")
 # without this, build tries to use pthread which is not supported by 
hexagon-unknown-none-elf
 set(COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "" FORCE)
+set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "")
 
 set(CMAKE_C_FLAGS "-ffreestanding" CACHE STRING "")
 set(CMAKE_CXX_FLAGS "-ffreestanding" CACHE STRING "")
diff --git a/compiler-rt/cmake/caches/hexagon-linux-builtins.cmake 
b/compiler-rt/cmake/caches/hexagon-linux-builtins.cmake
index d9c9ff2a4655e..3d21073ce9616 100644
--- a/compiler-rt/cmake/caches/hexagon-linux-builtins.cmake
+++ b/compiler-rt/cmake/caches/hexagon-linux-builtins.cmake
@@ -11,5 +11,6 @@ set(COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "")
 set(COMPILER_RT_BUILTINS_ENABLE_PIC OFF CACHE BOOL "")
 set(COMPILER_RT_SUPPORTED_ARCH hexagon CACHE STRING "")
diff --git a/compiler-rt/cmake/config-ix.cmake 
b/compiler-rt/cmake/config-ix.cmake
index 083f1c98d0f16..de67acb937afd 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -704,6 +704,9 @@ if(APPLE)
   list_intersect(ORC_SUPPORTED_ARCH
     ALL_ORC_SUPPORTED_ARCH
     SANITIZER_COMMON_SUPPORTED_ARCH)
+  list_intersect(INSTRUMENTOR_SUPPORTED_ARCH
+    ALL_INSTRUMENTOR_SUPPORTED_ARCH
+    SANITIZER_COMMON_SUPPORTED_ARCH)
 
 else()
   # Architectures supported by compiler-rt libraries.
@@ -739,6 +742,7 @@ else()
   filter_available_targets(GWP_ASAN_SUPPORTED_ARCH 
${ALL_GWP_ASAN_SUPPORTED_ARCH})
   filter_available_targets(NSAN_SUPPORTED_ARCH ${ALL_NSAN_SUPPORTED_ARCH})
   filter_available_targets(ORC_SUPPORTED_ARCH ${ALL_ORC_SUPPORTED_ARCH})
+  filter_available_targets(INSTRUMENTOR_SUPPORTED_ARCH 
${ALL_INSTRUMENTOR_SUPPORTED_ARCH})
 endif()
 
 if (MSVC)
diff --git a/compiler-rt/lib/CMakeLists.txt b/compiler-rt/lib/CMakeLists.txt
index e6158ec408895..779685240f235 100644
--- a/compiler-rt/lib/CMakeLists.txt
+++ b/compiler-rt/lib/CMakeLists.txt
@@ -78,3 +78,8 @@ endif()
 # is true for fuzzers that exercise parts of the runtime. So we add the fuzzer
 # directories explicitly here.
 add_subdirectory(scudo/standalone/fuzz)
+
+# Instrumentor tools - educational tools demonstrating the Instrumentor pass
+if(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS)
+  add_subdirectory(instrumentor-tools)
+endif()
diff --git a/compiler-rt/lib/instrumentor-tools/CMakeLists.txt 
b/compiler-rt/lib/instrumentor-tools/CMakeLists.txt
new file mode 100644
index 0000000000000..6f8e2fe352f5a
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/CMakeLists.txt
@@ -0,0 +1,11 @@
+# CMakeLists.txt for Instrumentor Examples
+#
+# This directory contains example runtimes that demonstrate how to use the
+# LLVM Instrumentor pass for various profiling and analysis tasks.
+
+include(AddCompilerRT)
+
+add_compiler_rt_component(instrumentor-tools)
+
+# Add subdirectories for specific examples
+add_subdirectory(flop-counter)
diff --git a/compiler-rt/lib/instrumentor-tools/README.md 
b/compiler-rt/lib/instrumentor-tools/README.md
new file mode 100644
index 0000000000000..5f50c7c7b001a
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/README.md
@@ -0,0 +1,49 @@
+# Instrumentor Tools
+
+This directory contains example runtime libraries that demonstrate how to use
+the LLVM Instrumentor pass for various profiling and analysis tasks.
+
+## Overview
+
+The LLVM Instrumentor is a configurable instrumentation pass that allows you to
+insert runtime calls at various program points (e.g., function entry/exit,
+memory operations, floating-point operations). Each example in this directory
+provides:
+
+1. A runtime library that implements the instrumentation callbacks
+2. An instrumentor configuration JSON file
+3. Tests demonstrating usage
+
+## Building
+
+The instrumentor tools are built as part of the compiler-rt build:
+
+```bash
+cmake -S llvm -B build -G Ninja \
+  -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+  -DLLVM_ENABLE_PROJECTS="clang;compiler-rt"
+ninja -C build
+```
+
+The runtime libraries will be installed in:
+- Darwin: `lib/clang/<version>/lib/darwin/libclang_rt.<example>_osx.a`
+- Linux: `lib/clang/<version>/lib/linux/libclang_rt.<example>-<arch>.a`
+
+Configuration files will be installed in `share/llvm/instrumentor-configs/`.
+
+## Adding New Tools 
+
+To add a new instrumentor example:
+
+1. Create a new directory under `compiler-rt/lib/instrumentor-tools/`
+2. Add your runtime implementation (`.cpp` and `.h` files)
+3. Create an instrumentor configuration JSON file
+4. Add a `CMakeLists.txt` (see `flop-counter/CMakeLists.txt` as a template)
+5. Update `compiler-rt/lib/instrumentor-tools/CMakeLists.txt` to include your 
subdirectory
+6. Add tests in `compiler-rt/test/instrumentor-tools/`
+
+## Resources
+
+- [Instrumentor Documentation](../../../llvm/docs/Instrumentor.rst)
+- [Instrumentor Runtime Headers](../../../llvm/utils/instrumentor_runtime.h)
+- [Configuration Wizard](../../../llvm/utils/instrumentor-config-wizard.py)
diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt 
b/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt
new file mode 100644
index 0000000000000..bed346a179e34
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt
@@ -0,0 +1,82 @@
+# CMakeLists.txt for FLOP Counter Example
+#
+# This example demonstrates counting floating-point operations using the
+# Instrumentor pass. It provides a runtime library that can be linked with
+# instrumented code to track and report FLOP counts.
+
+add_compiler_rt_component(flop-counter)
+
+set(FLOP_COUNTER_SOURCES
+  flop_counter_runtime.cpp
+  )
+
+set(FLOP_COUNTER_HEADERS
+  )
+
+# Include paths for instrumentor runtime headers
+# The instrumentor runtime headers are in llvm/utils
+include_directories(${COMPILER_RT_SOURCE_DIR}/../llvm/utils)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+# Common flags
+set(FLOP_COUNTER_CFLAGS
+  ${COMPILER_RT_COMMON_CFLAGS}
+  ${COMPILER_RT_CXX_CFLAGS}
+  -std=c++17
+  )
+
+set(FLOP_COUNTER_LINK_FLAGS ${COMPILER_RT_COMMON_LINK_FLAGS})
+set(FLOP_COUNTER_LINK_LIBS ${COMPILER_RT_CXX_LINK_LIBS})
+
+# flop counter uses C++ standard library headers.
+if (TARGET cxx-headers OR HAVE_LIBCXX)
+  set(DEPS cxx-headers)
+endif()
+
+# Determine supported architectures
+if(APPLE)
+  # On Darwin, use the darwin OSX architectures
+  set(FLOP_COUNTER_SUPPORTED_ARCH arm64)
+  if(NOT CMAKE_OSX_ARCHITECTURES STREQUAL "")
+    set(FLOP_COUNTER_SUPPORTED_ARCH ${CMAKE_OSX_ARCHITECTURES})
+  endif()
+  if(DARWIN_osx_ARCHS)
+    set(FLOP_COUNTER_SUPPORTED_ARCH ${DARWIN_osx_ARCHS})
+  endif()
+else()
+  # For non-Apple platforms, use the default target architecture
+  set(FLOP_COUNTER_SUPPORTED_ARCH ${INSTRUMENTOR_SUPPORTED_ARCH})
+endif()
+
+message(STATUS "FLOP Counter supported architectures: 
${FLOP_COUNTER_SUPPORTED_ARCH}")
+
+# Build the static runtime library for Apple platforms
+if(APPLE)
+  add_compiler_rt_runtime(clang_rt.instrumentor_flop_counter
+    STATIC
+    OS osx
+    ARCHS ${FLOP_COUNTER_SUPPORTED_ARCH}
+    CFLAGS ${FLOP_COUNTER_CFLAGS}
+    SOURCES ${FLOP_COUNTER_SOURCES}
+    LINK_FLAGS ${FLOP_COUNTER_LINK_FLAGS}
+    LINK_LIBS ${FLOP_COUNTER_LINK_LIBS}
+    ADDITIONAL_HEADERS ${FLOP_COUNTER_HEADERS}
+    DEPS ${DEPS}
+    PARENT_TARGET flop-counter)
+else()
+  add_compiler_rt_runtime(clang_rt.instrumentor_flop_counter
+    STATIC
+    ARCHS ${FLOP_COUNTER_SUPPORTED_ARCH}
+    CFLAGS ${FLOP_COUNTER_CFLAGS}
+    SOURCES ${FLOP_COUNTER_SOURCES}
+    LINK_FLAGS ${FLOP_COUNTER_LINK_FLAGS}
+    LINK_LIBS ${FLOP_COUNTER_LINK_LIBS}
+    ADDITIONAL_HEADERS ${FLOP_COUNTER_HEADERS}
+    DEPS ${DEPS}
+    PARENT_TARGET flop-counter)
+endif()
+
+# Install the configuration file as a resource
+install(FILES flop_counter_config.json
+        DESTINATION share/llvm/instrumentor-configs
+        COMPONENT flop-counter)
diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/README.md 
b/compiler-rt/lib/instrumentor-tools/flop-counter/README.md
new file mode 100644
index 0000000000000..c00a3e57d1a65
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/flop-counter/README.md
@@ -0,0 +1,77 @@
+# FLOP Counter
+
+A runtime library for counting floating-point operations in programs using the 
LLVM Instrumentor pass.
+
+## Features
+
+- **Precision Tracking**: Separates counts for single (float), double, and 
extended precision operations
+- **Operation Categorization**: Tracks adds, multiplications, divisions, FMA 
operations (TODO), and others (sqrt, sin, cos, etc.) (TODO)
+- **Vector Support**: Counts FLOPs in vector operations
+- **Thread-Safe**: Uses atomic operations for counter updates
+- **Low Overhead**: Minimal runtime overhead for counting
+- **Automatic Reporting**: Prints statistics at program exit
+
+## Usage
+
+### Basic Example
+
+```c
+#include <stdio.h>
+#include <math.h>
+
+double compute(double a, double b) {
+  return sqrt(a * a + b * b);
+}
+
+int main() {
+  double result = compute(3.0, 4.0);
+  printf("Result: %f\n", result);
+  return 0;
+}
+```
+
+Compile with:
+```bash
+clangxx -O2 -finstrumentor=flop_counter_config.json example.cpp \
+        -lclang_rt.flop_counter -o example
+```
+
+Run:
+```bash
+./example
+```
+
+Output:
+```
+Result: 5.000000
+
+=================================================
+           FLOP Counter Statistics
+=================================================
+Total FLOPs:                             3
+...
+```
+
+## Implementation Details
+
+### Instrumentation Points
+
+The FLOP counter instruments:
+
+1. **Binary FP Operations**: `fadd`, `fsub`, `fmul`, `fdiv`, `frem`
+2. **Unary FP Operations**: `fneg`
+3. TODO: **FP Intrinsics**: `llvm.fma`, `llvm.sqrt`, `llvm.sin`, `llvm.cos`, 
etc.
+
+### FLOP Counting Rules
+
+- **Regular operations**: 1 FLOP per operation
+- **FMA (Fused Multiply-Add)**: 2 FLOPs (multiply + add)
+- **Vector operations**: Counted per element
+- **Intrinsics**: TODO
+
+### Configuration
+
+The `flop_counter_config.json` file configures the instrumentor to:
+- Insert callbacks after floating-point binary/unary operations
+- Pass value size, type IDs, and opcodes to the runtime
+- Filter to only instrument FP math operations
diff --git 
a/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json 
b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json
new file mode 100644
index 0000000000000..c3131c363fded
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json
@@ -0,0 +1,32 @@
+{
+  "configuration": {
+    "runtime_prefix": "__flop_counter_",
+    "runtime_prefix.description": "The runtime API prefix.",
+    "runtime_stubs_file": "rt",
+    "target_regex": "",
+    "target_regex.description": "Regular expression to be matched against the 
module target. Only targets that match this regex will be instrumented.",
+    "function_regex": "",
+    "function_regex.description": "Regular expression to be matched against a 
function name. Only functions that match this regex will be instrumented.",
+    "demangle_function_names": true,
+    "demangle_function_names.description": "Demangle functions names passed to 
the runtime.",
+    "host_enabled": true,
+    "host_enabled.description": "Instrument non-GPU targets",
+    "gpu_enabled": true,
+    "gpu_enabled.description": "Instrument GPU targets"
+  },
+  "instruction_post": {
+    "numeric": {
+      "enabled": true,
+      "filter": "type_id < 7 || ((type_id == 17 || type_id == 18) && 
sub_type_id < 7)",
+      "filter.description": "Static property filter to exclude 
instrumentation.",
+      "type_id": true,
+      "type_id.description": "The operation's type id.",
+      "sub_type_id": true,
+      "sub_type_id.description": "The operation's type id.",
+      "size": true,
+      "size.description": "The operation's type size.",
+      "opcode": true,
+      "opcode.description": "The instruction opcode."
+    }
+  }
+}
diff --git 
a/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp 
b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp
new file mode 100644
index 0000000000000..9eaa2d807838e
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp
@@ -0,0 +1,164 @@
+//===-- flop_counter_runtime.cpp - FLOP Counter Runtime ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the runtime for counting floating-point operations.
+// It hooks into instrumentation points inserted by the LLVM Instrumentor pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../instrumentor_runtime.h"
+
+#include <atomic>
+#include <cinttypes>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+namespace {
+
+/// FLOP counter statistics (thread-safe using atomics)
+struct FlopCounterStats {
+  std::atomic<uint64_t> TotalFlops{0};
+  std::atomic<uint64_t> FloatOps{0};  // 32-bit float operations
+  std::atomic<uint64_t> DoubleOps{0}; // 64-bit double operations
+  std::atomic<uint64_t> ExtendedOps{
+      0}; // 80/128-bit extended precision operations
+  std::atomic<uint64_t> VectorFlops{0}; // Total FLOPs from vector operations
+  std::atomic<uint64_t> AddOps{0};
+  std::atomic<uint64_t> MulOps{0};
+  std::atomic<uint64_t> DivOps{0};
+  std::atomic<uint64_t> FmaOps{0};   // Fused multiply-add operations
+  std::atomic<uint64_t> OtherOps{0}; // sqrt, sin, cos, etc.
+};
+
+// Global statistics counters
+static FlopCounterStats *Stats = nullptr;
+
+enum {
+  LLVMOpcodeFAdd = 15,
+  LLVMOpcodeFSub = 17,
+  LLVMOpcodeFMul = 19,
+  LLVMOpcodeFDiv = 22,
+  LLVMOpcodeFRem = 25,
+  LLVMOpcodeFNeg = 13...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/205698
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to