https://github.com/jdoerfert updated https://github.com/llvm/llvm-project/pull/205698
>From 61cbfabb7ade682a64f516c871b2bacb1e3e324a Mon Sep 17 00:00:00 2001 From: Johannes Doerfert <[email protected]> Date: Mon, 15 Jun 2026 11:20:02 -0700 Subject: [PATCH] [Instrumentor] Add runtime examples: [1/N] A flop counter This adds a instrumentor-tools folder into compiler RT to showcase use cases of the instrumentor. The initial example is a program that, via instrumentation, counts the number of flops performed. Call and intrinsic support will follow after #198042. Partially developped by Claude (AI), tested and verified by me. --- clang/cmake/caches/CrossWinToARMLinux.cmake | 1 + clang/cmake/caches/VectorEngine.cmake | 2 + .../cmake/caches/cross-linux-toolchain.cmake | 1 + compiler-rt/CMakeLists.txt | 2 + .../cmake/Modules/AllSupportedArchDefs.cmake | 2 + compiler-rt/cmake/caches/AMDGPU.cmake | 1 + compiler-rt/cmake/caches/NVPTX.cmake | 1 + compiler-rt/cmake/caches/SPIRV64.cmake | 1 + .../caches/hexagon-builtins-baremetal.cmake | 1 + .../cmake/caches/hexagon-linux-builtins.cmake | 1 + compiler-rt/cmake/config-ix.cmake | 4 + compiler-rt/lib/CMakeLists.txt | 5 + .../lib/instrumentor-tools/CMakeLists.txt | 11 + compiler-rt/lib/instrumentor-tools/README.md | 49 +++ .../flop-counter/CMakeLists.txt | 82 +++++ .../instrumentor-tools/flop-counter/README.md | 77 +++++ .../flop-counter/flop_counter_config.json | 32 ++ .../flop-counter/flop_counter_runtime.cpp | 164 ++++++++++ .../instrumentor-tools/instrumentor_runtime.h | 293 ++++++++++++++++++ compiler-rt/test/CMakeLists.txt | 6 +- .../test/instrumentor-tools/CMakeLists.txt | 48 +++ .../test/instrumentor-tools/lit.cfg.py | 75 +++++ .../instrumentor-tools/lit.site.cfg.py.in | 12 + .../test/instrumentor-tools/simple_flops.c | 49 +++ .../test/instrumentor-tools/vector_flops.cpp | 46 +++ libcxx/cmake/caches/Generic-llvm-libc.cmake | 1 + 26 files changed, 966 insertions(+), 1 deletion(-) create mode 100644 compiler-rt/lib/instrumentor-tools/CMakeLists.txt create mode 100644 compiler-rt/lib/instrumentor-tools/README.md create mode 100644 compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt create mode 100644 compiler-rt/lib/instrumentor-tools/flop-counter/README.md create mode 100644 compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json create mode 100644 compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp create mode 100644 compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h create mode 100644 compiler-rt/test/instrumentor-tools/CMakeLists.txt create mode 100644 compiler-rt/test/instrumentor-tools/lit.cfg.py create mode 100644 compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in create mode 100644 compiler-rt/test/instrumentor-tools/simple_flops.c create mode 100644 compiler-rt/test/instrumentor-tools/vector_flops.cpp diff --git a/clang/cmake/caches/CrossWinToARMLinux.cmake b/clang/cmake/caches/CrossWinToARMLinux.cmake index c47c4ac3bb73e..a3adc3a761ed0 100644 --- a/clang/cmake/caches/CrossWinToARMLinux.cmake +++ b/clang/cmake/caches/CrossWinToARMLinux.cmake @@ -196,6 +196,7 @@ set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_BUILD_LIBFUZZER set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "") set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_BUILD_CRT ON CACHE BOOL "") set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_BUILD_ORC OFF CACHE BOOL "") +set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "") set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_DEFAULT_TARGET_ONLY ON CACHE BOOL "") set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_INCLUDE_TESTS ON CACHE BOOL "") set(RUNTIMES_${TOOLCHAIN_TARGET_TRIPLE}_COMPILER_RT_CAN_EXECUTE_TESTS ON CACHE BOOL "") diff --git a/clang/cmake/caches/VectorEngine.cmake b/clang/cmake/caches/VectorEngine.cmake index b19e773a09049..b177e6d5257ac 100644 --- a/clang/cmake/caches/VectorEngine.cmake +++ b/clang/cmake/caches/VectorEngine.cmake @@ -43,6 +43,7 @@ set(RUNTIMES_x86_64-unknown-linux-gnu_COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL " set(RUNTIMES_x86_64-unknown-linux-gnu_COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "") set(RUNTIMES_x86_64-unknown-linux-gnu_COMPILER_RT_BUILD_ORC OFF CACHE BOOL "") set(RUNTIMES_x86_64-unknown-linux-gnu_COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "") +set(RUNTIMES_x86_64-unknown-linux-gnu_COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "") # VE supports builtins, crt, and profile only. set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_BUILTINS ON CACHE BOOL "") @@ -55,6 +56,7 @@ set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_CTX_PROFILE OFF CACHE BOOL " set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "") set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_ORC OFF CACHE BOOL "") set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "") +set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "") # VE uses builtins from Compiler-RT. set(RUNTIMES_ve-unknown-linux-gnu_COMPILER_RT_USE_BUILTINS_LIBRARY TRUE CACHE BOOL "") diff --git a/clang/cmake/caches/cross-linux-toolchain.cmake b/clang/cmake/caches/cross-linux-toolchain.cmake index 8065961f7fcfe..d7b7d0fa1be4a 100644 --- a/clang/cmake/caches/cross-linux-toolchain.cmake +++ b/clang/cmake/caches/cross-linux-toolchain.cmake @@ -258,6 +258,7 @@ foreach(target ${LLVM_RUNTIME_TARGETS}) set(RUNTIMES_${target}_COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "") set(RUNTIMES_${target}_COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "") set(RUNTIMES_${target}_COMPILER_RT_BUILD_ORC OFF CACHE BOOL "") + set(RUNTIMES_${target}_COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "") set(RUNTIMES_${target}_COMPILER_RT_INCLUDE_TESTS ON CACHE BOOL "") set(RUNTIMES_${target}_COMPILER_RT_CAN_EXECUTE_TESTS ON CACHE BOOL "") diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt index 115d953e456c9..252eacf181860 100644 --- a/compiler-rt/CMakeLists.txt +++ b/compiler-rt/CMakeLists.txt @@ -100,6 +100,8 @@ option(COMPILER_RT_SCUDO_STANDALONE_BUILD_SHARED "Build SCUDO standalone for sha mark_as_advanced(COMPILER_RT_SCUDO_STANDALONE_BUILD_SHARED) option(COMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC "Build SCUDO standalone with LLVM's libc headers" OFF) mark_as_advanced(COMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC) +option(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS "Build Instrumentor tool runtimes" ON) +mark_as_advanced(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS) if(FUCHSIA) set(COMPILER_RT_HWASAN_WITH_INTERCEPTORS_DEFAULT OFF) diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index a535cf9e3a8da..fee5f4a5720ed 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -138,3 +138,5 @@ endif() if (WIN32) set(ALL_ORC_SUPPORTED_ARCH ${X86_64}) endif() + +set(ALL_INSTRUMENTOR_SUPPORTED_ARCH ${ALL_SANITIZER_COMMON_SUPPORTED_ARCH}) diff --git a/compiler-rt/cmake/caches/AMDGPU.cmake b/compiler-rt/cmake/caches/AMDGPU.cmake index f3a9510c4f311..a0e66c5431dc2 100644 --- a/compiler-rt/cmake/caches/AMDGPU.cmake +++ b/compiler-rt/cmake/caches/AMDGPU.cmake @@ -17,3 +17,4 @@ set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "") set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "") set(COMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "") set(COMPILER_RT_PROFILE_BAREMETAL ON CACHE BOOL "") +set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "") diff --git a/compiler-rt/cmake/caches/NVPTX.cmake b/compiler-rt/cmake/caches/NVPTX.cmake index dfeb96be190d4..7ebc37e585eaa 100644 --- a/compiler-rt/cmake/caches/NVPTX.cmake +++ b/compiler-rt/cmake/caches/NVPTX.cmake @@ -16,3 +16,4 @@ set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "") set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "") set(COMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "") set(COMPILER_RT_PROFILE_BAREMETAL ON CACHE BOOL "") +set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "") diff --git a/compiler-rt/cmake/caches/SPIRV64.cmake b/compiler-rt/cmake/caches/SPIRV64.cmake index 693970557f211..d89d1fba336f6 100644 --- a/compiler-rt/cmake/caches/SPIRV64.cmake +++ b/compiler-rt/cmake/caches/SPIRV64.cmake @@ -16,3 +16,4 @@ set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "") set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "") set(COMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "") set(COMPILER_RT_PROFILE_BAREMETAL OFF CACHE BOOL "") +set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "") diff --git a/compiler-rt/cmake/caches/hexagon-builtins-baremetal.cmake b/compiler-rt/cmake/caches/hexagon-builtins-baremetal.cmake index 632e9ea5758a0..ab78a1374f613 100644 --- a/compiler-rt/cmake/caches/hexagon-builtins-baremetal.cmake +++ b/compiler-rt/cmake/caches/hexagon-builtins-baremetal.cmake @@ -14,6 +14,7 @@ set(COMPILER_RT_BUILTINS_ENABLE_PIC OFF CACHE BOOL "") set(COMPILER_RT_SUPPORTED_ARCH hexagon CACHE STRING "") # without this, build tries to use pthread which is not supported by hexagon-unknown-none-elf set(COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "" FORCE) +set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "") set(CMAKE_C_FLAGS "-ffreestanding" CACHE STRING "") set(CMAKE_CXX_FLAGS "-ffreestanding" CACHE STRING "") diff --git a/compiler-rt/cmake/caches/hexagon-linux-builtins.cmake b/compiler-rt/cmake/caches/hexagon-linux-builtins.cmake index d9c9ff2a4655e..3d21073ce9616 100644 --- a/compiler-rt/cmake/caches/hexagon-linux-builtins.cmake +++ b/compiler-rt/cmake/caches/hexagon-linux-builtins.cmake @@ -11,5 +11,6 @@ set(COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "") set(COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "") set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "") set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "") +set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "") set(COMPILER_RT_BUILTINS_ENABLE_PIC OFF CACHE BOOL "") set(COMPILER_RT_SUPPORTED_ARCH hexagon CACHE STRING "") diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 083f1c98d0f16..de67acb937afd 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -704,6 +704,9 @@ if(APPLE) list_intersect(ORC_SUPPORTED_ARCH ALL_ORC_SUPPORTED_ARCH SANITIZER_COMMON_SUPPORTED_ARCH) + list_intersect(INSTRUMENTOR_SUPPORTED_ARCH + ALL_INSTRUMENTOR_SUPPORTED_ARCH + SANITIZER_COMMON_SUPPORTED_ARCH) else() # Architectures supported by compiler-rt libraries. @@ -739,6 +742,7 @@ else() filter_available_targets(GWP_ASAN_SUPPORTED_ARCH ${ALL_GWP_ASAN_SUPPORTED_ARCH}) filter_available_targets(NSAN_SUPPORTED_ARCH ${ALL_NSAN_SUPPORTED_ARCH}) filter_available_targets(ORC_SUPPORTED_ARCH ${ALL_ORC_SUPPORTED_ARCH}) + filter_available_targets(INSTRUMENTOR_SUPPORTED_ARCH ${ALL_INSTRUMENTOR_SUPPORTED_ARCH}) endif() if (MSVC) diff --git a/compiler-rt/lib/CMakeLists.txt b/compiler-rt/lib/CMakeLists.txt index e6158ec408895..779685240f235 100644 --- a/compiler-rt/lib/CMakeLists.txt +++ b/compiler-rt/lib/CMakeLists.txt @@ -78,3 +78,8 @@ endif() # is true for fuzzers that exercise parts of the runtime. So we add the fuzzer # directories explicitly here. add_subdirectory(scudo/standalone/fuzz) + +# Instrumentor tools - educational tools demonstrating the Instrumentor pass +if(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS) + add_subdirectory(instrumentor-tools) +endif() diff --git a/compiler-rt/lib/instrumentor-tools/CMakeLists.txt b/compiler-rt/lib/instrumentor-tools/CMakeLists.txt new file mode 100644 index 0000000000000..6f8e2fe352f5a --- /dev/null +++ b/compiler-rt/lib/instrumentor-tools/CMakeLists.txt @@ -0,0 +1,11 @@ +# CMakeLists.txt for Instrumentor Examples +# +# This directory contains example runtimes that demonstrate how to use the +# LLVM Instrumentor pass for various profiling and analysis tasks. + +include(AddCompilerRT) + +add_compiler_rt_component(instrumentor-tools) + +# Add subdirectories for specific examples +add_subdirectory(flop-counter) diff --git a/compiler-rt/lib/instrumentor-tools/README.md b/compiler-rt/lib/instrumentor-tools/README.md new file mode 100644 index 0000000000000..5f50c7c7b001a --- /dev/null +++ b/compiler-rt/lib/instrumentor-tools/README.md @@ -0,0 +1,49 @@ +# Instrumentor Tools + +This directory contains example runtime libraries that demonstrate how to use +the LLVM Instrumentor pass for various profiling and analysis tasks. + +## Overview + +The LLVM Instrumentor is a configurable instrumentation pass that allows you to +insert runtime calls at various program points (e.g., function entry/exit, +memory operations, floating-point operations). Each example in this directory +provides: + +1. A runtime library that implements the instrumentation callbacks +2. An instrumentor configuration JSON file +3. Tests demonstrating usage + +## Building + +The instrumentor tools are built as part of the compiler-rt build: + +```bash +cmake -S llvm -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DLLVM_ENABLE_PROJECTS="clang;compiler-rt" +ninja -C build +``` + +The runtime libraries will be installed in: +- Darwin: `lib/clang/<version>/lib/darwin/libclang_rt.<example>_osx.a` +- Linux: `lib/clang/<version>/lib/linux/libclang_rt.<example>-<arch>.a` + +Configuration files will be installed in `share/llvm/instrumentor-configs/`. + +## Adding New Tools + +To add a new instrumentor example: + +1. Create a new directory under `compiler-rt/lib/instrumentor-tools/` +2. Add your runtime implementation (`.cpp` and `.h` files) +3. Create an instrumentor configuration JSON file +4. Add a `CMakeLists.txt` (see `flop-counter/CMakeLists.txt` as a template) +5. Update `compiler-rt/lib/instrumentor-tools/CMakeLists.txt` to include your subdirectory +6. Add tests in `compiler-rt/test/instrumentor-tools/` + +## Resources + +- [Instrumentor Documentation](../../../llvm/docs/Instrumentor.rst) +- [Instrumentor Runtime Headers](../../../llvm/utils/instrumentor_runtime.h) +- [Configuration Wizard](../../../llvm/utils/instrumentor-config-wizard.py) diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt b/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt new file mode 100644 index 0000000000000..bed346a179e34 --- /dev/null +++ b/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt @@ -0,0 +1,82 @@ +# CMakeLists.txt for FLOP Counter Example +# +# This example demonstrates counting floating-point operations using the +# Instrumentor pass. It provides a runtime library that can be linked with +# instrumented code to track and report FLOP counts. + +add_compiler_rt_component(flop-counter) + +set(FLOP_COUNTER_SOURCES + flop_counter_runtime.cpp + ) + +set(FLOP_COUNTER_HEADERS + ) + +# Include paths for instrumentor runtime headers +# The instrumentor runtime headers are in llvm/utils +include_directories(${COMPILER_RT_SOURCE_DIR}/../llvm/utils) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + +# Common flags +set(FLOP_COUNTER_CFLAGS + ${COMPILER_RT_COMMON_CFLAGS} + ${COMPILER_RT_CXX_CFLAGS} + -std=c++17 + ) + +set(FLOP_COUNTER_LINK_FLAGS ${COMPILER_RT_COMMON_LINK_FLAGS}) +set(FLOP_COUNTER_LINK_LIBS ${COMPILER_RT_CXX_LINK_LIBS}) + +# flop counter uses C++ standard library headers. +if (TARGET cxx-headers OR HAVE_LIBCXX) + set(DEPS cxx-headers) +endif() + +# Determine supported architectures +if(APPLE) + # On Darwin, use the darwin OSX architectures + set(FLOP_COUNTER_SUPPORTED_ARCH arm64) + if(NOT CMAKE_OSX_ARCHITECTURES STREQUAL "") + set(FLOP_COUNTER_SUPPORTED_ARCH ${CMAKE_OSX_ARCHITECTURES}) + endif() + if(DARWIN_osx_ARCHS) + set(FLOP_COUNTER_SUPPORTED_ARCH ${DARWIN_osx_ARCHS}) + endif() +else() + # For non-Apple platforms, use the default target architecture + set(FLOP_COUNTER_SUPPORTED_ARCH ${INSTRUMENTOR_SUPPORTED_ARCH}) +endif() + +message(STATUS "FLOP Counter supported architectures: ${FLOP_COUNTER_SUPPORTED_ARCH}") + +# Build the static runtime library for Apple platforms +if(APPLE) + add_compiler_rt_runtime(clang_rt.instrumentor_flop_counter + STATIC + OS osx + ARCHS ${FLOP_COUNTER_SUPPORTED_ARCH} + CFLAGS ${FLOP_COUNTER_CFLAGS} + SOURCES ${FLOP_COUNTER_SOURCES} + LINK_FLAGS ${FLOP_COUNTER_LINK_FLAGS} + LINK_LIBS ${FLOP_COUNTER_LINK_LIBS} + ADDITIONAL_HEADERS ${FLOP_COUNTER_HEADERS} + DEPS ${DEPS} + PARENT_TARGET flop-counter) +else() + add_compiler_rt_runtime(clang_rt.instrumentor_flop_counter + STATIC + ARCHS ${FLOP_COUNTER_SUPPORTED_ARCH} + CFLAGS ${FLOP_COUNTER_CFLAGS} + SOURCES ${FLOP_COUNTER_SOURCES} + LINK_FLAGS ${FLOP_COUNTER_LINK_FLAGS} + LINK_LIBS ${FLOP_COUNTER_LINK_LIBS} + ADDITIONAL_HEADERS ${FLOP_COUNTER_HEADERS} + DEPS ${DEPS} + PARENT_TARGET flop-counter) +endif() + +# Install the configuration file as a resource +install(FILES flop_counter_config.json + DESTINATION share/llvm/instrumentor-configs + COMPONENT flop-counter) diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/README.md b/compiler-rt/lib/instrumentor-tools/flop-counter/README.md new file mode 100644 index 0000000000000..c00a3e57d1a65 --- /dev/null +++ b/compiler-rt/lib/instrumentor-tools/flop-counter/README.md @@ -0,0 +1,77 @@ +# FLOP Counter + +A runtime library for counting floating-point operations in programs using the LLVM Instrumentor pass. + +## Features + +- **Precision Tracking**: Separates counts for single (float), double, and extended precision operations +- **Operation Categorization**: Tracks adds, multiplications, divisions, FMA operations (TODO), and others (sqrt, sin, cos, etc.) (TODO) +- **Vector Support**: Counts FLOPs in vector operations +- **Thread-Safe**: Uses atomic operations for counter updates +- **Low Overhead**: Minimal runtime overhead for counting +- **Automatic Reporting**: Prints statistics at program exit + +## Usage + +### Basic Example + +```c +#include <stdio.h> +#include <math.h> + +double compute(double a, double b) { + return sqrt(a * a + b * b); +} + +int main() { + double result = compute(3.0, 4.0); + printf("Result: %f\n", result); + return 0; +} +``` + +Compile with: +```bash +clangxx -O2 -finstrumentor=flop_counter_config.json example.cpp \ + -lclang_rt.flop_counter -o example +``` + +Run: +```bash +./example +``` + +Output: +``` +Result: 5.000000 + +================================================= + FLOP Counter Statistics +================================================= +Total FLOPs: 3 +... +``` + +## Implementation Details + +### Instrumentation Points + +The FLOP counter instruments: + +1. **Binary FP Operations**: `fadd`, `fsub`, `fmul`, `fdiv`, `frem` +2. **Unary FP Operations**: `fneg` +3. TODO: **FP Intrinsics**: `llvm.fma`, `llvm.sqrt`, `llvm.sin`, `llvm.cos`, etc. + +### FLOP Counting Rules + +- **Regular operations**: 1 FLOP per operation +- **FMA (Fused Multiply-Add)**: 2 FLOPs (multiply + add) +- **Vector operations**: Counted per element +- **Intrinsics**: TODO + +### Configuration + +The `flop_counter_config.json` file configures the instrumentor to: +- Insert callbacks after floating-point binary/unary operations +- Pass value size, type IDs, and opcodes to the runtime +- Filter to only instrument FP math operations diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json new file mode 100644 index 0000000000000..c3131c363fded --- /dev/null +++ b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json @@ -0,0 +1,32 @@ +{ + "configuration": { + "runtime_prefix": "__flop_counter_", + "runtime_prefix.description": "The runtime API prefix.", + "runtime_stubs_file": "rt", + "target_regex": "", + "target_regex.description": "Regular expression to be matched against the module target. Only targets that match this regex will be instrumented.", + "function_regex": "", + "function_regex.description": "Regular expression to be matched against a function name. Only functions that match this regex will be instrumented.", + "demangle_function_names": true, + "demangle_function_names.description": "Demangle functions names passed to the runtime.", + "host_enabled": true, + "host_enabled.description": "Instrument non-GPU targets", + "gpu_enabled": true, + "gpu_enabled.description": "Instrument GPU targets" + }, + "instruction_post": { + "numeric": { + "enabled": true, + "filter": "type_id < 7 || ((type_id == 17 || type_id == 18) && sub_type_id < 7)", + "filter.description": "Static property filter to exclude instrumentation.", + "type_id": true, + "type_id.description": "The operation's type id.", + "sub_type_id": true, + "sub_type_id.description": "The operation's type id.", + "size": true, + "size.description": "The operation's type size.", + "opcode": true, + "opcode.description": "The instruction opcode." + } + } +} diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp new file mode 100644 index 0000000000000..9eaa2d807838e --- /dev/null +++ b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp @@ -0,0 +1,164 @@ +//===-- flop_counter_runtime.cpp - FLOP Counter Runtime ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the runtime for counting floating-point operations. +// It hooks into instrumentation points inserted by the LLVM Instrumentor pass. +// +//===----------------------------------------------------------------------===// + +#include "../instrumentor_runtime.h" + +#include <atomic> +#include <cinttypes> +#include <cstdint> +#include <cstdio> +#include <cstdlib> +#include <cstring> + +namespace { + +/// FLOP counter statistics (thread-safe using atomics) +struct FlopCounterStats { + std::atomic<uint64_t> TotalFlops{0}; + std::atomic<uint64_t> FloatOps{0}; // 32-bit float operations + std::atomic<uint64_t> DoubleOps{0}; // 64-bit double operations + std::atomic<uint64_t> ExtendedOps{ + 0}; // 80/128-bit extended precision operations + std::atomic<uint64_t> VectorFlops{0}; // Total FLOPs from vector operations + std::atomic<uint64_t> AddOps{0}; + std::atomic<uint64_t> MulOps{0}; + std::atomic<uint64_t> DivOps{0}; + std::atomic<uint64_t> FmaOps{0}; // Fused multiply-add operations + std::atomic<uint64_t> OtherOps{0}; // sqrt, sin, cos, etc. +}; + +// Global statistics counters +static FlopCounterStats *Stats = nullptr; + +enum { + LLVMOpcodeFAdd = 15, + LLVMOpcodeFSub = 17, + LLVMOpcodeFMul = 19, + LLVMOpcodeFDiv = 22, + LLVMOpcodeFRem = 25, + LLVMOpcodeFNeg = 13, +}; + +} // namespace + +extern "C" { + +__attribute__((constructor(1000))) void __flop_counter_initialize() { + Stats = new FlopCounterStats(); +} + +__attribute__((destructor(1000))) void __flop_counter_finalize() { + std::printf("\n"); + std::printf("=================================================\n"); + std::printf(" FLOP Counter Statistics\n"); + std::printf("=================================================\n"); + std::printf("Total FLOPs: %20llu\n", + Stats->TotalFlops.load(std::memory_order_relaxed)); + std::printf("\n"); + std::printf("By Precision:\n"); + std::printf(" Single (float): %20llu\n", + Stats->FloatOps.load(std::memory_order_relaxed)); + std::printf(" Double (double): %20llu\n", + Stats->DoubleOps.load(std::memory_order_relaxed)); + std::printf(" Extended (fp80/fp128): %20llu\n", + Stats->ExtendedOps.load(std::memory_order_relaxed)); + std::printf(" Vector FLOPs: %20llu\n", + Stats->VectorFlops.load(std::memory_order_relaxed)); + std::printf("\n"); + std::printf("By Operation:\n"); + std::printf(" Addition/Subtraction: %20llu\n", + Stats->AddOps.load(std::memory_order_relaxed)); + std::printf(" Multiplication: %20llu\n", + Stats->MulOps.load(std::memory_order_relaxed)); + std::printf(" Division: %20llu\n", + Stats->DivOps.load(std::memory_order_relaxed)); + std::printf(" Fused Multiply-Add: %20llu\n", + Stats->FmaOps.load(std::memory_order_relaxed)); + std::printf(" Other (sqrt, sin, ...): %20llu\n", + Stats->OtherOps.load(std::memory_order_relaxed)); + std::printf("=================================================\n"); + + delete Stats; +} + +void __flop_counter_post_numeric(int32_t TypeId, int32_t SubTypeId, + int32_t Size, int32_t Opcode) { + bool IsVector = false; + switch (TypeId) { + case FixedVectorTyID: + case ScalableVectorTyID: + IsVector = true; + TypeId = SubTypeId; + break; + default: + break; + }; + + int32_t TypeSize = Size; + switch (TypeId) { + case HalfTyID: + case BFloatTyID: + TypeSize = 2; + break; + case FloatTyID: + TypeSize = 4; + break; + case DoubleTyID: + TypeSize = 8; + break; + case X86_FP80TyID: + case FP128TyID: + case PPC_FP128TyID: + TypeSize = 16; + break; + default: + break; + }; + + // Determine FLOP count based on whether it's a vector operation + uint64_t FlopCount = Size / TypeSize; + if (IsVector) { + Stats->VectorFlops.fetch_add(FlopCount, std::memory_order_relaxed); + } else { + // Categorize by precision + if (TypeId == 2) { + Stats->FloatOps.fetch_add(1, std::memory_order_relaxed); + } else if (TypeId == 3) { + Stats->DoubleOps.fetch_add(1, std::memory_order_relaxed); + } else { + Stats->ExtendedOps.fetch_add(1, std::memory_order_relaxed); + } + } + + // Categorize by operation type + switch (Opcode) { + case LLVMOpcodeFAdd: + case LLVMOpcodeFSub: + Stats->AddOps.fetch_add(FlopCount, std::memory_order_relaxed); + break; + case LLVMOpcodeFMul: + Stats->MulOps.fetch_add(FlopCount, std::memory_order_relaxed); + break; + case LLVMOpcodeFDiv: + case LLVMOpcodeFRem: + Stats->DivOps.fetch_add(FlopCount, std::memory_order_relaxed); + break; + default: + Stats->OtherOps.fetch_add(FlopCount, std::memory_order_relaxed); + break; + } + + Stats->TotalFlops.fetch_add(FlopCount, std::memory_order_relaxed); +} + +} // extern "C" diff --git a/compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h b/compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h new file mode 100644 index 0000000000000..641096f2c0e22 --- /dev/null +++ b/compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h @@ -0,0 +1,293 @@ +//===-- Instrumentor Runtime Helper Header -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header provides helper structures and functions for reading data +// generated by the LLVM Instrumentor pass and passed to runtime functions. +// +//===----------------------------------------------------------------------===// + +#ifndef INSTRUMENTOR_RUNTIME_H +#define INSTRUMENTOR_RUNTIME_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> +#include <string.h> + +#ifdef __cplusplus +} +#endif + +/// Header for each value in a value pack. Value packs are used to pass function +/// arguments and other variable-length data to the runtime. The format is: +/// [ValueHeader][Padding][Value Data] +/// where padding aligns the value data to 8-byte boundaries. +typedef struct { + uint32_t size; // Size of the value in bytes + uint32_t type_id; // LLVM Type::TypeID of the value +} ValuePackHeader; + +/// Iterator for reading values from a value pack. +typedef struct { + const char *current; // Current position in the pack + uint64_t offset; // Byte offset from the start + uint32_t count; // Number of elements in the pack + uint32_t index; // Current element index +} ValuePackIterator; + +/// Initialize a value pack iterator. +/// \param iter The iterator to initialize +/// \param pack_ptr Pointer to the start of the value pack +/// \param num_elements Number of elements in the pack +static inline void initValuePackIterator(ValuePackIterator *iter, + const void *pack_ptr, + uint32_t num_elements) { + iter->current = (const char *)pack_ptr; + iter->offset = 0; + iter->count = num_elements; + iter->index = 0; +} + +/// Get the header for the current value. +static inline ValuePackHeader +getValuePackHeader(const ValuePackIterator *iter) { + const ValuePackHeader *header = (const ValuePackHeader *)iter->current; + return *header; +} + +/// Get a pointer to the current value data. +static inline const void *getValuePackData(const ValuePackIterator *iter) { + // Skip header (8 bytes: size + type_id) + const char *data_start = iter->current + sizeof(ValuePackHeader); + // Calculate padding for 8-byte alignment + ValuePackHeader header = getValuePackHeader(iter); + uint32_t padding = (8 - (header.size % 8)) % 8; + // Skip padding + return data_start + padding; +} + +/// Move to the next value in the pack. +static inline void nextValuePack(ValuePackIterator *iter) { + if (iter->index >= iter->count) { + iter->current = NULL; + return; + } + ValuePackHeader header = getValuePackHeader(iter); + uint32_t padding = (8 - (header.size % 8)) % 8; + uint64_t advance = sizeof(ValuePackHeader) + padding + header.size; + iter->current += advance; + iter->offset += advance; + iter->index++; +} + +/// Get the current offset in bytes from the start of the pack. +static inline uint64_t getValuePackOffset(const ValuePackIterator *iter) { + return iter->offset; +} + +/// Extract a specific value from a value pack by index. +/// +/// \param pack_ptr Pointer to the start of the value pack +/// \param num_elements Number of elements in the pack +/// \param index Zero-based index of the value to extract +/// \param header Output parameter for the value header (can be NULL) +/// \return Pointer to the value data, or NULL if index is out of bounds +static inline const void *getValuePackEntry(const void *pack_ptr, + uint32_t num_elements, + uint32_t index, + ValuePackHeader *header) { + if (!pack_ptr || index >= num_elements) + return NULL; + + ValuePackIterator iter; + initValuePackIterator(&iter, pack_ptr, num_elements); + + while (iter.current != NULL && iter.index < iter.count) { + ValuePackHeader h = getValuePackHeader(&iter); + if (iter.index == index) { + if (header) + *header = h; + return getValuePackData(&iter); + } + nextValuePack(&iter); + } + + return NULL; // Index out of bounds +} + +/// LLVM Type IDs for interpreting value pack data. +/// These correspond to llvm::Type::TypeID enum values. +enum LLVMTypeID { + HalfTyID = 0, ///< 16-bit floating point type + BFloatTyID, ///< 16-bit floating point type (7-bit significand) + FloatTyID, ///< 32-bit floating point type + DoubleTyID, ///< 64-bit floating point type + X86_FP80TyID, ///< 80-bit floating point type (X87) + FP128TyID, ///< 128-bit floating point type (112-bit significand) + PPC_FP128TyID, ///< 128-bit floating point type (two 64-bits, PowerPC) + VoidTyID, ///< type with no size + LabelTyID, ///< Labels + MetadataTyID, ///< Metadata + X86_AMXTyID, ///< AMX vectors (8192 bits, X86 specific) + TokenTyID, ///< Tokens + // Derived types... see DerivedTypes.h file. + IntegerTyID, ///< Arbitrary bit width integers + ByteTyID, ///< Arbitrary bit width bytes + FunctionTyID, ///< Functions + PointerTyID, ///< Pointers + StructTyID, ///< Structures + ArrayTyID, ///< Arrays + FixedVectorTyID, ///< Fixed width SIMD vector type + ScalableVectorTyID, ///< Scalable SIMD vector type + TypedPointerTyID, ///< Typed pointer used by some GPU targets + TargetExtTyID, ///< Target extension type +}; + +/// Get the string name of an LLVM Type ID. +static inline const char *getLLVMTypeIDName(uint32_t type_id) { + switch (type_id) { + case HalfTyID: + return "half"; + case BFloatTyID: + return "bfloat"; + case FloatTyID: + return "float"; + case DoubleTyID: + return "double"; + case X86_FP80TyID: + return "x86_fp80"; + case FP128TyID: + return "fp128"; + case PPC_FP128TyID: + return "ppc_fp128"; + case VoidTyID: + return "void"; + case LabelTyID: + return "label"; + case MetadataTyID: + return "metadata"; + case X86_AMXTyID: + return "x86_amx"; + case TokenTyID: + return "token"; + case IntegerTyID: + return "integer"; + case ByteTyID: + return "integer"; + case FunctionTyID: + return "function"; + case PointerTyID: + return "pointer"; + case StructTyID: + return "struct"; + case ArrayTyID: + return "array"; + case FixedVectorTyID: + return "fixed_vector"; + case ScalableVectorTyID: + return "scalable_vector"; + case TypedPointerTyID: + return "typed_pointer"; + case TargetExtTyID: + return "target_ext"; + default: + return "unknown"; + } +} + +#ifdef __cplusplus + +// C++ overlays for range-based iteration and quality of life improvements + +/// Range wrapper for value packs enabling range-based for loops. +/// Example: +/// for (auto val : ValuePackRange(pack_ptr, num_elements)) { +/// // val provides access to header and data +/// } +class ValuePackRange { +public: + struct ValueRef { + ValuePackHeader header; + const void *data; + + uint32_t type_id() const { return header.type_id; } + uint32_t size() const { return header.size; } + const char *type_name() const { return getLLVMTypeIDName(header.type_id); } + + template <typename T> const T &as() const { + return *static_cast<const T *>(data); + } + template <typename T> const T *ptr() const { + return static_cast<const T *>(data); + } + }; + + class iterator { + public: + iterator(const void *ptr, uint32_t num_elements, uint64_t max_offset) + : max_offset_(max_offset) { + initValuePackIterator(&iter_, ptr, num_elements); + if (ptr && !is_valid_position()) + iter_.current = nullptr; + } + + ValueRef operator*() const { + return ValueRef{getValuePackHeader(&iter_), getValuePackData(&iter_)}; + } + + iterator &operator++() { + nextValuePack(&iter_); + if (!is_valid_position()) + iter_.current = nullptr; + return *this; + } + + bool operator!=(const iterator &other) const { + return iter_.current != other.iter_.current; + } + + private: + bool is_valid_position() const { + if (!iter_.current) + return false; + if (iter_.index >= iter_.count) + return false; + if (max_offset_ > 0 && iter_.offset >= max_offset_) + return false; + return true; + } + + ValuePackIterator iter_; + uint64_t max_offset_; + }; + + ValuePackRange(const void *ptr, uint32_t num_elements, uint64_t max_size = 0) + : ptr_(ptr), num_elements_(num_elements), max_size_(max_size) {} + + iterator begin() const { return iterator(ptr_, num_elements_, max_size_); } + iterator end() const { return iterator(nullptr, 0, 0); } + +private: + const void *ptr_; + uint32_t num_elements_; + uint64_t max_size_; +}; + +/// Template helper to extract a typed value from a value pack by index. +template <typename T> +inline const T *getValueAs(const void *pack_ptr, uint32_t num_elements, + uint32_t index) { + return static_cast<const T *>( + getValuePackEntry(pack_ptr, num_elements, index, nullptr)); +} + +#endif // __cplusplus + +#endif // INSTRUMENTOR_RUNTIME_H diff --git a/compiler-rt/test/CMakeLists.txt b/compiler-rt/test/CMakeLists.txt index 3fab82518e75f..701759ad5cabc 100644 --- a/compiler-rt/test/CMakeLists.txt +++ b/compiler-rt/test/CMakeLists.txt @@ -60,7 +60,7 @@ umbrella_lit_testsuite_begin(check-compiler-rt) set(COMPILER_RT_KNOWN_TEST_SUITES builtins;ctx_profile;fuzzer;interception;lsan;memprof;metadata ;orc;profile;sanitizer_common;shadowcallstack - ;ubsan;xray) + ;ubsan;xray;instrumentor-tools) list(APPEND COMPILER_RT_KNOWN_TEST_SUITES ${ALL_SANITIZERS}) list(REMOVE_DUPLICATES COMPILER_RT_KNOWN_TEST_SUITES) # Sort the list so that's easier to read when emitting errors. @@ -170,6 +170,10 @@ if(COMPILER_RT_CAN_EXECUTE_TESTS) # ShadowCallStack does not yet provide a runtime with compiler-rt, the tests # include their own minimal runtime compiler_rt_test_runtime(shadowcallstack NO_COMPILER_RT_HAS_GUARD) + + if(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS) + compiler_rt_test_runtime(instrumentor-tools NO_COMPILER_RT_HAS_GUARD) + endif() endif() # Now that we've traversed all the directories and know all the lit testsuites, diff --git a/compiler-rt/test/instrumentor-tools/CMakeLists.txt b/compiler-rt/test/instrumentor-tools/CMakeLists.txt new file mode 100644 index 0000000000000..a9a1ca07829e4 --- /dev/null +++ b/compiler-rt/test/instrumentor-tools/CMakeLists.txt @@ -0,0 +1,48 @@ +set(INSTRUMENTOR_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(INSTRUMENTOR_LIT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + +set(INSTRUMENTOR_TESTSUITES) +set(INSTRUMENTOR_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS}) +list(APPEND INSTRUMENTOR_TEST_DEPS flop-counter) + +# Check if INSTRUMENTOR_SUPPORTED_ARCH is defined +if(NOT DEFINED INSTRUMENTOR_SUPPORTED_ARCH) + message(STATUS "INSTRUMENTOR_SUPPORTED_ARCH is NOT DEFINED - not building tests") + return() +endif() + +set(INSTRUMENTOR_TEST_ARCH ${INSTRUMENTOR_SUPPORTED_ARCH}) +if(APPLE) + darwin_filter_host_archs(INSTRUMENTOR_SUPPORTED_ARCH INSTRUMENTOR_TEST_ARCH) +endif() + + +macro(add_instrumentor_testsuite test_mode sanitizer arch) + set(INSTRUMENTOR_LIT_TEST_MODE "${test_mode}") + set(CONFIG_NAME ${test_mode}-${arch}) + + set(INSTRUMENTOR_TEST_TARGET_ARCH ${arch}) + get_test_cc_for_arch(${arch} INSTRUMENTOR_TEST_TARGET_CC INSTRUMENTOR_TEST_TARGET_CFLAGS) + + configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py) + list(APPEND INSTRUMENTOR_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) + +endmacro() + +if(INSTRUMENTOR_TEST_ARCH) + foreach(arch ${INSTRUMENTOR_TEST_ARCH}) + add_instrumentor_testsuite("InstrumentorTools" instrumentor-tools ${arch}) + endforeach() +else() + message(WARNING "No architectures configured for instrumentor-tools tests!") +endif() + +if(INSTRUMENTOR_TESTSUITES) + add_lit_testsuite(check-instrumentor-tools "Running the instrumentor-tools tests" + ${INSTRUMENTOR_TESTSUITES} + DEPENDS ${INSTRUMENTOR_TEST_DEPS}) +else() + message(WARNING "No test suites configured for instrumentor-tools!") +endif() diff --git a/compiler-rt/test/instrumentor-tools/lit.cfg.py b/compiler-rt/test/instrumentor-tools/lit.cfg.py new file mode 100644 index 0000000000000..00f9e120168d7 --- /dev/null +++ b/compiler-rt/test/instrumentor-tools/lit.cfg.py @@ -0,0 +1,75 @@ +# -*- Python -*- + +import os + + +def get_required_attr(config, attr_name): + attr_value = getattr(config, attr_name, None) + if attr_value is None: + lit_config.fatal( + "No attribute %r in test configuration! You may need to run " + "tests from your build directory or add this attribute " + "to lit.site.cfg.py " % attr_name + ) + return attr_value + + +# Setup config name. +config.name = "InstrumentorTools-" + config.target_arch + +# Setup source root. +config.test_source_root = os.path.dirname(__file__) + +# Setup executable root. +if ( + hasattr(config, "instrumentor_lit_binary_dir") + and config.instrumentor_lit_binary_dir is not None +): + config.test_exec_root = os.path.join( + config.instrumentor_lit_binary_dir, config.name + ) + +# Test suffixes. +config.suffixes = [".c", ".cpp", ".m", ".mm", ".ll", ".test"] + +# What to exclude. +config.excludes = ["Inputs"] + +# Clang flags. +target_cflags = [get_required_attr(config, "target_cflags")] +clang_cflags = target_cflags +clang_cxxflags = config.cxx_mode_flags + clang_cflags + + +def build_invocation(compile_flags): + return " " + " ".join([config.clang] + compile_flags) + " " + + +def make_lib_name(name): + if config.target_os != "Darwin": + return "clang_rt.instrumentor_" + name + return "clang_rt.instrumentor_" + name + "_osx" + + +# Add clang substitutions. +config.substitutions.append(("%clang ", build_invocation(clang_cflags))) +config.substitutions.append(("%clangxx ", build_invocation(clang_cxxflags))) + +flop_counter_lib = make_lib_name("flop_counter") +config.substitutions.append(("%flop_counter_lib", flop_counter_lib)) + +config.substitutions.append(("%lib_dir", config.compiler_rt_libdir)) + +# Add path to instrumentor config files +config_dir = os.path.join( + config.test_source_root, "..", "..", "lib", "instrumentor-tools" +) +config.substitutions.append(("%config_dir", config_dir)) + +# Check if running on a supported platform +if config.target_os not in [ + "Darwin", + "Linux", + "FreeBSD", +]: + config.unsupported = True diff --git a/compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in b/compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in new file mode 100644 index 0000000000000..8474c796f8eeb --- /dev/null +++ b/compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in @@ -0,0 +1,12 @@ +@LIT_SITE_CFG_IN_HEADER@ + +# Tool-specific config options. +config.instrumentor_lit_binary_dir = "@INSTRUMENTOR_LIT_BINARY_DIR@" +config.target_cflags = "@INSTRUMENTOR_TEST_TARGET_CFLAGS@" +config.target_arch = "@INSTRUMENTOR_TEST_TARGET_ARCH@" + +# Load common config for all compiler-rt lit tests. +lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured") + +# Load tool-specific config that would do the real work. +lit_config.load_config(config, "@INSTRUMENTOR_LIT_SOURCE_DIR@/lit.cfg.py") diff --git a/compiler-rt/test/instrumentor-tools/simple_flops.c b/compiler-rt/test/instrumentor-tools/simple_flops.c new file mode 100644 index 0000000000000..9434cb506f602 --- /dev/null +++ b/compiler-rt/test/instrumentor-tools/simple_flops.c @@ -0,0 +1,49 @@ +// Test basic FLOP counting functionality +// +// This test verifies that the FLOP counter correctly counts floating-point +// operations in a simple program. +// +// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%config_dir/flop-counter/flop_counter_config.json %s -L%lib_dir -l%flop_counter_lib -o %t +// RUN: %t | FileCheck %s +// +// TODO: For the correct values we need to track fmuladd calls too. +// +// CHECK: Total FLOPs: 400 +// CHECK: Single (float): 100 +// CHECK: Double (double): 300 + +#include <stdio.h> + +// Simple function with known FLOP count +float compute_float(float a, float b, float c) { + // 3 FLOPs: add, mul, add + return a + b * c; +} + +double compute_double(double a, double b) { + // 4 FLOPs: mul, mul, add, div + return (a * a + b * b) / 2.0; +} + +int main(void) { + float f1 = 1.0f, f2 = 2.0f, f3 = 3.0f; + double d1 = 4.0, d2 = 5.0; + + // Call functions multiple times to get meaningful counts + float result_f = 0.0f; + for (int i = 0; i < 100; i++) { + result_f += compute_float(f1, f2, f3); + } + + double result_d = 0.0; + for (int i = 0; i < 100; i++) { + result_d += compute_double(d1, d2); + } + + // Prevent optimization from removing the computations + if (result_f > 0.0f && result_d > 0.0) { + printf("Computation complete\n"); + } + + return 0; +} diff --git a/compiler-rt/test/instrumentor-tools/vector_flops.cpp b/compiler-rt/test/instrumentor-tools/vector_flops.cpp new file mode 100644 index 0000000000000..db729e5886849 --- /dev/null +++ b/compiler-rt/test/instrumentor-tools/vector_flops.cpp @@ -0,0 +1,46 @@ +// Test FLOP counting with vector operations +// +// This test verifies that the FLOP counter correctly counts vector +// floating-point operations. +// +// RUN: %clangxx -O2 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%config_dir/flop-counter/flop_counter_config.json %s -L%lib_dir -l%flop_counter_lib -o %t +// RUN: %t | FileCheck %s +// +// CHECK: Total FLOPs: +// CHECK: Vector FLOPs: + +#include <cmath> +#include <stdio.h> + +// Function using vector operations (if vectorized by the compiler) +void vector_compute(float *a, float *b, float *c, int n) { + for (int i = 0; i < n; i++) { + c[i] = std::sqrt(a[i] * a[i] + b[i] * b[i]); + } +} + +int main(void) { + const int N = 1000; + float a[N], b[N], c[N]; + + // Initialize arrays + for (int i = 0; i < N; i++) { + a[i] = (float)i; + b[i] = (float)(i + 1); + } + + // Compute + vector_compute(a, b, c, N); + + // Prevent optimization + float sum = 0.0f; + for (int i = 0; i < N; i++) { + sum += c[i]; + } + + if (sum > 0.0f) { + printf("Vector computation complete\n"); + } + + return 0; +} diff --git a/libcxx/cmake/caches/Generic-llvm-libc.cmake b/libcxx/cmake/caches/Generic-llvm-libc.cmake index 8e8a2699858b3..84f17224ebc22 100644 --- a/libcxx/cmake/caches/Generic-llvm-libc.cmake +++ b/libcxx/cmake/caches/Generic-llvm-libc.cmake @@ -18,6 +18,7 @@ set(COMPILER_RT_BUILD_SANITIZERS ON CACHE BOOL "") set(COMPILER_RT_BUILD_XRAY OFF CACHE BOOL "") set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "") set(COMPILER_RT_BUILD_LIBFUZZER OFF CACHE BOOL "") +set(COMPILER_RT_BUILD_INSTRUMENTOR_TOOLS OFF CACHE BOOL "") set(COMPILER_RT_DEBUG OFF CACHE BOOL "") set(COMPILER_RT_CRT_USE_EH_FRAME_REGISTRY ON CACHE BOOL "") _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
