Add a new tracking mechanism that captures function arguments/return values at instrumented function boundaries via submitted as an LLVM RFC SanitizerCoverage callbacks:
__sanitizer_cov_trace_args __sanitizer_cov_trace_ret This requires a custom LLVM/Clang build with the trace-args/ret passes: LLVM RFC: https://discourse.llvm.org/t/rfc-sanitizercoverage-add-fsanitize-coverage-trace-args-trace-ret/91026 LLVM PR: https://github.com/llvm/llvm-project/pull/201410 Clone and build toolchain: git clone --recursive --depth 1 --shallow-submodules \ --jobs `nproc` https://github.com/yskzalloc/kcov-dataflow.git cd kcov-dataflow cd llvm-project cmake -S llvm -B build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=clang \ -DCMAKE_CXX_COMPILER=clang++ \ -DLLVM_ENABLE_LLD=ON \ -DLLVM_ENABLE_PROJECTS="clang;lld" \ -DLLVM_TARGETS_TO_BUILD="X86;AArch64" ninja -C build cd .. Build and boot kernel (using virtme-ng): export PATH=$PWD/llvm-project/build/bin:$PATH cd linux vng --build \ --configitem CONFIG_KCOV=y \ --configitem CONFIG_KCOV_DATAFLOW_ARGS=y \ --configitem CONFIG_KCOV_DATAFLOW_RET=y \ --configitem CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL=y \ --configitem CONFIG_DEBUG_INFO=y \ --configitem CONFIG_RUST=y # for rust module kselftest LLVM=1 CC=clang Core implementation in kernel/kcov_dataflow.c (separating from kcov.c as Alexander's request): - Per-task lock-free ring buffer via debugfs kcov_dataflow device - READ_ONCE/WRITE_ONCE atomic pattern (tested on arm64) - copy_from_kernel_nofault() for safe struct field reads - in_task() guard rejects interrupt context - Bit-31 recursion guard prevents INSTRUMENT_ALL re-entry Build system (scripts/Makefile.kcov, scripts/Makefile.lib): - CFLAGS_KCOV_DATAFLOW: -fsanitize-coverage=trace-args,trace-ret - RUSTFLAGS_KCOV_DATAFLOW: -Cllvm-args=-sanitizer-coverage-trace-args/ret - Per-file opt-in: KCOV_DATAFLOW_file.o := y - Respects KCOV_INSTRUMENT := n for noinstr exclusion - CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL for whole-kernel Kconfig (lib/Kconfig.debug): - CONFIG_KCOV_DATAFLOW_ARGS / CONFIG_KCOV_DATAFLOW_RET - Depends on CONFIG_KCOV and CONFIG_DEBUG_INFO - CONFIG_KCOV_DATAFLOW_NO_INLINE (default n) - CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL Also fix rust/kernel/str.rs unused import (flags::* -> flags::GFP_KERNEL) which newer rustc (1.98-nightly) rejects as a hard error. Rust support requires rustc built against the custom LLVM with trace-args/ret passes compiled in: https://github.com/yskzalloc/rust Link: https://github.com/yskzalloc/kcov-dataflow/ Cc: Alexander Potapenko <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Nicolas Schier <[email protected]> Signed-off-by: Yunseong Kim <[email protected]> --- include/linux/sched.h | 10 ++ kernel/Makefile | 3 + kernel/kcov.c | 2 + kernel/kcov_dataflow.c | 324 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/Kconfig.debug | 43 +++++++ rust/kernel/str.rs | 2 +- scripts/Makefile.kcov | 12 ++ scripts/Makefile.lib | 9 ++ 8 files changed, 404 insertions(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 373bcc0598d1..4b8aa73b3b67 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1541,6 +1541,16 @@ struct task_struct { /* KCOV sequence number: */ int kcov_sequence; +#if defined(CONFIG_KCOV_DATAFLOW_ARGS) || defined(CONFIG_KCOV_DATAFLOW_RET) + /* KCOV dataflow per-task sequence counter for TLV records: */ + u32 kcov_df_seq; + + /* KCOV dataflow: separate buffer for trace-args/trace-ret */ + unsigned int kcov_df_size; + void *kcov_df_area; + bool kcov_df_enabled; +#endif + /* Collect coverage from softirq context: */ unsigned int kcov_softirq; #endif diff --git a/kernel/Makefile b/kernel/Makefile index 1e1a31673577..b70e524c4074 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -98,6 +98,9 @@ obj-$(CONFIG_AUDIT) += audit.o auditfilter.o obj-$(CONFIG_AUDITSYSCALL) += auditsc.o audit_watch.o audit_fsnotify.o audit_tree.o obj-$(CONFIG_GCOV_KERNEL) += gcov/ obj-$(CONFIG_KCOV) += kcov.o +ifneq ($(CONFIG_KCOV_DATAFLOW_ARGS)$(CONFIG_KCOV_DATAFLOW_RET),) +obj-y += kcov_dataflow.o +endif obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o obj-$(CONFIG_KGDB) += debug/ diff --git a/kernel/kcov.c b/kernel/kcov.c index 1df373fb562b..0a859ee8334f 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -353,6 +353,8 @@ void notrace __sanitizer_cov_trace_switch(kcov_u64 val, void *arg) EXPORT_SYMBOL(__sanitizer_cov_trace_switch); #endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */ + + static void kcov_start(struct task_struct *t, struct kcov *kcov, unsigned int size, void *area, enum kcov_mode mode, int sequence) diff --git a/kernel/kcov_dataflow.c b/kernel/kcov_dataflow.c new file mode 100644 index 000000000000..721f742cbfe5 --- /dev/null +++ b/kernel/kcov_dataflow.c @@ -0,0 +1,324 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KCOV Dataflow: per-task function argument/return value capture. + * + * Exposes /sys/kernel/debug/kcov_dataflow, completely independent from + * /sys/kernel/debug/kcov. Own buffer, own ioctl, own mmap. + * + * TLV record layout (all u64): + * area[0]: total u64 words written (counter) + * [pos+0]: type_and_seq (0xE=entry, 0xF=return in upper 4 bits) + * [pos+1]: PC + * [pos+2]: meta (arg_idx | arg_size | ptr) + * [pos+3..N]: field values read via copy_from_kernel_nofault() + */ +#define pr_fmt(fmt) "kcov_dataflow: " fmt + +#define DISABLE_BRANCH_PROFILING +#include <linux/atomic.h> +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/export.h> +#include <linux/types.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/preempt.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/vmalloc.h> +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/refcount.h> + +#define KCOV_DF_TYPE_ENTRY 0xE0000000ULL +#define KCOV_DF_TYPE_RET 0xF0000000ULL +#define KCOV_DF_MAGIC_BAD 0xBADADD85ULL +#define KCOV_DF_IS_ERR(p) ((unsigned long)(p) >= (unsigned long)-4095UL) + +/* Ioctl commands for /sys/kernel/debug/kcov_dataflow */ +#define KCOV_DF_INIT_TRACK _IOR('d', 1, unsigned long) +#define KCOV_DF_ENABLE _IO('d', 100) +#define KCOV_DF_DISABLE _IO('d', 101) + +struct kcov_dataflow { + refcount_t refcount; + spinlock_t lock; + unsigned int size; /* in u64 words */ + void *area; + struct task_struct *t; +}; + +static void kcov_df_put(struct kcov_dataflow *df) +{ + if (refcount_dec_and_test(&df->refcount)) { + vfree(df->area); + kfree(df); + } +} + +/* + * Core write function for dataflow records. + * Uses the same READ_ONCE/WRITE_ONCE pattern as write_comp_data() in kcov.c. + */ +static noinline notrace __no_sanitize_coverage void +kcov_df_write(u64 type_marker, u64 pc, u64 meta, void *ptr, + u64 *offsets, u32 num_fields) +{ + struct task_struct *t = current; + u64 *area; + unsigned long count, start_index, end_pos, max_pos; + u32 record_len, seq, i; + + if (!t->kcov_df_enabled) + return; + + if (!in_task()) + return; + + /* + * Prevent recursion: functions called by this callback + * (copy_from_kernel_nofault) may be instrumented. Use the + * sequence counter's high bit as a per-task guard. + */ + if (t->kcov_df_seq & (1U << 31)) + return; + t->kcov_df_seq |= (1U << 31); + + area = (u64 *)t->kcov_df_area; + if (!area) + goto out; + + max_pos = t->kcov_df_size * sizeof(u64); + + /* Record: header(1) + pc(1) + meta(1) + fields or scalar(max 1) */ + record_len = 3 + (num_fields > 0 ? num_fields : 1); + + count = READ_ONCE(area[0]); + + start_index = 1 + count; + end_pos = (start_index + record_len) * sizeof(u64); + if (unlikely(end_pos > max_pos)) + goto out; + + WRITE_ONCE(area[0], count + record_len); + barrier(); + + seq = ++t->kcov_df_seq; + area[start_index] = type_marker | + ((u64)(record_len - 3) << 24) | + (seq & 0x00FFFFFFULL); + area[start_index + 1] = pc; + area[start_index + 2] = meta; + + if (num_fields == 0) { + u64 val = 0; + u32 sz = (meta >> 48) & 0xFF; + + if (sz > sizeof(val)) + sz = sizeof(val); + if (ptr && !KCOV_DF_IS_ERR(ptr)) + copy_from_kernel_nofault(&val, ptr, sz); + area[start_index + 3] = val; + } else { + if (KCOV_DF_IS_ERR(ptr)) { + for (i = 0; i < num_fields; i++) + area[start_index + 3 + i] = KCOV_DF_MAGIC_BAD; + goto out; + } + for (i = 0; i < num_fields; i++) { + u64 off, sz, val = KCOV_DF_MAGIC_BAD; + void *fa; + + if (copy_from_kernel_nofault(&off, &offsets[i * 2], sizeof(off)) || + copy_from_kernel_nofault(&sz, &offsets[i * 2 + 1], sizeof(sz))) { + area[start_index + 3 + i] = KCOV_DF_MAGIC_BAD; + continue; + } + fa = (void *)((unsigned long)ptr + off); + val = 0; + if (sz <= sizeof(val)) + copy_from_kernel_nofault(&val, fa, sz); + else + copy_from_kernel_nofault(&val, fa, sizeof(val)); + area[start_index + 3 + i] = val; + } + } +out: + t->kcov_df_seq &= ~(1U << 31); +} + +#ifdef CONFIG_KCOV_DATAFLOW_ARGS +noinline void notrace __no_sanitize_coverage +__sanitizer_cov_trace_args(u64 pc, u32 arg_idx, u32 arg_size, void *arg_ptr, + u64 *offsets, u32 num_fields); + +noinline void notrace __no_sanitize_coverage +__sanitizer_cov_trace_args(u64 pc, u32 arg_idx, u32 arg_size, void *arg_ptr, + u64 *offsets, u32 num_fields) +{ + u64 meta = ((u64)arg_idx << 56) | ((u64)arg_size << 48) | + ((u64)(unsigned long)arg_ptr & 0xFFFFFFFFFFFFULL); + kcov_df_write(KCOV_DF_TYPE_ENTRY, pc, meta, arg_ptr, + offsets, num_fields); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_args); +#endif + +#ifdef CONFIG_KCOV_DATAFLOW_RET +noinline void notrace __no_sanitize_coverage +__sanitizer_cov_trace_ret(u64 pc, u32 ret_size, void *ret_val, + u64 *offsets, u32 num_fields); + +noinline void notrace __no_sanitize_coverage +__sanitizer_cov_trace_ret(u64 pc, u32 ret_size, void *ret_val, + u64 *offsets, u32 num_fields) +{ + u64 meta = ((u64)ret_size << 48) | + ((u64)(unsigned long)ret_val & 0xFFFFFFFFFFFFULL); + kcov_df_write(KCOV_DF_TYPE_RET, pc, meta, ret_val, + offsets, num_fields); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_ret); +#endif + +/* File operations for /sys/kernel/debug/kcov_dataflow */ + +static int kcov_df_open(struct inode *inode, struct file *filep) +{ + struct kcov_dataflow *df; + + df = kzalloc(sizeof(*df), GFP_KERNEL); + if (!df) + return -ENOMEM; + spin_lock_init(&df->lock); + refcount_set(&df->refcount, 1); + filep->private_data = df; + return nonseekable_open(inode, filep); +} + +static int kcov_df_close(struct inode *inode, struct file *filep) +{ + struct kcov_dataflow *df = filep->private_data; + unsigned long flags; + + spin_lock_irqsave(&df->lock, flags); + if (df->t == current) { + current->kcov_df_enabled = false; + current->kcov_df_area = NULL; + current->kcov_df_size = 0; + df->t = NULL; + } + spin_unlock_irqrestore(&df->lock, flags); + kcov_df_put(df); + return 0; +} + +static int kcov_df_mmap(struct file *filep, struct vm_area_struct *vma) +{ + struct kcov_dataflow *df = filep->private_data; + unsigned long size, off; + struct page *page; + unsigned long flags; + void *area; + int res = 0; + + spin_lock_irqsave(&df->lock, flags); + size = df->size * sizeof(u64); + if (!df->area || vma->vm_pgoff != 0 || + vma->vm_end - vma->vm_start != size) { + res = -EINVAL; + goto out; + } + area = df->area; + spin_unlock_irqrestore(&df->lock, flags); + + vm_flags_set(vma, VM_DONTEXPAND); + for (off = 0; off < size; off += PAGE_SIZE) { + page = vmalloc_to_page(area + off); + res = vm_insert_page(vma, vma->vm_start + off, page); + if (res) + return res; + } + return 0; +out: + spin_unlock_irqrestore(&df->lock, flags); + return res; +} + +static long kcov_df_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct kcov_dataflow *df = filep->private_data; + unsigned long flags; + unsigned long size; + int res = 0; + + spin_lock_irqsave(&df->lock, flags); + switch (cmd) { + case KCOV_DF_INIT_TRACK: + if (df->area) { + res = -EBUSY; + break; + } + size = arg; + if (size < 2 || size > (128 << 20) / sizeof(u64)) { + res = -EINVAL; + break; + } + spin_unlock_irqrestore(&df->lock, flags); + df->area = vmalloc_user(size * sizeof(u64)); + if (!df->area) + return -ENOMEM; + spin_lock_irqsave(&df->lock, flags); + df->size = size; + break; + + case KCOV_DF_ENABLE: + if (!df->area || df->t) { + res = -EINVAL; + break; + } + df->t = current; + current->kcov_df_area = df->area; + current->kcov_df_size = df->size; + current->kcov_df_seq = 0; + barrier(); + current->kcov_df_enabled = true; + break; + + case KCOV_DF_DISABLE: + if (df->t != current) { + res = -EINVAL; + break; + } + current->kcov_df_enabled = false; + barrier(); + current->kcov_df_area = NULL; + current->kcov_df_size = 0; + df->t = NULL; + break; + + default: + res = -ENOTTY; + } + spin_unlock_irqrestore(&df->lock, flags); + return res; +} + +static const struct file_operations kcov_df_fops = { + .open = kcov_df_open, + .unlocked_ioctl = kcov_df_ioctl, + .compat_ioctl = kcov_df_ioctl, + .mmap = kcov_df_mmap, + .release = kcov_df_close, +}; + +static int __init kcov_dataflow_init(void) +{ + debugfs_create_file_unsafe("kcov_dataflow", 0600, NULL, NULL, + &kcov_df_fops); + return 0; +} +device_initcall(kcov_dataflow_init); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e2f976c3301b..a402f829f9f9 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2261,6 +2261,49 @@ config KCOV_SELFTEST On test failure, causes the kernel to panic. Recommended to be enabled, ensuring critical functionality works as intended. +config KCOV_DATAFLOW_ARGS + bool "Enable KCOV dataflow: function argument capture" + depends on KCOV + depends on DEBUG_INFO + depends on $(cc-option,-fsanitize-coverage=trace-args) + help + Captures function arguments at entry via /sys/kernel/debug/kcov_dataflow. + Struct pointer arguments are auto-expanded using compiler DebugInfo + metadata, recording individual field values at runtime. + Enable per-module with: KCOV_DATAFLOW_file.o := y in the Makefile. + Requires clang with -fsanitize-coverage=trace-args support. + +config KCOV_DATAFLOW_RET + bool "Enable KCOV dataflow: return value capture" + depends on KCOV + depends on DEBUG_INFO + depends on $(cc-option,-fsanitize-coverage=trace-ret) + help + Captures function return values via /sys/kernel/debug/kcov_dataflow. + Struct pointer returns are auto-expanded using compiler DebugInfo + metadata, recording individual field values at runtime. + Enable per-module with: KCOV_DATAFLOW_file.o := y in the Makefile. + Requires clang with -fsanitize-coverage=trace-ret support. + +config KCOV_DATAFLOW_NO_INLINE + bool "Disable inlining for dataflow-instrumented files" + default n + depends on KCOV_DATAFLOW_ARGS || KCOV_DATAFLOW_RET + help + Adds -fno-inline to files instrumented with KCOV_DATAFLOW. + This ensures every function boundary is preserved, giving + complete argument visibility. Disable for lower overhead at the + cost of losing argument records for inlined functions. + +config KCOV_DATAFLOW_INSTRUMENT_ALL + bool "Instrument all kernel code with dataflow coverage" + depends on KCOV_DATAFLOW_ARGS || KCOV_DATAFLOW_RET + help + Instrument all kernel objects with trace-args/trace-ret + automatically. Individual files or directories can opt out + with KCOV_DATAFLOW_file.o := n or KCOV_DATAFLOW := n. + Warning: significantly increases code size and boot time. + config DEBUG_AID_FOR_SYZBOT bool "Additional debug code for syzbot" default n diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index a435674f05ea..f447a25c67c9 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -3,7 +3,7 @@ //! String representations. use crate::{ - alloc::{flags::*, AllocError, KVec}, + alloc::{flags::GFP_KERNEL, AllocError, KVec}, error::{to_result, Result}, fmt::{self, Write}, prelude::*, diff --git a/scripts/Makefile.kcov b/scripts/Makefile.kcov index 78305a84ba9d..a459c119795f 100644 --- a/scripts/Makefile.kcov +++ b/scripts/Makefile.kcov @@ -9,3 +9,15 @@ kcov-rflags-$(CONFIG_KCOV_ENABLE_COMPARISONS) += -Cllvm-args=-sanitizer-coverage export CFLAGS_KCOV := $(kcov-flags-y) export RUSTFLAGS_KCOV := $(kcov-rflags-y) + +# KCOV dataflow: trace function args and return values +kcov-dataflow-flags-y := -fsanitize-coverage=trace-args,trace-ret +kcov-dataflow-flags-$(CONFIG_KCOV_DATAFLOW_NO_INLINE) += -fno-inline + +# Rust: only add the trace-args/ret llvm-args (sancov-module pass and level=3 +# are already provided by RUSTFLAGS_KCOV since KCOV_DATAFLOW depends on KCOV). +kcov-dataflow-rflags-y := -Cllvm-args=-sanitizer-coverage-trace-args +kcov-dataflow-rflags-y += -Cllvm-args=-sanitizer-coverage-trace-ret + +export CFLAGS_KCOV_DATAFLOW := $(kcov-dataflow-flags-y) +export RUSTFLAGS_KCOV_DATAFLOW := $(kcov-dataflow-rflags-y) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 0a4fdd8bd975..b64fabb88ab9 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -88,6 +88,15 @@ _c_flags += $(if $(patsubst n%,, \ _rust_flags += $(if $(patsubst n%,, \ $(KCOV_INSTRUMENT_$(target-stem).o)$(KCOV_INSTRUMENT)$(if $(is-kernel-object),$(CONFIG_KCOV_INSTRUMENT_ALL))), \ $(RUSTFLAGS_KCOV)) +# KCOV dataflow respects KCOV_INSTRUMENT := n (noinstr exclusion) +_c_flags += $(if $(patsubst n%,, \ + $(KCOV_INSTRUMENT_$(target-stem).o)$(KCOV_INSTRUMENT)$(if $(is-kernel-object),y)),$(if $(patsubst n%,, \ + $(KCOV_DATAFLOW_$(target-stem).o)$(KCOV_DATAFLOW)$(if $(is-kernel-object),$(CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL))), \ + $(CFLAGS_KCOV_DATAFLOW))) +_rust_flags += $(if $(patsubst n%,, \ + $(KCOV_INSTRUMENT_$(target-stem).o)$(KCOV_INSTRUMENT)$(if $(is-kernel-object),y)),$(if $(patsubst n%,, \ + $(KCOV_DATAFLOW_$(target-stem).o)$(KCOV_DATAFLOW)$(if $(is-kernel-object),$(CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL))), \ + $(RUSTFLAGS_KCOV_DATAFLOW))) endif # -- 2.43.0

