Add a per-task recursion guard to kcov_df_write() using the high bit of
kcov_dataflow_seq. This prevents infinite recursion when
CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL is enabled: functions called by the
callback itself (copy_from_kernel_nofault, xadd helpers) are also
instrumented and would re-enter kcov_df_write() without this guard.

The guard uses the sequence counter's bit 31 as a re-entrancy flag.
The low 24 bits (used for TLV record sequence numbers) are unaffected.

Also:
- Exclude kcov.o, extable.o, softirq.o from dataflow instrumentation
  (same pattern as KCOV_INSTRUMENT exclusions)
- Add Documentation/dev-tools/kcov-dataflow.rst with:
  - Prerequisites and Kconfig options
  - Per-module instrumentation instructions
  - Complete C example for data collection
  - Ring buffer format specification
  - Ioctl interface reference
  - Fork interception example for child process tracing
  - Rust module support via post-compilation pipeline

Signed-off-by: Yunseong Kim <[email protected]>
---
 Documentation/dev-tools/kcov-dataflow.rst | 282 ++++++++++++++++++++++++++++++
 kernel/Makefile                           |   3 +
 kernel/kcov.c                             |  14 +-
 3 files changed, 298 insertions(+), 1 deletion(-)

diff --git a/Documentation/dev-tools/kcov-dataflow.rst 
b/Documentation/dev-tools/kcov-dataflow.rst
new file mode 100644
index 000000000000..5941df9f29e6
--- /dev/null
+++ b/Documentation/dev-tools/kcov-dataflow.rst
@@ -0,0 +1,282 @@
+KCOV-Dataflow: function argument and return value extraction
+=============================================================
+
+KCOV-Dataflow captures function arguments and return values — including
+automatic struct field decomposition — at instrumented kernel function
+boundaries. It provides per-task, lock-free ring buffers accessible via
+``mmap()``, enabling data-flow-aware fuzzing and post-mortem contract
+verification.
+
+Unlike KCOV's ``trace-pc`` which reports *which* code executed,
+KCOV-Dataflow reports *what values* were passed and returned. This is
+a completely separate device from ``/sys/kernel/debug/kcov``.
+
+Prerequisites
+-------------
+
+KCOV-Dataflow requires Clang/LLVM with the ``dataflow-args`` and
+``dataflow-ret`` SanitizerCoverage extensions. Standard (unpatched)
+compilers will not expose these Kconfig options.
+
+To enable KCOV-Dataflow, configure the kernel with::
+
+        CONFIG_KCOV=y
+        CONFIG_KCOV_DATAFLOW_ARGS=y
+        CONFIG_KCOV_DATAFLOW_RET=y
+
+Optional: instrument the entire kernel (significant overhead)::
+
+        CONFIG_KCOV_DATAFLOW_INSTRUMENT_ALL=y
+
+Coverage data becomes accessible once debugfs is mounted::
+
+        mount -t debugfs none /sys/kernel/debug
+
+Per-module instrumentation
+--------------------------
+
+To instrument a specific module, add to its Makefile::
+
+        KCOV_DATAFLOW_my_module.o := y
+
+For example, to instrument the Android binder driver::
+
+        # drivers/android/Makefile
+        KCOV_DATAFLOW_binder.o := y
+        KCOV_DATAFLOW_binder_alloc.o := y
+
+For Rust modules, add to the crate's Makefile::
+
+        # drivers/android/binder/Makefile
+        KCOV_DATAFLOW := y
+
+To instrument an entire directory, set the variable without a filename::
+
+        # fs/Makefile
+        KCOV_DATAFLOW := y
+
+The build system automatically adds the required compiler flags
+(``-fsanitize-coverage=dataflow-args,dataflow-ret -g``).
+
+Data collection
+---------------
+
+The following program demonstrates how to collect function argument and
+return value data for a single syscall:
+
+.. code-block:: c
+
+    #include <stdio.h>
+    #include <stdint.h>
+    #include <stdlib.h>
+    #include <sys/types.h>
+    #include <sys/ioctl.h>
+    #include <sys/mman.h>
+    #include <unistd.h>
+    #include <fcntl.h>
+
+    #define KCOV_DF_INIT_TRACE  _IOR('d', 1, unsigned long)
+    #define KCOV_DF_ENABLE      _IO('d', 100)
+    #define KCOV_DF_DISABLE     _IO('d', 101)
+    #define BUF_SIZE            (64 << 10)  /* 64K words = 512KB */
+
+    int main(void)
+    {
+        int fd;
+        uint64_t *buf, n, i;
+
+        fd = open("/sys/kernel/debug/kcov_dataflow", O_RDWR);
+        if (fd == -1)
+            perror("open"), exit(1);
+
+        /* Allocate buffer (size in u64 words). */
+        if (ioctl(fd, KCOV_DF_INIT_TRACE, BUF_SIZE))
+            perror("ioctl(INIT)"), exit(1);
+
+        /* Map the buffer into user space. */
+        buf = (uint64_t *)mmap(NULL, BUF_SIZE * sizeof(uint64_t),
+                               PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+        if (buf == MAP_FAILED)
+            perror("mmap"), exit(1);
+
+        /* Enable data-flow collection for this task. */
+        if (ioctl(fd, KCOV_DF_ENABLE, 0))
+            perror("ioctl(ENABLE)"), exit(1);
+
+        /* Reset counter. */
+        __atomic_store_n(&buf[0], 0, __ATOMIC_RELAXED);
+
+        /* === Trigger syscall(s) here === */
+        read(-1, NULL, 0);
+
+        /* Read how many words were written. */
+        n = __atomic_load_n(&buf[0], __ATOMIC_RELAXED);
+
+        /* Parse TLV records. */
+        i = 1;
+        while (i < n) {
+            uint64_t type_seq = buf[i];
+            uint64_t pc       = buf[i + 1];
+            uint64_t meta     = buf[i + 2];
+            uint32_t type     = (type_seq >> 28) & 0xF;
+            uint32_t seq      = type_seq & 0x00FFFFFF;
+            uint32_t arg_idx  = (meta >> 56) & 0xFF;
+            uint32_t size     = (meta >> 48) & 0xFF;
+
+            printf("[%s] seq=%u pc=0x%lx arg_idx=%u size=%u val=0x%lx\n",
+                   type == 0xE ? "ENTRY" : "RET",
+                   seq, pc, arg_idx, size, buf[i + 3]);
+            i += 4;  /* minimum record size: 3 header + 1 value */
+        }
+
+        if (ioctl(fd, KCOV_DF_DISABLE, 0))
+            perror("ioctl(DISABLE)"), exit(1);
+
+        munmap(buf, BUF_SIZE * sizeof(uint64_t));
+        close(fd);
+        return 0;
+    }
+
+Ring buffer format
+------------------
+
+The buffer is an array of ``u64`` words::
+
+        buf[0]: atomic counter — total words written
+
+Each record occupies 3 + N words:
+
++--------+------------------+---------------------------------------------+
+| Offset | Field            | Description                                 |
++========+==================+=============================================+
+| 0      | type_and_seq     | bits[31:28] = 0xE (entry) or 0xF (return),  |
+|        |                  | bits[23:0] = per-task sequence number        |
++--------+------------------+---------------------------------------------+
+| 1      | pc               | Instrumented function address                |
++--------+------------------+---------------------------------------------+
+| 2      | meta             | bits[63:56] = arg_idx (0 for return),        |
+|        |                  | bits[55:48] = size in bytes,                 |
+|        |                  | bits[47:0] = raw pointer value               |
++--------+------------------+---------------------------------------------+
+| 3..N   | field_val[0..N]  | Struct field values or single scalar         |
++--------+------------------+---------------------------------------------+
+
+Magic values:
+
+- ``0xBADADD85``: field read failed (pointer was invalid/freed/poisoned)
+
+Safety
+------
+
+- Callbacks are ``notrace``, ``__no_sanitize_coverage``, ``noinline``
+  to prevent recursion.
+- All pointer reads use ``copy_from_kernel_nofault()`` — survives
+  freed, poisoned, or unmapped memory.
+- An ``in_task()`` guard rejects calls from hardirq/softirq/NMI context,
+  preventing reentrant buffer corruption.
+- No ``printk`` or allocation in the data path.
+- When not enabled for a task, overhead is a single boolean check.
+
+Ioctl interface
+---------------
+
++---------------------+----------------------------+---------------------------+
+| Command             | Value                      | Description               
|
++=====================+============================+===========================+
+| KCOV_DF_INIT_TRACE  | ``_IOR('d', 1, unsigned    | Allocate buffer           
|
+|                     | long)``                    | (size in u64 words)       
|
++---------------------+----------------------------+---------------------------+
+| KCOV_DF_ENABLE      | ``_IO('d', 100)``          | Start collection for      
|
+|                     |                            | current task              
|
++---------------------+----------------------------+---------------------------+
+| KCOV_DF_DISABLE     | ``_IO('d', 101)``          | Stop collection           
|
++---------------------+----------------------------+---------------------------+
+
+Compatibility
+-------------
+
+KCOV-Dataflow is completely independent from legacy KCOV:
+
+- Separate device: ``/sys/kernel/debug/kcov_dataflow``
+- Separate ioctl namespace (``'d'`` vs ``'c'``)
+- Separate per-task buffer
+- Both can be used simultaneously without interference
+- syzkaller and other KCOV users are unaffected
+
+Rust module support
+-------------------
+
+Rust kernel modules are supported via a post-compilation pipeline::
+
+        rustc --emit=llvm-ir -g module.rs
+        opt -passes=sancov-module \
+            -sanitizer-coverage-dataflow-args \
+            -sanitizer-coverage-dataflow-ret module.ll -S -o module_inst.ll
+        llc -filetype=obj module_inst.ll -o module.o
+
+This is the good method for capturing Rust function arguments at runtime.
+
+
+Tracing child processes (fork interception)
+-------------------------------------------
+
+KCOV-Dataflow is per-task: after ``fork()``, the child does not inherit
+the enabled state. To trace child processes, re-enable on the inherited
+file descriptor in the child before ``exec()``. The ``mmap``'d buffer is
+shared (``MAP_SHARED``), so both parent and child write to the same ring
+buffer atomically.
+
+.. code-block:: c
+
+    #include <stdio.h>
+    #include <stdint.h>
+    #include <stdlib.h>
+    #include <sys/ioctl.h>
+    #include <sys/mman.h>
+    #include <sys/wait.h>
+    #include <unistd.h>
+    #include <fcntl.h>
+
+    #define KCOV_DF_INIT_TRACE  _IOR('d', 1, unsigned long)
+    #define KCOV_DF_ENABLE      _IO('d', 100)
+    #define KCOV_DF_DISABLE     _IO('d', 101)
+    #define BUF_SIZE            (64 << 10)
+
+    int main(int argc, char **argv)
+    {
+        int fd = open("/sys/kernel/debug/kcov_dataflow", O_RDWR);
+        ioctl(fd, KCOV_DF_INIT_TRACE, BUF_SIZE);
+        uint64_t *buf = mmap(NULL, BUF_SIZE * 8,
+                             PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+        /* Enable for parent task */
+        ioctl(fd, KCOV_DF_ENABLE, 0);
+        __atomic_store_n(&buf[0], 0, __ATOMIC_RELAXED);
+
+        pid_t pid = fork();
+        if (pid == 0) {
+            /* Child: re-enable on inherited fd.
+             * The shared mmap buffer receives records from both tasks.
+             */
+            ioctl(fd, KCOV_DF_ENABLE, 0);
+            execvp(argv[1], &argv[1]);
+            _exit(1);
+        }
+
+        waitpid(pid, NULL, 0);
+        ioctl(fd, KCOV_DF_DISABLE, 0);
+
+        uint64_t n = __atomic_load_n(&buf[0], __ATOMIC_RELAXED);
+        printf("Captured %lu words from parent + child\n", n);
+
+        munmap(buf, BUF_SIZE * 8);
+        close(fd);
+        return 0;
+    }
+
+Note: the child's ``ioctl(fd, KCOV_DF_ENABLE)`` will fail if the parent
+has not yet called ``KCOV_DF_DISABLE``, because only one task can be
+associated with a descriptor at a time. For true multi-process tracing,
+open a separate ``kcov_dataflow`` fd per child, or disable in the parent
+before the child enables (as shown above — the parent is blocked in
+``waitpid`` so it generates no records during that time anyway).
diff --git a/kernel/Makefile b/kernel/Makefile
index 1e1a31673577..9c56421c5390 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -37,6 +37,7 @@ KCOV_INSTRUMENT_extable.o := n
 KCOV_INSTRUMENT_stacktrace.o := n
 # Don't self-instrument.
 KCOV_INSTRUMENT_kcov.o := n
+KCOV_DATAFLOW_kcov.o := n
 # If sanitizers detect any issues in kcov, it may lead to recursion
 # via printk, etc.
 KASAN_SANITIZE_kcov.o := n
@@ -207,3 +208,5 @@ $(obj)/kheaders.md5: $(obj)/kheaders-srclist FORCE
        $(call filechk,kheaders_md5sum)
 
 clean-files := kheaders.md5 kheaders-srclist kheaders-objlist
+KCOV_DATAFLOW_extable.o := n
+KCOV_DATAFLOW_softirq.o := n
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 373b8034ca5c..8d9d5e33549f 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -413,6 +413,16 @@ kcov_df_write(u64 type_marker, u64 pc, u64 meta, void *ptr,
        if (!in_task())
                return;
 
+       /*
+        * Prevent recursion: functions called by this callback
+        * (copy_from_kernel_nofault, xadd helpers) may be instrumented
+        * with INSTRUMENT_ALL. Use a per-task guard via the sequence
+        * counter's high bit.
+        */
+       if (t->kcov_dataflow_seq & (1U << 31))
+               return;
+       t->kcov_dataflow_seq |= (1U << 31);
+
        area = (u64 *)t->kcov_df_area;
        if (!area)
                return;
@@ -449,7 +459,7 @@ kcov_df_write(u64 type_marker, u64 pc, u64 meta, void *ptr,
                if (KCOV_DF_IS_ERR(ptr)) {
                        for (i = 0; i < num_fields; i++)
                                area[pos + 3 + i] = KCOV_DF_MAGIC_BAD;
-                       return;
+                       goto out;
                }
                for (i = 0; i < num_fields; i++) {
                        u64 off, sz, val = KCOV_DF_MAGIC_BAD;
@@ -469,6 +479,8 @@ kcov_df_write(u64 type_marker, u64 pc, u64 meta, void *ptr,
                        area[pos + 3 + i] = val;
                }
        }
+out:
+       t->kcov_dataflow_seq &= ~(1U << 31);
 }
 
 #ifdef CONFIG_KCOV_DATAFLOW_ARGS

-- 
2.43.0


Reply via email to