A kernel panic was observed in the timerlat tracer with the following
reproducer:

   #!/bin/bash
   while true; do
      rtla timerlat hist -u -d 5s & PID=$!
      sleep 2
      echo OSNOISE_WORKLOAD > /sys/kernel/tracing/osnoise/options
      rtla timerlat hist -k -d 1s
   done

The kernel first displays several WARN traces with the following pattern:

   WARNING: CPU: 1 PID: 1822 at kernel/trace/trace_osnoise.c:1959 
stop_kthread+0xb7/0xc0
   ...
   CPU: 1 UID: 0 PID: 1822 Comm: bash
   ..
   Call Trace:
     ...
     ? stop_kthread+0xb7/0xc0
     stop_per_cpu_kthreads+0xf/0x40
     osnoise_options_write+0xda/0x1b0
     vfs_write+0xf5/0x450
     ...
     ksys_write+0x6d/0xf0
     do_syscall_64+0x7d/0x160
     ...
     entry_SYSCALL_64_after_hwframe+0x76/0x7e

Then, it displays a similar pattern, but in start_per_cpu_kthreads():

   WARNING: CPU: 1 PID: 2120 at kernel/trace/trace_osnoise.c:2068 
start_per_cpu_kthreads+0xfe/0x110
   ...
   CPU: 1 UID: 0 PID: 2120 Comm: rtla
   ...
   Call Trace:
     ...
     ? start_per_cpu_kthreads+0xfe/0x110
     ...
     osnoise_workload_start+0xb1/0x2d0
     timerlat_tracer_start+0x50/0x70
     rb_simple_write+0x13a/0x160
     vfs_write+0xf5/0x450
     ...
     ksys_write+0x6d/0xf0
     do_syscall_64+0x7d/0x160
     ...
     entry_SYSCALL_64_after_hwframe+0x76/0x7e

and finally a null pointer reference BUG:

   BUG: kernel NULL pointer dereference, address: 0000000000000030
   ...
   CPU: 1 UID: 0 PID: 2155 Comm: timerlatu/1
   ...
   Call Trace:
     ...
     ? timerlat_fd_read+0xf2/0x370
     ? timerlat_fd_read+0xee/0x370
     vfs_read+0xe8/0x370
     ksys_read+0x6d/0xf0
     do_syscall_64+0x7d/0x160
     ...
     entry_SYSCALL_64_after_hwframe+0x76/0x7e

Fix the bug by disallowing OSNOISE_WORKLOAD to be set if timerlat is
running with NO_OSNOISE_WORKLOAD and a user workload is attached to at
least one CPU.

To implement this, a new function osnoise_validate_option() is added.
The function validates any osnoise option change, and return an error
value if the change is not valid. EBUSY is used for this particular
case.

Fixes: 30838fcd8107 ("tracing/osnoise: Add OSNOISE_WORKLOAD option")
Signed-off-by: Tomas Glozar <[email protected]>
---
 kernel/trace/trace_osnoise.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 827104d00bc0..6a6d4f8bc19f 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -2186,6 +2186,31 @@ static const struct seq_operations 
osnoise_options_seq_ops = {
        .stop           = s_options_stop
 };
 
+/**
+ * osnoise_validate_option - Check if set option is valid
+ * @option: The index of the option
+ * @enabled: The requested state of the option
+ *
+ * Verify if the requested osnoise option is valid with regards to the current
+ * state of the tracer.
+ *
+ * If valid, return 0, if not, return error number.
+ */
+static int osnoise_validate_option(int option, int enabled)
+{
+       int cpu;
+
+       if (option == OSN_WORKLOAD && enabled &&
+           !test_bit(OSN_WORKLOAD, &osnoise_options)) {
+               /* Trying to enable kernel threads while user workload is 
running? */
+               for_each_online_cpu(cpu)
+                       if (per_cpu(per_cpu_osnoise_var, cpu).pid)
+                               return -EBUSY;
+       }
+
+       return 0;
+}
+
 static int osnoise_options_open(struct inode *inode, struct file *file)
 {
        return seq_open(file, &osnoise_options_seq_ops);
@@ -2229,6 +2254,10 @@ static ssize_t osnoise_options_write(struct file *filp, 
const char __user *ubuf,
        if (option < 0)
                return -EINVAL;
 
+       retval = osnoise_validate_option(option, enable);
+       if (retval != 0)
+               return retval;
+
        /*
         * trace_types_lock is taken to avoid concurrency on start/stop.
         */
-- 
2.52.0


Reply via email to