From: "Steven Rostedt (Google)" <rost...@goodmis.org>

The "shortest_full" variable is used to keep track of the waiter that is
waiting for the smallest amount on the ring buffer before being woken up.
When a tasks waits on the ring buffer, it passes in a "full" value that is
a percentage. 0 means wake up on any data. 1-100 means wake up from 1% to
100% full buffer.

As all waiters are on the same wait queue, the wake up happens for the
waiter with the smallest percentage.

The problem is that the smallest_full on the cpu_buffer that stores the
smallest amount doesn't get reset when all the waiters are woken up. It
does get reset when the ring buffer is reset (echo > /sys/kernel/tracing/trace).

This means that tasks may be woken up more often then when they want to
be. Instead, have the shortest_full field get reset just before waking up
all the tasks. If the tasks wait again, they will update the shortest_full
before sleeping.

Also add locking around setting of shortest_full in the poll logic, and
change "work" to "rbwork" to match the variable name for rb_irq_work
structures that are used in other places.

Cc: sta...@vger.kerenl.org
Fixes: 2c2b0a78b3739 ("ring-buffer: Add percentage of ring buffer full to wake 
up reader")
Signed-off-by: Steven Rostedt (Google) <rost...@goodmis.org>
---
 kernel/trace/ring_buffer.c | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3400f11286e3..aa332ace108b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -755,8 +755,19 @@ static void rb_wake_up_waiters(struct irq_work *work)
 
        wake_up_all(&rbwork->waiters);
        if (rbwork->full_waiters_pending || rbwork->wakeup_full) {
+               /* Only cpu_buffer sets the above flags */
+               struct ring_buffer_per_cpu *cpu_buffer =
+                       container_of(rbwork, struct ring_buffer_per_cpu, 
irq_work);
+
+               /* Called from interrupt context */
+               raw_spin_lock(&cpu_buffer->reader_lock);
                rbwork->wakeup_full = false;
                rbwork->full_waiters_pending = false;
+
+               /* Waking up all waiters, they will reset the shortest full */
+               cpu_buffer->shortest_full = 0;
+               raw_spin_unlock(&cpu_buffer->reader_lock);
+
                wake_up_all(&rbwork->full_waiters);
        }
 }
@@ -934,28 +945,33 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer 
*buffer, int cpu,
                          struct file *filp, poll_table *poll_table, int full)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
-       struct rb_irq_work *work;
+       struct rb_irq_work *rbwork;
 
        if (cpu == RING_BUFFER_ALL_CPUS) {
-               work = &buffer->irq_work;
+               rbwork = &buffer->irq_work;
                full = 0;
        } else {
                if (!cpumask_test_cpu(cpu, buffer->cpumask))
                        return EPOLLERR;
 
                cpu_buffer = buffer->buffers[cpu];
-               work = &cpu_buffer->irq_work;
+               rbwork = &cpu_buffer->irq_work;
        }
 
        if (full) {
-               poll_wait(filp, &work->full_waiters, poll_table);
-               work->full_waiters_pending = true;
+               unsigned long flags;
+
+               poll_wait(filp, &rbwork->full_waiters, poll_table);
+
+               raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+               rbwork->full_waiters_pending = true;
                if (!cpu_buffer->shortest_full ||
                    cpu_buffer->shortest_full > full)
                        cpu_buffer->shortest_full = full;
+               raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
        } else {
-               poll_wait(filp, &work->waiters, poll_table);
-               work->waiters_pending = true;
+               poll_wait(filp, &rbwork->waiters, poll_table);
+               rbwork->waiters_pending = true;
        }
 
        /*
-- 
2.43.0



Reply via email to