On Sat Jun 13, 2026 at 9:48 PM EDT, Tamir Duberstein wrote:
> BPF_F_RB_OVERWRITE can advance overwrite_pos past consumer_pos.
> Callback traversal does not read overwrite_pos, so after the producer
> laps the consumer it can treat overwritten data as a record header.
>
> An earlier proposal[0] copied the readable window before invoking
> callbacks. Review concluded that callbacks are a poor fit because
> copying penalizes zero-copy users and the API cannot report skipped
> records.
>
> Record the map flag and reject callback consumption with -EOPNOTSUPP.
>
> Link:
> https://lore.kernel.org/bpf/CAEf4Bzaq5drHWChXoRBnrmkb6reAsSVj8r=ubyfsup31fma...@mail.gmail.com/
> [0]
> Fixes: feeaf1346f80 ("bpf: Add overwrite mode for BPF ring buffer")
> Assisted-by: Codex:gpt-5.5
> Signed-off-by: Tamir Duberstein <[email protected]>
> ---
> tools/lib/bpf/libbpf.h | 1 +
> tools/lib/bpf/ringbuf.c | 4 +++
> tools/testing/selftests/bpf/prog_tests/ringbuf.c | 39
> ++++++++++++++++++++++++
> 3 files changed, 44 insertions(+)
>
> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> index a3b8f606a91d..899457d5d536 100644
> --- a/tools/lib/bpf/libbpf.h
> +++ b/tools/lib/bpf/libbpf.h
> @@ -1439,6 +1439,7 @@ struct ring_buffer;
> struct ring;
> struct user_ring_buffer;
>
> +/* Callback-based consumption is unsupported for BPF_F_RB_OVERWRITE maps. */
> /* A negative return stops consumption; non-negative values continue.
> Stopping
> * can leave records queued without a new readiness notification. Before
> * waiting for readiness again, consume until no records remain.
> diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
> index ea8909fec4e9..f7972eae05ba 100644
> --- a/tools/lib/bpf/ringbuf.c
> +++ b/tools/lib/bpf/ringbuf.c
> @@ -30,6 +30,7 @@ struct ring {
> unsigned long *producer_pos;
> unsigned long mask;
> int map_fd;
> + bool overwrite;
> };
>
> struct ring_buffer {
> @@ -118,6 +119,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
> r->sample_cb = sample_cb;
> r->ctx = ctx;
> r->mask = info.max_entries - 1;
> + r->overwrite = info.map_flags & BPF_F_RB_OVERWRITE;
>
> /* Map writable consumer page */
> tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
> map_fd, 0);
> @@ -233,6 +235,8 @@ static inline int roundup_len(__u32 len)
>
> static int ringbuf_validate(const struct ring *r)
> {
> + if (r->overwrite)
> + return -EOPNOTSUPP;
Nit; unlikely(r->overwrite)?
The check makes sense, we can add it whether we keep ringbuf_validate()
or not.
> return r->sample_cb ? 0 : -EINVAL;
> }
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
> b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
> index 5f0c679bf9a6..a6c707af1134 100644
> --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
> +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
> @@ -684,6 +684,43 @@ static void ringbuf_map_key_subtest(void)
> test_ringbuf_map_key_lskel__destroy(skel_map_key);
> }
>
> +static void ringbuf_overwrite_callback_subtest(void)
> +{
> + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_RB_OVERWRITE);
> + struct ring_buffer *ringbuf;
> + struct ring *ring;
> + int map_fd, err;
> +
> + map_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, getpagesize(),
> + &opts);
> + if (!ASSERT_OK_FD(map_fd, "bpf_map_create"))
> + return;
> +
> + ringbuf = ring_buffer__new(map_fd, process_noop_sample, NULL, NULL);
> + if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new"))
> + goto cleanup_fd;
> +
> + ring = ring_buffer__ring(ringbuf, 0);
> + if (!ASSERT_OK_PTR(ring, "ring_buffer__ring"))
> + goto cleanup_ringbuf;
> +
> + err = ring_buffer__consume_n(ringbuf, 0);
> + ASSERT_EQ(err, -EOPNOTSUPP, "ringbuf_consume_zero");
> + err = ring_buffer__consume(ringbuf);
> + ASSERT_EQ(err, -EOPNOTSUPP, "ringbuf_consume");
> + err = ring_buffer__poll(ringbuf, 0);
> + ASSERT_EQ(err, -EOPNOTSUPP, "ringbuf_poll");
> + err = ring__consume_n(ring, 0);
> + ASSERT_EQ(err, -EOPNOTSUPP, "ring_consume_zero");
> + err = ring__consume(ring);
> + ASSERT_EQ(err, -EOPNOTSUPP, "ring_consume");
> +
> +cleanup_ringbuf:
> + ring_buffer__free(ringbuf);
> +cleanup_fd:
> + close(map_fd);
> +}
> +
> static void ringbuf_overwrite_mode_subtest(void)
> {
> unsigned long size, len1, len2, len3, len4, len5;
> @@ -760,6 +797,8 @@ void test_ringbuf(void)
> ringbuf_map_key_subtest();
> if (test__start_subtest("ringbuf_write"))
> ringbuf_write_subtest();
> + if (test__start_subtest("ringbuf_overwrite_callback"))
> + ringbuf_overwrite_callback_subtest();
> if (test__start_subtest("ringbuf_overwrite_mode"))
> ringbuf_overwrite_mode_subtest();
> }