Introduce a BPF test program and user space code to test
bpf_probe_write_user_registered().

The test program also demonstrates 2 ways a BPF program may obtain the
addresses it can write to: either by tracing prctl() or simply accessing
current->bpf_user_writable directly.

Signed-off-by: Marco Elver <[email protected]>
---
 .../prog_tests/probe_write_user_registered.c  | 325 ++++++++++++++++++
 .../progs/test_probe_write_user_registered.c  | 219 ++++++++++++
 2 files changed, 544 insertions(+)
 create mode 100644 
tools/testing/selftests/bpf/prog_tests/probe_write_user_registered.c
 create mode 100644 
tools/testing/selftests/bpf/progs/test_probe_write_user_registered.c

diff --git 
a/tools/testing/selftests/bpf/prog_tests/probe_write_user_registered.c 
b/tools/testing/selftests/bpf/prog_tests/probe_write_user_registered.c
new file mode 100644
index 000000000000..78ac0756d365
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/probe_write_user_registered.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023, Google LLC. */
+
+#include <malloc.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/prctl.h>
+#include <time.h>
+
+#include <test_progs.h>
+#include "test_probe_write_user_registered.skel.h"
+
+#define TEST_TAG 0xf23c39ab
+
+/* Encoding of the test access-type in the tv_nsec parameter. */
+enum test_access {
+       TEST_SUB_REGION,
+       TEST_EQ_REGION,
+       TEST_ONE_BY_ONE,
+       TEST_ANY_TAG,
+};
+
+/* This will be written to by the BPF program. */
+struct test_data {
+       volatile uint64_t padding_start;
+       volatile uint64_t nanosleep_arg;
+       volatile uint64_t padding_end;
+};
+
+static struct test_data test_data;
+
+static void prctl_register_writable(const volatile void *start, size_t size, 
uint32_t tag)
+{
+       ASSERT_OK(prctl(PR_BPF_REGISTER_WRITABLE, start, size, tag, 0), 
__func__);
+}
+
+static void prctl_unregister_writable(const volatile void *start, size_t size)
+{
+       ASSERT_OK(prctl(PR_BPF_UNREGISTER_WRITABLE, start, size, 0, 0), 
__func__);
+}
+
+/* Returns the actual tv_nsec value derived from base and test_access. */
+static uint64_t do_nanosleep(uint64_t base, enum test_access test_access)
+{
+       const uint64_t tv_nsec = base << 8 | test_access;
+       struct timespec ts = {};
+
+       ts.tv_sec = 0;
+       ts.tv_nsec = tv_nsec;
+       syscall(__NR_nanosleep, &ts, NULL);
+
+       return tv_nsec;
+}
+
+/*
+ * Test that the basic usage works: register, write from BPF program,
+ * unregister, after which no more writes can happen.
+ */
+static void test_register_and_unregister(struct 
test_probe_write_user_registered *skel)
+{
+       uint64_t nsec = 1234;
+       uint64_t expect;
+
+       prctl_register_writable(&test_data, sizeof(test_data), TEST_TAG);
+
+       /* Check that we see the writes. */
+       for (int i = 0; i < 3; ++i) {
+               test_data.nanosleep_arg = 0;
+               expect = do_nanosleep(++nsec, TEST_SUB_REGION);
+               ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+               ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+       }
+
+       /* Registered the whole region, so this should also work... */
+       for (int i = 0; i < 3; ++i) {
+               test_data.nanosleep_arg = 0;
+               expect = do_nanosleep(++nsec, TEST_EQ_REGION);
+               ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+               ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+       }
+
+       prctl_unregister_writable(&test_data, sizeof(test_data));
+
+       /* No more writes after unregistration. */
+       test_data.nanosleep_arg = 0;
+       do_nanosleep(++nsec, TEST_SUB_REGION);
+       ASSERT_EQ(test_data.nanosleep_arg, 0, __func__);
+       ASSERT_EQ(skel->data->found_user_registered, 0, __func__);
+}
+
+/*
+ * Test that accesses with mismatching tags fail.
+ */
+static void test_bad_tag(struct test_probe_write_user_registered *skel)
+{
+       uint64_t expect;
+
+       prctl_register_writable(&test_data, sizeof(test_data), TEST_TAG);
+       test_data.nanosleep_arg = 0;
+       expect = do_nanosleep(1234, TEST_SUB_REGION);
+       ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+       ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+       do_nanosleep(9999, TEST_ANY_TAG); /* fails */
+       ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+       ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+       prctl_unregister_writable(&test_data, sizeof(test_data));
+}
+
+/*
+ * Test that the "any" (zero) tag works.
+ */
+static void test_any_tag(struct test_probe_write_user_registered *skel)
+{
+       uint64_t nsec = 1234;
+       uint64_t expect;
+
+       prctl_register_writable(&test_data, sizeof(test_data), 0);
+
+       for (int i = 0; i < 3; ++i) {
+               test_data.nanosleep_arg = 0;
+               expect = do_nanosleep(++nsec, TEST_ANY_TAG);
+               ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+               ASSERT_EQ(skel->data->found_user_registered, 0, __func__);
+       }
+
+       prctl_unregister_writable(&test_data, sizeof(test_data));
+
+       test_data.nanosleep_arg = 0;
+       do_nanosleep(++nsec, TEST_ANY_TAG);
+       ASSERT_EQ(test_data.nanosleep_arg, 0, __func__);
+       ASSERT_EQ(skel->data->found_user_registered, 0, __func__);
+}
+
+/*
+ * Test that invalid prctl() fail.
+ */
+static void test_invalid_prctl(struct test_probe_write_user_registered *skel)
+{
+       ASSERT_ERR(prctl(PR_BPF_REGISTER_WRITABLE, NULL, 1, 0, 0), __func__);
+       ASSERT_ERR(prctl(PR_BPF_REGISTER_WRITABLE, &test_data, 0, 0, 0), 
__func__);
+       prctl_register_writable(&test_data, sizeof(test_data), TEST_TAG);
+       ASSERT_ERR(prctl(PR_BPF_REGISTER_WRITABLE, &test_data, 
sizeof(test_data), 0, 0), __func__);
+       ASSERT_ERR(prctl(PR_BPF_REGISTER_WRITABLE, &test_data, 2, 0, 0), 
__func__);
+       prctl_register_writable((void *)&test_data + 1, 1, TEST_TAG);
+       prctl_register_writable((void *)&test_data - 1, 1, TEST_TAG);
+
+       ASSERT_ERR(prctl(PR_BPF_UNREGISTER_WRITABLE, &test_data, 1, 0, 0), 
__func__);
+       prctl_unregister_writable((void *)&test_data - 1, 1);
+       prctl_unregister_writable(&test_data, sizeof(test_data));
+       prctl_unregister_writable((void *)&test_data + 1, 1);
+       ASSERT_ERR(prctl(PR_BPF_UNREGISTER_WRITABLE, 0x123456, 1, 0, 0), 
__func__);
+       ASSERT_ERR(prctl(PR_BPF_UNREGISTER_WRITABLE, &test_data, 
sizeof(test_data), 0, 0), __func__);
+}
+
+/*
+ * Test that we can register multiple regions and they all work.
+ */
+static void test_multiple_region(struct test_probe_write_user_registered *skel)
+{
+       uint64_t expect;
+
+       prctl_register_writable(&test_data.nanosleep_arg, sizeof(uint64_t), 
TEST_TAG);
+       prctl_register_writable(&test_data.padding_end, sizeof(uint64_t), 
TEST_TAG);
+       /* First one last, so the test program knows where to start. */
+       prctl_register_writable(&test_data.padding_start, sizeof(uint64_t), 
TEST_TAG);
+
+       memset(&test_data, 0, sizeof(test_data));
+       do_nanosleep(0xf00d, TEST_EQ_REGION); /* fails */
+       ASSERT_EQ(test_data.nanosleep_arg, 0, __func__);
+       ASSERT_EQ(skel->data->found_user_registered, 1, __func__); /* found 
first */
+
+       expect = do_nanosleep(0xf33d, TEST_ONE_BY_ONE);
+       ASSERT_EQ(test_data.padding_start, expect, __func__);
+       ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+       ASSERT_EQ(test_data.padding_end, expect, __func__);
+       ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+
+       prctl_unregister_writable(&test_data.padding_start, sizeof(uint64_t));
+       prctl_unregister_writable(&test_data.nanosleep_arg, sizeof(uint64_t));
+       prctl_unregister_writable(&test_data.padding_end, sizeof(uint64_t));
+}
+
+static void *test_thread_func(void *arg)
+{
+       struct test_probe_write_user_registered *skel = arg;
+
+       /* If this fails, the thread didn't inherit the region. */
+       ASSERT_ERR(prctl(PR_BPF_UNREGISTER_WRITABLE, &test_data, 
sizeof(test_data), 0, 0), __func__);
+       /* So that the BPF user_writable task storage is filled. */
+       prctl_register_writable(&test_data, 1, TEST_TAG);
+       prctl_unregister_writable(&test_data, 1);
+
+       /* Test that there really is no way it'll write. */
+       test_data.nanosleep_arg = 0;
+       do_nanosleep(9999, TEST_SUB_REGION); /* fails */
+       ASSERT_EQ(test_data.nanosleep_arg, 0, __func__);
+       ASSERT_EQ(skel->data->found_user_registered, 0, __func__);
+
+       return NULL;
+}
+
+/*
+ * Test that threads (CLONE_VM) do not inherit writable regions.
+ */
+static void test_thread(struct test_probe_write_user_registered *skel)
+{
+       uint64_t expect;
+       pthread_t tid;
+
+       prctl_register_writable(&test_data, sizeof(test_data), TEST_TAG);
+
+       test_data.nanosleep_arg = 0;
+       expect = do_nanosleep(1234, TEST_SUB_REGION);
+       ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+       ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+
+       ASSERT_OK(pthread_create(&tid, NULL, test_thread_func, skel), 
"pthread_create");
+       ASSERT_OK(pthread_join(tid, NULL), "pthread_join");
+
+       ASSERT_EQ(test_data.nanosleep_arg, 0, __func__);
+       prctl_unregister_writable(&test_data, sizeof(test_data));
+}
+
+/*
+ * Test that fork() does inherit writable regions.
+ */
+static void test_fork(struct test_probe_write_user_registered *skel)
+{
+       uint64_t expect;
+       int pid, status;
+
+       prctl_register_writable(&test_data, sizeof(test_data), TEST_TAG);
+
+       test_data.nanosleep_arg = 0;
+       expect = do_nanosleep(1234, TEST_SUB_REGION);
+       ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+       ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+
+       pid = fork();
+       if (!pid) {
+               test_data.nanosleep_arg = 0; /* write prefault */
+               expect = do_nanosleep(3333, TEST_SUB_REGION);
+               ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+               exit(!ASSERT_EQ(test_data.nanosleep_arg, expect, __func__));
+       }
+
+       status = -1;
+       waitpid(pid, &status, 0);
+       ASSERT_EQ(status, 0, __func__);
+
+       ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+       prctl_unregister_writable(&test_data, sizeof(test_data));
+}
+
+/*
+ * Test that the kernel can allocate lots of regions and find them.
+ */
+static void test_stress_regions(struct test_probe_write_user_registered *skel)
+{
+       const int STRESS_SIZE = 200;
+       struct test_data *large = malloc(STRESS_SIZE * sizeof(*large));
+       uint64_t expect;
+
+       ASSERT_NEQ(large, NULL, __func__);
+
+       memset(large, 0, STRESS_SIZE * sizeof(*large));
+
+       for (int i = 0; i < STRESS_SIZE; ++i) {
+               prctl_register_writable(&large[i], sizeof(*large), TEST_TAG);
+               ASSERT_ERR(prctl(PR_BPF_REGISTER_WRITABLE, &large[i], 
sizeof(*large), 0, 0), __func__);
+               expect = do_nanosleep(777, TEST_SUB_REGION);
+               ASSERT_EQ(large[i].nanosleep_arg, expect, __func__);
+               ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+       }
+
+       for (int i = 0; i < STRESS_SIZE; ++i) {
+               prctl_unregister_writable(&large[i], sizeof(*large));
+               ASSERT_ERR(prctl(PR_BPF_UNREGISTER_WRITABLE, &large[i], 
sizeof(*large), 0, 0), __func__);
+               large[i].nanosleep_arg = 0;
+               do_nanosleep(1992, TEST_SUB_REGION); /* no more writes */
+               ASSERT_EQ(large[i].nanosleep_arg, 0, __func__);
+               ASSERT_EQ(skel->data->found_user_registered, i < STRESS_SIZE - 
1 ? 1 : 0, __func__);
+       }
+
+       for (int i = 0; i < STRESS_SIZE; ++i)
+               ASSERT_ERR(prctl(PR_BPF_UNREGISTER_WRITABLE, &large[i], 
sizeof(*large), 0, 0), __func__);
+
+       free(large);
+}
+
+/*
+ * Test setup.
+ */
+void test_probe_write_user_registered(void)
+{
+       struct test_probe_write_user_registered *skel;
+
+       skel = test_probe_write_user_registered__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "open and load"))
+               return;
+
+       if (!ASSERT_OK(test_probe_write_user_registered__attach(skel), 
"attach"))
+               goto cleanup;
+
+       if (test__start_subtest("register_and_unregister"))
+               test_register_and_unregister(skel);
+       if (test__start_subtest("bad_tag"))
+               test_bad_tag(skel);
+       if (test__start_subtest("any_tag"))
+               test_any_tag(skel);
+       if (test__start_subtest("invalid_prctl"))
+               test_invalid_prctl(skel);
+       if (test__start_subtest("multiple_region"))
+               test_multiple_region(skel);
+       if (test__start_subtest("thread"))
+               test_thread(skel);
+       if (test__start_subtest("fork"))
+               test_fork(skel);
+       if (test__start_subtest("stress_regions"))
+               test_stress_regions(skel);
+
+cleanup:
+       test_probe_write_user_registered__destroy(skel);
+}
diff --git 
a/tools/testing/selftests/bpf/progs/test_probe_write_user_registered.c 
b/tools/testing/selftests/bpf/progs/test_probe_write_user_registered.c
new file mode 100644
index 000000000000..9174ff2e36f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_probe_write_user_registered.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023, Google LLC. */
+#include "vmlinux.h"
+#include <asm/unistd.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/*
+ * We just need the CLONE_VM definition. Without __ASSEMBLY__ sched.h would
+ * redefine clone_args, which is already defined by vmlinux.h
+ */
+#define __ASSEMBLY__
+#include <linux/sched.h>
+#undef __ASSEMBLY__
+
+#define TEST_TAG 0xf23c39ab
+
+/* Encoding of the test access-type in the tv_nsec parameter. */
+enum test_access {
+       TEST_SUB_REGION,
+       TEST_EQ_REGION,
+       TEST_ONE_BY_ONE,
+       TEST_ANY_TAG,
+};
+#define TEST_ACCESS(nsec) ((enum test_access)((nsec) & 0xff))
+
+struct test_data {
+       __u64 padding_start;
+       __u64 nanosleep_arg;
+       __u64 padding_end;
+};
+
+struct user_writable {
+       void *start;
+       size_t size;
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+       __uint(map_flags, BPF_F_NO_PREALLOC);
+       __type(key, int);
+       __type(value, struct user_writable);
+} user_writable SEC(".maps");
+
+int found_user_registered = -1;
+
+/*
+ * This is used to test that the contents of per-task bpf_user_writable is 
sane.
+ *
+ * It also demonstrates another way (vs. prctl()) how the BPF program can 
obtain
+ * addresses associated with a tag. Beware, however, that this is 
O(#registered)
+ * and a production BPF program should cache its result in task local storage.
+ */
+static int find_user_registered(__u32 tag, void *start)
+{
+       const struct bpf_user_writable *uw = 
bpf_get_current_task_btf()->bpf_user_writable;
+       int count = 0;
+
+       if (!uw)
+               return count;
+
+      /*
+       * Ensure termination of the loop to make the verifier happy. Use
+       * bpf_loop() if you expect a very large number of registered regions.
+       */
+       for (__u32 idx = 0; idx < uw->size && idx < 1024; ++idx) {
+               if (uw->entries[idx].tag == tag && uw->entries[idx].start == 
start)
+                       count++;
+       }
+
+       return count;
+}
+
+static void sys_nanosleep(struct pt_regs *regs)
+{
+       struct __kernel_timespec *ts;
+       struct user_writable *w;
+       __u32 dummy = -99;
+       __u64 tv_nsec;
+       int err;
+
+       _Static_assert(sizeof(ts->tv_nsec) == sizeof(tv_nsec), "ABI");
+
+       found_user_registered = -1;
+
+       w = bpf_task_storage_get(&user_writable, bpf_get_current_task_btf(), 0, 
0);
+       if (!w)
+               return;
+
+       ts = (void *)PT_REGS_PARM1_CORE_SYSCALL(regs);
+       if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec))
+               return;
+
+       found_user_registered = find_user_registered(TEST_TAG, w->start);
+
+       bpf_printk("doing test accesses");
+
+       /*
+        * Test failing accesses before, so that if they actually succeed, we
+        * won't do the real write and the test will detect a missed write.
+        */
+       if (!bpf_probe_write_user_registered(w->start + w->size - 1, &dummy, 
sizeof(dummy), TEST_TAG))
+               return;
+       if (!bpf_probe_write_user_registered(w->start - 1, &dummy, 
sizeof(dummy), TEST_TAG))
+               return;
+       if (!bpf_probe_write_user_registered(w->start + 100, &dummy, 
sizeof(dummy), TEST_TAG))
+               return;
+       if (TEST_ACCESS(tv_nsec) != TEST_ANY_TAG) {
+               if (!bpf_probe_write_user_registered(w->start, &dummy, 
sizeof(dummy), 123))
+                       return;
+               if (!bpf_probe_write_user_registered(w->start, &dummy, 
sizeof(dummy), 0))
+                       return;
+       }
+
+       switch (TEST_ACCESS(tv_nsec)) {
+       case TEST_SUB_REGION:
+               bpf_printk("sub region write");
+               err = bpf_probe_write_user_registered(w->start + sizeof(__u64), 
&tv_nsec, sizeof(tv_nsec), TEST_TAG);
+               break;
+       case TEST_EQ_REGION: {
+               struct test_data out = {};
+
+               bpf_printk("whole region write");
+               out.nanosleep_arg = tv_nsec;
+               err = bpf_probe_write_user_registered(w->start, &out, 
sizeof(out), TEST_TAG);
+               break;
+       }
+       case TEST_ONE_BY_ONE:
+               bpf_printk("write one by one");
+               for (int i = 0; i < 3; ++i) {
+                       err = bpf_probe_write_user_registered(w->start + i * 
sizeof(__u64), &tv_nsec,
+                                                             sizeof(tv_nsec), 
TEST_TAG);
+                       if (err)
+                               break;
+               }
+               break;
+       case TEST_ANY_TAG:
+               bpf_printk("any tag write");
+               err = bpf_probe_write_user_registered(w->start + sizeof(__u64), 
&tv_nsec, sizeof(tv_nsec), 93845);
+               break;
+       default:
+               bpf_printk("unknown access method");
+               return;
+       }
+
+       if (err)
+               bpf_printk("write failed: %d", err);
+       else
+               bpf_printk("write success");
+}
+
+static void sys_prctl(struct pt_regs *regs)
+{
+       struct user_writable *w;
+       __u32 tag;
+
+       if (PT_REGS_PARM1_CORE_SYSCALL(regs) != /*PR_BPF_REGISTER_WRITABLE*/71)
+               return;
+
+       tag = (__u32)PT_REGS_PARM4_CORE_SYSCALL(regs);
+       if (tag && tag != TEST_TAG)
+               return;
+
+       w = bpf_task_storage_get(&user_writable, bpf_get_current_task_btf(), 0,
+                                BPF_LOCAL_STORAGE_GET_F_CREATE);
+       if (!w)
+               return;
+
+       bpf_printk("registered user writable region with tag %x", tag);
+       w->start = (void *)PT_REGS_PARM2_CORE_SYSCALL(regs);
+       w->size = PT_REGS_PARM3_CORE_SYSCALL(regs);
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(sys_enter, struct pt_regs *regs, long id)
+{
+       switch (id) {
+       case __NR_prctl:
+               sys_prctl(regs);
+               break;
+       case __NR_nanosleep:
+               sys_nanosleep(regs);
+               break;
+       default:
+               break;
+       }
+       return 0;
+}
+
+/*
+ * The user writable region is copied on fork(). Also copy the per-task map we
+ * use in this test.
+ */
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_newtask, struct task_struct *t, unsigned long clone_flags)
+{
+       const struct user_writable *src;
+       struct user_writable *dst;
+
+       if (clone_flags & CLONE_VM)
+               return 0;
+
+       src = bpf_task_storage_get(&user_writable, bpf_get_current_task_btf(), 
0, 0);
+       if (!src)
+               return 0;
+
+       dst = bpf_task_storage_get(&user_writable, t, 0, 
BPF_LOCAL_STORAGE_GET_F_CREATE);
+       if (!dst) {
+               bpf_printk("failed to copy user_writable on fork()");
+               return 0;
+       }
+       *dst = *src;
+       bpf_printk("fork copied user writable region");
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
2.44.0.478.gd926399ef9-goog


Reply via email to