Add test to verify cpuidle governor ext's load, attach, and kfuncs.

This patch also provides a simple demonstration of `cpuidle_gov_ext_ops` usage:
- In `ops.init()`, we set the "rating" value to 60 - significantly exceeding 
other governors' ratings - to activate `cpuidle_gov_ext`.
- For specific scenarios (e.g., screen-off music playback on mobile devices), 
we can enable "expect_deeper" to transition to deeper idle states.

This implementation serves as a foundation, not a final solution.
We can explore further exploration of cpuidle strategies optimized for various 
usage scenarios.

Test Results
-----------
:~/workplace/bpf/x86/submit/bpf_next/tools/testing/selftests/bpf$ make -j4

:$ sudo ./test_progs -t test_cpuidle_gov_ext      
#449     test_cpuidle_gov_ext: OK
Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED

Additionally, the kernel log shows:
$sudo cat /dev/kmsg
6,911,10997439785,-; cpuidle: using governor ext
6,913,11010384887,-; cpuidle: using governor menu
After `cpuidle_gov_ext` exits, the system will restore the previous governor.

Signed-off-by: Lin Yikai <yikai....@vivo.com>
---
 .../bpf/prog_tests/test_cpuidle_gov_ext.c     |  28 +++
 .../selftests/bpf/progs/cpuidle_common.h      |  13 ++
 .../selftests/bpf/progs/cpuidle_gov_ext.c     | 200 ++++++++++++++++++
 3 files changed, 241 insertions(+)
 create mode 100644 
tools/testing/selftests/bpf/prog_tests/test_cpuidle_gov_ext.c
 create mode 100644 tools/testing/selftests/bpf/progs/cpuidle_common.h
 create mode 100644 tools/testing/selftests/bpf/progs/cpuidle_gov_ext.c

diff --git a/tools/testing/selftests/bpf/prog_tests/test_cpuidle_gov_ext.c 
b/tools/testing/selftests/bpf/prog_tests/test_cpuidle_gov_ext.c
new file mode 100644
index 000000000000..8b35771ada44
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_cpuidle_gov_ext.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * test_cpuidle_gov_ext.c - test cpuidle governor ext's load, attach and kfuncs
+ *
+ * Copyright (C) Yikai Lin <yikai....@vivo.com>
+ */
+
+#include <test_progs.h>
+#include "cpuidle_gov_ext.skel.h"
+
+void test_test_cpuidle_gov_ext(void)
+{
+       struct cpuidle_gov_ext *skel;
+       int err;
+
+       skel = cpuidle_gov_ext__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "cpuidle_gov_ext__open_and_load"))
+               return;
+
+       skel->bss->expect_deeper = 1;
+       err = cpuidle_gov_ext__attach(skel);
+       if (!ASSERT_OK(err, "cpuidle_gov_ext__attach"))
+               goto cleanup;
+
+cleanup:
+       cpuidle_gov_ext__destroy(skel);
+}
+
diff --git a/tools/testing/selftests/bpf/progs/cpuidle_common.h 
b/tools/testing/selftests/bpf/progs/cpuidle_common.h
new file mode 100644
index 000000000000..95402974c53e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cpuidle_common.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) Yikai Lin <yikai....@vivo.com>
+ */
+
+#ifndef _CPUIDLE_COMMON_H
+#define _CPUIDLE_COMMON_H
+
+int bpf_cpuidle_ext_gov_update_rating(unsigned int rating) __ksym __weak;
+s64 bpf_cpuidle_ext_gov_latency_req(unsigned int cpu) __ksym __weak;
+s64 bpf_tick_nohz_get_sleep_length(void) __ksym __weak;
+
+#endif /* _CPUIDLE_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/cpuidle_gov_ext.c 
b/tools/testing/selftests/bpf/progs/cpuidle_gov_ext.c
new file mode 100644
index 000000000000..66c437243270
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cpuidle_gov_ext.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * cpuidle_gov_ext.c - test to use cpuidle governor ext by bpf
+ *
+ * Copyright (C) Yikai Lin <yikai....@vivo.com>
+ */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#include "bpf_misc.h"
+#include "cpuidle_common.h"
+
+char LICENSE[] SEC("license") = "GPL";
+
+#define ALPHA 10
+#define ALPHA_SCALE 100
+#define FIT_FACTOR 90
+
+#ifndef max
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#endif
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+/*
+ * For some low-power scenarios,
+ * such as the screen off scenario of mobile devices
+ * (which will be determined by the user-space BPF program),
+ * we aim to choose a deeper state
+ * At this point, we will somewhat disregard the impact on CPU performance.
+ */
+int expect_deeper = 0;
+
+struct cpuidle_gov_data {
+       int cpu;
+       int last_idx;
+       u64 last_pred;
+       u64 last_duration;
+       u64 next_pred;
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+       __uint(max_entries, 1);
+       __type(key, u32);
+       __type(value, struct cpuidle_gov_data);
+} cpuidle_gov_data_map SEC(".maps");
+
+static u64 calculate_ewma(u64 last, u64 new, u32 alpha, u32 alpha_scale)
+{
+       return (alpha * new + (alpha_scale - alpha) * last) / alpha_scale;
+}
+
+static void update_predict_duration(struct cpuidle_gov_data *data,
+                       struct cpuidle_driver *drv, struct cpuidle_device *dev)
+{
+       int idx;
+       struct cpuidle_state target;
+
+       if (!data || !drv || !dev)
+               return;
+       idx = data->last_idx;
+       data->last_duration = dev->last_residency_ns;
+       if (idx > 0) {
+               bpf_core_read(&target, sizeof(target), &drv->states[idx]);
+               if (data->last_duration > target.exit_latency)
+                       data->last_duration -= target.exit_latency;
+       }
+       data->last_pred = data->next_pred;
+       data->next_pred = calculate_ewma(data->next_pred,
+               data->last_duration, ALPHA, ALPHA_SCALE);
+}
+
+/* Enable the cpuidle governor */
+SEC("struct_ops.s/enable")
+int BPF_PROG(bpf_cpuidle_enable, struct cpuidle_driver *drv, struct 
cpuidle_device *dev)
+{
+       u32 key = 0;
+       struct cpuidle_gov_data *data;
+
+       bpf_printk("cpuidle_gov_ext: enabled");
+       data = bpf_map_lookup_percpu_elem(&cpuidle_gov_data_map, &key, 
dev->cpu);
+       if (!data)
+               return 0;
+
+       __builtin_memset(data, 0, sizeof(struct cpuidle_gov_data));
+       data->cpu = dev->cpu;
+       return 0;
+}
+
+/* Disable the cpuidle governor */
+SEC("struct_ops.s/disable")
+void BPF_PROG(bpf_cpuidle_disable, struct cpuidle_driver *drv, struct 
cpuidle_device *dev)
+{
+       bpf_printk("cpuidle_gov_ext: disabled");
+}
+
+/* Select the next idle state */
+SEC("struct_ops.s/select")
+int BPF_PROG(bpf_cpuidle_select, struct cpuidle_driver *drv, struct 
cpuidle_device *dev)
+{
+       u32 key = 0;
+       s64 delta, latency_req, residency_ns;
+       int i;
+       unsigned long long disable;
+       struct cpuidle_gov_data *data;
+       struct cpuidle_state *cs;
+
+       data = bpf_map_lookup_percpu_elem(&cpuidle_gov_data_map, &key, 
dev->cpu);
+       if (!data) {
+               bpf_printk("cpuidle_gov_ext: [%s] cpuidle_gov_data_map is 
NULL\n", __func__);
+               return 0;
+       }
+       latency_req = bpf_cpuidle_ext_gov_latency_req(dev->cpu);
+       delta = bpf_tick_nohz_get_sleep_length();
+
+       update_predict_duration(data, drv, dev);
+       for (i = ARRAY_SIZE(drv->states)-1; i > 0; i--) {
+               if (i >= drv->state_count)
+                       continue;
+               cs = &drv->states[i];
+               disable = dev->states_usage[i].disable;
+               if (disable)
+                       continue;
+               if (latency_req < cs->exit_latency_ns)
+                       continue;
+
+               if (delta < cs->target_residency_ns)
+                       continue;
+
+               if (data->next_pred / FIT_FACTOR * ALPHA_SCALE < 
cs->target_residency_ns)
+                       continue;
+
+               break;
+       }
+       residency_ns = drv->states[i].target_residency_ns;
+       if (expect_deeper &&
+               i <= drv->state_count-2 &&
+               !dev->states_usage[i+1].disable &&
+               data->last_pred >= residency_ns &&
+               data->next_pred < residency_ns &&
+               data->next_pred / FIT_FACTOR * ALPHA_SCALE >= residency_ns &&
+               data->next_pred / FIT_FACTOR * ALPHA_SCALE >= 
data->last_duration &&
+               delta > residency_ns) {
+               i++;
+       }
+
+       return i;
+}
+
+//enable or disable scheduling tick after selecting cpuidle state
+SEC("struct_ops.s/set_stop_tick")
+bool BPF_PROG(bpf_cpuidle_set_stop_tick)
+{
+       return false;
+}
+
+/* Reflect function called after entering an idle state */
+SEC("struct_ops.s/reflect")
+void BPF_PROG(bpf_cpuidle_reflect, struct cpuidle_device *dev, int index)
+{
+       u32 key = 0;
+       struct cpuidle_gov_data *data;
+
+       data = bpf_map_lookup_percpu_elem(&cpuidle_gov_data_map, &key, 
dev->cpu);
+       if (!data) {
+               bpf_printk("cpuidle_gov_ext: [%s] cpuidle_gov_data_map is 
NULL\n", __func__);
+               return;
+       }
+       data->last_idx = index;
+}
+
+/* Initialize the BPF cpuidle governor */
+SEC("struct_ops.s/init")
+int BPF_PROG(bpf_cpuidle_init)
+{
+       return bpf_cpuidle_ext_gov_update_rating(60);
+}
+
+/* Cleanup after the BPF cpuidle governor */
+SEC("struct_ops.s/exit")
+void BPF_PROG(bpf_cpuidle_exit) { }
+
+/* Struct_ops linkage for cpuidle governor */
+SEC(".struct_ops.link")
+struct cpuidle_gov_ext_ops ops = {
+       .enable  = (void *)bpf_cpuidle_enable,
+       .disable = (void *)bpf_cpuidle_disable,
+       .select  = (void *)bpf_cpuidle_select,
+       .set_stop_tick = (void *)bpf_cpuidle_set_stop_tick,
+       .reflect = (void *)bpf_cpuidle_reflect,
+       .init   = (void *)bpf_cpuidle_init,
+       .exit   = (void *)bpf_cpuidle_exit,
+       .name   = "BPF_cpuidle_gov"
+};
-- 
2.43.0


Reply via email to