On 3/11/26 13:23, Juri Lelli wrote:
> On 11/03/26 09:31, Christian Loehle wrote:
>> On 3/6/26 16:10, Juri Lelli wrote:
>
> ...
>
>>> + /* Start one cpuhog per CPU at max bandwidth */
>>> + printf(" Starting %d cpuhog tasks at max bandwidth...\n", num_cpus);
>>> +
>>> + for (i = 0; i < num_cpus; i++) {
>>> + pids[i] = dl_create_cpuhog(runtime_ns, deadline_ns, period_ns,
>>> 0);
>>> + if (pids[i] < 0) {
>>> + printf(" Task %d failed to start: %s\n",
>>> + i + 1, strerror(errno));
>>> + goto cleanup;
>>> + }
>>> + started++;
>>> + }
>>
>> Would it be okay to just have one task per max-cap CPU to make this pass on
>> HMP?
>> Or something more sophisticated?
>>
>
> On HMP we should probably have max bandwidth hogs on big CPUs and then
> scale runtime (bandwidth) considering smaller CPUs capacities. Cannot
> quickly check atm, but that info (max cap per-CPU) is available
> somewhere in sys or proc, is it?
Yes it's here:
/sys/devices/system/cpu/cpu0/cpu_capacity
FWIW I've attached the two patches to get a pass out of arm64 HMP.
From 49f24f00c6af2b7e856f7a7223a19fc87e2a4aaf Mon Sep 17 00:00:00 2001
From: Christian Loehle <[email protected]>
Date: Wed, 11 Mar 2026 13:31:05 +0000
Subject: [PATCH 1/2] selftests/sched: Account for asymmetric CPU bw
The bandwidth tests previously assumed a symmetric system, starting one
max-bandwidth task per online CPU. On HMP systems this overcounts
available bandwidth, since small CPUs have less capacity than 1024.
Fix this by reading cpu_capacity for each online CPU from sysfs and
computing the number of expected tasks as total root-domain bandwidth
divided by one max-capacity task's bandwidth. Also read DL server
overhead per-CPU from debugfs so it is correctly subtracted before
computing available bandwidth.
Signed-off-by: Christian Loehle <[email protected]>
---
.../selftests/sched/deadline/bandwidth.c | 363 ++++++++++++------
1 file changed, 252 insertions(+), 111 deletions(-)
diff --git a/tools/testing/selftests/sched/deadline/bandwidth.c b/tools/testing/selftests/sched/deadline/bandwidth.c
index 72755a200db2..f931b6bddac6 100644
--- a/tools/testing/selftests/sched/deadline/bandwidth.c
+++ b/tools/testing/selftests/sched/deadline/bandwidth.c
@@ -3,8 +3,8 @@
* SCHED_DEADLINE bandwidth admission control tests
*
* Validates that the kernel correctly enforces bandwidth limits for
- * SCHED_DEADLINE tasks, including per-CPU bandwidth replication and
- * overflow rejection.
+ * SCHED_DEADLINE tasks, including capacity-scaled bandwidth replication
+ * and overflow rejection.
*/
#define _GNU_SOURCE
@@ -16,69 +16,248 @@
#include <signal.h>
#include <errno.h>
#include <string.h>
+#include <glob.h>
#include "dl_test.h"
#include "dl_util.h"
-/*
- * Test: Bandwidth admission control with max bandwidth per CPU
- *
- * Verifies that SCHED_DEADLINE bandwidth is replicated per CPU, allowing
- * one task per CPU to use the maximum available bandwidth (typically 95%).
- */
-static enum dl_test_status test_bandwidth_admission_run(void *ctx)
+#define DL_BW_SCALE 1000000000ULL
+#define DL_DEFAULT_CPU_CAPACITY 1024UL
+
+struct dl_bw_test_config {
+ uint64_t rt_runtime_us;
+ uint64_t rt_period_us;
+ uint64_t rt_bw_scaled;
+ uint64_t total_server_bw_scaled;
+ uint64_t max_cpu_server_bw_scaled;
+ uint64_t runtime_ns;
+ uint64_t deadline_ns;
+ uint64_t period_ns;
+ uint64_t task_bw_scaled;
+ unsigned long total_cpu_capacity;
+ unsigned long max_cpu_capacity;
+ int num_cpus;
+ int max_cpu;
+ int expected_tasks;
+};
+
+static int bw_read_uint64(const char *path, uint64_t *value)
{
- uint64_t rt_runtime_us, rt_period_us;
- int max_bw_percent;
- uint64_t runtime_ns, deadline_ns, period_ns;
- int num_cpus, i;
- pid_t *pids = NULL;
- int started = 0, running = 0;
- enum dl_test_status ret = DL_TEST_FAIL;
+ FILE *f;
+ int ret;
+
+ f = fopen(path, "r");
+ if (!f)
+ return -1;
+
+ ret = fscanf(f, "%lu", value);
+ fclose(f);
+
+ return ret == 1 ? 0 : -1;
+}
+
+static int bw_is_cpu_online(int cpu)
+{
+ char path[256];
+ int online;
+
+ online = dl_is_cpu_online(cpu);
+ if (online >= 0)
+ return online;
+
+ snprintf(path, sizeof(path), "/sys/devices/system/cpu/cpu%d", cpu);
+ return access(path, F_OK) == 0;
+}
+
+static int bw_get_cpu_capacity(int cpu, unsigned long *capacity)
+{
+ char path[256];
+ uint64_t value;
- /* Get RT bandwidth settings */
- DL_FAIL_IF(dl_get_rt_bandwidth(&rt_runtime_us, &rt_period_us) < 0,
+ snprintf(path, sizeof(path),
+ "/sys/devices/system/cpu/cpu%d/cpu_capacity", cpu);
+
+ if (bw_read_uint64(path, &value) < 0)
+ value = DL_DEFAULT_CPU_CAPACITY;
+
+ if (!value)
+ value = DL_DEFAULT_CPU_CAPACITY;
+
+ *capacity = value;
+ return 0;
+}
+
+static uint64_t bw_get_server_bw_scaled(int cpu)
+{
+ glob_t globbuf;
+ char pattern[512];
+ size_t i;
+ uint64_t total_bw = 0;
+
+ snprintf(pattern, sizeof(pattern), "/sys/kernel/debug/sched/*_server");
+ if (glob(pattern, 0, NULL, &globbuf) != 0)
+ return 0;
+
+ for (i = 0; i < globbuf.gl_pathc; i++) {
+ char runtime_path[512];
+ char period_path[512];
+ uint64_t runtime_ns, period_ns;
+
+ snprintf(runtime_path, sizeof(runtime_path), "%s/cpu%d/runtime",
+ globbuf.gl_pathv[i], cpu);
+ snprintf(period_path, sizeof(period_path), "%s/cpu%d/period",
+ globbuf.gl_pathv[i], cpu);
+
+ if (bw_read_uint64(runtime_path, &runtime_ns) < 0)
+ continue;
+ if (bw_read_uint64(period_path, &period_ns) < 0)
+ continue;
+ if (!period_ns)
+ continue;
+
+ total_bw += ((__uint128_t)runtime_ns * DL_BW_SCALE) / period_ns;
+ }
+
+ globfree(&globbuf);
+ return total_bw;
+}
+
+static double bw_scaled_to_percent(uint64_t scaled)
+{
+ return (double)scaled * 100.0 / DL_BW_SCALE;
+}
+
+static enum dl_test_status bw_prepare_test(struct dl_bw_test_config *cfg)
+{
+ int cpu;
+ int max_cpus;
+ __uint128_t total_rt_bw;
+ __uint128_t total_server_bw;
+ __uint128_t total_available_bw;
+ __uint128_t task_bw_capacity;
+
+ memset(cfg, 0, sizeof(*cfg));
+ cfg->max_cpu = -1;
+
+ DL_FAIL_IF(dl_get_rt_bandwidth(&cfg->rt_runtime_us,
+ &cfg->rt_period_us) < 0,
"Failed to read RT bandwidth settings");
+ DL_FAIL_IF(!cfg->rt_period_us, "RT bandwidth period is zero");
- printf(" RT bandwidth: runtime=%luµs, period=%luµs (%.0f%%)\n",
- rt_runtime_us, rt_period_us,
- (double)rt_runtime_us * 100.0 / rt_period_us);
+ cfg->rt_bw_scaled = ((__uint128_t)cfg->rt_runtime_us * DL_BW_SCALE) /
+ cfg->rt_period_us;
+ cfg->period_ns = dl_ms_to_ns(100);
+ cfg->deadline_ns = cfg->period_ns;
- /* Show server overhead */
- int server_overhead = dl_get_server_bandwidth_overhead();
+ max_cpus = (int)sysconf(_SC_NPROCESSORS_CONF);
+ DL_FAIL_IF(max_cpus <= 0, "Failed to read configured CPU count");
- if (server_overhead > 0)
- printf(" DL server overhead: %d%% per CPU\n", server_overhead);
+ for (cpu = 0; cpu < max_cpus; cpu++) {
+ unsigned long capacity;
+ uint64_t server_bw;
- /* Calculate maximum bandwidth percentage */
- max_bw_percent = dl_calc_max_bandwidth_percent();
- DL_FAIL_IF(max_bw_percent < 0, "Failed to calculate max bandwidth");
+ if (!bw_is_cpu_online(cpu))
+ continue;
- printf(" Available bandwidth per CPU: %d%%\n", max_bw_percent);
+ bw_get_cpu_capacity(cpu, &capacity);
+ server_bw = bw_get_server_bw_scaled(cpu);
- /* Calculate task parameters: 100ms period for easy calculation */
- period_ns = dl_ms_to_ns(100); /* 100ms */
- runtime_ns = (period_ns * max_bw_percent) / 100;
- deadline_ns = period_ns;
+ cfg->num_cpus++;
+ cfg->total_cpu_capacity += capacity;
+ cfg->total_server_bw_scaled += server_bw;
+
+ if (capacity > cfg->max_cpu_capacity) {
+ cfg->max_cpu_capacity = capacity;
+ cfg->max_cpu_server_bw_scaled = server_bw;
+ cfg->max_cpu = cpu;
+ }
+ }
+
+ DL_FAIL_IF(cfg->num_cpus <= 0, "Failed to find online CPUs");
+ DL_FAIL_IF(!cfg->max_cpu_capacity,
+ "Failed to determine maximum CPU capacity");
+ DL_FAIL_IF(cfg->rt_bw_scaled <= cfg->max_cpu_server_bw_scaled,
+ "DL servers leave no bandwidth for tasks on max-capacity CPU");
+
+ cfg->runtime_ns = ((__uint128_t)cfg->period_ns *
+ (cfg->rt_bw_scaled - cfg->max_cpu_server_bw_scaled)) /
+ DL_BW_SCALE;
+ DL_FAIL_IF(!cfg->runtime_ns, "Calculated task runtime is zero");
+
+ cfg->task_bw_scaled = ((__uint128_t)cfg->runtime_ns * DL_BW_SCALE) /
+ cfg->period_ns;
+ DL_FAIL_IF(!cfg->task_bw_scaled, "Calculated task bandwidth is zero");
+
+ total_rt_bw = (__uint128_t)cfg->total_cpu_capacity * cfg->rt_bw_scaled;
+ total_server_bw = (__uint128_t)cfg->max_cpu_capacity *
+ cfg->total_server_bw_scaled;
+ DL_FAIL_IF(total_rt_bw <= total_server_bw,
+ "DL servers consume all root-domain bandwidth");
+
+ total_available_bw = total_rt_bw - total_server_bw;
+ task_bw_capacity = (__uint128_t)cfg->max_cpu_capacity *
+ cfg->task_bw_scaled;
+
+ DL_FAIL_IF(!task_bw_capacity, "Calculated task capacity is zero");
+
+ cfg->expected_tasks = total_available_bw / task_bw_capacity;
+ DL_FAIL_IF(cfg->expected_tasks <= 0,
+ "Calculated task count is zero");
+
+ return DL_TEST_PASS;
+}
+
+static void bw_print_test_config(const struct dl_bw_test_config *cfg)
+{
+ printf(" RT bandwidth: runtime=%luµs, period=%luµs (%.0f%%)\n",
+ cfg->rt_runtime_us, cfg->rt_period_us,
+ (double)cfg->rt_runtime_us * 100.0 / cfg->rt_period_us);
+ printf(" Number of online CPUs: %d\n", cfg->num_cpus);
+ printf(" Equivalent max-capacity CPUs: %.2f\n",
+ (double)cfg->total_cpu_capacity / cfg->max_cpu_capacity);
+ printf(" Max-capacity CPU: %d (capacity=%lu)\n",
+ cfg->max_cpu, cfg->max_cpu_capacity);
+
+ if (cfg->max_cpu_server_bw_scaled) {
+ printf(" DL server overhead on max-capacity CPU: %.2f%%\n",
+ bw_scaled_to_percent(cfg->max_cpu_server_bw_scaled));
+ }
printf(" Task params: runtime=%lums, deadline=%lums, period=%lums\n",
- dl_ns_to_ms(runtime_ns), dl_ns_to_ms(deadline_ns),
- dl_ns_to_ms(period_ns));
+ dl_ns_to_ms(cfg->runtime_ns), dl_ns_to_ms(cfg->deadline_ns),
+ dl_ns_to_ms(cfg->period_ns));
+ printf(" Expected tasks at max-capacity bandwidth: %d\n",
+ cfg->expected_tasks);
+}
- /* Get number of CPUs */
- num_cpus = dl_get_online_cpus();
- DL_FAIL_IF(num_cpus <= 0, "Failed to get number of CPUs");
+/*
+ * Test: Bandwidth admission control with max-capacity task bandwidth
+ *
+ * Verifies that SCHED_DEADLINE bandwidth is replicated across the root
+ * domain capacity, allowing one max-capacity task per equivalent CPU.
+ */
+static enum dl_test_status test_bandwidth_admission_run(void *ctx)
+{
+ struct dl_bw_test_config cfg;
+ int i;
+ pid_t *pids = NULL;
+ int started = 0, running = 0;
+ enum dl_test_status ret = DL_TEST_FAIL;
- printf(" Number of online CPUs: %d\n", num_cpus);
+ DL_FAIL_IF(bw_prepare_test(&cfg) != DL_TEST_PASS,
+ "Failed to prepare bandwidth test parameters");
+ bw_print_test_config(&cfg);
/* Allocate PID array */
- pids = calloc(num_cpus, sizeof(pid_t));
+ pids = calloc(cfg.expected_tasks, sizeof(pid_t));
DL_FAIL_IF(!pids, "Failed to allocate PID array");
- /* Start one cpuhog per CPU at max bandwidth */
- printf(" Starting %d cpuhog tasks at max bandwidth...\n", num_cpus);
+ /* Start one cpuhog per equivalent max-capacity CPU */
+ printf(" Starting %d cpuhog tasks at max-capacity bandwidth...\n",
+ cfg.expected_tasks);
- for (i = 0; i < num_cpus; i++) {
- pids[i] = dl_create_cpuhog(runtime_ns, deadline_ns, period_ns, 0);
+ for (i = 0; i < cfg.expected_tasks; i++) {
+ pids[i] = dl_create_cpuhog(cfg.runtime_ns, cfg.deadline_ns,
+ cfg.period_ns, 0);
if (pids[i] < 0) {
printf(" Task %d failed to start: %s\n",
i + 1, strerror(errno));
@@ -105,15 +284,15 @@ static enum dl_test_status test_bandwidth_admission_run(void *ctx)
}
printf(" Started %d/%d tasks, %d running with SCHED_DEADLINE\n",
- started, num_cpus, running);
+ started, cfg.expected_tasks, running);
- /* Test passes if we started all N tasks and they're all running */
- if (started == num_cpus && running == num_cpus) {
- printf(" SUCCESS: All %d tasks running at max bandwidth\n",
- num_cpus);
+ /* Test passes if we started all expected tasks and they're all running */
+ if (started == cfg.expected_tasks && running == cfg.expected_tasks) {
+ printf(" SUCCESS: All %d tasks running at max-capacity bandwidth\n",
+ cfg.expected_tasks);
ret = DL_TEST_PASS;
- } else if (started != num_cpus) {
- DL_ERR("Only started %d/%d tasks", started, num_cpus);
+ } else if (started != cfg.expected_tasks) {
+ DL_ERR("Only started %d/%d tasks", started, cfg.expected_tasks);
ret = DL_TEST_FAIL;
} else {
DL_ERR("Started %d tasks but only %d using SCHED_DEADLINE",
@@ -134,7 +313,7 @@ static enum dl_test_status test_bandwidth_admission_run(void *ctx)
static struct dl_test test_bandwidth_admission = {
.name = "bandwidth_admission",
- .description = "Verify per-CPU bandwidth replication (N tasks at max bandwidth)",
+ .description = "Verify capacity-scaled bandwidth replication (N equivalent tasks at max bandwidth)",
.run = test_bandwidth_admission_run,
};
REGISTER_DL_TEST(&test_bandwidth_admission);
@@ -142,73 +321,34 @@ REGISTER_DL_TEST(&test_bandwidth_admission);
/*
* Test: Bandwidth admission control overflow rejection
*
- * Verifies that the kernel rejects tasks that would exceed available
- * bandwidth on a CPU. Creates N-1 tasks at max bandwidth, then attempts
- * to create one more at slightly higher bandwidth (should fail).
+ * Verifies that the kernel rejects tasks that would exceed the available
+ * root-domain bandwidth. Creates all equivalent max-capacity tasks, then
+ * attempts to create one more at slightly higher bandwidth (should fail).
*/
static enum dl_test_status test_bandwidth_overflow_run(void *ctx)
{
- uint64_t rt_runtime_us, rt_period_us;
- int max_bw_percent;
- uint64_t runtime_ns, deadline_ns, period_ns;
+ struct dl_bw_test_config cfg;
uint64_t overflow_runtime_ns;
- int num_cpus, i;
- int target_tasks;
+ int i;
pid_t *pids = NULL;
pid_t overflow_pid;
int started = 0;
enum dl_test_status ret = DL_TEST_FAIL;
- /* Get RT bandwidth settings */
- DL_FAIL_IF(dl_get_rt_bandwidth(&rt_runtime_us, &rt_period_us) < 0,
- "Failed to read RT bandwidth settings");
-
- printf(" RT bandwidth: runtime=%luµs, period=%luµs (%.0f%%)\n",
- rt_runtime_us, rt_period_us,
- (double)rt_runtime_us * 100.0 / rt_period_us);
-
- /* Show server overhead */
- int server_overhead = dl_get_server_bandwidth_overhead();
-
- if (server_overhead > 0)
- printf(" DL server overhead: %d%% per CPU\n", server_overhead);
-
- /* Calculate maximum bandwidth percentage */
- max_bw_percent = dl_calc_max_bandwidth_percent();
- DL_FAIL_IF(max_bw_percent < 0, "Failed to calculate max bandwidth");
-
- printf(" Available bandwidth per CPU: %d%%\n", max_bw_percent);
-
- /* Get number of CPUs */
- num_cpus = dl_get_online_cpus();
- DL_FAIL_IF(num_cpus <= 0, "Failed to get number of CPUs");
-
- if (num_cpus < 2) {
- printf(" Need at least 2 CPUs for this test (have %d)\n",
- num_cpus);
- return DL_TEST_SKIP;
- }
-
- printf(" Number of online CPUs: %d\n", num_cpus);
-
- /* Calculate task parameters */
- period_ns = dl_ms_to_ns(100); /* 100ms */
- runtime_ns = (period_ns * max_bw_percent) / 100;
- deadline_ns = period_ns;
-
- printf(" Task params: runtime=%lums, deadline=%lums, period=%lums\n",
- dl_ns_to_ms(runtime_ns), dl_ns_to_ms(deadline_ns),
- dl_ns_to_ms(period_ns));
+ DL_FAIL_IF(bw_prepare_test(&cfg) != DL_TEST_PASS,
+ "Failed to prepare bandwidth test parameters");
+ bw_print_test_config(&cfg);
- /* Start N-1 tasks at max bandwidth */
- target_tasks = num_cpus - 1;
- pids = calloc(target_tasks, sizeof(pid_t));
+ /* Start all expected tasks at max-capacity bandwidth */
+ pids = calloc(cfg.expected_tasks, sizeof(pid_t));
DL_FAIL_IF(!pids, "Failed to allocate PID array");
- printf(" Starting %d tasks at max bandwidth...\n", target_tasks);
+ printf(" Starting %d tasks at max-capacity bandwidth...\n",
+ cfg.expected_tasks);
- for (i = 0; i < target_tasks; i++) {
- pids[i] = dl_create_cpuhog(runtime_ns, deadline_ns, period_ns, 0);
+ for (i = 0; i < cfg.expected_tasks; i++) {
+ pids[i] = dl_create_cpuhog(cfg.runtime_ns, cfg.deadline_ns,
+ cfg.period_ns, 0);
if (pids[i] < 0) {
printf(" Task %d failed to start: %s\n",
i + 1, strerror(errno));
@@ -217,19 +357,20 @@ static enum dl_test_status test_bandwidth_overflow_run(void *ctx)
started++;
}
- printf(" Successfully started %d/%d tasks\n", started, target_tasks);
+ printf(" Successfully started %d/%d tasks\n",
+ started, cfg.expected_tasks);
/* Brief wait */
usleep(500000); /* 500ms */
/* Try to start one more task at max+1% bandwidth (should fail) */
- overflow_runtime_ns = (runtime_ns * 101) / 100; /* Add 1% */
+ overflow_runtime_ns = (cfg.runtime_ns * 101) / 100;
printf(" Attempting overflow task with runtime=%lums (+1%%)...\n",
dl_ns_to_ms(overflow_runtime_ns));
- overflow_pid = dl_create_cpuhog(overflow_runtime_ns, deadline_ns,
- period_ns, 0);
+ overflow_pid = dl_create_cpuhog(overflow_runtime_ns, cfg.deadline_ns,
+ cfg.period_ns, 0);
if (overflow_pid < 0) {
/* Expected: admission control rejected it */
@@ -264,7 +405,7 @@ static enum dl_test_status test_bandwidth_overflow_run(void *ctx)
static struct dl_test test_bandwidth_overflow = {
.name = "bandwidth_overflow",
- .description = "Verify bandwidth overflow rejection (N-1 + overflow fails)",
+ .description = "Verify bandwidth overflow rejection beyond equivalent capacity",
.run = test_bandwidth_overflow_run,
};
REGISTER_DL_TEST(&test_bandwidth_overflow);
--
2.34.1
From babc2081f598a59ba4bcebe9b4741f4cfa0f67ef Mon Sep 17 00:00:00 2001
From: Christian Loehle <[email protected]>
Date: Wed, 11 Mar 2026 13:32:35 +0000
Subject: [PATCH 2/2] selftests/sched: Remove cpuhog from test binaries
cpuhog is a helper binary invoked by the test runner, not a standalone
test. Listing it in TEST_GEN_PROGS causes the kselftest framework to
execute it directly as a test case. Move it to TEST_GEN_FILES so it
is built but not run independently.
Also drop the hand-written clean target and use EXTRA_CLEAN instead,
deferring to lib.mk.
Signed-off-by: Christian Loehle <[email protected]>
---
tools/testing/selftests/sched/deadline/Makefile | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/tools/testing/selftests/sched/deadline/Makefile b/tools/testing/selftests/sched/deadline/Makefile
index aa7752da1bdc..22cdffa4d08a 100644
--- a/tools/testing/selftests/sched/deadline/Makefile
+++ b/tools/testing/selftests/sched/deadline/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := runner cpuhog
+TEST_GEN_PROGS := runner
+TEST_GEN_FILES := cpuhog
# override lib.mk's default rules
OVERRIDE_TARGETS := 1
@@ -9,6 +10,7 @@ include ../../lib.mk
CFLAGS += -Wall -O2 -g -pthread
OUTPUT_DIR := $(OUTPUT)
+EXTRA_CLEAN += $(OUTPUT)/*.o *.o
# Utility object files
UTIL_OBJS := $(OUTPUT)/dl_util.o
@@ -44,11 +46,6 @@ $(OUTPUT)/replenish_bug.o: replenish_bug.c dl_test.h dl_util.h | $(OUTPUT_DIR)
$(OUTPUT_DIR):
mkdir -p $@
-.PHONY: all clean
+.PHONY: all
-all: $(TEST_GEN_PROGS)
-
-clean:
- rm -f $(OUTPUT)/runner $(OUTPUT)/cpuhog
- rm -f $(OUTPUT)/*.o
- rm -f *.o
+all: $(TEST_GEN_PROGS) $(TEST_GEN_FILES)
--
2.34.1