The rtapp/sleep monitor's primary purpose is detecting common mistakes
with user-space real-time design. Monitoring real-time issues with
kernel threads is a bonus.
However, accomodating kernel threads complicates the monitor due to
the edge cases which is seen by the monitor as lower-priority task
waking higher-priority task:
- kthread_stop() wakes up the task in order to stop it.
- The rcu thread and migration thread can be woken by any task.
- The ktimerd thread is woken near the end of irq_exit_rcu(), where
the preempt counter is "broken" and falsely says this is task
context. This requires the monitor to use the hardirq_context flag
instead of the preempt counter.
Beside complicating the monitor, the final case also requires enabling
CONFIG_TRACE_IRQFLAGS (so that "hardirq_context" can be used). This
adds overhead to the kernel even when the monitor is not active. This
may be an obstacle to enabling this monitor in distros' kernels.
Furthermore, kernel threads usually are started before the monitor is
enabled. Consequently, the threads' states (i.o.w. the monitor's
atomic propositions for the threads) are not fully known to the
monitor. As a result, the kernel threads mostly cannot be monitored.
Overall, the downsides of accomodating kernel threads outweights the
benefits. Thus, exclude kernel threads to simplify the monitor.
Signed-off-by: Nam Cao <[email protected]>
---
Cc: Sebastian Andrzej Siewior <[email protected]>
---
Documentation/trace/rv/monitor_rtapp.rst | 22 ++---
kernel/trace/rv/monitors/sleep/Kconfig | 1 -
kernel/trace/rv/monitors/sleep/sleep.c | 39 +-------
kernel/trace/rv/monitors/sleep/sleep.h | 104 +++++++++-------------
tools/verification/models/rtapp/sleep.ltl | 7 +-
5 files changed, 54 insertions(+), 119 deletions(-)
diff --git a/Documentation/trace/rv/monitor_rtapp.rst
b/Documentation/trace/rv/monitor_rtapp.rst
index 570be67a8f3b..502d3ea412eb 100644
--- a/Documentation/trace/rv/monitor_rtapp.rst
+++ b/Documentation/trace/rv/monitor_rtapp.rst
@@ -93,9 +93,9 @@ assessment.
The monitor's specification is::
- RULE = always ((RT and SLEEP) imply (RT_FRIENDLY_SLEEP or ALLOWLIST))
+ RULE = always ((RT and SLEEP and USER_THREAD) imply (RT_FRIENDLY_SLEEP or
ALLOWLIST))
- RT_FRIENDLY_SLEEP = (RT_VALID_SLEEP_REASON or KERNEL_THREAD)
+ RT_FRIENDLY_SLEEP = RT_VALID_SLEEP_REASON
and ((not SCHEDULE_IN) until RT_FRIENDLY_WAKE)
RT_VALID_SLEEP_REASON = FUTEX_WAIT
@@ -110,23 +110,13 @@ The monitor's specification is::
or WOKEN_BY_HARDIRQ
or WOKEN_BY_NMI
or ABORT_SLEEP
- or KTHREAD_SHOULD_STOP
ALLOWLIST = BLOCK_ON_RT_MUTEX
or FUTEX_LOCK_PI
- or TASK_IS_RCU
- or TASK_IS_MIGRATION
-
-Beside the scenarios described above, this specification also handle some
-special cases:
-
- - `KERNEL_THREAD`: kernel tasks do not have any pattern that can be
recognized
- as valid real-time sleeping reasons. Therefore sleeping reason is not
- checked for kernel tasks.
- - `KTHREAD_SHOULD_STOP`: a non-real-time thread may stop a real-time kernel
- thread by waking it and waiting for it to exit (`kthread_stop()`). This
- wakeup is safe for real-time.
- - `ALLOWLIST`: to handle known false positives with the kernel.
+
+Beside the scenarios described above, this specification also defines an allow
list
+to handle some special cases:
+
- `BLOCK_ON_RT_MUTEX` is included in the allowlist due to its implementation.
In the release path of rt_mutex, a boosted task is de-boosted before waking
the rt_mutex's waiter. Consequently, the monitor may see a real-time-unsafe
diff --git a/kernel/trace/rv/monitors/sleep/Kconfig
b/kernel/trace/rv/monitors/sleep/Kconfig
index 6b7a122e7b47..d6ec3e9a91b6 100644
--- a/kernel/trace/rv/monitors/sleep/Kconfig
+++ b/kernel/trace/rv/monitors/sleep/Kconfig
@@ -5,7 +5,6 @@ config RV_MON_SLEEP
select RV_LTL_MONITOR
depends on HAVE_SYSCALL_TRACEPOINTS
depends on RV_MON_RTAPP
- select TRACE_IRQFLAGS
default y
select LTL_MON_EVENTS_ID
bool "sleep monitor"
diff --git a/kernel/trace/rv/monitors/sleep/sleep.c
b/kernel/trace/rv/monitors/sleep/sleep.c
index 638be7d8747f..aa5a984853b5 100644
--- a/kernel/trace/rv/monitors/sleep/sleep.c
+++ b/kernel/trace/rv/monitors/sleep/sleep.c
@@ -43,7 +43,6 @@ static void ltl_atoms_init(struct task_struct *task, struct
ltl_monitor *mon, bo
ltl_atom_set(mon, LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO, false);
if (task_creation) {
- ltl_atom_set(mon, LTL_KTHREAD_SHOULD_STOP, false);
ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_REALTIME, false);
ltl_atom_set(mon, LTL_NANOSLEEP_TIMER_ABSTIME, false);
ltl_atom_set(mon, LTL_CLOCK_NANOSLEEP, false);
@@ -53,33 +52,7 @@ static void ltl_atoms_init(struct task_struct *task, struct
ltl_monitor *mon, bo
ltl_atom_set(mon, LTL_BLOCK_ON_RT_MUTEX, false);
}
- if (task->flags & PF_KTHREAD) {
- ltl_atom_set(mon, LTL_KERNEL_THREAD, true);
-
- /* kernel tasks do not do syscall */
- ltl_atom_set(mon, LTL_FUTEX_WAIT, false);
- ltl_atom_set(mon, LTL_FUTEX_LOCK_PI, false);
- ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_REALTIME, false);
- ltl_atom_set(mon, LTL_NANOSLEEP_TIMER_ABSTIME, false);
- ltl_atom_set(mon, LTL_CLOCK_NANOSLEEP, false);
- ltl_atom_set(mon, LTL_EPOLL_WAIT, false);
-
- if (strstarts(task->comm, "migration/"))
- ltl_atom_set(mon, LTL_TASK_IS_MIGRATION, true);
- else
- ltl_atom_set(mon, LTL_TASK_IS_MIGRATION, false);
-
- if (strstarts(task->comm, "rcu"))
- ltl_atom_set(mon, LTL_TASK_IS_RCU, true);
- else
- ltl_atom_set(mon, LTL_TASK_IS_RCU, false);
- } else {
- ltl_atom_set(mon, LTL_KTHREAD_SHOULD_STOP, false);
- ltl_atom_set(mon, LTL_KERNEL_THREAD, false);
- ltl_atom_set(mon, LTL_TASK_IS_RCU, false);
- ltl_atom_set(mon, LTL_TASK_IS_MIGRATION, false);
- }
-
+ ltl_atom_set(mon, LTL_USER_THREAD, !(task->flags & PF_KTHREAD));
}
static void handle_sched_set_state(void *data, struct task_struct *task, int
state)
@@ -97,7 +70,7 @@ static void handle_sched_exit(void *data, bool is_switch)
static void handle_sched_waking(void *data, struct task_struct *task)
{
- if (this_cpu_read(hardirq_context)) {
+ if (in_hardirq()) {
ltl_atom_pulse(task, LTL_WOKEN_BY_HARDIRQ, true);
} else if (in_task()) {
if (current->prio <= task->prio)
@@ -181,12 +154,6 @@ static void handle_sys_exit(void *data, struct pt_regs
*regs, long ret)
ltl_atom_update(current, LTL_CLOCK_NANOSLEEP, false);
}
-static void handle_kthread_stop(void *data, struct task_struct *task)
-{
- /* FIXME: this could race with other tracepoint handlers */
- ltl_atom_update(task, LTL_KTHREAD_SHOULD_STOP, true);
-}
-
static int enable_sleep(void)
{
int retval;
@@ -200,7 +167,6 @@ static int enable_sleep(void)
rv_attach_trace_probe("rtapp_sleep", sched_set_state_tp,
handle_sched_set_state);
rv_attach_trace_probe("rtapp_sleep", contention_begin,
handle_contention_begin);
rv_attach_trace_probe("rtapp_sleep", contention_end,
handle_contention_end);
- rv_attach_trace_probe("rtapp_sleep", sched_kthread_stop,
handle_kthread_stop);
rv_attach_trace_probe("rtapp_sleep", sys_enter, handle_sys_enter);
rv_attach_trace_probe("rtapp_sleep", sys_exit, handle_sys_exit);
return 0;
@@ -213,7 +179,6 @@ static void disable_sleep(void)
rv_detach_trace_probe("rtapp_sleep", sched_set_state_tp,
handle_sched_set_state);
rv_detach_trace_probe("rtapp_sleep", contention_begin,
handle_contention_begin);
rv_detach_trace_probe("rtapp_sleep", contention_end,
handle_contention_end);
- rv_detach_trace_probe("rtapp_sleep", sched_kthread_stop,
handle_kthread_stop);
rv_detach_trace_probe("rtapp_sleep", sys_enter, handle_sys_enter);
rv_detach_trace_probe("rtapp_sleep", sys_exit, handle_sys_exit);
diff --git a/kernel/trace/rv/monitors/sleep/sleep.h
b/kernel/trace/rv/monitors/sleep/sleep.h
index 2fe2ec7edae8..44e593f41e6a 100644
--- a/kernel/trace/rv/monitors/sleep/sleep.h
+++ b/kernel/trace/rv/monitors/sleep/sleep.h
@@ -18,15 +18,12 @@ enum ltl_atom {
LTL_EPOLL_WAIT,
LTL_FUTEX_LOCK_PI,
LTL_FUTEX_WAIT,
- LTL_KERNEL_THREAD,
- LTL_KTHREAD_SHOULD_STOP,
LTL_NANOSLEEP_CLOCK_REALTIME,
LTL_NANOSLEEP_TIMER_ABSTIME,
LTL_RT,
LTL_SCHEDULE_IN,
LTL_SLEEP,
- LTL_TASK_IS_MIGRATION,
- LTL_TASK_IS_RCU,
+ LTL_USER_THREAD,
LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO,
LTL_WOKEN_BY_HARDIRQ,
LTL_WOKEN_BY_NMI,
@@ -43,15 +40,12 @@ static const char *ltl_atom_str(enum ltl_atom atom)
"ep_wa",
"fu_lo_pi",
"fu_wa",
- "ker_th",
- "kth_sh_st",
"na_cl_re",
"na_ti_ab",
"rt",
"sch_in",
"sle",
- "ta_mi",
- "ta_rc",
+ "us_th",
"wo_eq_hi_pr",
"wo_ha",
"wo_nm",
@@ -79,46 +73,41 @@ static void ltl_start(struct task_struct *task, struct
ltl_monitor *mon)
bool woken_by_hardirq = test_bit(LTL_WOKEN_BY_HARDIRQ, mon->atoms);
bool woken_by_equal_or_higher_prio =
test_bit(LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO,
mon->atoms);
- bool task_is_rcu = test_bit(LTL_TASK_IS_RCU, mon->atoms);
- bool task_is_migration = test_bit(LTL_TASK_IS_MIGRATION, mon->atoms);
+ bool user_thread = test_bit(LTL_USER_THREAD, mon->atoms);
bool sleep = test_bit(LTL_SLEEP, mon->atoms);
bool schedule_in = test_bit(LTL_SCHEDULE_IN, mon->atoms);
bool rt = test_bit(LTL_RT, mon->atoms);
bool nanosleep_timer_abstime = test_bit(LTL_NANOSLEEP_TIMER_ABSTIME,
mon->atoms);
bool nanosleep_clock_realtime = test_bit(LTL_NANOSLEEP_CLOCK_REALTIME,
mon->atoms);
- bool kthread_should_stop = test_bit(LTL_KTHREAD_SHOULD_STOP,
mon->atoms);
- bool kernel_thread = test_bit(LTL_KERNEL_THREAD, mon->atoms);
bool futex_wait = test_bit(LTL_FUTEX_WAIT, mon->atoms);
bool futex_lock_pi = test_bit(LTL_FUTEX_LOCK_PI, mon->atoms);
bool epoll_wait = test_bit(LTL_EPOLL_WAIT, mon->atoms);
bool clock_nanosleep = test_bit(LTL_CLOCK_NANOSLEEP, mon->atoms);
bool block_on_rt_mutex = test_bit(LTL_BLOCK_ON_RT_MUTEX, mon->atoms);
bool abort_sleep = test_bit(LTL_ABORT_SLEEP, mon->atoms);
- bool val41 = task_is_rcu || task_is_migration;
- bool val42 = futex_lock_pi || val41;
- bool val5 = block_on_rt_mutex || val42;
- bool val33 = abort_sleep || kthread_should_stop;
- bool val34 = woken_by_nmi || val33;
- bool val35 = woken_by_hardirq || val34;
- bool val14 = woken_by_equal_or_higher_prio || val35;
+ bool val7 = block_on_rt_mutex || futex_lock_pi;
+ bool val32 = woken_by_nmi || abort_sleep;
+ bool val33 = woken_by_hardirq || val32;
+ bool val14 = woken_by_equal_or_higher_prio || val33;
bool val13 = !schedule_in;
bool val25 = !nanosleep_clock_realtime;
bool val26 = nanosleep_timer_abstime && val25;
bool val18 = clock_nanosleep && val26;
bool val20 = val18 || epoll_wait;
- bool val9 = futex_wait || val20;
- bool val11 = val9 || kernel_thread;
+ bool val11 = futex_wait || val20;
+ bool val3 = !user_thread;
bool val2 = !sleep;
+ bool val4 = val2 || val3;
bool val1 = !rt;
- bool val3 = val1 || val2;
+ bool val5 = val1 || val4;
- if (val3)
+ if (val5)
__set_bit(S0, mon->states);
if (val11 && val13)
__set_bit(S1, mon->states);
if (val11 && val14)
__set_bit(S4, mon->states);
- if (val5)
+ if (val7)
__set_bit(S5, mon->states);
}
@@ -129,130 +118,125 @@ ltl_possible_next_states(struct ltl_monitor *mon,
unsigned int state, unsigned l
bool woken_by_hardirq = test_bit(LTL_WOKEN_BY_HARDIRQ, mon->atoms);
bool woken_by_equal_or_higher_prio =
test_bit(LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO,
mon->atoms);
- bool task_is_rcu = test_bit(LTL_TASK_IS_RCU, mon->atoms);
- bool task_is_migration = test_bit(LTL_TASK_IS_MIGRATION, mon->atoms);
+ bool user_thread = test_bit(LTL_USER_THREAD, mon->atoms);
bool sleep = test_bit(LTL_SLEEP, mon->atoms);
bool schedule_in = test_bit(LTL_SCHEDULE_IN, mon->atoms);
bool rt = test_bit(LTL_RT, mon->atoms);
bool nanosleep_timer_abstime = test_bit(LTL_NANOSLEEP_TIMER_ABSTIME,
mon->atoms);
bool nanosleep_clock_realtime = test_bit(LTL_NANOSLEEP_CLOCK_REALTIME,
mon->atoms);
- bool kthread_should_stop = test_bit(LTL_KTHREAD_SHOULD_STOP,
mon->atoms);
- bool kernel_thread = test_bit(LTL_KERNEL_THREAD, mon->atoms);
bool futex_wait = test_bit(LTL_FUTEX_WAIT, mon->atoms);
bool futex_lock_pi = test_bit(LTL_FUTEX_LOCK_PI, mon->atoms);
bool epoll_wait = test_bit(LTL_EPOLL_WAIT, mon->atoms);
bool clock_nanosleep = test_bit(LTL_CLOCK_NANOSLEEP, mon->atoms);
bool block_on_rt_mutex = test_bit(LTL_BLOCK_ON_RT_MUTEX, mon->atoms);
bool abort_sleep = test_bit(LTL_ABORT_SLEEP, mon->atoms);
- bool val41 = task_is_rcu || task_is_migration;
- bool val42 = futex_lock_pi || val41;
- bool val5 = block_on_rt_mutex || val42;
- bool val33 = abort_sleep || kthread_should_stop;
- bool val34 = woken_by_nmi || val33;
- bool val35 = woken_by_hardirq || val34;
- bool val14 = woken_by_equal_or_higher_prio || val35;
+ bool val7 = block_on_rt_mutex || futex_lock_pi;
+ bool val32 = woken_by_nmi || abort_sleep;
+ bool val33 = woken_by_hardirq || val32;
+ bool val14 = woken_by_equal_or_higher_prio || val33;
bool val13 = !schedule_in;
bool val25 = !nanosleep_clock_realtime;
bool val26 = nanosleep_timer_abstime && val25;
bool val18 = clock_nanosleep && val26;
bool val20 = val18 || epoll_wait;
- bool val9 = futex_wait || val20;
- bool val11 = val9 || kernel_thread;
+ bool val11 = futex_wait || val20;
+ bool val3 = !user_thread;
bool val2 = !sleep;
+ bool val4 = val2 || val3;
bool val1 = !rt;
- bool val3 = val1 || val2;
+ bool val5 = val1 || val4;
switch (state) {
case S0:
- if (val3)
+ if (val5)
__set_bit(S0, next);
if (val11 && val13)
__set_bit(S1, next);
if (val11 && val14)
__set_bit(S4, next);
- if (val5)
+ if (val7)
__set_bit(S5, next);
break;
case S1:
if (val11 && val13)
__set_bit(S1, next);
- if (val13 && val3)
+ if (val13 && val5)
__set_bit(S2, next);
- if (val14 && val3)
+ if (val14 && val5)
__set_bit(S3, next);
if (val11 && val14)
__set_bit(S4, next);
- if (val13 && val5)
+ if (val13 && val7)
__set_bit(S6, next);
- if (val14 && val5)
+ if (val14 && val7)
__set_bit(S7, next);
break;
case S2:
if (val11 && val13)
__set_bit(S1, next);
- if (val13 && val3)
+ if (val13 && val5)
__set_bit(S2, next);
- if (val14 && val3)
+ if (val14 && val5)
__set_bit(S3, next);
if (val11 && val14)
__set_bit(S4, next);
- if (val13 && val5)
+ if (val13 && val7)
__set_bit(S6, next);
- if (val14 && val5)
+ if (val14 && val7)
__set_bit(S7, next);
break;
case S3:
- if (val3)
+ if (val5)
__set_bit(S0, next);
if (val11 && val13)
__set_bit(S1, next);
if (val11 && val14)
__set_bit(S4, next);
- if (val5)
+ if (val7)
__set_bit(S5, next);
break;
case S4:
- if (val3)
+ if (val5)
__set_bit(S0, next);
if (val11 && val13)
__set_bit(S1, next);
if (val11 && val14)
__set_bit(S4, next);
- if (val5)
+ if (val7)
__set_bit(S5, next);
break;
case S5:
- if (val3)
+ if (val5)
__set_bit(S0, next);
if (val11 && val13)
__set_bit(S1, next);
if (val11 && val14)
__set_bit(S4, next);
- if (val5)
+ if (val7)
__set_bit(S5, next);
break;
case S6:
if (val11 && val13)
__set_bit(S1, next);
- if (val13 && val3)
+ if (val13 && val5)
__set_bit(S2, next);
- if (val14 && val3)
+ if (val14 && val5)
__set_bit(S3, next);
if (val11 && val14)
__set_bit(S4, next);
- if (val13 && val5)
+ if (val13 && val7)
__set_bit(S6, next);
- if (val14 && val5)
+ if (val14 && val7)
__set_bit(S7, next);
break;
case S7:
- if (val3)
+ if (val5)
__set_bit(S0, next);
if (val11 && val13)
__set_bit(S1, next);
if (val11 && val14)
__set_bit(S4, next);
- if (val5)
+ if (val7)
__set_bit(S5, next);
break;
}
diff --git a/tools/verification/models/rtapp/sleep.ltl
b/tools/verification/models/rtapp/sleep.ltl
index 5923e58d7810..4d78fdd204c0 100644
--- a/tools/verification/models/rtapp/sleep.ltl
+++ b/tools/verification/models/rtapp/sleep.ltl
@@ -1,6 +1,6 @@
-RULE = always ((RT and SLEEP) imply (RT_FRIENDLY_SLEEP or ALLOWLIST))
+RULE = always ((RT and SLEEP and USER_THREAD) imply (RT_FRIENDLY_SLEEP or
ALLOWLIST))
-RT_FRIENDLY_SLEEP = (RT_VALID_SLEEP_REASON or KERNEL_THREAD)
+RT_FRIENDLY_SLEEP = RT_VALID_SLEEP_REASON
and ((not SCHEDULE_IN) until RT_FRIENDLY_WAKE)
RT_VALID_SLEEP_REASON = FUTEX_WAIT
@@ -15,9 +15,6 @@ RT_FRIENDLY_WAKE = WOKEN_BY_EQUAL_OR_HIGHER_PRIO
or WOKEN_BY_HARDIRQ
or WOKEN_BY_NMI
or ABORT_SLEEP
- or KTHREAD_SHOULD_STOP
ALLOWLIST = BLOCK_ON_RT_MUTEX
or FUTEX_LOCK_PI
- or TASK_IS_RCU
- or TASK_IS_MIGRATION
--
2.47.3