Am 04.11.2010 10:26, Jan Kiszka wrote:
> Am 04.11.2010 10:16, Gilles Chanteperdrix wrote:
>> Jan Kiszka wrote:
>>> Take a step back and look at the root cause for this issue again. Unlocked
>>>
>>> if need-resched
>>> __xnpod_schedule
>>>
>>> is inherently racy and will always be (not only for the remote
>>> reschedule case BTW).
>>
>> Ok, let us examine what may happen with this code if we only set the
>> XNRESCHED bit on the local cpu. First, other bits than XNRESCHED do not
>> matter, because they can not change under our feet. So, we have two
>> cases for this race:
>> 1- we see the XNRESCHED bit, but it has been cleared once nklock is
>> locked in __xnpod_schedule.
>> 2- we do not see the XNRESCHED bit, but it get set right after we test it.
>>
>> 1 is not a problem.
>
> Yes, as long as we remove the debug check from the scheduler code (or
> fix it somehow). The scheduling code already catches this race.
>
>> 2 is not a problem, because anything which sets the XNRESCHED (it may
>> only be an interrupt in fact) bit will cause xnpod_schedule to be called
>> right after that.
>>
>> So no, no race here provided that we only set the XNRESCHED bit on the
>> local cpu.
>>
>> So we either have to accept this and remove the
>>> debugging check from the scheduler or push the check back to
>>> __xnpod_schedule where it once came from. When this it cleaned up, we
>>> can look into the remote resched protocol again.
>>
>> The problem of the debug check is that it checks whether the scheduler
>> state is modified without the XNRESCHED bit being set. And this is the
>> problem, because yes, in that case, we have a race: the scheduler state
>> may be modified before the XNRESCHED bit is set by an IPI.
>>
>> If we want to fix the debug check, we have to have a special bit, on in
>> the sched->status flag, only for the purpose of debugging. Or remove the
>> debug check.
>
> Exactly my point. Is there any benefit in keeping the debug check? The
> code to make it work may end up as "complex" as the logic it verifies,
> at least that's my current feeling.
>
This would be the radical approach of removing the check (and cleaning
up some bits). If it's acceptable, I would split it up properly.
diff --git a/include/nucleus/pod.h b/include/nucleus/pod.h
index 01ff0a7..71f8311 100644
--- a/include/nucleus/pod.h
+++ b/include/nucleus/pod.h
@@ -277,14 +277,9 @@ static inline void xnpod_schedule(void)
* context is active, or if we are caught in the middle of a
* unlocked context switch.
*/
-#if XENO_DEBUG(NUCLEUS)
- if (testbits(sched->status, XNKCOUT|XNINIRQ|XNSWLOCK))
- return;
-#else /* !XENO_DEBUG(NUCLEUS) */
if (testbits(sched->status,
XNKCOUT|XNINIRQ|XNSWLOCK|XNRESCHED) != XNRESCHED)
return;
-#endif /* !XENO_DEBUG(NUCLEUS) */
__xnpod_schedule(sched);
}
diff --git a/include/nucleus/sched.h b/include/nucleus/sched.h
index df56417..c832b91 100644
--- a/include/nucleus/sched.h
+++ b/include/nucleus/sched.h
@@ -177,17 +177,16 @@ static inline int xnsched_self_resched_p(struct xnsched
*sched)
/* Set self resched flag for the given scheduler. */
#define xnsched_set_self_resched(__sched__) do { \
- setbits((__sched__)->status, XNRESCHED); \
+ __setbits((__sched__)->status, XNRESCHED); \
} while (0)
/* Set specific resched flag into the local scheduler mask. */
#define xnsched_set_resched(__sched__) do { \
- xnsched_t *current_sched = xnpod_current_sched(); \
- setbits(current_sched->status, XNRESCHED); \
- if (current_sched != (__sched__)) { \
- xnarch_cpu_set(xnsched_cpu(__sched__), current_sched->resched); \
- setbits((__sched__)->status, XNRESCHED); \
- } \
+ xnsched_t *current_sched = xnpod_current_sched(); \
+ __setbits(current_sched->status, XNRESCHED); \
+ if (current_sched != (__sched__)) \
+ xnarch_cpu_set(xnsched_cpu(__sched__), \
+ current_sched->resched); \
} while (0)
void xnsched_zombie_hooks(struct xnthread *thread);
diff --git a/ksrc/nucleus/pod.c b/ksrc/nucleus/pod.c
index 9e135f3..87dc136 100644
--- a/ksrc/nucleus/pod.c
+++ b/ksrc/nucleus/pod.c
@@ -284,10 +284,11 @@ void xnpod_schedule_handler(void) /* Called with hw
interrupts off. */
trace_xn_nucleus_sched_remote(sched);
#if defined(CONFIG_SMP) && defined(CONFIG_XENO_OPT_PRIOCPL)
if (testbits(sched->status, XNRPICK)) {
- clrbits(sched->status, XNRPICK);
+ __clrbits(sched->status, XNRPICK);
xnshadow_rpi_check();
}
#endif /* CONFIG_SMP && CONFIG_XENO_OPT_PRIOCPL */
+ xnsched_set_resched(sched);
xnpod_schedule();
}
@@ -2162,21 +2163,21 @@ static inline void xnpod_switch_to(xnsched_t *sched,
static inline int __xnpod_test_resched(struct xnsched *sched)
{
- int resched = testbits(sched->status, XNRESCHED);
+ int resched = xnsched_resched_p(sched);
#ifdef CONFIG_SMP
/* Send resched IPI to remote CPU(s). */
- if (unlikely(xnsched_resched_p(sched))) {
+ if (unlikely(resched)) {
xnarch_send_ipi(sched->resched);
xnarch_cpus_clear(sched->resched);
}
#endif
- clrbits(sched->status, XNRESCHED);
+ __clrbits(sched->status, XNRESCHED);
return resched;
}
void __xnpod_schedule(struct xnsched *sched)
{
- int zombie, switched, need_resched, shadow;
+ int zombie, switched, shadow;
struct xnthread *prev, *next, *curr;
spl_t s;
@@ -2194,11 +2195,10 @@ void __xnpod_schedule(struct xnsched *sched)
xnthread_current_priority(curr));
reschedule:
switched = 0;
- need_resched = __xnpod_test_resched(sched);
-#if !XENO_DEBUG(NUCLEUS)
- if (!need_resched)
+
+ if (!__xnpod_test_resched(sched))
goto signal_unlock_and_exit;
-#endif /* !XENO_DEBUG(NUCLEUS) */
+
zombie = xnthread_test_state(curr, XNZOMBIE);
next = xnsched_pick_next(sched);
@@ -2213,8 +2213,6 @@ reschedule:
goto signal_unlock_and_exit;
}
- XENO_BUGON(NUCLEUS, need_resched == 0);
-
prev = curr;
trace_xn_nucleus_sched_switch(prev, next);
Jan
--
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
_______________________________________________
Xenomai-core mailing list
[email protected]
https://mail.gna.org/listinfo/xenomai-core