Re: [Xenomai-core] Potential problem with rt_eepro100

Jan Kiszka Thu, 04 Nov 2010 02:33:47 -0700

Am 04.11.2010 10:26, Jan Kiszka wrote:
> Am 04.11.2010 10:16, Gilles Chanteperdrix wrote:
>> Jan Kiszka wrote:
>>> Take a step back and look at the root cause for this issue again. Unlocked
>>>
>>>     if need-resched
>>>             __xnpod_schedule
>>>
>>> is inherently racy and will always be (not only for the remote
>>> reschedule case BTW).
>>
>> Ok, let us examine what may happen with this code if we only set the
>> XNRESCHED bit on the local cpu. First, other bits than XNRESCHED do not
>> matter, because they can not change under our feet. So, we have two
>> cases for this race:
>> 1- we see the XNRESCHED bit, but it has been cleared once nklock is
>> locked in __xnpod_schedule.
>> 2- we do not see the XNRESCHED bit, but it get set right after we test it.
>>
>> 1 is not a problem.
> 
> Yes, as long as we remove the debug check from the scheduler code (or
> fix it somehow). The scheduling code already catches this race.
> 
>> 2 is not a problem, because anything which sets the XNRESCHED (it may
>> only be an interrupt in fact) bit will cause xnpod_schedule to be called
>> right after that.
>>
>> So no, no race here provided that we only set the XNRESCHED bit on the
>> local cpu.
>>
>>  So we either have to accept this and remove the
>>> debugging check from the scheduler or push the check back to
>>> __xnpod_schedule where it once came from. When this it cleaned up, we
>>> can look into the remote resched protocol again.
>>
>> The problem of the debug check is that it checks whether the scheduler
>> state is modified without the XNRESCHED bit being set. And this is the
>> problem, because yes, in that case, we have a race: the scheduler state
>> may be modified before the XNRESCHED bit is set by an IPI.
>>
>> If we want to fix the debug check, we have to have a special bit, on in
>> the sched->status flag, only for the purpose of debugging. Or remove the
>> debug check.
> 
> Exactly my point. Is there any benefit in keeping the debug check? The
> code to make it work may end up as "complex" as the logic it verifies,
> at least that's my current feeling.
>


This would be the radical approach of removing the check (and cleaning
up some bits). If it's acceptable, I would split it up properly.

diff --git a/include/nucleus/pod.h b/include/nucleus/pod.h
index 01ff0a7..71f8311 100644
--- a/include/nucleus/pod.h
+++ b/include/nucleus/pod.h
@@ -277,14 +277,9 @@ static inline void xnpod_schedule(void)
         * context is active, or if we are caught in the middle of a
         * unlocked context switch.
         */
-#if XENO_DEBUG(NUCLEUS)
-       if (testbits(sched->status, XNKCOUT|XNINIRQ|XNSWLOCK))
-               return;
-#else /* !XENO_DEBUG(NUCLEUS) */
        if (testbits(sched->status,
                     XNKCOUT|XNINIRQ|XNSWLOCK|XNRESCHED) != XNRESCHED)
                return;
-#endif /* !XENO_DEBUG(NUCLEUS) */
 
        __xnpod_schedule(sched);
 }
diff --git a/include/nucleus/sched.h b/include/nucleus/sched.h
index df56417..c832b91 100644
--- a/include/nucleus/sched.h
+++ b/include/nucleus/sched.h
@@ -177,17 +177,16 @@ static inline int xnsched_self_resched_p(struct xnsched 
*sched)
 
 /* Set self resched flag for the given scheduler. */
 #define xnsched_set_self_resched(__sched__) do {               \
-  setbits((__sched__)->status, XNRESCHED);                     \
+       __setbits((__sched__)->status, XNRESCHED);              \
 } while (0)
 
 /* Set specific resched flag into the local scheduler mask. */
 #define xnsched_set_resched(__sched__) do {                            \
-  xnsched_t *current_sched = xnpod_current_sched();                    \
-  setbits(current_sched->status, XNRESCHED);                           \
-  if (current_sched != (__sched__))    {                               \
-      xnarch_cpu_set(xnsched_cpu(__sched__), current_sched->resched);  \
-      setbits((__sched__)->status, XNRESCHED);                         \
-  }                                                                    \
+       xnsched_t *current_sched = xnpod_current_sched();               \
+       __setbits(current_sched->status, XNRESCHED);                    \
+       if (current_sched != (__sched__))                               \
+               xnarch_cpu_set(xnsched_cpu(__sched__),                  \
+                              current_sched->resched);                 \
 } while (0)
 
 void xnsched_zombie_hooks(struct xnthread *thread);
diff --git a/ksrc/nucleus/pod.c b/ksrc/nucleus/pod.c
index 9e135f3..87dc136 100644
--- a/ksrc/nucleus/pod.c
+++ b/ksrc/nucleus/pod.c
@@ -284,10 +284,11 @@ void xnpod_schedule_handler(void) /* Called with hw 
interrupts off. */
        trace_xn_nucleus_sched_remote(sched);
 #if defined(CONFIG_SMP) && defined(CONFIG_XENO_OPT_PRIOCPL)
        if (testbits(sched->status, XNRPICK)) {
-               clrbits(sched->status, XNRPICK);
+               __clrbits(sched->status, XNRPICK);
                xnshadow_rpi_check();
        }
 #endif /* CONFIG_SMP && CONFIG_XENO_OPT_PRIOCPL */
+       xnsched_set_resched(sched);
        xnpod_schedule();
 }
 
@@ -2162,21 +2163,21 @@ static inline void xnpod_switch_to(xnsched_t *sched,
 
 static inline int __xnpod_test_resched(struct xnsched *sched)
 {
-       int resched = testbits(sched->status, XNRESCHED);
+       int resched = xnsched_resched_p(sched);
 #ifdef CONFIG_SMP
        /* Send resched IPI to remote CPU(s). */
-       if (unlikely(xnsched_resched_p(sched))) {
+       if (unlikely(resched)) {
                xnarch_send_ipi(sched->resched);
                xnarch_cpus_clear(sched->resched);
        }
 #endif
-       clrbits(sched->status, XNRESCHED);
+       __clrbits(sched->status, XNRESCHED);
        return resched;
 }
 
 void __xnpod_schedule(struct xnsched *sched)
 {
-       int zombie, switched, need_resched, shadow;
+       int zombie, switched, shadow;
        struct xnthread *prev, *next, *curr;
        spl_t s;
 
@@ -2194,11 +2195,10 @@ void __xnpod_schedule(struct xnsched *sched)
                         xnthread_current_priority(curr));
 reschedule:
        switched = 0;
-       need_resched = __xnpod_test_resched(sched);
-#if !XENO_DEBUG(NUCLEUS)
-       if (!need_resched)
+
+       if (!__xnpod_test_resched(sched))
                goto signal_unlock_and_exit;
-#endif /* !XENO_DEBUG(NUCLEUS) */
+
        zombie = xnthread_test_state(curr, XNZOMBIE);
 
        next = xnsched_pick_next(sched);
@@ -2213,8 +2213,6 @@ reschedule:
                goto signal_unlock_and_exit;
        }
 
-       XENO_BUGON(NUCLEUS, need_resched == 0);
-
        prev = curr;
 
        trace_xn_nucleus_sched_switch(prev, next);

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

_______________________________________________
Xenomai-core mailing list
[email protected]
https://mail.gna.org/listinfo/xenomai-core

Re: [Xenomai-core] Potential problem with rt_eepro100

Reply via email to