Jan Kiszka wrote:
Am 04.11.2010 10:26, Jan Kiszka wrote:
Am 04.11.2010 10:16, Gilles Chanteperdrix wrote:
Jan Kiszka wrote:
Take a step back and look at the root cause for this issue again. Unlocked

        if need-resched
                __xnpod_schedule

is inherently racy and will always be (not only for the remote
reschedule case BTW).
Ok, let us examine what may happen with this code if we only set the
XNRESCHED bit on the local cpu. First, other bits than XNRESCHED do not
matter, because they can not change under our feet. So, we have two
cases for this race:
1- we see the XNRESCHED bit, but it has been cleared once nklock is
locked in __xnpod_schedule.
2- we do not see the XNRESCHED bit, but it get set right after we test it.

1 is not a problem.
Yes, as long as we remove the debug check from the scheduler code (or
fix it somehow). The scheduling code already catches this race.

2 is not a problem, because anything which sets the XNRESCHED (it may
only be an interrupt in fact) bit will cause xnpod_schedule to be called
right after that.

So no, no race here provided that we only set the XNRESCHED bit on the
local cpu.

 So we either have to accept this and remove the
debugging check from the scheduler or push the check back to
__xnpod_schedule where it once came from. When this it cleaned up, we
can look into the remote resched protocol again.
The problem of the debug check is that it checks whether the scheduler
state is modified without the XNRESCHED bit being set. And this is the
problem, because yes, in that case, we have a race: the scheduler state
may be modified before the XNRESCHED bit is set by an IPI.

If we want to fix the debug check, we have to have a special bit, on in
the sched->status flag, only for the purpose of debugging. Or remove the
debug check.
Exactly my point. Is there any benefit in keeping the debug check? The
code to make it work may end up as "complex" as the logic it verifies,
at least that's my current feeling.


This would be the radical approach of removing the check (and cleaning
up some bits). If it's acceptable, I would split it up properly.

diff --git a/include/nucleus/pod.h b/include/nucleus/pod.h
index 01ff0a7..71f8311 100644
--- a/include/nucleus/pod.h
+++ b/include/nucleus/pod.h
@@ -277,14 +277,9 @@ static inline void xnpod_schedule(void)
         * context is active, or if we are caught in the middle of a
         * unlocked context switch.
         */
-#if XENO_DEBUG(NUCLEUS)
-       if (testbits(sched->status, XNKCOUT|XNINIRQ|XNSWLOCK))
-               return;
-#else /* !XENO_DEBUG(NUCLEUS) */
        if (testbits(sched->status,
                     XNKCOUT|XNINIRQ|XNSWLOCK|XNRESCHED) != XNRESCHED)
                return;
-#endif /* !XENO_DEBUG(NUCLEUS) */
__xnpod_schedule(sched);
 }
diff --git a/include/nucleus/sched.h b/include/nucleus/sched.h
index df56417..c832b91 100644
--- a/include/nucleus/sched.h
+++ b/include/nucleus/sched.h
@@ -177,17 +177,16 @@ static inline int xnsched_self_resched_p(struct xnsched 
*sched)
/* Set self resched flag for the given scheduler. */
 #define xnsched_set_self_resched(__sched__) do {               \
-  setbits((__sched__)->status, XNRESCHED);                  \
+       __setbits((__sched__)->status, XNRESCHED);           \
 } while (0)
/* Set specific resched flag into the local scheduler mask. */
 #define xnsched_set_resched(__sched__) do {                            \
-  xnsched_t *current_sched = xnpod_current_sched();                    \
-  setbits(current_sched->status, XNRESCHED);                                \
-  if (current_sched != (__sched__))    {                               \
-      xnarch_cpu_set(xnsched_cpu(__sched__), current_sched->resched);       \
-      setbits((__sched__)->status, XNRESCHED);                              \
-  }                                                                    \
+       xnsched_t *current_sched = xnpod_current_sched();               \
+       __setbits(current_sched->status, XNRESCHED);                 \
+       if (current_sched != (__sched__))                               \
+               xnarch_cpu_set(xnsched_cpu(__sched__),                  \
+                              current_sched->resched);                      \
 } while (0)
void xnsched_zombie_hooks(struct xnthread *thread);
diff --git a/ksrc/nucleus/pod.c b/ksrc/nucleus/pod.c
index 9e135f3..87dc136 100644
--- a/ksrc/nucleus/pod.c
+++ b/ksrc/nucleus/pod.c
@@ -284,10 +284,11 @@ void xnpod_schedule_handler(void) /* Called with hw 
interrupts off. */
        trace_xn_nucleus_sched_remote(sched);
 #if defined(CONFIG_SMP) && defined(CONFIG_XENO_OPT_PRIOCPL)
        if (testbits(sched->status, XNRPICK)) {
-               clrbits(sched->status, XNRPICK);
+               __clrbits(sched->status, XNRPICK);
                xnshadow_rpi_check();
        }
 #endif /* CONFIG_SMP && CONFIG_XENO_OPT_PRIOCPL */
+       xnsched_set_resched(sched);
        xnpod_schedule();
 }
@@ -2162,21 +2163,21 @@ static inline void xnpod_switch_to(xnsched_t *sched, static inline int __xnpod_test_resched(struct xnsched *sched)
 {
-       int resched = testbits(sched->status, XNRESCHED);
+       int resched = xnsched_resched_p(sched);
 #ifdef CONFIG_SMP
        /* Send resched IPI to remote CPU(s). */
-       if (unlikely(xnsched_resched_p(sched))) {
+       if (unlikely(resched)) {
                xnarch_send_ipi(sched->resched);
                xnarch_cpus_clear(sched->resched);
        }
 #endif
-       clrbits(sched->status, XNRESCHED);
+       __clrbits(sched->status, XNRESCHED);
        return resched;
 }
void __xnpod_schedule(struct xnsched *sched)
 {
-       int zombie, switched, need_resched, shadow;
+       int zombie, switched, shadow;
        struct xnthread *prev, *next, *curr;
        spl_t s;
@@ -2194,11 +2195,10 @@ void __xnpod_schedule(struct xnsched *sched)
                         xnthread_current_priority(curr));
 reschedule:
        switched = 0;
-       need_resched = __xnpod_test_resched(sched);
-#if !XENO_DEBUG(NUCLEUS)
-       if (!need_resched)
+
+       if (!__xnpod_test_resched(sched))
                goto signal_unlock_and_exit;
-#endif /* !XENO_DEBUG(NUCLEUS) */
+
        zombie = xnthread_test_state(curr, XNZOMBIE);
next = xnsched_pick_next(sched);
@@ -2213,8 +2213,6 @@ reschedule:
                goto signal_unlock_and_exit;
        }
- XENO_BUGON(NUCLEUS, need_resched == 0);
-
        prev = curr;
trace_xn_nucleus_sched_switch(prev, next);

And remember to make the diff to head and not ftrace branch :-)

/Anders

_______________________________________________
Xenomai-core mailing list
[email protected]
https://mail.gna.org/listinfo/xenomai-core

Reply via email to