Haven't finished (hardly started) the arch code for this yet (and probably
broke ppc64), so it is just an RFC at this stage.

The large CC list is because it is a reasonably big change, so I hope one
or two of you smart guys can see if it looks sound before I try to tackle
the rest of the arch code and start asking for testers.

This actually improves performance noticably (ie. a % or so) on schedule /
wakeup happy benchmarks (tbench, on a dual Xeon with HT using mwait idle).

--
SUSE Labs, Novell Inc.
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce
confusion, and make their semantics rigid. Also have preempt explicitly
disabled in idle routines. Improves efficiency of resched_task and some
cpu_idle routines.

* In resched_task:
- TIF_NEED_RESCHED is only cleared with the task's runqueue lock held,
  and as we hold it during resched_task, then there is no need for an
  atomic test and set there. (The only time this may prevent an IPI is
  when the task's quantum expires in the timer interrupt - this is a
  very rare race to bother with in comparison with the cost).

- If TIF_NEED_RESCHED is set, then we don't need to do anything. It
  won't get unset until the task get's schedule()d off.

- If we are running on the same CPU as the task we resched, then set
  TIF_NEED_RESCHED and no further action is required.

- If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set
  after TIF_NEED_RESCHED has been set, then we need to send an IPI.

Using these rules, we are able to remove the test and set operation in
resched_task, and make clear the previously vague semantics of POLLING_NRFLAG.

* In idle routines:
- Enter cpu_idle with preempt disabled. When the need_resched() condition
  becomes true, explicitly call schedule(). This makes things a bit clearer
  (IMO), but haven't updated all architectures yet.

- Many do a test and clear of TIF_NEED_RESCHED for some reason. According
  to the resched_task rules, this isn't needed (and actually breaks the
  assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock
  held). So remove that. Generally one less locked memory op when switching
  to the idle thread.

- Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner
  most polling idle loops. The above resched_task semantics allow it to be
  set until before the last time need_resched() is checked before going into
  a halt requiring interrupt wakeup.

  Many idle routines simply never enter such a halt, and so POLLING_NRFLAG
  can be always left set, completely eliminating resched IPIs when rescheduling
  the idle task.

  POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs.

Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c       2005-03-27 00:25:49.000000000 +1100
+++ linux-2.6/kernel/sched.c    2005-03-27 00:27:30.000000000 +1100
@@ -796,21 +796,28 @@ static void deactivate_task(struct task_
 #ifdef CONFIG_SMP
 static void resched_task(task_t *p)
 {
-       int need_resched, nrpolling;
+       int cpu;
 
        assert_spin_locked(&task_rq(p)->lock);
 
-       /* minimise the chance of sending an interrupt to poll_idle() */
-       nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
-       need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED);
-       nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
+       if (test_tsk_thread_flag(p, TIF_NEED_RESCHED))
+               return;
+       
+       set_tsk_thread_flag(p, TIF_NEED_RESCHED);
 
-       if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id()))
-               smp_send_reschedule(task_cpu(p));
+       cpu = task_cpu(p);
+       if (cpu == smp_processor_id())
+               return;
+
+       /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */
+       smp_mb();
+       if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG))
+               smp_send_reschedule(cpu);
 }
 #else
 static inline void resched_task(task_t *p)
 {
+       assert_spin_locked(&task_rq(p)->lock);
        set_tsk_need_resched(p);
 }
 #endif
Index: linux-2.6/arch/i386/kernel/process.c
===================================================================
--- linux-2.6.orig/arch/i386/kernel/process.c   2005-03-27 00:25:49.000000000 
+1100
+++ linux-2.6/arch/i386/kernel/process.c        2005-03-27 00:27:30.000000000 
+1100
@@ -95,14 +95,19 @@ EXPORT_SYMBOL(enable_hlt);
  */
 void default_idle(void)
 {
+       local_irq_enable();
+
        if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
-               local_irq_disable();
-               if (!need_resched())
+               clear_thread_flag(TIF_POLLING_NRFLAG);
+               smp_mb__after_clear_bit();
+               while (!need_resched()) {
+                       local_irq_disable();
                        safe_halt();
-               else
-                       local_irq_enable();
+               }
+               set_thread_flag(TIF_POLLING_NRFLAG);
        } else {
-               cpu_relax();
+               while (!need_resched())
+                       cpu_relax();
        }
 }
 
@@ -113,29 +118,14 @@ void default_idle(void)
  */
 static void poll_idle (void)
 {
-       int oldval;
-
        local_irq_enable();
 
-       /*
-        * Deal with another CPU just having chosen a thread to
-        * run here:
-        */
-       oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
-
-       if (!oldval) {
-               set_thread_flag(TIF_POLLING_NRFLAG);
-               asm volatile(
-                       "2:"
-                       "testl %0, %1;"
-                       "rep; nop;"
-                       "je 2b;"
-                       : : "i"(_TIF_NEED_RESCHED), "m" 
(current_thread_info()->flags));
-
-               clear_thread_flag(TIF_POLLING_NRFLAG);
-       } else {
-               set_need_resched();
-       }
+       asm volatile(
+               "2:"
+               "testl %0, %1;"
+               "rep; nop;"
+               "je 2b;"
+               : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
 }
 
 /*
@@ -146,25 +136,27 @@ static void poll_idle (void)
  */
 void cpu_idle (void)
 {
-       int cpu = _smp_processor_id();
+       int cpu = smp_processor_id();
+       set_thread_flag(TIF_POLLING_NRFLAG);
 
        /* endless idle loop with no priority at all */
        while (1) {
-               while (!need_resched()) {
-                       void (*idle)(void);
+               void (*idle)(void);
 
-                       if (cpu_isset(cpu, cpu_idle_map))
-                               cpu_clear(cpu, cpu_idle_map);
-                       rmb();
-                       idle = pm_idle;
+               if (cpu_isset(cpu, cpu_idle_map))
+                       cpu_clear(cpu, cpu_idle_map);
+               rmb();
+               idle = pm_idle;
 
-                       if (!idle)
-                               idle = default_idle;
+               if (!idle)
+                       idle = default_idle;
 
-                       irq_stat[cpu].idle_timestamp = jiffies;
-                       idle();
-               }
+               irq_stat[cpu].idle_timestamp = jiffies;
+               idle();
+
+               preempt_enable_no_resched();
                schedule();
+               preempt_disable();
        }
 }
 
@@ -195,15 +187,12 @@ static void mwait_idle(void)
 {
        local_irq_enable();
 
-       if (!need_resched()) {
-               set_thread_flag(TIF_POLLING_NRFLAG);
-               do {
-                       __monitor((void *)&current_thread_info()->flags, 0, 0);
-                       if (need_resched())
-                               break;
-                       __mwait(0, 0);
-               } while (!need_resched());
-               clear_thread_flag(TIF_POLLING_NRFLAG);
+       while (!need_resched()) {
+               __monitor((void *)&current_thread_info()->flags, 0, 0);
+               smp_mb();
+               if (need_resched())
+                       break;
+               __mwait(0, 0);
        }
 }
 
Index: linux-2.6/init/main.c
===================================================================
--- linux-2.6.orig/init/main.c  2005-03-27 00:25:49.000000000 +1100
+++ linux-2.6/init/main.c       2005-03-27 00:27:30.000000000 +1100
@@ -382,7 +382,7 @@ static void noinline rest_init(void)
        kernel_thread(init, NULL, CLONE_FS | CLONE_SIGHAND);
        numa_default_policy();
        unlock_kernel();
-       preempt_enable_no_resched();
+       /* Don't re-enable preemption */
        cpu_idle();
 } 
 
Index: linux-2.6/arch/i386/kernel/apm.c
===================================================================
--- linux-2.6.orig/arch/i386/kernel/apm.c       2005-03-27 00:25:49.000000000 
+1100
+++ linux-2.6/arch/i386/kernel/apm.c    2005-03-27 00:27:30.000000000 +1100
@@ -767,8 +767,20 @@ static int set_system_power_state(u_shor
 static int apm_do_idle(void)
 {
        u32     eax;
+       u8      ret;
+       int     idled = 0;
 
-       if (apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax)) {
+       clear_thread_flag(TIF_POLLING_NRFLAG);
+       smp_mb__after_clear_bit();
+       if (!need_resched()) {
+               idled = 1;
+               ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax);
+       }
+       set_thread_flag(TIF_POLLING_NRFLAG);
+       if (!idled)
+               return 0;
+
+       if (ret) {
                static unsigned long t;
 
                /* This always fails on some SMP boards running UP kernels.
Index: linux-2.6/drivers/acpi/processor_idle.c
===================================================================
--- linux-2.6.orig/drivers/acpi/processor_idle.c        2005-03-27 
00:25:49.000000000 +1100
+++ linux-2.6/drivers/acpi/processor_idle.c     2005-03-27 00:27:30.000000000 
+1100
@@ -162,6 +162,14 @@ acpi_processor_power_activate (
        return;
 }
 
+static void acpi_safe_halt (void)
+{
+       clear_thread_flag(TIF_POLLING_NRFLAG);
+       smp_mb__after_clear_bit();
+       while (!need_resched())
+               safe_halt();
+       set_thread_flag(TIF_POLLING_NRFLAG);
+}
 
 static void acpi_processor_idle (void)
 {
@@ -171,7 +179,7 @@ static void acpi_processor_idle (void)
        int                     sleep_ticks = 0;
        u32                     t1, t2 = 0;
 
-       pr = processors[_smp_processor_id()];
+       pr = processors[smp_processor_id()];
        if (!pr)
                return;
 
@@ -191,8 +199,13 @@ static void acpi_processor_idle (void)
        }
 
        cx = pr->power.state;
-       if (!cx)
-               goto easy_out;
+       if (!cx) {
+               if (pm_idle_save)
+                       pm_idle_save();
+               else
+                       acpi_safe_halt();
+               return;
+       }
 
        /*
         * Check BM Activity
@@ -272,7 +285,8 @@ static void acpi_processor_idle (void)
                if (pm_idle_save)
                        pm_idle_save();
                else
-                       safe_halt();
+                       acpi_safe_halt();
+
                /*
                  * TBD: Can't get time duration while in C1, as resumes
                 *      go to an ISR rather than here.  Need to instrument
@@ -384,16 +398,6 @@ end:
         */
        if (next_state != pr->power.state)
                acpi_processor_power_activate(pr, next_state);
-
-       return;
-
- easy_out:
-       /* do C1 instead of busy loop */
-       if (pm_idle_save)
-               pm_idle_save();
-       else
-               safe_halt();
-       return;
 }
 
 
Index: linux-2.6/arch/i386/kernel/smpboot.c
===================================================================
--- linux-2.6.orig/arch/i386/kernel/smpboot.c   2005-03-27 00:25:49.000000000 
+1100
+++ linux-2.6/arch/i386/kernel/smpboot.c        2005-03-27 00:27:30.000000000 
+1100
@@ -414,6 +414,8 @@ static int cpucount;
  */
 static void __init start_secondary(void *unused)
 {
+       preempt_disable();
+
        /*
         * Dont put anything before smp_callin(), SMP
         * booting is too fragile that we want to limit the
Index: linux-2.6/arch/x86_64/kernel/process.c
===================================================================
--- linux-2.6.orig/arch/x86_64/kernel/process.c 2005-03-27 00:25:49.000000000 
+1100
+++ linux-2.6/arch/x86_64/kernel/process.c      2005-03-27 00:27:30.000000000 
+1100
@@ -84,12 +84,19 @@ EXPORT_SYMBOL(enable_hlt);
  */
 void default_idle(void)
 {
+       local_irq_enable();
+
        if (!atomic_read(&hlt_counter)) {
-               local_irq_disable();
-               if (!need_resched())
+               clear_thread_flag(TIF_POLLING_NRFLAG);
+               smp_mb__after_clear_bit();
+               while (!need_resched()) {
+                       local_irq_disable();
                        safe_halt();
-               else
-                       local_irq_enable();
+               }
+               set_thread_flag(TIF_POLLING_NRFLAG);
+       } else {
+               while (!need_resched())
+                       cpu_relax();
        }
 }
 
@@ -100,29 +107,16 @@ void default_idle(void)
  */
 static void poll_idle (void)
 {
-       int oldval;
-
        local_irq_enable();
 
-       /*
-        * Deal with another CPU just having chosen a thread to
-        * run here:
-        */
-       oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
-
-       if (!oldval) {
-               set_thread_flag(TIF_POLLING_NRFLAG); 
-               asm volatile(
-                       "2:"
-                       "testl %0,%1;"
-                       "rep; nop;"
-                       "je 2b;"
-                       : :
-                       "i" (_TIF_NEED_RESCHED), 
-                       "m" (current_thread_info()->flags));
-       } else {
-               set_need_resched();
-       }
+       asm volatile(
+               "2:"
+               "testl %0,%1;"
+               "rep; nop;"
+               "je 2b;"
+               : :
+               "i" (_TIF_NEED_RESCHED), 
+               "m" (current_thread_info()->flags));
 }
 
 
@@ -151,21 +145,23 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
 void cpu_idle (void)
 {
        int cpu = smp_processor_id();
+       set_thread_flag(TIF_POLLING_NRFLAG);
 
        /* endless idle loop with no priority at all */
        while (1) {
-               while (!need_resched()) {
-                       void (*idle)(void);
+               void (*idle)(void);
 
-                       if (cpu_isset(cpu, cpu_idle_map))
-                               cpu_clear(cpu, cpu_idle_map);
-                       rmb();
-                       idle = pm_idle;
-                       if (!idle)
-                               idle = default_idle;
-                       idle();
-               }
+               if (cpu_isset(cpu, cpu_idle_map))
+                       cpu_clear(cpu, cpu_idle_map);
+               rmb();
+               idle = pm_idle;
+               if (!idle)
+                       idle = default_idle;
+               idle();
+
+               preempt_enable_noresched();
                schedule();
+               preempt_disable();
        }
 }
 
@@ -180,15 +176,12 @@ static void mwait_idle(void)
 {
        local_irq_enable();
 
-       if (!need_resched()) {
-               set_thread_flag(TIF_POLLING_NRFLAG);
-               do {
-                       __monitor((void *)&current_thread_info()->flags, 0, 0);
-                       if (need_resched())
-                               break;
-                       __mwait(0, 0);
-               } while (!need_resched());
-               clear_thread_flag(TIF_POLLING_NRFLAG);
+       while (!need_resched()) {
+               __monitor((void *)&current_thread_info()->flags, 0, 0);
+               smp_mb();
+               if (need_resched())
+                       break;
+               __mwait(0, 0);
        }
 }
 
Index: linux-2.6/arch/ppc64/kernel/idle.c
===================================================================
--- linux-2.6.orig/arch/ppc64/kernel/idle.c     2005-03-27 00:25:49.000000000 
+1100
+++ linux-2.6/arch/ppc64/kernel/idle.c  2005-03-27 00:27:30.000000000 +1100
@@ -77,6 +77,8 @@ static int iSeries_idle(void)
        long oldval;
        unsigned long CTRL;
 
+       set_thread_flag(TIF_POLLING_NRFLAG);
+
        /* ensure iSeries run light will be out when idle */
        clear_thread_flag(TIF_RUN_LIGHT);
        CTRL = mfspr(CTRLF);
@@ -86,32 +88,21 @@ static int iSeries_idle(void)
        lpaca = get_paca();
 
        while (1) {
-               if (lpaca->lppaca.shared_proc) {
-                       if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr))
-                               process_iSeries_events();
-                       if (!need_resched())
-                               yield_shared_processor();
-               } else {
-                       oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
-
-                       if (!oldval) {
-                               set_thread_flag(TIF_POLLING_NRFLAG);
-
-                               while (!need_resched()) {
-                                       HMT_medium();
-                                       if 
(ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr))
-                                               process_iSeries_events();
-                                       HMT_low();
-                               }
-
+               while (!need_resched()) {
+                       HMT_low();
+                       if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr)) {
                                HMT_medium();
-                               clear_thread_flag(TIF_POLLING_NRFLAG);
-                       } else {
-                               set_need_resched();
+                               process_iSeries_events();
+                               HMT_low();
                        }
+                       if (lpaca->lppaca.shared_proc)
+                               yield_shared_processor();
                }
+               HMT_medium();
 
+               preempt_enable_no_resched();
                schedule();
+               preempt_disable();
        }
 
        return 0;
@@ -123,30 +114,23 @@ static int default_idle(void)
 {
        long oldval;
        unsigned int cpu = smp_processor_id();
-
+       set_thread_flag(TIF_POLLING_NRFLAG);
+       
        while (1) {
-               oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
-
-               if (!oldval) {
-                       set_thread_flag(TIF_POLLING_NRFLAG);
-
-                       while (!need_resched() && !cpu_is_offline(cpu)) {
-                               barrier();
-                               /*
-                                * Go into low thread priority and possibly
-                                * low power mode.
-                                */
-                               HMT_low();
-                               HMT_very_low();
-                       }
-
-                       HMT_medium();
-                       clear_thread_flag(TIF_POLLING_NRFLAG);
-               } else {
-                       set_need_resched();
+               while (!need_resched() && !cpu_is_offline(cpu)) {
+                       barrier();
+                       /*
+                        * Go into low thread priority and possibly
+                        * low power mode.
+                        */
+                       HMT_low();
+                       HMT_very_low();
                }
+               HMT_medium();
 
+               preempt_enable_no_resched();
                schedule();
+               preempt_disable();
                if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
                        cpu_die();
        }
@@ -166,6 +150,7 @@ int dedicated_idle(void)
        unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
        unsigned int cpu = smp_processor_id();
 
+       set_thread_flag(TIF_POLLING_NRFLAG);
        ppaca = &paca[cpu ^ 1];
 
        while (1) {
@@ -175,66 +160,67 @@ int dedicated_idle(void)
                 */
                lpaca->lppaca.idle = 1;
 
-               oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
-               if (!oldval) {
-                       set_thread_flag(TIF_POLLING_NRFLAG);
-                       start_snooze = __get_tb() +
+               start_snooze = __get_tb() +
                                *smt_snooze_delay * tb_ticks_per_usec;
-                       while (!need_resched() && !cpu_is_offline(cpu)) {
-                               /*
-                                * Go into low thread priority and possibly
-                                * low power mode.
-                                */
-                               HMT_low();
-                               HMT_very_low();
 
-                               if (*smt_snooze_delay == 0 ||
-                                   __get_tb() < start_snooze)
-                                       continue;
+               while (!need_resched() && !cpu_is_offline(cpu)) {
+                       /*
+                        * Go into low thread priority and possibly
+                        * low power mode.
+                        */
+                       HMT_low();
+                       HMT_very_low();
 
-                               HMT_medium();
+                       if (*smt_snooze_delay == 0 || __get_tb() < start_snooze)
+                               continue;
 
-                               if (!(ppaca->lppaca.idle)) {
-                                       local_irq_disable();
+                       HMT_medium();
 
-                                       /*
-                                        * We are about to sleep the thread
-                                        * and so wont be polling any
-                                        * more.
-                                        */
-                                       clear_thread_flag(TIF_POLLING_NRFLAG);
-
-                                       /*
-                                        * SMT dynamic mode. Cede will result
-                                        * in this thread going dormant, if the
-                                        * partner thread is still doing work.
-                                        * Thread wakes up if partner goes idle,
-                                        * an interrupt is presented, or a prod
-                                        * occurs.  Returning from the cede
-                                        * enables external interrupts.
-                                        */
-                                       if (!need_resched())
-                                               cede_processor();
-                                       else
-                                               local_irq_enable();
-                               } else {
-                                       /*
-                                        * Give the HV an opportunity at the
-                                        * processor, since we are not doing
-                                        * any work.
-                                        */
-                                       poll_pending();
-                               }
-                       }
+                       if (!(ppaca->lppaca.idle)) {
+                               local_irq_disable();
 
-                       clear_thread_flag(TIF_POLLING_NRFLAG);
-               } else {
-                       set_need_resched();
+                               /*
+                                * We are about to sleep the thread
+                                * and so wont be polling any
+                                * more.
+                                */
+                               clear_thread_flag(TIF_POLLING_NRFLAG);
+
+                               /* 
+                                * Must have TIF_POLLING_NRFLAG clear visible
+                                * before checking need_resched
+                                */
+                               smp_mb__after_clear_bit();
+
+                               /*
+                                * SMT dynamic mode. Cede will result
+                                * in this thread going dormant, if the
+                                * partner thread is still doing work.
+                                * Thread wakes up if partner goes idle,
+                                * an interrupt is presented, or a prod
+                                * occurs.  Returning from the cede
+                                * enables external interrupts.
+                                */
+                               if (!need_resched())
+                                       cede_processor();
+                               else
+                                       local_irq_enable();
+                               set_thread_flag(TIF_POLLING_NRFLAG);
+                       } else {
+                               /*
+                                * Give the HV an opportunity at the
+                                * processor, since we are not doing
+                                * any work.
+                                */
+                               poll_pending();
+                       }
                }
 
                HMT_medium();
                lpaca->lppaca.idle = 0;
+               preempt_enable_no_resched();
                schedule();
+               preempt_disable();
                if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
                        cpu_die();
        }
@@ -245,6 +231,7 @@ static int shared_idle(void)
 {
        struct paca_struct *lpaca = get_paca();
        unsigned int cpu = smp_processor_id();
+       set_thread_flag(TIF_POLLING_NRFLAG);
 
        while (1) {
                /*
@@ -256,6 +243,9 @@ static int shared_idle(void)
                while (!need_resched() && !cpu_is_offline(cpu)) {
                        local_irq_disable();
 
+                       clear_thread_flag(TIF_POLLING_NRFLAG);
+                       smp_mb__after_clear_bit();
+
                        /*
                         * Yield the processor to the hypervisor.  We return if
                         * an external interrupt occurs (which are driven prior
@@ -270,11 +260,14 @@ static int shared_idle(void)
                                cede_processor();
                        else
                                local_irq_enable();
+                       set_thread_flag(TIF_POLLING_NRFLAG);
                }
 
                HMT_medium();
                lpaca->lppaca.idle = 0;
+               preempt_enable_no_resched();
                schedule();
+               preempt_disable();
                if (cpu_is_offline(smp_processor_id()) &&
                    system_state == SYSTEM_RUNNING)
                        cpu_die();
@@ -289,10 +282,12 @@ static int native_idle(void)
 {
        while(1) {
                /* check CPU type here */
-               if (!need_resched())
+               while (!need_resched())
                        power4_idle();
-               if (need_resched())
-                       schedule();
+
+               preempt_enable_no_resched();
+               schedule();
+               preempt_disable();
 
                if (cpu_is_offline(_smp_processor_id()) &&
                    system_state == SYSTEM_RUNNING)
Index: linux-2.6/arch/ia64/kernel/process.c
===================================================================
--- linux-2.6.orig/arch/ia64/kernel/process.c   2005-03-27 00:25:49.000000000 
+1100
+++ linux-2.6/arch/ia64/kernel/process.c        2005-03-27 00:27:30.000000000 
+1100
@@ -188,11 +188,16 @@ default_idle (void)
 {
        unsigned long pmu_active = ia64_getreg(_IA64_REG_PSR) & (IA64_PSR_PP | 
IA64_PSR_UP);
 
-       while (!need_resched())
-               if (pal_halt && !pmu_active)
+       if (pal_halt && !pmu_active) {
+               clear_thread_flag(TIF_POLLING_NRFLAG);
+               smp_mb__after_clear_bit();
+               while (!need_resched())
                        safe_halt();
-               else
+               set_thread_flag(TIF_POLLING_NRFLAG);
+       } else {
+               while (!need_resched())
                        cpu_relax();
+       }
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -251,35 +256,36 @@ cpu_idle (void)
 {
        void (*mark_idle)(int) = ia64_mark_idle;
        int cpu = smp_processor_id();
+       set_thread_flag(TIF_POLLING_NRFLAG);
 
        /* endless idle loop with no priority at all */
        while (1) {
+               void (*idle)(void);
+               if (cpu_isset(cpu, cpu_idle_map))
+                       cpu_clear(cpu, cpu_idle_map);
+               rmb();
+               idle = pm_idle;
+               if (!idle)
+                       idle = default_idle;
+
+               if (!need_resched()) {
 #ifdef CONFIG_SMP
-               if (!need_resched())
                        min_xtp();
 #endif
-               while (!need_resched()) {
-                       void (*idle)(void);
-
                        if (mark_idle)
                                (*mark_idle)(1);
-
-                       if (cpu_isset(cpu, cpu_idle_map))
-                               cpu_clear(cpu, cpu_idle_map);
-                       rmb();
-                       idle = pm_idle;
-                       if (!idle)
-                               idle = default_idle;
                        (*idle)();
-               }
-
-               if (mark_idle)
-                       (*mark_idle)(0);
-
+                       if (mark_idle)
+                               (*mark_idle)(0);
 #ifdef CONFIG_SMP
-               normal_xtp();
+                       normal_xtp();
 #endif
+               }
+
+               preempt_enable_no_resched();
                schedule();
+               preempt_disable();
+               
                check_pgt_cache();
                if (cpu_is_offline(smp_processor_id()))
                        play_dead();
Index: linux-2.6/arch/ia64/kernel/smpboot.c
===================================================================
--- linux-2.6.orig/arch/ia64/kernel/smpboot.c   2005-03-27 00:25:49.000000000 
+1100
+++ linux-2.6/arch/ia64/kernel/smpboot.c        2005-03-27 00:27:30.000000000 
+1100
@@ -343,6 +343,8 @@ smp_callin (void)
 int __devinit
 start_secondary (void *unused)
 {
+       preempt_disable();
+
        /* Early console may use I/O ports */
        ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
 
Index: linux-2.6/arch/ppc64/kernel/smp.c
===================================================================
--- linux-2.6.orig/arch/ppc64/kernel/smp.c      2005-03-27 00:25:49.000000000 
+1100
+++ linux-2.6/arch/ppc64/kernel/smp.c   2005-03-27 00:27:30.000000000 +1100
@@ -561,7 +561,10 @@ int __devinit __cpu_up(unsigned int cpu)
 /* Activate a secondary processor. */
 int __devinit start_secondary(void *unused)
 {
-       unsigned int cpu = smp_processor_id();
+       unsigned int cpu;
+       
+       preempt_disable();
+       cpu = smp_processor_id();
 
        atomic_inc(&init_mm.mm_count);
        current->active_mm = &init_mm;

Reply via email to