Re: [v10 PATCH 8/9]: pSeries: implement pSeries processor idle module

2009-12-15 Thread Arun R Bharadwaj
* Benjamin Herrenschmidt b...@kernel.crashing.org [2009-12-04 21:00:52]:

 On Fri, 2009-12-04 at 13:45 +0530, Arun R Bharadwaj wrote:
 
  
  Hi Ben,
  
  I forgot to attach the patch which enables cpuidle for the rest of the
  POWER platforms. Attaching it below.
  
  So for these platforms, ppc_md.power_save will be called from from the
  cpuidle_idle_call idle loop itself. Also, this cpuidle_idle_call is
  not a pseries specific idle loop. It is a common loop for Intel and
  PPC which use cpuidle infrastructure.
 
 Ok, so there was a missing piece in the puzzle ;-)
 
 I'll review asap.
 

Hi Ben,

Did you get time to review this?

thanks
arun

 Cheers,
 Ben.
 
  arun
  
  
  
  This patch enables cpuidle for the rest of the POWER platforms like
  44x, Cell, Pasemi etc.
  
  Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
  ---
   arch/powerpc/include/asm/system.h   |2 ++
   arch/powerpc/kernel/idle.c  |   28 
   arch/powerpc/kernel/setup_32.c  |8 ++--
   arch/powerpc/platforms/44x/idle.c   |2 ++
   arch/powerpc/platforms/cell/pervasive.c |2 ++
   arch/powerpc/platforms/pasemi/idle.c|2 ++
   arch/powerpc/platforms/ps3/setup.c  |2 ++
   7 files changed, 44 insertions(+), 2 deletions(-)
  
  Index: linux.trees.git/arch/powerpc/include/asm/system.h
  ===
  --- linux.trees.git.orig/arch/powerpc/include/asm/system.h
  +++ linux.trees.git/arch/powerpc/include/asm/system.h
  @@ -551,8 +551,10 @@ void cpu_idle_wait(void);
   
   #ifdef CONFIG_CPU_IDLE
   extern void update_smt_snooze_delay(int snooze);
  +extern void setup_cpuidle_ppc(void);
   #else
   static inline void update_smt_snooze_delay(int snooze) {}
  +static inline void setup_cpuidle_ppc(void) {}
   #endif
   
   #endif /* __KERNEL__ */
  Index: linux.trees.git/arch/powerpc/kernel/idle.c
  ===
  --- linux.trees.git.orig/arch/powerpc/kernel/idle.c
  +++ linux.trees.git/arch/powerpc/kernel/idle.c
  @@ -129,6 +129,34 @@ void default_idle(void)
  HMT_very_low();
   }
   
  +#ifdef CONFIG_CPU_IDLE
  +DEFINE_PER_CPU(struct cpuidle_device, ppc_idle_devices);
  +struct cpuidle_driver cpuidle_ppc_driver = {
  +   .name = cpuidle_ppc,
  +};
  +
  +static void ppc_idle_loop(struct cpuidle_device *dev, struct cpuidle_state 
  *st)
  +{
  +   ppc_md.power_save();
  +}
  +
  +void setup_cpuidle_ppc(void)
  +{
  +   struct cpuidle_device *dev;
  +   int cpu;
  +
  +   cpuidle_register_driver(cpuidle_ppc_driver);
  +
  +   for_each_online_cpu(cpu) {
  +   dev = per_cpu(ppc_idle_devices, cpu);
  +   dev-cpu = cpu;
  +   dev-states[0].enter = ppc_idle_loop;
  +   dev-state_count = 1;
  +   cpuidle_register_device(dev);
  +   }
  +}
  +#endif
  +
   int powersave_nap;
   
   #ifdef CONFIG_SYSCTL
  Index: linux.trees.git/arch/powerpc/kernel/setup_32.c
  ===
  --- linux.trees.git.orig/arch/powerpc/kernel/setup_32.c
  +++ linux.trees.git/arch/powerpc/kernel/setup_32.c
  @@ -133,14 +133,18 @@ notrace void __init machine_init(unsigne
   
   #ifdef CONFIG_6xx
  if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
  -   cpu_has_feature(CPU_FTR_CAN_NAP))
  +   cpu_has_feature(CPU_FTR_CAN_NAP)) {
  ppc_md.power_save = ppc6xx_idle;
  +   setup_cpuidle_ppc();
  +   }
   #endif
   
   #ifdef CONFIG_E500
  if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
  -   cpu_has_feature(CPU_FTR_CAN_NAP))
  +   cpu_has_feature(CPU_FTR_CAN_NAP)) {
  ppc_md.power_save = e500_idle;
  +   setup_cpuidle_ppc();
  +   }
   #endif
  if (ppc_md.progress)
  ppc_md.progress(id mach(): done, 0x200);
  Index: linux.trees.git/arch/powerpc/platforms/44x/idle.c
  ===
  --- linux.trees.git.orig/arch/powerpc/platforms/44x/idle.c
  +++ linux.trees.git/arch/powerpc/platforms/44x/idle.c
  @@ -24,6 +24,7 @@
   #include linux/of.h
   #include linux/kernel.h
   #include asm/machdep.h
  +#include asm/system.h
   
   static int mode_spin;
   
  @@ -46,6 +47,7 @@ int __init ppc44x_idle_init(void)
  /* If we are not setting spin mode 
  then we set to wait mode */
  ppc_md.power_save = ppc44x_idle;
  +   setup_cpuidle_ppc();
  }
   
  return 0;
  Index: linux.trees.git/arch/powerpc/platforms/cell/pervasive.c
  ===
  --- linux.trees.git.orig/arch/powerpc/platforms/cell/pervasive.c
  +++ linux.trees.git/arch/powerpc/platforms/cell/pervasive.c
  @@ -35,6 +35,7 @@
   #include asm/pgtable.h
   #include asm/reg.h
   #include asm/cell-regs.h
  +#include asm/system.h
   
   #include pervasive.h
   
  @@ -128,5 +129,6 @@ void __init

Re: [v10 PATCH 2/9]: cpuidle: cleanup drivers/cpuidle/cpuidle.c

2009-12-07 Thread Arun R Bharadwaj
* Torsten Duwe d...@lst.de [2009-12-07 11:17:57]:

 On Sunday 06 December 2009, Arun R Bharadwaj wrote:
 
  Peter objected to the idea of integrating this with the old pm_idle
  because it has already caused a lot of problems on x86 and we wouldn't
  want to be doing the same mistake on POWER. The discussion related to
  that could be found here http://lkml.org/lkml/2009/8/26/233
 
 And BenH has sketched how it should be done on ppc, in that thread:
 http://lkml.org/lkml/2009/8/26/624 AFAIS this comment is still valid for v10.
 
 Not only I would like to understand what is the conceptual idea behind the 
 other changes. Nothing wrong with cleanups, but there's got to be a purpose 
 and benefits.
 
   Torsten

The reason for the cleanups is that we should have just one idle
function manager instead of having one for each arch, which needs to
be exported and hence really ugly. So thats why we
decided to do away with pm_idle and make cpuidle as _the_ idle
function manager. So in case of POWER, we have the ppc_md.power_save
which is the pm_idle equivalent. We discussed that in this thread
http://lkml.org/lkml/2009/9/2/20

thanks
arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v10 PATCH 2/9]: cpuidle: cleanup drivers/cpuidle/cpuidle.c

2009-12-05 Thread Arun R Bharadwaj
* Torsten Duwe d...@lst.de [2009-12-04 23:20:00]:

 On Wednesday 02 December 2009, Arun R Bharadwaj wrote:
  * Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-12-02 15:24:27]:
 
  This patch cleans up drivers/cpuidle/cpuidle.c
  Earlier cpuidle assumed pm_idle as the default idle loop. Break that
  assumption and make it more generic.
 
 Is there a problem with the old pm_idle? Couldn't it be integrated more 
 transparently, instead of replacing it this intrusively?
 

Hi Torsten,

Peter objected to the idea of integrating this with the old pm_idle
because it has already caused a lot of problems on x86 and we wouldn't
want to be doing the same mistake on POWER. The discussion related to
that could be found here http://lkml.org/lkml/2009/8/26/233

  --- linux.trees.git.orig/include/linux/cpuidle.h
  +++ linux.trees.git/include/linux/cpuidle.h
  @@ -41,7 +41,7 @@ struct cpuidle_state {
  unsigned long long  usage;
  unsigned long long  time; /* in US */
 
  -   int (*enter)(struct cpuidle_device *dev,
  +   void (*enter)   (struct cpuidle_device *dev,
   struct cpuidle_state *state);
   };
 
 While it may be a good idea to move the residency calculation to one central 
 place, at least in theory a cpuidle_state-enter() function could have a 
 better method to determine its value.


This would mean a lot of code replication, which Pavel pointed out in
the previous iteration. So I moved the residency calculation to a
central place.

 Either way you're implicitly introducing an API change here, and you're at 
 least missing two functions on ARM and SuperH, respectively. Could you 
 separate this API change out, and not take it for granted in the other 
 patches?

   Torsten
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v10 PATCH 8/9]: pSeries: implement pSeries processor idle module

2009-12-04 Thread Arun R Bharadwaj
* Benjamin Herrenschmidt b...@kernel.crashing.org [2009-12-04 13:47:38]:

 On Wed, 2009-12-02 at 15:32 +0530, Arun R Bharadwaj wrote:
  * Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-12-02 15:24:27]:
  
  This patch creates arch/powerpc/platforms/pseries/processor_idle.c,
  which implements the cpuidle infrastructure for pseries.
  It implements a pseries_cpuidle_loop() which would be the main idle loop
  called from cpu_idle(). It makes decision of entering either
  dedicated_snooze_loop or dedicated_cede_loop for dedicated lpar and
  shared_cede_loop for shared lpar processor based on the
  decision taken by the cpuidle governor.
 
 So unless I'm mistaken, you removed our powerpc generic idle loop that
 calls into ppc_md.power_save(), and replaced it by some pseries specific
 idle loops... Now what about all the other powerpc platforms ? native
 970 (aka G5) ? 6xx ? Cell ? Or are you still calling ppc_md.power_save
 somewhere that I missed ?
 
 Cheers,
 Ben.

Hi Ben,

I forgot to attach the patch which enables cpuidle for the rest of the
POWER platforms. Attaching it below.

So for these platforms, ppc_md.power_save will be called from from the
cpuidle_idle_call idle loop itself. Also, this cpuidle_idle_call is
not a pseries specific idle loop. It is a common loop for Intel and
PPC which use cpuidle infrastructure.

arun



This patch enables cpuidle for the rest of the POWER platforms like
44x, Cell, Pasemi etc.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/system.h   |2 ++
 arch/powerpc/kernel/idle.c  |   28 
 arch/powerpc/kernel/setup_32.c  |8 ++--
 arch/powerpc/platforms/44x/idle.c   |2 ++
 arch/powerpc/platforms/cell/pervasive.c |2 ++
 arch/powerpc/platforms/pasemi/idle.c|2 ++
 arch/powerpc/platforms/ps3/setup.c  |2 ++
 7 files changed, 44 insertions(+), 2 deletions(-)

Index: linux.trees.git/arch/powerpc/include/asm/system.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/system.h
+++ linux.trees.git/arch/powerpc/include/asm/system.h
@@ -551,8 +551,10 @@ void cpu_idle_wait(void);
 
 #ifdef CONFIG_CPU_IDLE
 extern void update_smt_snooze_delay(int snooze);
+extern void setup_cpuidle_ppc(void);
 #else
 static inline void update_smt_snooze_delay(int snooze) {}
+static inline void setup_cpuidle_ppc(void) {}
 #endif
 
 #endif /* __KERNEL__ */
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -129,6 +129,34 @@ void default_idle(void)
HMT_very_low();
 }
 
+#ifdef CONFIG_CPU_IDLE
+DEFINE_PER_CPU(struct cpuidle_device, ppc_idle_devices);
+struct cpuidle_driver cpuidle_ppc_driver = {
+   .name = cpuidle_ppc,
+};
+
+static void ppc_idle_loop(struct cpuidle_device *dev, struct cpuidle_state *st)
+{
+   ppc_md.power_save();
+}
+
+void setup_cpuidle_ppc(void)
+{
+   struct cpuidle_device *dev;
+   int cpu;
+
+   cpuidle_register_driver(cpuidle_ppc_driver);
+
+   for_each_online_cpu(cpu) {
+   dev = per_cpu(ppc_idle_devices, cpu);
+   dev-cpu = cpu;
+   dev-states[0].enter = ppc_idle_loop;
+   dev-state_count = 1;
+   cpuidle_register_device(dev);
+   }
+}
+#endif
+
 int powersave_nap;
 
 #ifdef CONFIG_SYSCTL
Index: linux.trees.git/arch/powerpc/kernel/setup_32.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/setup_32.c
+++ linux.trees.git/arch/powerpc/kernel/setup_32.c
@@ -133,14 +133,18 @@ notrace void __init machine_init(unsigne
 
 #ifdef CONFIG_6xx
if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
-   cpu_has_feature(CPU_FTR_CAN_NAP))
+   cpu_has_feature(CPU_FTR_CAN_NAP)) {
ppc_md.power_save = ppc6xx_idle;
+   setup_cpuidle_ppc();
+   }
 #endif
 
 #ifdef CONFIG_E500
if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
-   cpu_has_feature(CPU_FTR_CAN_NAP))
+   cpu_has_feature(CPU_FTR_CAN_NAP)) {
ppc_md.power_save = e500_idle;
+   setup_cpuidle_ppc();
+   }
 #endif
if (ppc_md.progress)
ppc_md.progress(id mach(): done, 0x200);
Index: linux.trees.git/arch/powerpc/platforms/44x/idle.c
===
--- linux.trees.git.orig/arch/powerpc/platforms/44x/idle.c
+++ linux.trees.git/arch/powerpc/platforms/44x/idle.c
@@ -24,6 +24,7 @@
 #include linux/of.h
 #include linux/kernel.h
 #include asm/machdep.h
+#include asm/system.h
 
 static int mode_spin;
 
@@ -46,6 +47,7 @@ int __init ppc44x_idle_init(void)
/* If we are not setting spin mode 
then we set to wait mode

[v10 PATCH 0/9] cpuidle: cleanup cpuidle/ introduce cpuidle to POWER

2009-12-02 Thread Arun R Bharadwaj
Hi,

This patchset introduces cpuidle infrastructure to POWER, prototyping
for pSeries, and also does a major refactoring of current x86 idle
power management and a cleanup of cpuidle infrastructure.

This patch series has been in discussion for quite a while now and
below are the links to the previous discussions.

Please consider this for inclusion into the -tip tree.

v9 -- http://lkml.org/lkml/2009/10/16/63
v8 -- http://lkml.org/lkml/2009/10/8/82
v7 -- http://lkml.org/lkml/2009/10/6/278
v6 -- http://lkml.org/lkml/2009/9/22/180
v5 -- http://lkml.org/lkml/2009/9/22/26
v4 -- http://lkml.org/lkml/2009/9/1/133
v3 -- http://lkml.org/lkml/2009/8/27/124
v2 -- http://lkml.org/lkml/2009/8/26/233
v1 -- http://lkml.org/lkml/2009/8/19/150


Change in this version:

Pavel noticed that the code which calls the cpuidle's idle
loop was repeated at many places. So this set optimizes it so
that we dont have repetition of code. The rest of the patches
are same as the earlier iteration.


arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v10 PATCH 1/9]: cpuidle: Design documentation patch

2009-12-02 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-12-02 15:24:27]:

This patch adds a little information about the redesigned cpuidle
infrastructure in Documentation/cpuidle/core.txt

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 Documentation/cpuidle/core.txt |   35 +++
 1 file changed, 35 insertions(+)

Index: linux.trees.git/Documentation/cpuidle/core.txt
===
--- linux.trees.git.orig/Documentation/cpuidle/core.txt
+++ linux.trees.git/Documentation/cpuidle/core.txt
@@ -21,3 +21,38 @@ which can be used to switch governors at
 is meant for developer testing only. In normal usage, kernel picks the
 best governor based on governor ratings.
 SEE ALSO: sysfs.txt in this directory.
+
+Design:
+
+Cpuidle allows for registration of multiple sets of idle routines.
+The latest registered set is used by cpuidle governors as the current
+active set to choose the right idle state. This set is managed as a
+list and each time the newly registered set is added to the head of the
+list and made the current active set.
+
+An example of how this would work on x86 is shown below.
+
+-  -
+|  |   |   |
+| choose b/w   |   mwait is chosen |mwait  |
+| mwait, poll, |- |(current active|
+| default, c1e |   register to cpuidle |set)   |
+|  |   with mwait as the idle routine  |   |
+-  -
+
+
+-  -
+|  |   |  c1, c2, c3   |
+| ACPI |   register to cpuidle |   (current)   |
+|   discovery  |- |---|
+|  |   with c1, c2, c3 | mwait |
+|  |   as set of idle routines |   |
+-  -
+
+With this mechanism, a module can register and unregister its set of
+idle routines at run time in a clean manner.
+
+The main idle routine called inside cpu_idle() of every arch is defined in
+driver/cpuidle/cpuidle.c which would in turn call the idle routine selected
+by the governor. If the CONFIG_CPU_IDLE is disabled, the arch needs to
+provide an alternate definition for cpuidle_idle_call().
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v10 PATCH 4/9]: x86: refactor x86 idle power management code, remove all instances of pm_idle

2009-12-02 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-12-02 15:24:27]:

This patch cleans up x86 of all instances of pm_idle.

pm_idle which was earlier called from cpu_idle() idle loop
is replaced by cpuidle_idle_call.

x86 also registers to cpuidle when the idle routine is selected,
by populating the cpuidle_device data structure for each cpu.

This is replicated for apm module and for xen, which also used pm_idle.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/x86/kernel/apm_32.c  |   46 +++-
 arch/x86/kernel/process.c |   78 +++---
 arch/x86/kernel/process_32.c  |3 +
 arch/x86/kernel/process_64.c  |3 +
 arch/x86/xen/setup.c  |   30 +++-
 drivers/acpi/processor_core.c |9 +++-
 drivers/acpi/processor_idle.c |   44 ++-
 7 files changed, 160 insertions(+), 53 deletions(-)

Index: linux.trees.git/arch/x86/kernel/process.c
===
--- linux.trees.git.orig/arch/x86/kernel/process.c
+++ linux.trees.git/arch/x86/kernel/process.c
@@ -10,6 +10,7 @@
 #include linux/clockchips.h
 #include linux/random.h
 #include linux/user-return-notifier.h
+#include linux/cpuidle.h
 #include trace/events/power.h
 #include linux/hw_breakpoint.h
 #include asm/system.h
@@ -241,12 +242,6 @@ int sys_vfork(struct pt_regs *regs)
 unsigned long boot_option_idle_override = 0;
 EXPORT_SYMBOL(boot_option_idle_override);
 
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
-
 #ifdef CONFIG_X86_32
 /*
  * This halt magic was a workaround for ancient floppy DMA
@@ -326,17 +321,15 @@ static void do_nothing(void *unused)
 }
 
 /*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
+ * cpu_idle_wait - Required while changing idle routine handler on SMP systems.
  *
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
+ * Caller must have changed idle routine to the new value before the call. Old
+ * value will not be used by any CPU after the return of this function.
  */
 void cpu_idle_wait(void)
 {
smp_mb();
-   /* kick all the CPUs so that they exit out of pm_idle */
+   /* kick all the CPUs so that they exit out of idle loop */
smp_call_function(do_nothing, NULL, 1);
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
@@ -515,15 +508,58 @@ static void c1e_idle(void)
default_idle();
 }
 
+static void (*local_idle)(void);
+
+#ifndef CONFIG_CPU_IDLE
+void cpuidle_idle_call(void)
+{
+   if (local_idle)
+   local_idle();
+   else
+   default_idle();
+}
+#endif
+
+DEFINE_PER_CPU(struct cpuidle_device, idle_devices);
+
+struct cpuidle_driver cpuidle_default_driver = {
+   .name = cpuidle_default,
+};
+
+static void local_idle_loop(struct cpuidle_device *dev,
+   struct cpuidle_state *st)
+{
+   local_idle();
+}
+
+static int setup_cpuidle_simple(void)
+{
+   struct cpuidle_device *dev;
+   int cpu;
+
+   if (!cpuidle_curr_driver)
+   cpuidle_register_driver(cpuidle_default_driver);
+
+   for_each_online_cpu(cpu) {
+   dev = per_cpu(idle_devices, cpu);
+   dev-cpu = cpu;
+   dev-states[0].enter = local_idle_loop;
+   dev-state_count = 1;
+   cpuidle_register_device(dev);
+   }
+   return 0;
+}
+device_initcall(setup_cpuidle_simple);
+
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
-   if (pm_idle == poll_idle  smp_num_siblings  1) {
+   if (local_idle == poll_idle  smp_num_siblings  1) {
printk(KERN_WARNING WARNING: polling idle and HT enabled,
 performance may degrade.\n);
}
 #endif
-   if (pm_idle)
+   if (local_idle)
return;
 
if (cpu_has(c, X86_FEATURE_MWAIT)  mwait_usable(c)) {
@@ -531,18 +567,20 @@ void __cpuinit select_idle_routine(const
 * One CPU supports mwait = All CPUs supports mwait
 */
printk(KERN_INFO using mwait in idle threads.\n);
-   pm_idle = mwait_idle;
+   local_idle = mwait_idle;
} else if (check_c1e_idle(c)) {
printk(KERN_INFO using C1E aware idle routine\n);
-   pm_idle = c1e_idle;
+   local_idle = c1e_idle;
} else
-   pm_idle = default_idle;
+   local_idle = default_idle;
+
+   return;
 }
 
 void __init init_c1e_mask(void)
 {
/* If we're using c1e_idle, we need to allocate c1e_mask. */
-   if (pm_idle == c1e_idle)
+   if (local_idle == c1e_idle

[v10 PATCH 5/9]: POWER: enable cpuidle for POWER.

2009-12-02 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-12-02 15:24:27]:

This patch enables the cpuidle option in Kconfig for pSeries.

Currently cpuidle infrastructure is enabled only for x86 and ARM.
This code is almost completely borrowed from x86 to enable
cpuidle for pSeries.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/Kconfig  |9 +
 arch/powerpc/include/asm/system.h |2 ++
 arch/powerpc/kernel/idle.c|   19 +++
 3 files changed, 30 insertions(+)

Index: linux.trees.git/arch/powerpc/Kconfig
===
--- linux.trees.git.orig/arch/powerpc/Kconfig
+++ linux.trees.git/arch/powerpc/Kconfig
@@ -91,6 +91,9 @@ config ARCH_HAS_ILOG2_U64
bool
default y if 64BIT
 
+config ARCH_HAS_CPU_IDLE_WAIT
+   def_bool y
+
 config GENERIC_HWEIGHT
bool
default y
@@ -247,6 +250,12 @@ source kernel/Kconfig.freezer
 source arch/powerpc/sysdev/Kconfig
 source arch/powerpc/platforms/Kconfig
 
+menu Power management options
+
+source drivers/cpuidle/Kconfig
+
+endmenu
+
 menu Kernel options
 
 config HIGHMEM
Index: linux.trees.git/arch/powerpc/include/asm/system.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/system.h
+++ linux.trees.git/arch/powerpc/include/asm/system.h
@@ -546,5 +546,7 @@ extern void account_system_vtime(struct 
 
 extern struct dentry *powerpc_debugfs_root;
 
+void cpu_idle_wait(void);
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SYSTEM_H */
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -102,6 +102,25 @@ void cpu_idle(void)
}
 }
 
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs come out of the old
+ * idle loop and start using the new idle loop.
+ * Required while changing idle handler on SMP systems.
+ * Caller must have changed idle handler to the new value before the call.
+ */
+void cpu_idle_wait(void)
+{
+   /* Ensure that new value of idle is set */
+   smp_mb();
+   /* kick all the CPUs so that they exit out of old idle routine */
+   smp_call_function(do_nothing, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 int powersave_nap;
 
 #ifdef CONFIG_SYSCTL
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v10 PATCH 6/9]: pSeries/cpuidle: refactor pseries idle loops

2009-12-02 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-12-02 15:24:27]:

This patch removes the routines, pseries_shared_idle_sleep and
pseries_dedicated_idle_sleep, since this is implemented as a part
of arch/powerpc/platform/pseries/processor_idle.c

Also, similar to x86, call cpuidle_idle_call from cpu_idle() idle
loop instead of ppc_md.power_save.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/idle.c |   58 ++---
 arch/powerpc/platforms/pseries/setup.c |   89 -
 2 files changed, 30 insertions(+), 117 deletions(-)

Index: linux.trees.git/arch/powerpc/platforms/pseries/setup.c
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/setup.c
+++ linux.trees.git/arch/powerpc/platforms/pseries/setup.c
@@ -75,9 +75,6 @@ EXPORT_SYMBOL(CMO_PageSize);
 
 int fwnmi_active;  /* TRUE if an FWNMI handler is present */
 
-static void pseries_shared_idle_sleep(void);
-static void pseries_dedicated_idle_sleep(void);
-
 static struct device_node *pSeries_mpic_node;
 
 static void pSeries_show_cpuinfo(struct seq_file *m)
@@ -297,18 +294,8 @@ static void __init pSeries_setup_arch(vo
pSeries_nvram_init();
 
/* Choose an idle loop */
-   if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+   if (firmware_has_feature(FW_FEATURE_SPLPAR))
vpa_init(boot_cpuid);
-   if (get_lppaca()-shared_proc) {
-   printk(KERN_DEBUG Using shared processor idle loop\n);
-   ppc_md.power_save = pseries_shared_idle_sleep;
-   } else {
-   printk(KERN_DEBUG Using dedicated idle loop\n);
-   ppc_md.power_save = pseries_dedicated_idle_sleep;
-   }
-   } else {
-   printk(KERN_DEBUG Using default idle loop\n);
-   }
 
if (firmware_has_feature(FW_FEATURE_LPAR))
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
@@ -496,80 +483,6 @@ static int __init pSeries_probe(void)
return 1;
 }
 
-
-DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
-
-static void pseries_dedicated_idle_sleep(void)
-{ 
-   unsigned int cpu = smp_processor_id();
-   unsigned long start_snooze;
-   unsigned long in_purr, out_purr;
-
-   /*
-* Indicate to the HV that we are idle. Now would be
-* a good time to find other work to dispatch.
-*/
-   get_lppaca()-idle = 1;
-   get_lppaca()-donate_dedicated_cpu = 1;
-   in_purr = mfspr(SPRN_PURR);
-
-   /*
-* We come in with interrupts disabled, and need_resched()
-* has been checked recently.  If we should poll for a little
-* while, do so.
-*/
-   if (__get_cpu_var(smt_snooze_delay)) {
-   start_snooze = get_tb() +
-   __get_cpu_var(smt_snooze_delay) * tb_ticks_per_usec;
-   local_irq_enable();
-   set_thread_flag(TIF_POLLING_NRFLAG);
-
-   while (get_tb()  start_snooze) {
-   if (need_resched() || cpu_is_offline(cpu))
-   goto out;
-   ppc64_runlatch_off();
-   HMT_low();
-   HMT_very_low();
-   }
-
-   HMT_medium();
-   clear_thread_flag(TIF_POLLING_NRFLAG);
-   smp_mb();
-   local_irq_disable();
-   if (need_resched() || cpu_is_offline(cpu))
-   goto out;
-   }
-
-   cede_processor();
-
-out:
-   HMT_medium();
-   out_purr = mfspr(SPRN_PURR);
-   get_lppaca()-wait_state_cycles += out_purr - in_purr;
-   get_lppaca()-donate_dedicated_cpu = 0;
-   get_lppaca()-idle = 0;
-}
-
-static void pseries_shared_idle_sleep(void)
-{
-   /*
-* Indicate to the HV that we are idle. Now would be
-* a good time to find other work to dispatch.
-*/
-   get_lppaca()-idle = 1;
-
-   /*
-* Yield the processor to the hypervisor.  We return if
-* an external interrupt occurs (which are driven prior
-* to returning here) or if a prod occurs from another
-* processor. When returning here, external interrupts
-* are enabled.
-*/
-   cede_processor();
-
-   get_lppaca()-idle = 0;
-}
-
 static int pSeries_pci_probe_mode(struct pci_bus *bus)
 {
if (firmware_has_feature(FW_FEATURE_LPAR))
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -25,6 +25,7 @@
 #include linux/cpu.h
 #include linux/sysctl.h
 #include linux/tick.h
+#include linux/cpuidle.h
 
 #include asm/system.h
 #include asm/processor.h
@@ -46,6 +47,14 @@ static int __init powersave_off(char *ar
 }
 __setup

[v10 PATCH 7/9]: POWER: add a default_idle idle loop for POWER

2009-12-02 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-12-02 15:24:27]:

In arch/powerpc/kernel/idle.c create a default_idle() routine by moving
the failover condition of the cpu_idle() idle loop. This is needed by
cpuidle infrastructure to call default_idle when other idle routines
are not yet registered. Functionality remains the same, but the code is
slightly moved around.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/Kconfig  |3 +++
 arch/powerpc/include/asm/system.h |1 +
 arch/powerpc/kernel/idle.c|6 ++
 3 files changed, 10 insertions(+)

Index: linux.trees.git/arch/powerpc/Kconfig
===
--- linux.trees.git.orig/arch/powerpc/Kconfig
+++ linux.trees.git/arch/powerpc/Kconfig
@@ -94,6 +94,9 @@ config ARCH_HAS_ILOG2_U64
 config ARCH_HAS_CPU_IDLE_WAIT
def_bool y
 
+config ARCH_HAS_DEFAULT_IDLE
+   def_bool y
+
 config GENERIC_HWEIGHT
bool
default y
Index: linux.trees.git/arch/powerpc/include/asm/system.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/system.h
+++ linux.trees.git/arch/powerpc/include/asm/system.h
@@ -218,6 +218,7 @@ extern unsigned long klimit;
 extern void *alloc_maybe_bootmem(size_t size, gfp_t mask);
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
 
+extern void default_idle(void);
 extern int powersave_nap;  /* set if nap mode can be used in idle loop */
 
 /*
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -121,6 +121,12 @@ void cpu_idle_wait(void)
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
 
+void default_idle(void)
+{
+   HMT_low();
+   HMT_very_low();
+}
+
 int powersave_nap;
 
 #ifdef CONFIG_SYSCTL
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v10 PATCH 8/9]: pSeries: implement pSeries processor idle module

2009-12-02 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-12-02 15:24:27]:

This patch creates arch/powerpc/platforms/pseries/processor_idle.c,
which implements the cpuidle infrastructure for pseries.
It implements a pseries_cpuidle_loop() which would be the main idle loop
called from cpu_idle(). It makes decision of entering either
dedicated_snooze_loop or dedicated_cede_loop for dedicated lpar and
shared_cede_loop for shared lpar processor based on the
decision taken by the cpuidle governor.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/system.h   |6 
 arch/powerpc/kernel/sysfs.c |2 
 arch/powerpc/platforms/pseries/Makefile |1 
 arch/powerpc/platforms/pseries/processor_idle.c |  196 
 arch/powerpc/platforms/pseries/pseries.h|6 
 5 files changed, 211 insertions(+)

Index: linux.trees.git/arch/powerpc/platforms/pseries/Makefile
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/Makefile
+++ linux.trees.git/arch/powerpc/platforms/pseries/Makefile
@@ -26,3 +26,4 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst
 obj-$(CONFIG_PHYP_DUMP)+= phyp_dump.o
 obj-$(CONFIG_CMM)  += cmm.o
 obj-$(CONFIG_DTL)  += dtl.o
+obj-$(CONFIG_CPU_IDLE) += processor_idle.o
Index: linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/pseries.h
+++ linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
@@ -10,6 +10,8 @@
 #ifndef _PSERIES_PSERIES_H
 #define _PSERIES_PSERIES_H
 
+#include linux/cpuidle.h
+
 extern void __init fw_feature_init(const char *hypertas, unsigned long len);
 
 struct pt_regs;
@@ -40,4 +42,8 @@ extern unsigned long rtas_poweron_auto;
 
 extern void find_udbg_vterm(void);
 
+DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
+
+extern struct cpuidle_driver pseries_idle_driver;
+
 #endif /* _PSERIES_PSERIES_H */
Index: linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
===
--- /dev/null
+++ linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
@@ -0,0 +1,196 @@
+/*
+ *  processor_idle - idle state cpuidle driver.
+ *  Adapted from drivers/acpi/processor_idle.c
+ *
+ *  Arun R Bharadwaj a...@linux.vnet.ibm.com
+ *
+ *  Copyright (C) 2009 IBM Corporation.
+ * ~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~
+ */
+
+#include linux/kernel.h
+#include linux/module.h
+#include linux/init.h
+#include linux/moduleparam.h
+#include linux/cpuidle.h
+
+#include asm/paca.h
+#include asm/reg.h
+#include asm/system.h
+#include asm/machdep.h
+#include asm/firmware.h
+
+#include plpar_wrappers.h
+#include pseries.h
+
+MODULE_AUTHOR(Arun R Bharadwaj);
+MODULE_DESCRIPTION(pSeries Idle State Driver);
+MODULE_LICENSE(GPL);
+
+struct cpuidle_driver pseries_idle_driver = {
+   .name = pseries_idle,
+   .owner =THIS_MODULE,
+};
+
+DEFINE_PER_CPU(struct cpuidle_device, pseries_dev);
+
+#define IDLE_STATE_COUNT   2
+
+/* pSeries Idle state Flags */
+#definePSERIES_DEDICATED_SNOOZE(0x01)
+#definePSERIES_DEDICATED_CEDE  (0x02)
+#definePSERIES_SHARED_CEDE (0x03)
+
+static int pseries_idle_init(struct cpuidle_device *dev)
+{
+   return cpuidle_register_device(dev);
+}
+
+static void shared_cede_loop(void)
+{
+   get_lppaca()-idle = 1;
+   cede_processor();
+   get_lppaca()-idle = 0;
+}
+
+static void dedicated_snooze_loop(void)
+{
+   local_irq_enable();
+   set_thread_flag(TIF_POLLING_NRFLAG);
+   while (!need_resched()) {
+   ppc64_runlatch_off();
+   HMT_low();
+   HMT_very_low();
+   }
+   HMT_medium();
+   clear_thread_flag(TIF_POLLING_NRFLAG);
+   smp_mb();
+   local_irq_disable();
+}
+
+static void dedicated_cede_loop(void)
+{
+   ppc64_runlatch_off();
+   HMT_medium();
+   cede_processor

[v10 PATCH 9/9]: POWER: Enable default_idle when power_save=off

2009-12-02 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-12-02 15:24:27]:

This patch enables default_idle when power_save=off kernel boot
option is specified.

Earlier, this was done by setting ppc_md.power_save = NULL and hence
HMT_low() and HMT_very_low() was called. Now this is defined under
default_idle() and hence by setting boot_option_idle_override = 1,
the cpuidle registration stuff does not happen and hence default_idle
is chosen in cpuidle_idle_call.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/processor.h|2 ++
 arch/powerpc/kernel/idle.c  |4 +++-
 arch/powerpc/platforms/pseries/processor_idle.c |5 +
 3 files changed, 10 insertions(+), 1 deletion(-)

Index: linux.trees.git/arch/powerpc/include/asm/processor.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/processor.h
+++ linux.trees.git/arch/powerpc/include/asm/processor.h
@@ -332,6 +332,8 @@ static inline unsigned long get_clean_sp
 }
 #endif
 
+extern int boot_option_idle_override;
+
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_PROCESSOR_H */
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -40,9 +40,11 @@
 #define cpu_should_die()   0
 #endif
 
+int boot_option_idle_override = 0;
+
 static int __init powersave_off(char *arg)
 {
-   ppc_md.power_save = NULL;
+   boot_option_idle_override = 1;
return 0;
 }
 __setup(powersave=off, powersave_off);
Index: linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/processor_idle.c
+++ linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
@@ -171,6 +171,11 @@ static int __init pseries_processor_idle
int cpu;
int result;
 
+   if (boot_option_idle_override) {
+   printk(KERN_DEBUG Using default idle\n);
+   return 0;
+   }
+
result = cpuidle_register_driver(pseries_idle_driver);
 
if (result  0)
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v10 PATCH 9/9]: POWER: Enable default_idle when power_save=off

2009-12-02 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-12-02 15:33:46]:

Thanks for running checkpatch on the patch Daniel. Will fix this.

arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v9 PATCH 0/9]: cpuidle: Cleanup cpuidle/ Introduce cpuidle to POWER.

2009-11-16 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-16 15:08:50]:

 Hi,
 
 This patchset introduces cpuidle infrastructure to POWER, prototyping
 for pSeries, and also does a major refactoring of current x86 idle
 power management and a cleanup of cpuidle infrastructure.
 
 This patch series has been in discussion for quite a while now and
 below are the links to the previous discussions.
 
 Hopefully, this is ready to be included in the -tip tree.
 
 v8 -- http://lkml.org/lkml/2009/10/8/82
 v7 -- http://lkml.org/lkml/2009/10/6/278
 v6 -- http://lkml.org/lkml/2009/9/22/180
 v5 -- http://lkml.org/lkml/2009/9/22/26
 v4 -- http://lkml.org/lkml/2009/9/1/133
 v3 -- http://lkml.org/lkml/2009/8/27/124
 v2 -- http://lkml.org/lkml/2009/8/26/233
 v1 -- http://lkml.org/lkml/2009/8/19/150
 
 
 Changes in this version:
 
 *Added documentation for the new design regarding registration
 of idle routines in Documentation/cpuidle/core.txt
 
 *Platforms which do not want the code bloat of cpuidle can
 disable CONFIG_CPU_IDLE. Alternate definition of
 cpuidle_idle_call is provided which would call the appropriate
 idle routine provided by the arch without the overhead of
 registration and governors.
 (Thanks to Andi for raising this isuue)
 
 *I had missed a cpuidle_kick_cpus() in
 cpuidle_pause_and_lock() which is called from the hotplug
 path. So added that and tested this patchset by subjecting it
 to cpuhotplug torture.
 (Thanks to Balbir for noticing this)
 
 

Hi Peter,

Did you get time to look at this series?
Please let me know if we need any changes to this version to get it
merged.

thanks
 --arun
 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v9 PATCH 4/9]: x86: refactor x86 idle power management code and remove all instances of pm_idle.

2009-10-26 Thread Arun R Bharadwaj
* Pavel Machek pa...@ucw.cz [2009-10-23 18:07:11]:

 On Fri 2009-10-16 15:13:08, Arun R Bharadwaj wrote:
  * Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-16 15:08:50]:
  
  This patch cleans up x86 of all instances of pm_idle.
  
  pm_idle which was earlier called from cpu_idle() idle loop
  is replaced by cpuidle_idle_call.
  
  x86 also registers to cpuidle when the idle routine is selected,
  by populating the cpuidle_device data structure for each cpu.
  
  This is replicated for apm module and for xen, which also used pm_idle.
  
  
  Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
  ---
   arch/x86/kernel/apm_32.c  |   55 -
   arch/x86/kernel/process.c |   90 
  --
   arch/x86/kernel/process_32.c  |3 -
   arch/x86/kernel/process_64.c  |3 -
   arch/x86/xen/setup.c  |   40 ++
   drivers/acpi/processor_core.c |9 ++--
   drivers/cpuidle/cpuidle.c |   16 +--
   7 files changed, 182 insertions(+), 34 deletions(-)
 ...
  +static int local_idle_loop(struct cpuidle_device *dev, struct 
  cpuidle_state *st)
  +{
  +   ktime_t t1, t2;
  +   s64 diff;
  +   int ret;
  +
  +   t1 = ktime_get();
  +   local_idle();
  +   t2 = ktime_get();
  +
  +   diff = ktime_to_us(ktime_sub(t2, t1));
  +   if (diff  INT_MAX)
  +   diff = INT_MAX;
  +   ret = (int) diff;
  +
  +   return ret;
  +}
 
 So we get this routine essentially 3 times. Is there no way to share
 the code?
 

We can move this code to a common place, but that would mean exporting
the idle function pointer to be called from within this routine, which
is exactly what we wanted to avoid.

Any suggestions are welcome.

arun

 -- 
 (english) http://www.livejournal.com/~pavelmachek
 (cesky, pictures) 
 http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v9 PATCH 4/9]: x86: refactor x86 idle power management code and remove all instances of pm_idle.

2009-10-26 Thread Arun R Bharadwaj
* Pavel Machek pa...@ucw.cz [2009-10-26 08:58:31]:

 
+static int local_idle_loop(struct cpuidle_device *dev, struct 
cpuidle_state *st)
+{
+   ktime_t t1, t2;
+   s64 diff;
+   int ret;
+
+   t1 = ktime_get();
+   local_idle();
+   t2 = ktime_get();
+
+   diff = ktime_to_us(ktime_sub(t2, t1));
+   if (diff  INT_MAX)
+   diff = INT_MAX;
+   ret = (int) diff;
+
+   return ret;
+}
   
   So we get this routine essentially 3 times. Is there no way to share
   the code?
   
  
  We can move this code to a common place, but that would mean exporting
  the idle function pointer to be called from within this routine, which
  is exactly what we wanted to avoid.
  
  Any suggestions are welcome.
 
 You can just pass idle routine as a parameter...?
 
 int common_idle_loop(struct cpuidle_device *dev, struct cpuidle_state
 *st, void *idle(void))
 
 ...?
   Pavel

Yes, this should be fine. I was trying to avoid passing the void
function pointer around but i guess this reduces considerable code
size.

thanks!
arun
 -- 
 (english) http://www.livejournal.com/~pavelmachek
 (cesky, pictures) 
 http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v9 PATCH 0/9]: cpuidle: Cleanup cpuidle/ Introduce cpuidle to POWER.

2009-10-16 Thread Arun R Bharadwaj
Hi,

This patchset introduces cpuidle infrastructure to POWER, prototyping
for pSeries, and also does a major refactoring of current x86 idle
power management and a cleanup of cpuidle infrastructure.

This patch series has been in discussion for quite a while now and
below are the links to the previous discussions.

Hopefully, this is ready to be included in the -tip tree.

v8 -- http://lkml.org/lkml/2009/10/8/82
v7 -- http://lkml.org/lkml/2009/10/6/278
v6 -- http://lkml.org/lkml/2009/9/22/180
v5 -- http://lkml.org/lkml/2009/9/22/26
v4 -- http://lkml.org/lkml/2009/9/1/133
v3 -- http://lkml.org/lkml/2009/8/27/124
v2 -- http://lkml.org/lkml/2009/8/26/233
v1 -- http://lkml.org/lkml/2009/8/19/150


Changes in this version:

*Added documentation for the new design regarding registration
of idle routines in Documentation/cpuidle/core.txt

*Platforms which do not want the code bloat of cpuidle can
disable CONFIG_CPU_IDLE. Alternate definition of
cpuidle_idle_call is provided which would call the appropriate
idle routine provided by the arch without the overhead of
registration and governors.
(Thanks to Andi for raising this isuue)

*I had missed a cpuidle_kick_cpus() in
cpuidle_pause_and_lock() which is called from the hotplug
path. So added that and tested this patchset by subjecting it
to cpuhotplug torture.
(Thanks to Balbir for noticing this)


--arun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v9 PATCH 1/9]: cpuidle: Design documentation patch

2009-10-16 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-16 15:08:50]:

This patch adds a little information about the redesigned cpuidle
infrastructure in Documentation/cpuidle/core.txt

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 Documentation/cpuidle/core.txt |   35 +++
 1 file changed, 35 insertions(+)

Index: linux.trees.git/Documentation/cpuidle/core.txt
===
--- linux.trees.git.orig/Documentation/cpuidle/core.txt
+++ linux.trees.git/Documentation/cpuidle/core.txt
@@ -21,3 +21,38 @@ which can be used to switch governors at
 is meant for developer testing only. In normal usage, kernel picks the
 best governor based on governor ratings.
 SEE ALSO: sysfs.txt in this directory.
+
+Design:
+
+Cpuidle allows for registration of multiple sets of idle routines.
+The latest registered set is used by cpuidle governors as the current
+active set to choose the right idle state. This set is managed as a
+list and each time the newly registered set is added to the head of the
+list and made the current active set.
+
+An example of how this would work on x86 is shown below.
+
+-  -
+|  |   |   |
+| choose b/w   |   mwait is chosen |mwait  |
+| mwait, poll, |- |(current active|
+| default, c1e |   register to cpuidle |set)   |
+|  |   with mwait as the idle routine  |   |
+-  -
+
+
+-  -
+|  |   |  c1, c2, c3   |
+| ACPI |   register to cpuidle |   (current)   |
+|   discovery  |- |---|
+|  |   with c1, c2, c3 | mwait |
+|  |   as set of idle routines |   |
+-  -
+
+With this mechanism, a module can register and unregister its set of
+idle routines at run time in a clean manner.
+
+The main idle routine called inside cpu_idle() of every arch is defined in
+driver/cpuidle/cpuidle.c which would in turn call the idle routine selected
+by the governor. If the CONFIG_CPU_IDLE is disabled, the arch needs to
+provide an alternate definition for cpuidle_idle_call().
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v9 PATCH 2/9]: cpuidle: cleanup drivers/cpuidle/cpuidle.c

2009-10-16 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-16 15:08:50]:

This patch cleans up drivers/cpuidle/cpuidle.c
Earlier cpuidle assumed pm_idle as the default idle loop. Break that
assumption and make it more generic. cpuidle_idle_call() which is the
main idle loop of cpuidle is to be called by architectures which have
registered to cpuidle.

Remove routines cpuidle_install/uninstall_idle_handler() and
cpuidle_kick_cpus() which are not needed anymore.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 drivers/cpuidle/cpuidle.c  |   63 ++---
 drivers/cpuidle/cpuidle.h  |6 +---
 drivers/cpuidle/driver.c   |4 --
 drivers/cpuidle/governor.c |   13 +++--
 drivers/cpuidle/sysfs.c|   34 +---
 include/linux/cpuidle.h|4 ++
 6 files changed, 38 insertions(+), 86 deletions(-)

Index: linux.trees.git/drivers/cpuidle/cpuidle.c
===
--- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
+++ linux.trees.git/drivers/cpuidle/cpuidle.c
@@ -24,10 +24,6 @@
 DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
 
 DEFINE_MUTEX(cpuidle_lock);
-LIST_HEAD(cpuidle_detected_devices);
-static void (*pm_idle_old)(void);
-
-static int enabled_devices;
 
 #if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
 static void cpuidle_kick_cpus(void)
@@ -47,7 +43,7 @@ static int __cpuidle_register_device(str
  *
  * NOTE: no locks or semaphores should be used here
  */
-static void cpuidle_idle_call(void)
+void cpuidle_idle_call(void)
 {
struct cpuidle_device *dev = __get_cpu_var(cpuidle_devices);
struct cpuidle_state *target_state;
@@ -55,13 +51,10 @@ static void cpuidle_idle_call(void)
 
/* check if the device is ready */
if (!dev || !dev-enabled) {
-   if (pm_idle_old)
-   pm_idle_old();
-   else
 #if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE)
-   default_idle();
+   default_idle();
 #else
-   local_irq_enable();
+   local_irq_enable();
 #endif
return;
}
@@ -75,7 +68,11 @@ static void cpuidle_idle_call(void)
hrtimer_peek_ahead_timers();
 #endif
/* ask the governor for the next state */
-   next_state = cpuidle_curr_governor-select(dev);
+   if (dev-state_count  1)
+   next_state = cpuidle_curr_governor-select(dev);
+   else
+   next_state = 0;
+
if (need_resched())
return;
target_state = dev-states[next_state];
@@ -96,35 +93,12 @@ static void cpuidle_idle_call(void)
 }
 
 /**
- * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
- */
-void cpuidle_install_idle_handler(void)
-{
-   if (enabled_devices  (pm_idle != cpuidle_idle_call)) {
-   /* Make sure all changes finished before we switch to new idle 
*/
-   smp_wmb();
-   pm_idle = cpuidle_idle_call;
-   }
-}
-
-/**
- * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
- */
-void cpuidle_uninstall_idle_handler(void)
-{
-   if (enabled_devices  pm_idle_old  (pm_idle != pm_idle_old)) {
-   pm_idle = pm_idle_old;
-   cpuidle_kick_cpus();
-   }
-}
-
-/**
  * cpuidle_pause_and_lock - temporarily disables CPUIDLE
  */
 void cpuidle_pause_and_lock(void)
 {
mutex_lock(cpuidle_lock);
-   cpuidle_uninstall_idle_handler();
+   cpuidle_kick_cpus();
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
@@ -134,7 +108,6 @@ EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock
  */
 void cpuidle_resume_and_unlock(void)
 {
-   cpuidle_install_idle_handler();
mutex_unlock(cpuidle_lock);
 }
 
@@ -182,7 +155,6 @@ int cpuidle_enable_device(struct cpuidle
 
dev-enabled = 1;
 
-   enabled_devices++;
return 0;
 
 fail_sysfs:
@@ -213,7 +185,6 @@ void cpuidle_disable_device(struct cpuid
cpuidle_curr_governor-disable(dev);
 
cpuidle_remove_state_sysfs(dev);
-   enabled_devices--;
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_disable_device);
@@ -266,7 +237,6 @@ static void poll_idle_init(struct cpuidl
  */
 static int __cpuidle_register_device(struct cpuidle_device *dev)
 {
-   int ret;
struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev-cpu);
 
if (!sys_dev)
@@ -274,16 +244,9 @@ static int __cpuidle_register_device(str
if (!try_module_get(cpuidle_curr_driver-owner))
return -EINVAL;
 
-   init_completion(dev-kobj_unregister);
-
poll_idle_init(dev);
 
per_cpu(cpuidle_devices, dev-cpu) = dev;
-   list_add(dev-device_list, cpuidle_detected_devices);
-   if ((ret = cpuidle_add_sysfs(sys_dev))) {
-   module_put(cpuidle_curr_driver-owner);
-   return ret;
-   }
 
dev-registered = 1;
return 0;
@@ -305,7 +268,6 @@ int cpuidle_register_device(struct

[v9 PATCH 3/9]: cpuidle: implement a list based approach to register a set of idle routines.

2009-10-16 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-16 15:08:50]:

Implement a list based registering mechanism for architectures which
have multiple sets of idle routines which are to be registered.

Currently, in x86 it is done by merely setting pm_idle = idle_routine
and managing this pm_idle pointer is messy.

To give an example of how this mechanism works:
In x86, initially, idle routine is selected from the set of poll/mwait/
c1e/default idle loops. So the selected idle loop is registered in cpuidle
as one idle state cpuidle devices. Once ACPI comes up, it registers
another set of idle states on top of this state. Again, suppose a module
registers another set of idle loops, it is added to this list.

This provides a clean way of registering and unregistering idle state
routines.

In the current implementation, pm_idle is set as the current idle routine
being used and the old idle routine has to be maintained and when a module
registers/unregisters an idle routine, confusion arises.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 drivers/cpuidle/cpuidle.c |   54 --
 include/linux/cpuidle.h   |1 
 2 files changed, 48 insertions(+), 7 deletions(-)

Index: linux.trees.git/drivers/cpuidle/cpuidle.c
===
--- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
+++ linux.trees.git/drivers/cpuidle/cpuidle.c
@@ -22,6 +22,7 @@
 #include cpuidle.h
 
 DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
+DEFINE_PER_CPU(struct list_head, cpuidle_devices_list);
 
 DEFINE_MUTEX(cpuidle_lock);
 
@@ -113,6 +114,45 @@ void cpuidle_resume_and_unlock(void)
 
 EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
 
+int cpuidle_add_to_list(struct cpuidle_device *dev)
+{
+   int ret, cpu = dev-cpu;
+   struct cpuidle_device *old_dev;
+
+   if (!list_empty(per_cpu(cpuidle_devices_list, cpu))) {
+   old_dev = list_first_entry(per_cpu(cpuidle_devices_list, cpu),
+   struct cpuidle_device, idle_list);
+   cpuidle_remove_state_sysfs(old_dev);
+   }
+
+   list_add(dev-idle_list, per_cpu(cpuidle_devices_list, cpu));
+   ret = cpuidle_add_state_sysfs(dev);
+   return ret;
+}
+
+void cpuidle_remove_from_list(struct cpuidle_device *dev)
+{
+   struct cpuidle_device *temp_dev;
+   struct list_head *pos;
+   int ret, cpu = dev-cpu;
+
+   list_for_each(pos, per_cpu(cpuidle_devices_list, cpu)) {
+   temp_dev = container_of(pos, struct cpuidle_device, idle_list);
+   if (dev == temp_dev) {
+   list_del(temp_dev-idle_list);
+   cpuidle_remove_state_sysfs(temp_dev);
+   break;
+   }
+   }
+
+   if (!list_empty(per_cpu(cpuidle_devices_list, cpu))) {
+   temp_dev = list_first_entry(per_cpu(cpuidle_devices_list, cpu),
+   struct cpuidle_device, idle_list);
+   ret = cpuidle_add_state_sysfs(temp_dev);
+   }
+   cpuidle_kick_cpus();
+}
+
 /**
  * cpuidle_enable_device - enables idle PM for a CPU
  * @dev: the CPU
@@ -137,9 +177,6 @@ int cpuidle_enable_device(struct cpuidle
return ret;
}
 
-   if ((ret = cpuidle_add_state_sysfs(dev)))
-   return ret;
-
if (cpuidle_curr_governor-enable 
(ret = cpuidle_curr_governor-enable(dev)))
goto fail_sysfs;
@@ -158,7 +195,7 @@ int cpuidle_enable_device(struct cpuidle
return 0;
 
 fail_sysfs:
-   cpuidle_remove_state_sysfs(dev);
+   cpuidle_remove_from_list(dev);
 
return ret;
 }
@@ -183,8 +220,6 @@ void cpuidle_disable_device(struct cpuid
 
if (cpuidle_curr_governor-disable)
cpuidle_curr_governor-disable(dev);
-
-   cpuidle_remove_state_sysfs(dev);
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_disable_device);
@@ -268,6 +303,7 @@ int cpuidle_register_device(struct cpuid
}
 
cpuidle_enable_device(dev);
+   cpuidle_add_to_list(dev);
 
mutex_unlock(cpuidle_lock);
 
@@ -289,6 +325,7 @@ void cpuidle_unregister_device(struct cp
cpuidle_pause_and_lock();
 
cpuidle_disable_device(dev);
+   cpuidle_remove_from_list(dev);
 
per_cpu(cpuidle_devices, dev-cpu) = NULL;
 
@@ -339,12 +376,15 @@ static inline void latency_notifier_init
  */
 static int __init cpuidle_init(void)
 {
-   int ret;
+   int ret, cpu;
 
ret = cpuidle_add_class_sysfs(cpu_sysdev_class);
if (ret)
return ret;
 
+   for_each_possible_cpu(cpu)
+   INIT_LIST_HEAD(per_cpu(cpuidle_devices_list, cpu));
+
latency_notifier_init(cpuidle_latency_notifier);
 
return 0;
Index: linux.trees.git/include/linux/cpuidle.h
===
--- linux.trees.git.orig/include/linux/cpuidle.h

[v9 PATCH 4/9]: x86: refactor x86 idle power management code and remove all instances of pm_idle.

2009-10-16 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-16 15:08:50]:

This patch cleans up x86 of all instances of pm_idle.

pm_idle which was earlier called from cpu_idle() idle loop
is replaced by cpuidle_idle_call.

x86 also registers to cpuidle when the idle routine is selected,
by populating the cpuidle_device data structure for each cpu.

This is replicated for apm module and for xen, which also used pm_idle.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/x86/kernel/apm_32.c  |   55 -
 arch/x86/kernel/process.c |   90 --
 arch/x86/kernel/process_32.c  |3 -
 arch/x86/kernel/process_64.c  |3 -
 arch/x86/xen/setup.c  |   40 ++
 drivers/acpi/processor_core.c |9 ++--
 drivers/cpuidle/cpuidle.c |   16 +--
 7 files changed, 182 insertions(+), 34 deletions(-)

Index: linux.trees.git/arch/x86/kernel/process.c
===
--- linux.trees.git.orig/arch/x86/kernel/process.c
+++ linux.trees.git/arch/x86/kernel/process.c
@@ -10,6 +10,7 @@
 #include linux/clockchips.h
 #include linux/random.h
 #include linux/user-return-notifier.h
+#include linux/cpuidle.h
 #include trace/events/power.h
 #include asm/system.h
 #include asm/apic.h
@@ -246,12 +247,6 @@ int sys_vfork(struct pt_regs *regs)
 unsigned long boot_option_idle_override = 0;
 EXPORT_SYMBOL(boot_option_idle_override);
 
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
-
 #ifdef CONFIG_X86_32
 /*
  * This halt magic was a workaround for ancient floppy DMA
@@ -331,17 +326,15 @@ static void do_nothing(void *unused)
 }
 
 /*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
+ * cpu_idle_wait - Required while changing idle routine handler on SMP systems.
  *
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
+ * Caller must have changed idle routine to the new value before the call. Old
+ * value will not be used by any CPU after the return of this function.
  */
 void cpu_idle_wait(void)
 {
smp_mb();
-   /* kick all the CPUs so that they exit out of pm_idle */
+   /* kick all the CPUs so that they exit out of idle loop */
smp_call_function(do_nothing, NULL, 1);
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
@@ -520,15 +513,70 @@ static void c1e_idle(void)
default_idle();
 }
 
+static void (*local_idle)(void);
+
+#ifndef CONFIG_CPU_IDLE
+void cpuidle_idle_call(void)
+{
+   if (local_idle)
+   local_idle();
+   else
+   default_idle();
+}
+#endif
+
+DEFINE_PER_CPU(struct cpuidle_device, idle_devices);
+
+struct cpuidle_driver cpuidle_default_driver = {
+   .name = cpuidle_default,
+};
+
+static int local_idle_loop(struct cpuidle_device *dev, struct cpuidle_state 
*st)
+{
+   ktime_t t1, t2;
+   s64 diff;
+   int ret;
+
+   t1 = ktime_get();
+   local_idle();
+   t2 = ktime_get();
+
+   diff = ktime_to_us(ktime_sub(t2, t1));
+   if (diff  INT_MAX)
+   diff = INT_MAX;
+   ret = (int) diff;
+
+   return ret;
+}
+
+static int setup_cpuidle_simple(void)
+{
+   struct cpuidle_device *dev;
+   int cpu;
+
+   if (!cpuidle_curr_driver)
+   cpuidle_register_driver(cpuidle_default_driver);
+
+   for_each_online_cpu(cpu) {
+   dev = per_cpu(idle_devices, cpu);
+   dev-cpu = cpu;
+   dev-states[0].enter = local_idle_loop;
+   dev-state_count = 1;
+   cpuidle_register_device(dev);
+   }
+   return 0;
+}
+device_initcall(setup_cpuidle_simple);
+
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
-   if (pm_idle == poll_idle  smp_num_siblings  1) {
+   if (local_idle == poll_idle  smp_num_siblings  1) {
printk(KERN_WARNING WARNING: polling idle and HT enabled,
 performance may degrade.\n);
}
 #endif
-   if (pm_idle)
+   if (local_idle)
return;
 
if (cpu_has(c, X86_FEATURE_MWAIT)  mwait_usable(c)) {
@@ -536,18 +584,20 @@ void __cpuinit select_idle_routine(const
 * One CPU supports mwait = All CPUs supports mwait
 */
printk(KERN_INFO using mwait in idle threads.\n);
-   pm_idle = mwait_idle;
+   local_idle = mwait_idle;
} else if (check_c1e_idle(c)) {
printk(KERN_INFO using C1E aware idle routine\n);
-   pm_idle = c1e_idle;
+   local_idle = c1e_idle;
} else
-   pm_idle = default_idle;
+   local_idle = default_idle

[v9 PATCH 5/9]: POWER: enable cpuidle for POWER.

2009-10-16 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-16 15:08:50]:

This patch enables the cpuidle option in Kconfig for pSeries.

Currently cpuidle infrastructure is enabled only for x86 and ARM.
This code is almost completely borrowed from x86 to enable
cpuidle for pSeries.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/Kconfig  |   17 +
 arch/powerpc/include/asm/system.h |2 ++
 arch/powerpc/kernel/idle.c|   19 +++
 3 files changed, 38 insertions(+)

Index: linux.trees.git/arch/powerpc/Kconfig
===
--- linux.trees.git.orig/arch/powerpc/Kconfig
+++ linux.trees.git/arch/powerpc/Kconfig
@@ -91,6 +91,9 @@ config ARCH_HAS_ILOG2_U64
bool
default y if 64BIT
 
+config ARCH_HAS_CPU_IDLE_WAIT
+   def_bool y
+
 config GENERIC_HWEIGHT
bool
default y
@@ -247,6 +250,20 @@ source kernel/Kconfig.freezer
 source arch/powerpc/sysdev/Kconfig
 source arch/powerpc/platforms/Kconfig
 
+menu Power management options
+
+source drivers/cpuidle/Kconfig
+
+config PSERIES_PROCESSOR_IDLE
+   bool Idle Power Management Support for pSeries
+   depends on PPC_PSERIES  CPU_IDLE
+   default y
+   help
+ Idle Power Management Support for pSeries. This hooks onto cpuidle
+ infrastructure to help in idle cpu power management.
+
+endmenu
+
 menu Kernel options
 
 config HIGHMEM
Index: linux.trees.git/arch/powerpc/include/asm/system.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/system.h
+++ linux.trees.git/arch/powerpc/include/asm/system.h
@@ -546,5 +546,7 @@ extern void account_system_vtime(struct 
 
 extern struct dentry *powerpc_debugfs_root;
 
+void cpu_idle_wait(void);
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SYSTEM_H */
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -102,6 +102,25 @@ void cpu_idle(void)
}
 }
 
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs come out of the old
+ * idle loop and start using the new idle loop.
+ * Required while changing idle handler on SMP systems.
+ * Caller must have changed idle handler to the new value before the call.
+ */
+void cpu_idle_wait(void)
+{
+   /* Ensure that new value of idle is set */
+   smp_mb();
+   /* kick all the CPUs so that they exit out of old idle routine */
+   smp_call_function(do_nothing, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 int powersave_nap;
 
 #ifdef CONFIG_SYSCTL
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v9 PATCH 6/9]: pSeries/cpuidle: refactor pseries idle loops

2009-10-16 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-16 15:08:50]:

This patch removes the routines, pseries_shared_idle_sleep and
pseries_dedicated_idle_sleep, since this is implemented as a part
of arch/powerpc/platform/pseries/processor_idle.c

Also, similar to x86, call cpuidle_idle_call from cpu_idle() idle
loop instead of ppc_md.power_save.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/idle.c |   58 ++---
 arch/powerpc/platforms/pseries/setup.c |   89 -
 2 files changed, 30 insertions(+), 117 deletions(-)

Index: linux.trees.git/arch/powerpc/platforms/pseries/setup.c
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/setup.c
+++ linux.trees.git/arch/powerpc/platforms/pseries/setup.c
@@ -75,9 +75,6 @@ EXPORT_SYMBOL(CMO_PageSize);
 
 int fwnmi_active;  /* TRUE if an FWNMI handler is present */
 
-static void pseries_shared_idle_sleep(void);
-static void pseries_dedicated_idle_sleep(void);
-
 static struct device_node *pSeries_mpic_node;
 
 static void pSeries_show_cpuinfo(struct seq_file *m)
@@ -297,18 +294,8 @@ static void __init pSeries_setup_arch(vo
pSeries_nvram_init();
 
/* Choose an idle loop */
-   if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+   if (firmware_has_feature(FW_FEATURE_SPLPAR))
vpa_init(boot_cpuid);
-   if (get_lppaca()-shared_proc) {
-   printk(KERN_DEBUG Using shared processor idle loop\n);
-   ppc_md.power_save = pseries_shared_idle_sleep;
-   } else {
-   printk(KERN_DEBUG Using dedicated idle loop\n);
-   ppc_md.power_save = pseries_dedicated_idle_sleep;
-   }
-   } else {
-   printk(KERN_DEBUG Using default idle loop\n);
-   }
 
if (firmware_has_feature(FW_FEATURE_LPAR))
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
@@ -496,80 +483,6 @@ static int __init pSeries_probe(void)
return 1;
 }
 
-
-DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
-
-static void pseries_dedicated_idle_sleep(void)
-{ 
-   unsigned int cpu = smp_processor_id();
-   unsigned long start_snooze;
-   unsigned long in_purr, out_purr;
-
-   /*
-* Indicate to the HV that we are idle. Now would be
-* a good time to find other work to dispatch.
-*/
-   get_lppaca()-idle = 1;
-   get_lppaca()-donate_dedicated_cpu = 1;
-   in_purr = mfspr(SPRN_PURR);
-
-   /*
-* We come in with interrupts disabled, and need_resched()
-* has been checked recently.  If we should poll for a little
-* while, do so.
-*/
-   if (__get_cpu_var(smt_snooze_delay)) {
-   start_snooze = get_tb() +
-   __get_cpu_var(smt_snooze_delay) * tb_ticks_per_usec;
-   local_irq_enable();
-   set_thread_flag(TIF_POLLING_NRFLAG);
-
-   while (get_tb()  start_snooze) {
-   if (need_resched() || cpu_is_offline(cpu))
-   goto out;
-   ppc64_runlatch_off();
-   HMT_low();
-   HMT_very_low();
-   }
-
-   HMT_medium();
-   clear_thread_flag(TIF_POLLING_NRFLAG);
-   smp_mb();
-   local_irq_disable();
-   if (need_resched() || cpu_is_offline(cpu))
-   goto out;
-   }
-
-   cede_processor();
-
-out:
-   HMT_medium();
-   out_purr = mfspr(SPRN_PURR);
-   get_lppaca()-wait_state_cycles += out_purr - in_purr;
-   get_lppaca()-donate_dedicated_cpu = 0;
-   get_lppaca()-idle = 0;
-}
-
-static void pseries_shared_idle_sleep(void)
-{
-   /*
-* Indicate to the HV that we are idle. Now would be
-* a good time to find other work to dispatch.
-*/
-   get_lppaca()-idle = 1;
-
-   /*
-* Yield the processor to the hypervisor.  We return if
-* an external interrupt occurs (which are driven prior
-* to returning here) or if a prod occurs from another
-* processor. When returning here, external interrupts
-* are enabled.
-*/
-   cede_processor();
-
-   get_lppaca()-idle = 0;
-}
-
 static int pSeries_pci_probe_mode(struct pci_bus *bus)
 {
if (firmware_has_feature(FW_FEATURE_LPAR))
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -25,6 +25,7 @@
 #include linux/cpu.h
 #include linux/sysctl.h
 #include linux/tick.h
+#include linux/cpuidle.h
 
 #include asm/system.h
 #include asm/processor.h
@@ -46,6 +47,14 @@ static int __init powersave_off(char *ar
 }
 __setup

[v9 PATCH 7/9]: POWER: add a default_idle idle loop for POWER.

2009-10-16 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-16 15:08:50]:

In arch/powerpc/kernel/idle.c create a default_idle() routine by moving
the failover condition of the cpu_idle() idle loop. This is needed by
cpuidle infrastructure to call default_idle when other idle routines
are not yet registered. Functionality remains the same, but the code is
slightly moved around.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/Kconfig  |3 +++
 arch/powerpc/include/asm/system.h |1 +
 arch/powerpc/kernel/idle.c|6 ++
 3 files changed, 10 insertions(+)

Index: linux.trees.git/arch/powerpc/Kconfig
===
--- linux.trees.git.orig/arch/powerpc/Kconfig
+++ linux.trees.git/arch/powerpc/Kconfig
@@ -94,6 +94,9 @@ config ARCH_HAS_ILOG2_U64
 config ARCH_HAS_CPU_IDLE_WAIT
def_bool y
 
+config ARCH_HAS_DEFAULT_IDLE
+   def_bool y
+
 config GENERIC_HWEIGHT
bool
default y
Index: linux.trees.git/arch/powerpc/include/asm/system.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/system.h
+++ linux.trees.git/arch/powerpc/include/asm/system.h
@@ -218,6 +218,7 @@ extern unsigned long klimit;
 extern void *alloc_maybe_bootmem(size_t size, gfp_t mask);
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
 
+extern void default_idle(void);
 extern int powersave_nap;  /* set if nap mode can be used in idle loop */
 
 /*
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -121,6 +121,12 @@ void cpu_idle_wait(void)
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
 
+void default_idle(void)
+{
+   HMT_low();
+   HMT_very_low();
+}
+
 int powersave_nap;
 
 #ifdef CONFIG_SYSCTL
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v9 PATCH 8/9]: pSeries: implement pSeries processor idle module.

2009-10-16 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-16 15:08:50]:

This patch creates arch/powerpc/platforms/pseries/processor_idle.c,
which implements the cpuidle infrastructure for pseries.
It implements a pseries_cpuidle_loop() which would be the main idle loop
called from cpu_idle(). It makes decision of entering either
dedicated_snooze_loop or dedicated_cede_loop for dedicated lpar and
shared_cede_loop for shared lpar processor based on the
decision taken by the cpuidle governor.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/system.h   |1 
 arch/powerpc/kernel/sysfs.c |2 
 arch/powerpc/platforms/pseries/Makefile |1 
 arch/powerpc/platforms/pseries/processor_idle.c |  210 
 arch/powerpc/platforms/pseries/pseries.h|8 
 5 files changed, 222 insertions(+)

Index: linux.trees.git/arch/powerpc/platforms/pseries/Makefile
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/Makefile
+++ linux.trees.git/arch/powerpc/platforms/pseries/Makefile
@@ -26,3 +26,4 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst
 obj-$(CONFIG_PHYP_DUMP)+= phyp_dump.o
 obj-$(CONFIG_CMM)  += cmm.o
 obj-$(CONFIG_DTL)  += dtl.o
+obj-$(CONFIG_PSERIES_PROCESSOR_IDLE)   += processor_idle.o
Index: linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/pseries.h
+++ linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
@@ -10,6 +10,8 @@
 #ifndef _PSERIES_PSERIES_H
 #define _PSERIES_PSERIES_H
 
+#include linux/cpuidle.h
+
 extern void __init fw_feature_init(const char *hypertas, unsigned long len);
 
 struct pt_regs;
@@ -40,4 +42,10 @@ extern unsigned long rtas_poweron_auto;
 
 extern void find_udbg_vterm(void);
 
+DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
+
+#ifdef CONFIG_PSERIES_PROCESSOR_IDLE
+extern struct cpuidle_driver pseries_idle_driver;
+#endif
+
 #endif /* _PSERIES_PSERIES_H */
Index: linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
===
--- /dev/null
+++ linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
@@ -0,0 +1,210 @@
+/*
+ *  processor_idle - idle state cpuidle driver.
+ *  Adapted from drivers/acpi/processor_idle.c
+ *
+ *  Arun R Bharadwaj a...@linux.vnet.ibm.com
+ *
+ *  Copyright (C) 2009 IBM Corporation.
+ * ~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~
+ */
+
+#include linux/kernel.h
+#include linux/module.h
+#include linux/init.h
+#include linux/moduleparam.h
+#include linux/cpuidle.h
+
+#include asm/paca.h
+#include asm/reg.h
+#include asm/system.h
+#include asm/machdep.h
+#include asm/firmware.h
+
+#include plpar_wrappers.h
+#include pseries.h
+
+MODULE_AUTHOR(Arun R Bharadwaj);
+MODULE_DESCRIPTION(pSeries Idle State Driver);
+MODULE_LICENSE(GPL);
+
+struct cpuidle_driver pseries_idle_driver = {
+   .name = pseries_idle,
+   .owner =THIS_MODULE,
+};
+
+DEFINE_PER_CPU(struct cpuidle_device, pseries_dev);
+
+#define IDLE_STATE_COUNT   2
+
+/* pSeries Idle state Flags */
+#definePSERIES_DEDICATED_SNOOZE(0x01)
+#definePSERIES_DEDICATED_CEDE  (0x02)
+#definePSERIES_SHARED_CEDE (0x03)
+
+static int pseries_idle_init(struct cpuidle_device *dev)
+{
+   return cpuidle_register_device(dev);
+}
+
+static void shared_cede_loop(void)
+{
+   get_lppaca()-idle = 1;
+   cede_processor();
+   get_lppaca()-idle = 0;
+}
+
+static void dedicated_snooze_loop(void)
+{
+   local_irq_enable();
+   set_thread_flag(TIF_POLLING_NRFLAG);
+   while (!need_resched()) {
+   ppc64_runlatch_off();
+   HMT_low();
+   HMT_very_low();
+   }
+   HMT_medium();
+   clear_thread_flag(TIF_POLLING_NRFLAG);
+   smp_mb();
+   local_irq_disable();
+}
+
+static void dedicated_cede_loop(void

[v9 PATCH 9/9]: POWER: Enable default_idle when power_save=off.

2009-10-16 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-16 15:08:50]:

This patch enables default_idle when power_save=off kernel boot
option is specified.

Earlier, this was done by setting ppc_md.power_save = NULL and hence
HMT_low() and HMT_very_low() was called. Now this is defined under
default_idle() and hence by setting boot_option_idle_override = 1,
the cpuidle registration stuff does not happen and hence default_idle
is chosen in cpuidle_idle_call.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/processor.h|2 ++
 arch/powerpc/kernel/idle.c  |4 +++-
 arch/powerpc/platforms/pseries/processor_idle.c |5 +
 3 files changed, 10 insertions(+), 1 deletion(-)

Index: linux.trees.git/arch/powerpc/include/asm/processor.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/processor.h
+++ linux.trees.git/arch/powerpc/include/asm/processor.h
@@ -332,6 +332,8 @@ static inline unsigned long get_clean_sp
 }
 #endif
 
+extern int boot_option_idle_override;
+
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_PROCESSOR_H */
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -40,9 +40,11 @@
 #define cpu_should_die()   0
 #endif
 
+int boot_option_idle_override = 0;
+
 static int __init powersave_off(char *arg)
 {
-   ppc_md.power_save = NULL;
+   boot_option_idle_override = 1;
return 0;
 }
 __setup(powersave=off, powersave_off);
Index: linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/processor_idle.c
+++ linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
@@ -185,6 +185,11 @@ static int __init pseries_processor_idle
int cpu;
int result;
 
+   if (boot_option_idle_override) {
+   printk(KERN_DEBUG Using default idle\n);
+   return 0;
+   }
+
result = cpuidle_register_driver(pseries_idle_driver);
 
if (result  0)
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v8 PATCH 2/8]: cpuidle: implement a list based approach to register a set of idle routines.

2009-10-15 Thread Arun R Bharadwaj
* Andi Kleen a...@firstfloor.org [2009-10-14 09:18:38]:

  How about something like this..
  If the arch does not enable CONFIG_CPU_IDLE, the cpuidle_idle_call
  which is called from cpu_idle() should call default_idle without
  involving the registering cpuidle steps. This should prevent bloating
  up of the kernel for archs which dont want to use cpuidle.
 
 On x86 some people want small kernel too, so selecting it on a architecture
 granuality is not good. Also you can make it default, you just need
 to slim it down first.
 

No, I dont mean selecting it on an architecture granularity.

At compile time, if CONFIG_CPU_IDLE is disabled, the arch can redefine
cpuidle_idle_call. For e.g. in arch/x86/kernel/process.c

#ifndef CONFIG_CPU_IDLE
void cpuidle_idle_call(void)
{
if (local_idle)
local_idle();
else
default_idle();
}
#endif

where local_idle points to the idle routine selected using
select_idle_routine() which can be poll, mwait, c1e.

So this way, we still preserve the exact same functionality as before
and we also remove the ugly pm_idle exported function pointer and we
avoid unnecessary code bloat for platforms who do not want to use
cpuidle.

--arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v8 PATCH 2/8]: cpuidle: implement a list based approach to register a set of idle routines.

2009-10-14 Thread Arun R Bharadwaj
* Andi Kleen a...@firstfloor.org [2009-10-12 20:00:05]:

 Peter Zijlstra a.p.zijls...@chello.nl writes:
 
  So does it make sense to have a set of sets?
 
  Why not integrate them all into one set to be ruled by this governor
  thing?
 
 cpuidle is currently optional, that is why the two level hierarchy
 is there so that you can still have simple idle selection without it.
 
 % size drivers/cpuidle/*.o
textdata bss dec hex filename
55141416  4469741b3e drivers/cpuidle/built-in.o
 
 Adding it unconditionally would add ~7k to everyone who wants idle functions.
 
 I think making it unconditional would require putting it on a serious
 diet first.
 

Hi Andi,

Yes, this is a valid point.

How about something like this..
If the arch does not enable CONFIG_CPU_IDLE, the cpuidle_idle_call
which is called from cpu_idle() should call default_idle without
involving the registering cpuidle steps. This should prevent bloating
up of the kernel for archs which dont want to use cpuidle.

--arun
 -Andi
 -- 
 a...@linux.intel.com -- Speaking for myself only.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v8 PATCH 1/8]: cpuidle: cleanup drivers/cpuidle/cpuidle.c

2009-10-14 Thread Arun R Bharadwaj
* Balbir Singh bal...@linux.vnet.ibm.com [2009-10-12 17:06:02]:

 * Arun R B a...@linux.vnet.ibm.com [2009-10-08 15:19:42]:
 
  * Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-08 15:18:28]:
  
  This patch cleans up drivers/cpuidle/cpuidle.c
  Earlier cpuidle assumed pm_idle as the default idle loop. Break that
  assumption and make it more generic. cpuidle_idle_call() which is the
  main idle loop of cpuidle is to be called by architectures which have
  registered to cpuidle.
  
  Remove routines cpuidle_install/uninstall_idle_handler() which are not
  needed anymore.
  
 
 
 [snip]
 
   /**
  - * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
  - */
  -void cpuidle_install_idle_handler(void)
  -{
  -   if (enabled_devices  (pm_idle != cpuidle_idle_call)) {
  -   /* Make sure all changes finished before we switch to new idle 
  */
  -   smp_wmb();
  -   pm_idle = cpuidle_idle_call;
  -   }
  -}
  -
  -/**
  - * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop 
  handler
  - */
  -void cpuidle_uninstall_idle_handler(void)
  -{
  -   if (enabled_devices  pm_idle_old  (pm_idle != pm_idle_old)) {
  -   pm_idle = pm_idle_old;
  -   cpuidle_kick_cpus();
  -   }
  -}
  -
 
 I see the routines above being called in from
 cpuidle_pause/resume_and_lock/unlock below and they are entries from
 ACPI on ACPI_PROCESSOR_NOTIFY_POWER and from the hotplug path, could
 you test them to make sure they are not broken. We also seem to be
 missing a cpuidle_kick_cpus() in cpuidle_pause_and_lock()
 
 [snip]
 

Hi Balbir,

yes, we definitely need a cpuidle_kick_cpus() in
cpuidle_pause_and_lock() since this is used while disabling the
cpuidle_device and the cpus need to be kicked out of the idle states.
I will test this modified code and see if it breaks hotplug.

thanks,
arun

 -- 
   Balbir
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v8 PATCH 2/8]: cpuidle: implement a list based approach to register a set of idle routines.

2009-10-09 Thread Arun R Bharadwaj
* Peter Zijlstra a.p.zijls...@chello.nl [2009-10-08 14:25:37]:

 On Thu, 2009-10-08 at 17:31 +0530, Arun R Bharadwaj wrote:
  
   Uhm, no, it would mean ACPI putting its idle routines on the same level
   as all others.
   
  
  Putting them all on the same level would mean, we need an
  enable/disable routine to enable only the currently active routines.
 
 What's this enable/disable stuff about?
 
  Also, the way governor works is that, it assumes that idle routines
  are indexed in the increasing order of power benefit that can be got
  out of the state. So this would get messed up.
 
 Right, which is why I initially had a power-savings field in my
 proposal, so it could weight the power savings vs the wakeup latency.
 
   http://lkml.org/lkml/2009/8/27/159
 
 There it was said that was exactly what these governors were doing,
 seems its not.
 
   Sounds like something is wrong alright. If you can register an idle
   routine you should be able to unregister it too.
  
  
  Yes, we can register and unregister in a clean way now.
  Consider this. We have a set of routines A, B, C currently registered.
  Now a module comes and registers D and E, and later on at some point
  of time wants to unregister. So how do you keep track of what all idle
  routines the module registered and unregister only those?
  Best way to do that is a stack, which is how I have currently
  implemented.
 
 Right, so destroy that inner set thing, that way we only have one
 left ;-)
 

I'm not convinced with your argument. Why dont we do this
incrementally. Right now, this set of sets mechanism works fine and
doesn't look like it has any obvious flaws in it. We have a clean
register/unregister mechanism which solves all the earlier problems we
started out to solve.

We can gradually build on this and try to come up with a single set
of idle states for a governor to choose from.

thanks,
arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v8 PATCH 0/8]: cpuidle: Cleanup cpuidle/ Introduce cpuidle to POWER.

2009-10-08 Thread Arun R Bharadwaj
Hi

Please consider this for inclusion into the testing tree.

This patchset introduces cpuidle infrastructure to POWER, prototyping
for pSeries, and also does a major refactoring of current x86 idle
power management and a cleanup of cpuidle infrastructure.

Earlier discussions on the same can be found at:

v7 -- http://lkml.org/lkml/2009/10/6/278
v6 -- http://lkml.org/lkml/2009/9/22/180
v5 -- http://lkml.org/lkml/2009/9/22/26
v4 -- http://lkml.org/lkml/2009/9/1/133
v3 -- http://lkml.org/lkml/2009/8/27/124
v2 -- http://lkml.org/lkml/2009/8/26/233
v1 -- http://lkml.org/lkml/2009/8/19/150


Changes in this version:
--

* Remove redundant poll_idle definition in arch/x86/kernel/process.c

* Prevent acpi_driver from registering when boot_option_idle_override
  is set and let cpuidle_default driver to take over in this case.

* Enable default_idle when power_save=off in POWER.

thanks,
arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v8 PATCH 1/8]: cpuidle: cleanup drivers/cpuidle/cpuidle.c

2009-10-08 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-08 15:18:28]:

This patch cleans up drivers/cpuidle/cpuidle.c
Earlier cpuidle assumed pm_idle as the default idle loop. Break that
assumption and make it more generic. cpuidle_idle_call() which is the
main idle loop of cpuidle is to be called by architectures which have
registered to cpuidle.

Remove routines cpuidle_install/uninstall_idle_handler() which are not
needed anymore.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 drivers/cpuidle/cpuidle.c  |   62 +
 drivers/cpuidle/cpuidle.h  |6 +---
 drivers/cpuidle/driver.c   |4 --
 drivers/cpuidle/governor.c |   13 +++--
 drivers/cpuidle/sysfs.c|   34 +---
 include/linux/cpuidle.h|4 ++
 6 files changed, 37 insertions(+), 86 deletions(-)

Index: linux.trees.git/drivers/cpuidle/cpuidle.c
===
--- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
+++ linux.trees.git/drivers/cpuidle/cpuidle.c
@@ -24,10 +24,6 @@
 DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
 
 DEFINE_MUTEX(cpuidle_lock);
-LIST_HEAD(cpuidle_detected_devices);
-static void (*pm_idle_old)(void);
-
-static int enabled_devices;
 
 #if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
 static void cpuidle_kick_cpus(void)
@@ -47,7 +43,7 @@ static int __cpuidle_register_device(str
  *
  * NOTE: no locks or semaphores should be used here
  */
-static void cpuidle_idle_call(void)
+void cpuidle_idle_call(void)
 {
struct cpuidle_device *dev = __get_cpu_var(cpuidle_devices);
struct cpuidle_state *target_state;
@@ -55,13 +51,10 @@ static void cpuidle_idle_call(void)
 
/* check if the device is ready */
if (!dev || !dev-enabled) {
-   if (pm_idle_old)
-   pm_idle_old();
-   else
 #if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE)
-   default_idle();
+   default_idle();
 #else
-   local_irq_enable();
+   local_irq_enable();
 #endif
return;
}
@@ -75,7 +68,11 @@ static void cpuidle_idle_call(void)
hrtimer_peek_ahead_timers();
 #endif
/* ask the governor for the next state */
-   next_state = cpuidle_curr_governor-select(dev);
+   if (dev-state_count  1)
+   next_state = cpuidle_curr_governor-select(dev);
+   else
+   next_state = 0;
+
if (need_resched())
return;
target_state = dev-states[next_state];
@@ -96,35 +93,11 @@ static void cpuidle_idle_call(void)
 }
 
 /**
- * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
- */
-void cpuidle_install_idle_handler(void)
-{
-   if (enabled_devices  (pm_idle != cpuidle_idle_call)) {
-   /* Make sure all changes finished before we switch to new idle 
*/
-   smp_wmb();
-   pm_idle = cpuidle_idle_call;
-   }
-}
-
-/**
- * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
- */
-void cpuidle_uninstall_idle_handler(void)
-{
-   if (enabled_devices  pm_idle_old  (pm_idle != pm_idle_old)) {
-   pm_idle = pm_idle_old;
-   cpuidle_kick_cpus();
-   }
-}
-
-/**
  * cpuidle_pause_and_lock - temporarily disables CPUIDLE
  */
 void cpuidle_pause_and_lock(void)
 {
mutex_lock(cpuidle_lock);
-   cpuidle_uninstall_idle_handler();
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
@@ -134,7 +107,6 @@ EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock
  */
 void cpuidle_resume_and_unlock(void)
 {
-   cpuidle_install_idle_handler();
mutex_unlock(cpuidle_lock);
 }
 
@@ -182,7 +154,6 @@ int cpuidle_enable_device(struct cpuidle
 
dev-enabled = 1;
 
-   enabled_devices++;
return 0;
 
 fail_sysfs:
@@ -213,7 +184,6 @@ void cpuidle_disable_device(struct cpuid
cpuidle_curr_governor-disable(dev);
 
cpuidle_remove_state_sysfs(dev);
-   enabled_devices--;
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_disable_device);
@@ -266,7 +236,6 @@ static void poll_idle_init(struct cpuidl
  */
 static int __cpuidle_register_device(struct cpuidle_device *dev)
 {
-   int ret;
struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev-cpu);
 
if (!sys_dev)
@@ -274,16 +243,9 @@ static int __cpuidle_register_device(str
if (!try_module_get(cpuidle_curr_driver-owner))
return -EINVAL;
 
-   init_completion(dev-kobj_unregister);
-
poll_idle_init(dev);
 
per_cpu(cpuidle_devices, dev-cpu) = dev;
-   list_add(dev-device_list, cpuidle_detected_devices);
-   if ((ret = cpuidle_add_sysfs(sys_dev))) {
-   module_put(cpuidle_curr_driver-owner);
-   return ret;
-   }
 
dev-registered = 1;
return 0;
@@ -305,7 +267,6 @@ int cpuidle_register_device(struct cpuid
}
 
cpuidle_enable_device(dev

[v8 PATCH 2/8]: cpuidle: implement a list based approach to register a set of idle routines.

2009-10-08 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-08 15:18:28]:

Implement a list based registering mechanism for architectures which
have multiple sets of idle routines which are to be registered.

Currently, in x86 it is done by merely setting pm_idle = idle_routine
and managing this pm_idle pointer is messy.

To give an example of how this mechanism works:
In x86, initially, idle routine is selected from the set of poll/mwait/
c1e/default idle loops. So the selected idle loop is registered in cpuidle
as one idle state cpuidle devices. Once ACPI comes up, it registers
another set of idle states on top of this state. Again, suppose a module
registers another set of idle loops, it is added to this list.

This provides a clean way of registering and unregistering idle state
routines.

In the current implementation, pm_idle is set as the current idle routine
being used and the old idle routine has to be maintained and when a module
registers/unregisters an idle routine, confusion arises.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 drivers/cpuidle/cpuidle.c |   54 --
 include/linux/cpuidle.h   |1 
 2 files changed, 48 insertions(+), 7 deletions(-)

Index: linux.trees.git/drivers/cpuidle/cpuidle.c
===
--- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
+++ linux.trees.git/drivers/cpuidle/cpuidle.c
@@ -22,6 +22,7 @@
 #include cpuidle.h
 
 DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
+DEFINE_PER_CPU(struct list_head, cpuidle_devices_list);
 
 DEFINE_MUTEX(cpuidle_lock);
 
@@ -112,6 +113,45 @@ void cpuidle_resume_and_unlock(void)
 
 EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
 
+int cpuidle_add_to_list(struct cpuidle_device *dev)
+{
+   int ret, cpu = dev-cpu;
+   struct cpuidle_device *old_dev;
+
+   if (!list_empty(per_cpu(cpuidle_devices_list, cpu))) {
+   old_dev = list_first_entry(per_cpu(cpuidle_devices_list, cpu),
+   struct cpuidle_device, idle_list);
+   cpuidle_remove_state_sysfs(old_dev);
+   }
+
+   list_add(dev-idle_list, per_cpu(cpuidle_devices_list, cpu));
+   ret = cpuidle_add_state_sysfs(dev);
+   return ret;
+}
+
+void cpuidle_remove_from_list(struct cpuidle_device *dev)
+{
+   struct cpuidle_device *temp_dev;
+   struct list_head *pos;
+   int ret, cpu = dev-cpu;
+
+   list_for_each(pos, per_cpu(cpuidle_devices_list, cpu)) {
+   temp_dev = container_of(pos, struct cpuidle_device, idle_list);
+   if (dev == temp_dev) {
+   list_del(temp_dev-idle_list);
+   cpuidle_remove_state_sysfs(temp_dev);
+   break;
+   }
+   }
+
+   if (!list_empty(per_cpu(cpuidle_devices_list, cpu))) {
+   temp_dev = list_first_entry(per_cpu(cpuidle_devices_list, cpu),
+   struct cpuidle_device, idle_list);
+   ret = cpuidle_add_state_sysfs(temp_dev);
+   }
+   cpuidle_kick_cpus();
+}
+
 /**
  * cpuidle_enable_device - enables idle PM for a CPU
  * @dev: the CPU
@@ -136,9 +176,6 @@ int cpuidle_enable_device(struct cpuidle
return ret;
}
 
-   if ((ret = cpuidle_add_state_sysfs(dev)))
-   return ret;
-
if (cpuidle_curr_governor-enable 
(ret = cpuidle_curr_governor-enable(dev)))
goto fail_sysfs;
@@ -157,7 +194,7 @@ int cpuidle_enable_device(struct cpuidle
return 0;
 
 fail_sysfs:
-   cpuidle_remove_state_sysfs(dev);
+   cpuidle_remove_from_list(dev);
 
return ret;
 }
@@ -182,8 +219,6 @@ void cpuidle_disable_device(struct cpuid
 
if (cpuidle_curr_governor-disable)
cpuidle_curr_governor-disable(dev);
-
-   cpuidle_remove_state_sysfs(dev);
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_disable_device);
@@ -267,6 +302,7 @@ int cpuidle_register_device(struct cpuid
}
 
cpuidle_enable_device(dev);
+   cpuidle_add_to_list(dev);
 
mutex_unlock(cpuidle_lock);
 
@@ -288,6 +324,7 @@ void cpuidle_unregister_device(struct cp
cpuidle_pause_and_lock();
 
cpuidle_disable_device(dev);
+   cpuidle_remove_from_list(dev);
 
per_cpu(cpuidle_devices, dev-cpu) = NULL;
 
@@ -338,12 +375,15 @@ static inline void latency_notifier_init
  */
 static int __init cpuidle_init(void)
 {
-   int ret;
+   int ret, cpu;
 
ret = cpuidle_add_class_sysfs(cpu_sysdev_class);
if (ret)
return ret;
 
+   for_each_possible_cpu(cpu)
+   INIT_LIST_HEAD(per_cpu(cpuidle_devices_list, cpu));
+
latency_notifier_init(cpuidle_latency_notifier);
 
return 0;
Index: linux.trees.git/include/linux/cpuidle.h
===
--- linux.trees.git.orig/include/linux/cpuidle.h

[v8 PATCH 3/8]: x86: refactor x86 idle power management code and remove all instances of pm_idle.

2009-10-08 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-08 15:18:28]:

This patch cleans up x86 of all instances of pm_idle.

pm_idle which was earlier called from cpu_idle() idle loop
is replaced by cpuidle_idle_call.

x86 also registers to cpuidle when the idle routine is selected,
by populating the cpuidle_device data structure for each cpu.

This is replicated for apm module and for xen, which also used pm_idle.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/x86/kernel/apm_32.c  |   55 +++-
 arch/x86/kernel/process.c |   93 ++
 arch/x86/kernel/process_32.c  |3 -
 arch/x86/kernel/process_64.c  |3 -
 arch/x86/xen/setup.c  |   40 +-
 drivers/acpi/processor_core.c |9 ++--
 drivers/cpuidle/cpuidle.c |   16 ---
 include/linux/cpuidle.h   |1 
 8 files changed, 172 insertions(+), 48 deletions(-)

Index: linux.trees.git/arch/x86/kernel/process.c
===
--- linux.trees.git.orig/arch/x86/kernel/process.c
+++ linux.trees.git/arch/x86/kernel/process.c
@@ -9,6 +9,7 @@
 #include linux/pm.h
 #include linux/clockchips.h
 #include linux/random.h
+#include linux/cpuidle.h
 #include trace/events/power.h
 #include asm/system.h
 #include asm/apic.h
@@ -244,12 +245,6 @@ int sys_vfork(struct pt_regs *regs)
 unsigned long boot_option_idle_override = 0;
 EXPORT_SYMBOL(boot_option_idle_override);
 
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
-
 #ifdef CONFIG_X86_32
 /*
  * This halt magic was a workaround for ancient floppy DMA
@@ -329,17 +324,15 @@ static void do_nothing(void *unused)
 }
 
 /*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
+ * cpu_idle_wait - Required while changing idle routine handler on SMP systems.
  *
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
+ * Caller must have changed idle routine to the new value before the call. Old
+ * value will not be used by any CPU after the return of this function.
  */
 void cpu_idle_wait(void)
 {
smp_mb();
-   /* kick all the CPUs so that they exit out of pm_idle */
+   /* kick all the CPUs so that they exit out of idle loop */
smp_call_function(do_nothing, NULL, 1);
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
@@ -387,20 +380,6 @@ static void mwait_idle(void)
 }
 
 /*
- * On SMP it's slightly faster (but much more power-consuming!)
- * to poll the -work.need_resched flag instead of waiting for the
- * cross-CPU IPI to arrive. Use this option with caution.
- */
-static void poll_idle(void)
-{
-   trace_power_start(POWER_CSTATE, 0);
-   local_irq_enable();
-   while (!need_resched())
-   cpu_relax();
-   trace_power_end(0);
-}
-
-/*
  * mwait selection logic:
  *
  * It depends on the CPU. For AMD CPUs that support MWAIT this is
@@ -518,15 +497,59 @@ static void c1e_idle(void)
default_idle();
 }
 
+static void (*local_idle)(void);
+DEFINE_PER_CPU(struct cpuidle_device, idle_devices);
+
+struct cpuidle_driver cpuidle_default_driver = {
+   .name = cpuidle_default,
+};
+
+static int local_idle_loop(struct cpuidle_device *dev, struct cpuidle_state 
*st)
+{
+   ktime_t t1, t2;
+   s64 diff;
+   int ret;
+
+   t1 = ktime_get();
+   local_idle();
+   t2 = ktime_get();
+
+   diff = ktime_to_us(ktime_sub(t2, t1));
+   if (diff  INT_MAX)
+   diff = INT_MAX;
+   ret = (int) diff;
+
+   return ret;
+}
+
+static int setup_cpuidle_simple(void)
+{
+   struct cpuidle_device *dev;
+   int cpu;
+
+   if (!cpuidle_curr_driver)
+   cpuidle_register_driver(cpuidle_default_driver);
+
+   for_each_online_cpu(cpu) {
+   dev = per_cpu(idle_devices, cpu);
+   dev-cpu = cpu;
+   dev-states[0].enter = local_idle_loop;
+   dev-state_count = 1;
+   cpuidle_register_device(dev);
+   }
+   return 0;
+}
+device_initcall(setup_cpuidle_simple);
+
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
-   if (pm_idle == poll_idle  smp_num_siblings  1) {
+   if (local_idle == poll_idle  smp_num_siblings  1) {
printk(KERN_WARNING WARNING: polling idle and HT enabled,
 performance may degrade.\n);
}
 #endif
-   if (pm_idle)
+   if (local_idle)
return;
 
if (cpu_has(c, X86_FEATURE_MWAIT)  mwait_usable(c)) {
@@ -534,18 +557,20 @@ void __cpuinit select_idle_routine(const
 * One CPU supports mwait = All CPUs supports mwait
 */
printk(KERN_INFO

[v8 PATCH 4/8]: POWER: enable cpuidle for POWER.

2009-10-08 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-08 15:18:28]:

This patch enables the cpuidle option in Kconfig for pSeries.

Currently cpuidle infrastructure is enabled only for x86 and ARM.
This code is almost completely borrowed from x86 to enable
cpuidle for pSeries.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/Kconfig  |   17 +
 arch/powerpc/include/asm/system.h |2 ++
 arch/powerpc/kernel/idle.c|   19 +++
 3 files changed, 38 insertions(+)

Index: linux.trees.git/arch/powerpc/Kconfig
===
--- linux.trees.git.orig/arch/powerpc/Kconfig
+++ linux.trees.git/arch/powerpc/Kconfig
@@ -91,6 +91,9 @@ config ARCH_HAS_ILOG2_U64
bool
default y if 64BIT
 
+config ARCH_HAS_CPU_IDLE_WAIT
+   def_bool y
+
 config GENERIC_HWEIGHT
bool
default y
@@ -247,6 +250,20 @@ source kernel/Kconfig.freezer
 source arch/powerpc/sysdev/Kconfig
 source arch/powerpc/platforms/Kconfig
 
+menu Power management options
+
+source drivers/cpuidle/Kconfig
+
+config PSERIES_PROCESSOR_IDLE
+   bool Idle Power Management Support for pSeries
+   depends on PPC_PSERIES  CPU_IDLE
+   default y
+   help
+ Idle Power Management Support for pSeries. This hooks onto cpuidle
+ infrastructure to help in idle cpu power management.
+
+endmenu
+
 menu Kernel options
 
 config HIGHMEM
Index: linux.trees.git/arch/powerpc/include/asm/system.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/system.h
+++ linux.trees.git/arch/powerpc/include/asm/system.h
@@ -546,5 +546,7 @@ extern void account_system_vtime(struct 
 
 extern struct dentry *powerpc_debugfs_root;
 
+void cpu_idle_wait(void);
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SYSTEM_H */
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -102,6 +102,25 @@ void cpu_idle(void)
}
 }
 
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs come out of the old
+ * idle loop and start using the new idle loop.
+ * Required while changing idle handler on SMP systems.
+ * Caller must have changed idle handler to the new value before the call.
+ */
+void cpu_idle_wait(void)
+{
+   /* Ensure that new value of idle is set */
+   smp_mb();
+   /* kick all the CPUs so that they exit out of old idle routine */
+   smp_call_function(do_nothing, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 int powersave_nap;
 
 #ifdef CONFIG_SYSCTL
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v8 PATCH 5/8]: pSeries/cpuidle: remove dedicate/shared idle loops, which will be moved to arch/powerpc/platforms/pseries/processor_idle.c

2009-10-08 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-08 15:18:28]:

This patch removes the routines, pseries_shared_idle_sleep and
pseries_dedicated_idle_sleep, since this is implemented as a part
of arch/powerpc/platform/pseries/processor_idle.c

Also, similar to x86, call cpuidle_idle_call from cpu_idle() idle
loop instead of ppc_md.power_save.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/idle.c |   50 +++---
 arch/powerpc/platforms/pseries/setup.c |   89 -
 2 files changed, 22 insertions(+), 117 deletions(-)

Index: linux.trees.git/arch/powerpc/platforms/pseries/setup.c
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/setup.c
+++ linux.trees.git/arch/powerpc/platforms/pseries/setup.c
@@ -75,9 +75,6 @@ EXPORT_SYMBOL(CMO_PageSize);
 
 int fwnmi_active;  /* TRUE if an FWNMI handler is present */
 
-static void pseries_shared_idle_sleep(void);
-static void pseries_dedicated_idle_sleep(void);
-
 static struct device_node *pSeries_mpic_node;
 
 static void pSeries_show_cpuinfo(struct seq_file *m)
@@ -297,18 +294,8 @@ static void __init pSeries_setup_arch(vo
pSeries_nvram_init();
 
/* Choose an idle loop */
-   if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+   if (firmware_has_feature(FW_FEATURE_SPLPAR))
vpa_init(boot_cpuid);
-   if (get_lppaca()-shared_proc) {
-   printk(KERN_DEBUG Using shared processor idle loop\n);
-   ppc_md.power_save = pseries_shared_idle_sleep;
-   } else {
-   printk(KERN_DEBUG Using dedicated idle loop\n);
-   ppc_md.power_save = pseries_dedicated_idle_sleep;
-   }
-   } else {
-   printk(KERN_DEBUG Using default idle loop\n);
-   }
 
if (firmware_has_feature(FW_FEATURE_LPAR))
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
@@ -496,80 +483,6 @@ static int __init pSeries_probe(void)
return 1;
 }
 
-
-DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
-
-static void pseries_dedicated_idle_sleep(void)
-{ 
-   unsigned int cpu = smp_processor_id();
-   unsigned long start_snooze;
-   unsigned long in_purr, out_purr;
-
-   /*
-* Indicate to the HV that we are idle. Now would be
-* a good time to find other work to dispatch.
-*/
-   get_lppaca()-idle = 1;
-   get_lppaca()-donate_dedicated_cpu = 1;
-   in_purr = mfspr(SPRN_PURR);
-
-   /*
-* We come in with interrupts disabled, and need_resched()
-* has been checked recently.  If we should poll for a little
-* while, do so.
-*/
-   if (__get_cpu_var(smt_snooze_delay)) {
-   start_snooze = get_tb() +
-   __get_cpu_var(smt_snooze_delay) * tb_ticks_per_usec;
-   local_irq_enable();
-   set_thread_flag(TIF_POLLING_NRFLAG);
-
-   while (get_tb()  start_snooze) {
-   if (need_resched() || cpu_is_offline(cpu))
-   goto out;
-   ppc64_runlatch_off();
-   HMT_low();
-   HMT_very_low();
-   }
-
-   HMT_medium();
-   clear_thread_flag(TIF_POLLING_NRFLAG);
-   smp_mb();
-   local_irq_disable();
-   if (need_resched() || cpu_is_offline(cpu))
-   goto out;
-   }
-
-   cede_processor();
-
-out:
-   HMT_medium();
-   out_purr = mfspr(SPRN_PURR);
-   get_lppaca()-wait_state_cycles += out_purr - in_purr;
-   get_lppaca()-donate_dedicated_cpu = 0;
-   get_lppaca()-idle = 0;
-}
-
-static void pseries_shared_idle_sleep(void)
-{
-   /*
-* Indicate to the HV that we are idle. Now would be
-* a good time to find other work to dispatch.
-*/
-   get_lppaca()-idle = 1;
-
-   /*
-* Yield the processor to the hypervisor.  We return if
-* an external interrupt occurs (which are driven prior
-* to returning here) or if a prod occurs from another
-* processor. When returning here, external interrupts
-* are enabled.
-*/
-   cede_processor();
-
-   get_lppaca()-idle = 0;
-}
-
 static int pSeries_pci_probe_mode(struct pci_bus *bus)
 {
if (firmware_has_feature(FW_FEATURE_LPAR))
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -25,6 +25,7 @@
 #include linux/cpu.h
 #include linux/sysctl.h
 #include linux/tick.h
+#include linux/cpuidle.h
 
 #include asm/system.h
 #include asm/processor.h
@@ -60,35 +61,26 @@ void cpu_idle(void)
while (!need_resched

[v8 PATCH 6/8]: POWER: add a default_idle idle loop for POWER.

2009-10-08 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-08 15:18:28]:

In arch/powerpc/kernel/idle.c create a default_idle() routine by moving
the failover condition of the cpu_idle() idle loop. This is needed by
cpuidle infrastructure to call default_idle when other idle routines
are not yet registered. Functionality remains the same, but the code is
slightly moved around.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/Kconfig  |3 +++
 arch/powerpc/include/asm/system.h |1 +
 arch/powerpc/kernel/idle.c|6 ++
 3 files changed, 10 insertions(+)

Index: linux.trees.git/arch/powerpc/Kconfig
===
--- linux.trees.git.orig/arch/powerpc/Kconfig
+++ linux.trees.git/arch/powerpc/Kconfig
@@ -94,6 +94,9 @@ config ARCH_HAS_ILOG2_U64
 config ARCH_HAS_CPU_IDLE_WAIT
def_bool y
 
+config ARCH_HAS_DEFAULT_IDLE
+   def_bool y
+
 config GENERIC_HWEIGHT
bool
default y
Index: linux.trees.git/arch/powerpc/include/asm/system.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/system.h
+++ linux.trees.git/arch/powerpc/include/asm/system.h
@@ -218,6 +218,7 @@ extern unsigned long klimit;
 extern void *alloc_maybe_bootmem(size_t size, gfp_t mask);
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
 
+extern void default_idle(void);
 extern int powersave_nap;  /* set if nap mode can be used in idle loop */
 
 /*
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -113,6 +113,12 @@ void cpu_idle_wait(void)
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
 
+void default_idle(void)
+{
+   HMT_low();
+   HMT_very_low();
+}
+
 int powersave_nap;
 
 #ifdef CONFIG_SYSCTL
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v8 PATCH 7/8]: pSeries: implement pSeries processor idle module.

2009-10-08 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-08 15:18:28]:

This patch creates arch/powerpc/platforms/pseries/processor_idle.c,
which implements the cpuidle infrastructure for pseries.
It implements a pseries_cpuidle_loop() which would be the main idle loop
called from cpu_idle(). It makes decision of entering either
dedicated_snooze_loop or dedicated_cede_loop for dedicated lpar and
shared_cede_loop for shared lpar processor based on the
decision taken by the cpuidle governor.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/system.h   |1 
 arch/powerpc/kernel/sysfs.c |2 
 arch/powerpc/platforms/pseries/Makefile |1 
 arch/powerpc/platforms/pseries/processor_idle.c |  210 
 arch/powerpc/platforms/pseries/pseries.h|8 
 5 files changed, 222 insertions(+)

Index: linux.trees.git/arch/powerpc/platforms/pseries/Makefile
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/Makefile
+++ linux.trees.git/arch/powerpc/platforms/pseries/Makefile
@@ -26,3 +26,4 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst
 obj-$(CONFIG_PHYP_DUMP)+= phyp_dump.o
 obj-$(CONFIG_CMM)  += cmm.o
 obj-$(CONFIG_DTL)  += dtl.o
+obj-$(CONFIG_PSERIES_PROCESSOR_IDLE)   += processor_idle.o
Index: linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/pseries.h
+++ linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
@@ -10,6 +10,8 @@
 #ifndef _PSERIES_PSERIES_H
 #define _PSERIES_PSERIES_H
 
+#include linux/cpuidle.h
+
 extern void __init fw_feature_init(const char *hypertas, unsigned long len);
 
 struct pt_regs;
@@ -40,4 +42,10 @@ extern unsigned long rtas_poweron_auto;
 
 extern void find_udbg_vterm(void);
 
+DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
+
+#ifdef CONFIG_PSERIES_PROCESSOR_IDLE
+extern struct cpuidle_driver pseries_idle_driver;
+#endif
+
 #endif /* _PSERIES_PSERIES_H */
Index: linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
===
--- /dev/null
+++ linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
@@ -0,0 +1,210 @@
+/*
+ *  processor_idle - idle state cpuidle driver.
+ *  Adapted from drivers/acpi/processor_idle.c
+ *
+ *  Arun R Bharadwaj a...@linux.vnet.ibm.com
+ *
+ *  Copyright (C) 2009 IBM Corporation.
+ * ~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~
+ */
+
+#include linux/kernel.h
+#include linux/module.h
+#include linux/init.h
+#include linux/moduleparam.h
+#include linux/cpuidle.h
+
+#include asm/paca.h
+#include asm/reg.h
+#include asm/system.h
+#include asm/machdep.h
+#include asm/firmware.h
+
+#include plpar_wrappers.h
+#include pseries.h
+
+MODULE_AUTHOR(Arun R Bharadwaj);
+MODULE_DESCRIPTION(pSeries Idle State Driver);
+MODULE_LICENSE(GPL);
+
+struct cpuidle_driver pseries_idle_driver = {
+   .name = pseries_idle,
+   .owner =THIS_MODULE,
+};
+
+DEFINE_PER_CPU(struct cpuidle_device, pseries_dev);
+
+#define IDLE_STATE_COUNT   2
+
+/* pSeries Idle state Flags */
+#definePSERIES_DEDICATED_SNOOZE(0x01)
+#definePSERIES_DEDICATED_CEDE  (0x02)
+#definePSERIES_SHARED_CEDE (0x03)
+
+static int pseries_idle_init(struct cpuidle_device *dev)
+{
+   return cpuidle_register_device(dev);
+}
+
+static void shared_cede_loop(void)
+{
+   get_lppaca()-idle = 1;
+   cede_processor();
+   get_lppaca()-idle = 0;
+}
+
+static void dedicated_snooze_loop(void)
+{
+   local_irq_enable();
+   set_thread_flag(TIF_POLLING_NRFLAG);
+   while (!need_resched()) {
+   ppc64_runlatch_off();
+   HMT_low();
+   HMT_very_low();
+   }
+   HMT_medium();
+   clear_thread_flag(TIF_POLLING_NRFLAG);
+   smp_mb();
+   local_irq_disable();
+}
+
+static void dedicated_cede_loop(void

[v8 PATCH 8/8]: POWER: Enable default_idle when power_save=off.

2009-10-08 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-08 15:18:28]:

This patch enables default_idle when power_save=off kernel boot
option is specified.

Earlier, this was done by setting ppc_md.power_save = NULL and hence
HMT_low() and HMT_very_low() was called. Now this is defined under
default_idle() and hence by setting boot_option_idle_override = 1,
the cpuidle registration stuff does not happen and hence default_idle
is chosen in cpuidle_idle_call.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/processor.h|2 ++
 arch/powerpc/kernel/idle.c  |4 +++-
 arch/powerpc/platforms/pseries/processor_idle.c |5 +
 3 files changed, 10 insertions(+), 1 deletion(-)

Index: linux.trees.git/arch/powerpc/include/asm/processor.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/processor.h
+++ linux.trees.git/arch/powerpc/include/asm/processor.h
@@ -332,6 +332,8 @@ static inline unsigned long get_clean_sp
 }
 #endif
 
+extern int boot_option_idle_override;
+
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_PROCESSOR_H */
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -40,9 +40,11 @@
 #define cpu_should_die()   0
 #endif
 
+int boot_option_idle_override = 0;
+
 static int __init powersave_off(char *arg)
 {
-   ppc_md.power_save = NULL;
+   boot_option_idle_override = 1;
return 0;
 }
 __setup(powersave=off, powersave_off);
Index: linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/processor_idle.c
+++ linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
@@ -185,6 +185,11 @@ static int __init pseries_processor_idle
int cpu;
int result;
 
+   if (boot_option_idle_override) {
+   printk(KERN_DEBUG Using default idle\n);
+   return 0;
+   }
+
result = cpuidle_register_driver(pseries_idle_driver);
 
if (result  0)
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v8 PATCH 2/8]: cpuidle: implement a list based approach to register a set of idle routines.

2009-10-08 Thread Arun R Bharadwaj
* Peter Zijlstra a.p.zijls...@chello.nl [2009-10-08 12:36:02]:

 On Thu, 2009-10-08 at 15:20 +0530, Arun R Bharadwaj wrote:
  * Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-08 15:18:28]:
  
  Implement a list based registering mechanism for architectures which
  have multiple sets of idle routines which are to be registered.
  
  Currently, in x86 it is done by merely setting pm_idle = idle_routine
  and managing this pm_idle pointer is messy.
  
  To give an example of how this mechanism works:
  In x86, initially, idle routine is selected from the set of poll/mwait/
  c1e/default idle loops. So the selected idle loop is registered in cpuidle
  as one idle state cpuidle devices. Once ACPI comes up, it registers
  another set of idle states on top of this state. Again, suppose a module
  registers another set of idle loops, it is added to this list.
  
  This provides a clean way of registering and unregistering idle state
  routines.
 
 So cpuidle didn't already have a list of idle functions it takes an
 appropriate one from?
 

No.. As of now, cpuidle supported only one _set_ of idle states that
can be registered. So in this one set, it would choose the appropriate
idle state. But this list mechanism(actually a stack) allows for
multiple sets.

This is needed because we have a hierarchy of idle states discovery
in x86. First, select_idle_routine() would select poll/mwait/default/c1e.
It doesn't know of existance of ACPI. Later when ACPI comes up,
it registers a set of routines on top of the earlier set.

 Then what does this governor do?


The governor would only select the best idle state available from the
set of states which is at the top of the stack. (In the above case, it
would only consider the states registered by ACPI).

If the top-of-the-stack set of idle states is unregistered, the next
set of states on the stack are considered.

 Also, does this imply the governor doesn't consider these idle routines?


As i said above, governor would only consider the idle routines which
are at the top of the stack.

Hope this gave a better idea..

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v8 PATCH 2/8]: cpuidle: implement a list based approach to register a set of idle routines.

2009-10-08 Thread Arun R Bharadwaj
* Peter Zijlstra a.p.zijls...@chello.nl [2009-10-08 13:25:10]:

 On Thu, 2009-10-08 at 16:31 +0530, Arun R Bharadwaj wrote:
  * Peter Zijlstra a.p.zijls...@chello.nl [2009-10-08 12:50:33]:
  
   On Thu, 2009-10-08 at 16:12 +0530, Arun R Bharadwaj wrote:

 So cpuidle didn't already have a list of idle functions it takes an
 appropriate one from?
 

No.. As of now, cpuidle supported only one _set_ of idle states that
can be registered. So in this one set, it would choose the appropriate
idle state. But this list mechanism(actually a stack) allows for
multiple sets.

This is needed because we have a hierarchy of idle states discovery
in x86. First, select_idle_routine() would select 
poll/mwait/default/c1e.
It doesn't know of existance of ACPI. Later when ACPI comes up,
it registers a set of routines on top of the earlier set.

 Then what does this governor do?


The governor would only select the best idle state available from the
set of states which is at the top of the stack. (In the above case, it
would only consider the states registered by ACPI).

If the top-of-the-stack set of idle states is unregistered, the next
set of states on the stack are considered.

 Also, does this imply the governor doesn't consider these idle 
 routines?


As i said above, governor would only consider the idle routines which
are at the top of the stack.

Hope this gave a better idea..
   
   So does it make sense to have a set of sets?
   
   Why not integrate them all into one set to be ruled by this governor
   thing?
   
  
  Right now there is a clean hierarchy. So breaking that would mean
  putting the registration of all idle routines under ACPI. 
 
 Uhm, no, it would mean ACPI putting its idle routines on the same level
 as all others.
 

Putting them all on the same level would mean, we need an
enable/disable routine to enable only the currently active routines.

Also, the way governor works is that, it assumes that idle routines
are indexed in the increasing order of power benefit that can be got
out of the state. So this would get messed up.

  So, if ACPI
  fails to come up or if ACPI is not supported, that would lead to
  problems.
 
 I think the problem is that ACPI is thinking its special, that should be
 rectified, its not.
 
   Because if that happens now, we can fallback to the
  initially registered set.
 
 I'm thinking its all daft and we should be having one set of idle
 routines, if ACPI fails (a tautology if ever there was one) we simply
 wouldn't have its idle routines to pick from.
 
  Also, if a module wants to register a set of routines later on, that
  cant be added to the initially registered set. So i think we need this
  set of sets.
 
 Sounds like something is wrong alright. If you can register an idle
 routine you should be able to unregister it too.


Yes, we can register and unregister in a clean way now.
Consider this. We have a set of routines A, B, C currently registered.
Now a module comes and registers D and E, and later on at some point
of time wants to unregister. So how do you keep track of what all idle
routines the module registered and unregister only those?
Best way to do that is a stack, which is how I have currently
implemented.

 What about making ACPI register its idle routines too, 1 for each C
 state, and have the governor make a selection out of the full set?
 
 That also allows you to do away with this default_idle() nonsense and
 simply panic the box when there are no registered idle routines when the
 box wants to go idle.
 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v7 PATCH 7/7]: pSeries: implement pSeries processor idle module.

2009-10-07 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-06 21:05:22]:

 * Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-06 20:54:21]:

Please consider this updated PATCH 7/7 instead of the earlier one.
The earlier one had a late_initcall(pseries_processor_idle_init),
which caused a panic when cpuidle_enable_device() was called from
cpuidle_switch_governor(). This is because registration of cpuidle
devices was happening at a later stage. So change this to a
device_initcall() to get rid of the panic.

---

This patch creates arch/powerpc/platforms/pseries/processor_idle.c,
which implements the cpuidle infrastructure for pseries.
It implements a pseries_cpuidle_loop() which would be the main idle loop
called from cpu_idle(). It makes decision of entering either
dedicated_snooze_loop or dedicated_cede_loop for dedicated lpar and
shared_cede_loop for shared lpar processor based on the
decision taken by the cpuidle governor.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/system.h   |1 
 arch/powerpc/kernel/sysfs.c |2 
 arch/powerpc/platforms/pseries/Makefile |1 
 arch/powerpc/platforms/pseries/processor_idle.c |  208 
 arch/powerpc/platforms/pseries/pseries.h|8 
 5 files changed, 220 insertions(+)

Index: linux.trees.git/arch/powerpc/platforms/pseries/Makefile
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/Makefile
+++ linux.trees.git/arch/powerpc/platforms/pseries/Makefile
@@ -26,3 +26,4 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst
 obj-$(CONFIG_PHYP_DUMP)+= phyp_dump.o
 obj-$(CONFIG_CMM)  += cmm.o
 obj-$(CONFIG_DTL)  += dtl.o
+obj-$(CONFIG_PSERIES_PROCESSOR_IDLE)   += processor_idle.o
Index: linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/pseries.h
+++ linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
@@ -10,6 +10,8 @@
 #ifndef _PSERIES_PSERIES_H
 #define _PSERIES_PSERIES_H
 
+#include linux/cpuidle.h
+
 extern void __init fw_feature_init(const char *hypertas, unsigned long len);
 
 struct pt_regs;
@@ -40,4 +42,10 @@ extern unsigned long rtas_poweron_auto;
 
 extern void find_udbg_vterm(void);
 
+DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
+
+#ifdef CONFIG_PSERIES_PROCESSOR_IDLE
+extern struct cpuidle_driver pseries_idle_driver;
+#endif
+
 #endif /* _PSERIES_PSERIES_H */
Index: linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
===
--- /dev/null
+++ linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
@@ -0,0 +1,208 @@
+/*
+ *  processor_idle - idle state cpuidle driver.
+ *  Adapted from drivers/acpi/processor_idle.c
+ *
+ *  Arun R Bharadwaj a...@linux.vnet.ibm.com
+ *
+ *  Copyright (C) 2009 IBM Corporation.
+ * ~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~
+ */
+
+#include linux/kernel.h
+#include linux/module.h
+#include linux/init.h
+#include linux/moduleparam.h
+#include linux/cpuidle.h
+
+#include asm/paca.h
+#include asm/reg.h
+#include asm/system.h
+#include asm/machdep.h
+#include asm/firmware.h
+
+#include plpar_wrappers.h
+#include pseries.h
+
+MODULE_AUTHOR(Arun R Bharadwaj);
+MODULE_DESCRIPTION(pSeries Idle State Driver);
+MODULE_LICENSE(GPL);
+
+struct cpuidle_driver pseries_idle_driver = {
+   .name = pseries_idle,
+   .owner =THIS_MODULE,
+};
+
+DEFINE_PER_CPU(struct cpuidle_device, pseries_dev);
+
+#define IDLE_STATE_COUNT   2
+
+/* pSeries Idle state Flags */
+#definePSERIES_DEDICATED_SNOOZE(0x01)
+#definePSERIES_DEDICATED_CEDE  (0x02)
+#definePSERIES_SHARED_CEDE (0x03)
+
+static int pseries_idle_init(struct cpuidle_device *dev)
+{
+   return cpuidle_register_device(dev);
+}
+
+static void shared_cede_loop(void)
+{
+   get_lppaca()-idle = 1;
+   cede_processor();
+   get_lppaca()-idle = 0

Re: [v7 PATCH 3/7]: x86: refactor x86 idle power management code and remove all instances of pm_idle.

2009-10-07 Thread Arun R Bharadwaj
* Peter Zijlstra a.p.zijls...@chello.nl [2009-10-07 16:45:50]:

 On Tue, 2009-10-06 at 21:01 +0530, Arun R Bharadwaj wrote:
  +++ linux.trees.git/arch/x86/kernel/process.c
  @@ -9,6 +9,7 @@
   #include linux/pm.h
   #include linux/clockchips.h
   #include linux/random.h
  +#include linux/cpuidle.h
   #include trace/events/power.h
   #include asm/system.h
   #include asm/apic.h
  @@ -244,12 +245,6 @@ int sys_vfork(struct pt_regs *regs)
   unsigned long boot_option_idle_override = 0;
   EXPORT_SYMBOL(boot_option_idle_override);
   
  -/*
  - * Powermanagement idle function, if any..
  - */
  -void (*pm_idle)(void);
  -EXPORT_SYMBOL(pm_idle);
  -
   #ifdef CONFIG_X86_32
   /*
* This halt magic was a workaround for ancient floppy DMA
  @@ -329,17 +324,15 @@ static void do_nothing(void *unused)
   }
   
   /*
  - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
  - * pm_idle and update to new pm_idle value. Required while changing pm_idle
  - * handler on SMP systems.
  + * cpu_idle_wait - Required while changing idle routine handler on SMP 
  systems.
*
  - * Caller must have changed pm_idle to the new value before the call. Old
  - * pm_idle value will not be used by any CPU after the return of this 
  function.
  + * Caller must have changed idle routine to the new value before the call. 
  Old
  + * value will not be used by any CPU after the return of this function.
*/
   void cpu_idle_wait(void)
   {
  smp_mb();
  -   /* kick all the CPUs so that they exit out of pm_idle */
  +   /* kick all the CPUs so that they exit out of idle loop */
  smp_call_function(do_nothing, NULL, 1);
   }
   EXPORT_SYMBOL_GPL(cpu_idle_wait);
  @@ -518,15 +511,59 @@ static void c1e_idle(void)
  default_idle();
   }
   
  +static void (*local_idle)(void);
  +DEFINE_PER_CPU(struct cpuidle_device, idle_devices);
  +
  +struct cpuidle_driver cpuidle_default_driver = {
  +   .name = cpuidle_default,
  +};
  +
  +static int local_idle_loop(struct cpuidle_device *dev, struct 
  cpuidle_state *st)
  +{
  +   ktime_t t1, t2;
  +   s64 diff;
  +   int ret;
  +
  +   t1 = ktime_get();
  +   local_idle();
  +   t2 = ktime_get();
  +
  +   diff = ktime_to_us(ktime_sub(t2, t1));
  +   if (diff  INT_MAX)
  +   diff = INT_MAX;
  +   ret = (int) diff;
  +
  +   return ret;
  +}
  +
  +static int setup_cpuidle_simple(void)
  +{
  +   struct cpuidle_device *dev;
  +   int cpu;
  +
  +   if (!cpuidle_curr_driver)
  +   cpuidle_register_driver(cpuidle_default_driver);
  +
  +   for_each_online_cpu(cpu) {
  +   dev = per_cpu(idle_devices, cpu);
  +   dev-cpu = cpu;
  +   dev-states[0].enter = local_idle_loop;
  +   dev-state_count = 1;
  +   cpuidle_register_device(dev);
  +   }
  +   return 0;
  +}
  +device_initcall(setup_cpuidle_simple);
  +
   void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
   {
   #ifdef CONFIG_SMP
  -   if (pm_idle == poll_idle  smp_num_siblings  1) {
  +   if (local_idle == poll_idle  smp_num_siblings  1) {
  printk(KERN_WARNING WARNING: polling idle and HT enabled,
   performance may degrade.\n);
  }
   #endif
  -   if (pm_idle)
  +   if (local_idle)
  return;
   
  if (cpu_has(c, X86_FEATURE_MWAIT)  mwait_usable(c)) {
  @@ -534,18 +571,20 @@ void __cpuinit select_idle_routine(const
   * One CPU supports mwait = All CPUs supports mwait
   */
  printk(KERN_INFO using mwait in idle threads.\n);
  -   pm_idle = mwait_idle;
  +   local_idle = mwait_idle;
  } else if (check_c1e_idle(c)) {
  printk(KERN_INFO using C1E aware idle routine\n);
  -   pm_idle = c1e_idle;
  +   local_idle = c1e_idle;
  } else
  -   pm_idle = default_idle;
  +   local_idle = default_idle;
  +
  +   return;
   }
   
   void __init init_c1e_mask(void)
   {
  /* If we're using c1e_idle, we need to allocate c1e_mask. */
  -   if (pm_idle == c1e_idle)
  +   if (local_idle == c1e_idle)
  zalloc_cpumask_var(c1e_mask, GFP_KERNEL);
   }
   
  @@ -556,7 +595,7 @@ static int __init idle_setup(char *str)
   
  if (!strcmp(str, poll)) {
  printk(using polling idle threads.\n);
  -   pm_idle = poll_idle;
  +   local_idle = poll_idle;
  } else if (!strcmp(str, mwait))
  force_mwait = 1;
  else if (!strcmp(str, halt)) {
  @@ -567,7 +606,7 @@ static int __init idle_setup(char *str)
   * To continue to load the CPU idle driver, don't touch
   * the boot_option_idle_override.
   */
  -   pm_idle = default_idle

[v7 PATCH 1/7]: cpuidle: cleanup drivers/cpuidle/cpuidle.c

2009-10-06 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-06 20:54:21]:

This patch cleans up drivers/cpuidle/cpuidle.c
Earlier cpuidle assumed pm_idle as the default idle loop. Break that
assumption and make it more generic. cpuidle_idle_call() which is the
main idle loop of cpuidle is to be called by architectures which have
registered to cpuidle.

Remove routines cpuidle_install/uninstall_idle_handler() and
cpuidle_kick_cpus() which are not needed anymore.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 drivers/cpuidle/cpuidle.c  |   62 +
 drivers/cpuidle/cpuidle.h  |6 +---
 drivers/cpuidle/driver.c   |4 --
 drivers/cpuidle/governor.c |   13 +++--
 drivers/cpuidle/sysfs.c|   34 +---
 include/linux/cpuidle.h|4 ++
 6 files changed, 37 insertions(+), 86 deletions(-)

Index: linux.trees.git/drivers/cpuidle/cpuidle.c
===
--- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
+++ linux.trees.git/drivers/cpuidle/cpuidle.c
@@ -24,10 +24,6 @@
 DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
 
 DEFINE_MUTEX(cpuidle_lock);
-LIST_HEAD(cpuidle_detected_devices);
-static void (*pm_idle_old)(void);
-
-static int enabled_devices;
 
 #if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
 static void cpuidle_kick_cpus(void)
@@ -47,7 +43,7 @@ static int __cpuidle_register_device(str
  *
  * NOTE: no locks or semaphores should be used here
  */
-static void cpuidle_idle_call(void)
+void cpuidle_idle_call(void)
 {
struct cpuidle_device *dev = __get_cpu_var(cpuidle_devices);
struct cpuidle_state *target_state;
@@ -55,13 +51,10 @@ static void cpuidle_idle_call(void)
 
/* check if the device is ready */
if (!dev || !dev-enabled) {
-   if (pm_idle_old)
-   pm_idle_old();
-   else
 #if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE)
-   default_idle();
+   default_idle();
 #else
-   local_irq_enable();
+   local_irq_enable();
 #endif
return;
}
@@ -75,7 +68,11 @@ static void cpuidle_idle_call(void)
hrtimer_peek_ahead_timers();
 #endif
/* ask the governor for the next state */
-   next_state = cpuidle_curr_governor-select(dev);
+   if (dev-state_count  1)
+   next_state = cpuidle_curr_governor-select(dev);
+   else
+   next_state = 0;
+
if (need_resched())
return;
target_state = dev-states[next_state];
@@ -96,35 +93,11 @@ static void cpuidle_idle_call(void)
 }
 
 /**
- * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
- */
-void cpuidle_install_idle_handler(void)
-{
-   if (enabled_devices  (pm_idle != cpuidle_idle_call)) {
-   /* Make sure all changes finished before we switch to new idle 
*/
-   smp_wmb();
-   pm_idle = cpuidle_idle_call;
-   }
-}
-
-/**
- * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
- */
-void cpuidle_uninstall_idle_handler(void)
-{
-   if (enabled_devices  pm_idle_old  (pm_idle != pm_idle_old)) {
-   pm_idle = pm_idle_old;
-   cpuidle_kick_cpus();
-   }
-}
-
-/**
  * cpuidle_pause_and_lock - temporarily disables CPUIDLE
  */
 void cpuidle_pause_and_lock(void)
 {
mutex_lock(cpuidle_lock);
-   cpuidle_uninstall_idle_handler();
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
@@ -134,7 +107,6 @@ EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock
  */
 void cpuidle_resume_and_unlock(void)
 {
-   cpuidle_install_idle_handler();
mutex_unlock(cpuidle_lock);
 }
 
@@ -182,7 +154,6 @@ int cpuidle_enable_device(struct cpuidle
 
dev-enabled = 1;
 
-   enabled_devices++;
return 0;
 
 fail_sysfs:
@@ -213,7 +184,6 @@ void cpuidle_disable_device(struct cpuid
cpuidle_curr_governor-disable(dev);
 
cpuidle_remove_state_sysfs(dev);
-   enabled_devices--;
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_disable_device);
@@ -266,7 +236,6 @@ static void poll_idle_init(struct cpuidl
  */
 static int __cpuidle_register_device(struct cpuidle_device *dev)
 {
-   int ret;
struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev-cpu);
 
if (!sys_dev)
@@ -274,16 +243,9 @@ static int __cpuidle_register_device(str
if (!try_module_get(cpuidle_curr_driver-owner))
return -EINVAL;
 
-   init_completion(dev-kobj_unregister);
-
poll_idle_init(dev);
 
per_cpu(cpuidle_devices, dev-cpu) = dev;
-   list_add(dev-device_list, cpuidle_detected_devices);
-   if ((ret = cpuidle_add_sysfs(sys_dev))) {
-   module_put(cpuidle_curr_driver-owner);
-   return ret;
-   }
 
dev-registered = 1;
return 0;
@@ -305,7 +267,6 @@ int cpuidle_register_device(struct cpuid

[v7 PATCH 2/7]: cpuidle: implement a list based approach to register a set of idle routines.

2009-10-06 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-06 20:54:21]:

Implement a list based registering mechanism for architectures which
have multiple sets of idle routines which are to be registered.

Currently, in x86 it is done by merely setting pm_idle = idle_routine
and managing this pm_idle pointer is messy.

To give an example of how this mechanism works:
In x86, initially, idle routine is selected from the set of poll/mwait/
c1e/default idle loops. So the selected idle loop is registered in cpuidle
as one idle state cpuidle devices. Once ACPI comes up, it registers
another set of idle states on top of this state. Again, suppose a module
registers another set of idle loops, it is added to this list.

This provides a clean way of registering and unregistering idle state
routines.

In the current implementation, pm_idle is set as the current idle routine
being used and the old idle routine has to be maintained and when a module
registers/unregisters an idle routine, confusion arises.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 drivers/cpuidle/cpuidle.c |   54 --
 include/linux/cpuidle.h   |1 
 2 files changed, 48 insertions(+), 7 deletions(-)

Index: linux.trees.git/drivers/cpuidle/cpuidle.c
===
--- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
+++ linux.trees.git/drivers/cpuidle/cpuidle.c
@@ -22,6 +22,7 @@
 #include cpuidle.h
 
 DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
+DEFINE_PER_CPU(struct list_head, cpuidle_devices_list);
 
 DEFINE_MUTEX(cpuidle_lock);
 
@@ -112,6 +113,45 @@ void cpuidle_resume_and_unlock(void)
 
 EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
 
+int cpuidle_add_to_list(struct cpuidle_device *dev)
+{
+   int ret, cpu = dev-cpu;
+   struct cpuidle_device *old_dev;
+
+   if (!list_empty(per_cpu(cpuidle_devices_list, cpu))) {
+   old_dev = list_first_entry(per_cpu(cpuidle_devices_list, cpu),
+   struct cpuidle_device, idle_list);
+   cpuidle_remove_state_sysfs(old_dev);
+   }
+
+   list_add(dev-idle_list, per_cpu(cpuidle_devices_list, cpu));
+   ret = cpuidle_add_state_sysfs(dev);
+   return ret;
+}
+
+void cpuidle_remove_from_list(struct cpuidle_device *dev)
+{
+   struct cpuidle_device *temp_dev;
+   struct list_head *pos;
+   int ret, cpu = dev-cpu;
+
+   list_for_each(pos, per_cpu(cpuidle_devices_list, cpu)) {
+   temp_dev = container_of(pos, struct cpuidle_device, idle_list);
+   if (dev == temp_dev) {
+   list_del(temp_dev-idle_list);
+   cpuidle_remove_state_sysfs(temp_dev);
+   break;
+   }
+   }
+
+   if (!list_empty(per_cpu(cpuidle_devices_list, cpu))) {
+   temp_dev = list_first_entry(per_cpu(cpuidle_devices_list, cpu),
+   struct cpuidle_device, idle_list);
+   ret = cpuidle_add_state_sysfs(temp_dev);
+   }
+   cpuidle_kick_cpus();
+}
+
 /**
  * cpuidle_enable_device - enables idle PM for a CPU
  * @dev: the CPU
@@ -136,9 +176,6 @@ int cpuidle_enable_device(struct cpuidle
return ret;
}
 
-   if ((ret = cpuidle_add_state_sysfs(dev)))
-   return ret;
-
if (cpuidle_curr_governor-enable 
(ret = cpuidle_curr_governor-enable(dev)))
goto fail_sysfs;
@@ -157,7 +194,7 @@ int cpuidle_enable_device(struct cpuidle
return 0;
 
 fail_sysfs:
-   cpuidle_remove_state_sysfs(dev);
+   cpuidle_remove_from_list(dev);
 
return ret;
 }
@@ -182,8 +219,6 @@ void cpuidle_disable_device(struct cpuid
 
if (cpuidle_curr_governor-disable)
cpuidle_curr_governor-disable(dev);
-
-   cpuidle_remove_state_sysfs(dev);
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_disable_device);
@@ -267,6 +302,7 @@ int cpuidle_register_device(struct cpuid
}
 
cpuidle_enable_device(dev);
+   cpuidle_add_to_list(dev);
 
mutex_unlock(cpuidle_lock);
 
@@ -288,6 +324,7 @@ void cpuidle_unregister_device(struct cp
cpuidle_pause_and_lock();
 
cpuidle_disable_device(dev);
+   cpuidle_remove_from_list(dev);
 
per_cpu(cpuidle_devices, dev-cpu) = NULL;
 
@@ -338,12 +375,15 @@ static inline void latency_notifier_init
  */
 static int __init cpuidle_init(void)
 {
-   int ret;
+   int ret, cpu;
 
ret = cpuidle_add_class_sysfs(cpu_sysdev_class);
if (ret)
return ret;
 
+   for_each_possible_cpu(cpu)
+   INIT_LIST_HEAD(per_cpu(cpuidle_devices_list, cpu));
+
latency_notifier_init(cpuidle_latency_notifier);
 
return 0;
Index: linux.trees.git/include/linux/cpuidle.h
===
--- linux.trees.git.orig/include/linux/cpuidle.h

[v7 PATCH 3/7]: x86: refactor x86 idle power management code and remove all instances of pm_idle.

2009-10-06 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-06 20:54:21]:

This patch cleans up x86 of all instances of pm_idle.

pm_idle which was earlier called from cpu_idle() idle loop
is replaced by cpuidle_idle_call.

x86 also registers to cpuidle when the idle routine is selected,
by populating the cpuidle_device data structure for each cpu.

This is replicated for apm module and for xen, which also used pm_idle.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/x86/kernel/apm_32.c |   37 ++--
 arch/x86/kernel/process.c|   79 ---
 arch/x86/kernel/process_32.c |3 +
 arch/x86/kernel/process_64.c |3 +
 arch/x86/xen/setup.c |   22 +++
 5 files changed, 118 insertions(+), 26 deletions(-)

Index: linux.trees.git/arch/x86/kernel/process.c
===
--- linux.trees.git.orig/arch/x86/kernel/process.c
+++ linux.trees.git/arch/x86/kernel/process.c
@@ -9,6 +9,7 @@
 #include linux/pm.h
 #include linux/clockchips.h
 #include linux/random.h
+#include linux/cpuidle.h
 #include trace/events/power.h
 #include asm/system.h
 #include asm/apic.h
@@ -244,12 +245,6 @@ int sys_vfork(struct pt_regs *regs)
 unsigned long boot_option_idle_override = 0;
 EXPORT_SYMBOL(boot_option_idle_override);
 
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
-
 #ifdef CONFIG_X86_32
 /*
  * This halt magic was a workaround for ancient floppy DMA
@@ -329,17 +324,15 @@ static void do_nothing(void *unused)
 }
 
 /*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
+ * cpu_idle_wait - Required while changing idle routine handler on SMP systems.
  *
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
+ * Caller must have changed idle routine to the new value before the call. Old
+ * value will not be used by any CPU after the return of this function.
  */
 void cpu_idle_wait(void)
 {
smp_mb();
-   /* kick all the CPUs so that they exit out of pm_idle */
+   /* kick all the CPUs so that they exit out of idle loop */
smp_call_function(do_nothing, NULL, 1);
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
@@ -518,15 +511,59 @@ static void c1e_idle(void)
default_idle();
 }
 
+static void (*local_idle)(void);
+DEFINE_PER_CPU(struct cpuidle_device, idle_devices);
+
+struct cpuidle_driver cpuidle_default_driver = {
+   .name = cpuidle_default,
+};
+
+static int local_idle_loop(struct cpuidle_device *dev, struct cpuidle_state 
*st)
+{
+   ktime_t t1, t2;
+   s64 diff;
+   int ret;
+
+   t1 = ktime_get();
+   local_idle();
+   t2 = ktime_get();
+
+   diff = ktime_to_us(ktime_sub(t2, t1));
+   if (diff  INT_MAX)
+   diff = INT_MAX;
+   ret = (int) diff;
+
+   return ret;
+}
+
+static int setup_cpuidle_simple(void)
+{
+   struct cpuidle_device *dev;
+   int cpu;
+
+   if (!cpuidle_curr_driver)
+   cpuidle_register_driver(cpuidle_default_driver);
+
+   for_each_online_cpu(cpu) {
+   dev = per_cpu(idle_devices, cpu);
+   dev-cpu = cpu;
+   dev-states[0].enter = local_idle_loop;
+   dev-state_count = 1;
+   cpuidle_register_device(dev);
+   }
+   return 0;
+}
+device_initcall(setup_cpuidle_simple);
+
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
-   if (pm_idle == poll_idle  smp_num_siblings  1) {
+   if (local_idle == poll_idle  smp_num_siblings  1) {
printk(KERN_WARNING WARNING: polling idle and HT enabled,
 performance may degrade.\n);
}
 #endif
-   if (pm_idle)
+   if (local_idle)
return;
 
if (cpu_has(c, X86_FEATURE_MWAIT)  mwait_usable(c)) {
@@ -534,18 +571,20 @@ void __cpuinit select_idle_routine(const
 * One CPU supports mwait = All CPUs supports mwait
 */
printk(KERN_INFO using mwait in idle threads.\n);
-   pm_idle = mwait_idle;
+   local_idle = mwait_idle;
} else if (check_c1e_idle(c)) {
printk(KERN_INFO using C1E aware idle routine\n);
-   pm_idle = c1e_idle;
+   local_idle = c1e_idle;
} else
-   pm_idle = default_idle;
+   local_idle = default_idle;
+
+   return;
 }
 
 void __init init_c1e_mask(void)
 {
/* If we're using c1e_idle, we need to allocate c1e_mask. */
-   if (pm_idle == c1e_idle)
+   if (local_idle == c1e_idle)
zalloc_cpumask_var(c1e_mask, GFP_KERNEL);
 }
 
@@ -556,7 +595,7 @@ static int __init

[v7 PATCH 4/7]: POWER: enable cpuidle for POWER.

2009-10-06 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-06 20:54:21]:

This patch enables the cpuidle option in Kconfig for pSeries.

Currently cpuidle infrastructure is enabled only for x86 and ARM.
This code is almost completely borrowed from x86 to enable
cpuidle for pSeries.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/Kconfig  |   17 +
 arch/powerpc/include/asm/system.h |2 ++
 arch/powerpc/kernel/idle.c|   19 +++
 3 files changed, 38 insertions(+)

Index: linux.trees.git/arch/powerpc/Kconfig
===
--- linux.trees.git.orig/arch/powerpc/Kconfig
+++ linux.trees.git/arch/powerpc/Kconfig
@@ -88,6 +88,9 @@ config ARCH_HAS_ILOG2_U64
bool
default y if 64BIT
 
+config ARCH_HAS_CPU_IDLE_WAIT
+   def_bool y
+
 config GENERIC_HWEIGHT
bool
default y
@@ -243,6 +246,20 @@ source kernel/Kconfig.freezer
 source arch/powerpc/sysdev/Kconfig
 source arch/powerpc/platforms/Kconfig
 
+menu Power management options
+
+source drivers/cpuidle/Kconfig
+
+config PSERIES_PROCESSOR_IDLE
+   bool Idle Power Management Support for pSeries
+   depends on PPC_PSERIES  CPU_IDLE
+   default y
+   help
+ Idle Power Management Support for pSeries. This hooks onto cpuidle
+ infrastructure to help in idle cpu power management.
+
+endmenu
+
 menu Kernel options
 
 config HIGHMEM
Index: linux.trees.git/arch/powerpc/include/asm/system.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/system.h
+++ linux.trees.git/arch/powerpc/include/asm/system.h
@@ -546,5 +546,7 @@ extern void account_system_vtime(struct 
 
 extern struct dentry *powerpc_debugfs_root;
 
+void cpu_idle_wait(void);
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SYSTEM_H */
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -102,6 +102,25 @@ void cpu_idle(void)
}
 }
 
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs come out of the old
+ * idle loop and start using the new idle loop.
+ * Required while changing idle handler on SMP systems.
+ * Caller must have changed idle handler to the new value before the call.
+ */
+void cpu_idle_wait(void)
+{
+   /* Ensure that new value of ppc_md.power_save is set */
+   smp_mb();
+   /* kick all the CPUs so that they exit out of ppc_md.power_save */
+   smp_call_function(do_nothing, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 int powersave_nap;
 
 #ifdef CONFIG_SYSCTL
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v7 PATCH 5/7]: pSeries/cpuidle: remove dedicate/shared idle loops, which will be moved to arch/powerpc/platforms/pseries/processor_idle.c.

2009-10-06 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-06 20:54:21]:

This patch removes the routines, pseries_shared_idle_sleep and
pseries_dedicated_idle_sleep, since this is implemented as a part
of arch/powerpc/platform/pseries/processor_idle.c

Also, similar to x86, call cpuidle_idle_call from cpu_idle() idle
loop instead of ppc_md.power_save.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/idle.c |   50 +++---
 arch/powerpc/platforms/pseries/setup.c |   89 -
 2 files changed, 22 insertions(+), 117 deletions(-)

Index: linux.trees.git/arch/powerpc/platforms/pseries/setup.c
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/setup.c
+++ linux.trees.git/arch/powerpc/platforms/pseries/setup.c
@@ -75,9 +75,6 @@ EXPORT_SYMBOL(CMO_PageSize);
 
 int fwnmi_active;  /* TRUE if an FWNMI handler is present */
 
-static void pseries_shared_idle_sleep(void);
-static void pseries_dedicated_idle_sleep(void);
-
 static struct device_node *pSeries_mpic_node;
 
 static void pSeries_show_cpuinfo(struct seq_file *m)
@@ -301,18 +298,8 @@ static void __init pSeries_setup_arch(vo
pSeries_nvram_init();
 
/* Choose an idle loop */
-   if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+   if (firmware_has_feature(FW_FEATURE_SPLPAR))
vpa_init(boot_cpuid);
-   if (get_lppaca()-shared_proc) {
-   printk(KERN_DEBUG Using shared processor idle loop\n);
-   ppc_md.power_save = pseries_shared_idle_sleep;
-   } else {
-   printk(KERN_DEBUG Using dedicated idle loop\n);
-   ppc_md.power_save = pseries_dedicated_idle_sleep;
-   }
-   } else {
-   printk(KERN_DEBUG Using default idle loop\n);
-   }
 
if (firmware_has_feature(FW_FEATURE_LPAR))
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
@@ -500,80 +487,6 @@ static int __init pSeries_probe(void)
return 1;
 }
 
-
-DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
-
-static void pseries_dedicated_idle_sleep(void)
-{ 
-   unsigned int cpu = smp_processor_id();
-   unsigned long start_snooze;
-   unsigned long in_purr, out_purr;
-
-   /*
-* Indicate to the HV that we are idle. Now would be
-* a good time to find other work to dispatch.
-*/
-   get_lppaca()-idle = 1;
-   get_lppaca()-donate_dedicated_cpu = 1;
-   in_purr = mfspr(SPRN_PURR);
-
-   /*
-* We come in with interrupts disabled, and need_resched()
-* has been checked recently.  If we should poll for a little
-* while, do so.
-*/
-   if (__get_cpu_var(smt_snooze_delay)) {
-   start_snooze = get_tb() +
-   __get_cpu_var(smt_snooze_delay) * tb_ticks_per_usec;
-   local_irq_enable();
-   set_thread_flag(TIF_POLLING_NRFLAG);
-
-   while (get_tb()  start_snooze) {
-   if (need_resched() || cpu_is_offline(cpu))
-   goto out;
-   ppc64_runlatch_off();
-   HMT_low();
-   HMT_very_low();
-   }
-
-   HMT_medium();
-   clear_thread_flag(TIF_POLLING_NRFLAG);
-   smp_mb();
-   local_irq_disable();
-   if (need_resched() || cpu_is_offline(cpu))
-   goto out;
-   }
-
-   cede_processor();
-
-out:
-   HMT_medium();
-   out_purr = mfspr(SPRN_PURR);
-   get_lppaca()-wait_state_cycles += out_purr - in_purr;
-   get_lppaca()-donate_dedicated_cpu = 0;
-   get_lppaca()-idle = 0;
-}
-
-static void pseries_shared_idle_sleep(void)
-{
-   /*
-* Indicate to the HV that we are idle. Now would be
-* a good time to find other work to dispatch.
-*/
-   get_lppaca()-idle = 1;
-
-   /*
-* Yield the processor to the hypervisor.  We return if
-* an external interrupt occurs (which are driven prior
-* to returning here) or if a prod occurs from another
-* processor. When returning here, external interrupts
-* are enabled.
-*/
-   cede_processor();
-
-   get_lppaca()-idle = 0;
-}
-
 static int pSeries_pci_probe_mode(struct pci_bus *bus)
 {
if (firmware_has_feature(FW_FEATURE_LPAR))
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -25,6 +25,7 @@
 #include linux/cpu.h
 #include linux/sysctl.h
 #include linux/tick.h
+#include linux/cpuidle.h
 
 #include asm/system.h
 #include asm/processor.h
@@ -60,35 +61,26 @@ void cpu_idle(void)
while (!need_resched

[v7 PATCH 6/7]: POWER: add a default_idle idle loop for POWER.

2009-10-06 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-06 20:54:21]:

In arch/powerpc/kernel/idle.c create a default_idle() routine by moving
the failover condition of the cpu_idle() idle loop. This is needed by
cpuidle infrastructure to call default_idle when other idle routines
are not yet registered. Functionality remains the same, but the code is
slightly moved around.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/Kconfig  |3 +++
 arch/powerpc/include/asm/system.h |1 +
 arch/powerpc/kernel/idle.c|6 ++
 3 files changed, 10 insertions(+)

Index: linux.trees.git/arch/powerpc/Kconfig
===
--- linux.trees.git.orig/arch/powerpc/Kconfig
+++ linux.trees.git/arch/powerpc/Kconfig
@@ -91,6 +91,9 @@ config ARCH_HAS_ILOG2_U64
 config ARCH_HAS_CPU_IDLE_WAIT
def_bool y
 
+config ARCH_HAS_DEFAULT_IDLE
+   def_bool y
+
 config GENERIC_HWEIGHT
bool
default y
Index: linux.trees.git/arch/powerpc/include/asm/system.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/system.h
+++ linux.trees.git/arch/powerpc/include/asm/system.h
@@ -218,6 +218,7 @@ extern unsigned long klimit;
 extern void *alloc_maybe_bootmem(size_t size, gfp_t mask);
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
 
+extern void default_idle(void);
 extern int powersave_nap;  /* set if nap mode can be used in idle loop */
 
 /*
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -113,6 +113,12 @@ void cpu_idle_wait(void)
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
 
+void default_idle(void)
+{
+   HMT_low();
+   HMT_very_low();
+}
+
 int powersave_nap;
 
 #ifdef CONFIG_SYSCTL
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v7 PATCH 7/7]: pSeries: implement pSeries processor idle module.

2009-10-06 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-06 20:54:21]:

This patch creates arch/powerpc/platforms/pseries/processor_idle.c,
which implements the cpuidle infrastructure for pseries.
It implements a pseries_cpuidle_loop() which would be the main idle loop
called from cpu_idle(). It makes decision of entering either
dedicated_snooze_loop or dedicated_cede_loop for dedicated lpar and
shared_cede_loop for shared lpar processor based on the
decision taken by the cpuidle governor.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/system.h   |1 
 arch/powerpc/kernel/sysfs.c |2 
 arch/powerpc/platforms/pseries/Makefile |1 
 arch/powerpc/platforms/pseries/processor_idle.c |  208 
 arch/powerpc/platforms/pseries/pseries.h|8 
 5 files changed, 220 insertions(+)

Index: linux.trees.git/arch/powerpc/platforms/pseries/Makefile
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/Makefile
+++ linux.trees.git/arch/powerpc/platforms/pseries/Makefile
@@ -26,3 +26,4 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst
 obj-$(CONFIG_PHYP_DUMP)+= phyp_dump.o
 obj-$(CONFIG_CMM)  += cmm.o
 obj-$(CONFIG_DTL)  += dtl.o
+obj-$(CONFIG_PSERIES_PROCESSOR_IDLE)   += processor_idle.o
Index: linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/pseries.h
+++ linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
@@ -10,6 +10,8 @@
 #ifndef _PSERIES_PSERIES_H
 #define _PSERIES_PSERIES_H
 
+#include linux/cpuidle.h
+
 extern void __init fw_feature_init(const char *hypertas, unsigned long len);
 
 struct pt_regs;
@@ -40,4 +42,10 @@ extern unsigned long rtas_poweron_auto;
 
 extern void find_udbg_vterm(void);
 
+DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
+
+#ifdef CONFIG_PSERIES_PROCESSOR_IDLE
+extern struct cpuidle_driver pseries_idle_driver;
+#endif
+
 #endif /* _PSERIES_PSERIES_H */
Index: linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
===
--- /dev/null
+++ linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
@@ -0,0 +1,208 @@
+/*
+ *  processor_idle - idle state cpuidle driver.
+ *  Adapted from drivers/acpi/processor_idle.c
+ *
+ *  Arun R Bharadwaj a...@linux.vnet.ibm.com
+ *
+ *  Copyright (C) 2009 IBM Corporation.
+ * ~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~
+ */
+
+#include linux/kernel.h
+#include linux/module.h
+#include linux/init.h
+#include linux/moduleparam.h
+#include linux/cpuidle.h
+
+#include asm/paca.h
+#include asm/reg.h
+#include asm/system.h
+#include asm/machdep.h
+#include asm/firmware.h
+
+#include plpar_wrappers.h
+#include pseries.h
+
+MODULE_AUTHOR(Arun R Bharadwaj);
+MODULE_DESCRIPTION(pSeries Idle State Driver);
+MODULE_LICENSE(GPL);
+
+struct cpuidle_driver pseries_idle_driver = {
+   .name = pseries_idle,
+   .owner =THIS_MODULE,
+};
+
+DEFINE_PER_CPU(struct cpuidle_device, pseries_dev);
+
+#define IDLE_STATE_COUNT   2
+
+/* pSeries Idle state Flags */
+#definePSERIES_DEDICATED_SNOOZE(0x01)
+#definePSERIES_DEDICATED_CEDE  (0x02)
+#definePSERIES_SHARED_CEDE (0x03)
+
+static int pseries_idle_init(struct cpuidle_device *dev)
+{
+   return cpuidle_register_device(dev);
+}
+
+static void shared_cede_loop(void)
+{
+   get_lppaca()-idle = 1;
+   cede_processor();
+   get_lppaca()-idle = 0;
+}
+
+static void dedicated_snooze_loop(void)
+{
+   local_irq_enable();
+   set_thread_flag(TIF_POLLING_NRFLAG);
+   while (!need_resched()) {
+   ppc64_runlatch_off();
+   HMT_low();
+   HMT_very_low();
+   }
+   HMT_medium();
+   clear_thread_flag(TIF_POLLING_NRFLAG);
+   smp_mb();
+   local_irq_disable();
+}
+
+static void dedicated_cede_loop(void

Re: [v7 PATCH 0/7]: cpuidle/x86/POWER: Cleanup idle power management code in x86, cleanup drivers/cpuidle/cpuidle.c and introduce cpuidle to POWER.

2009-10-06 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-10-06 20:54:21]:

 Hi
 
 Please consider this for inclusion into the testing tree.
 
 This patchset introduces cpuidle infrastructure to POWER, prototyping
 for pSeries, and also does a major refactoring of current x86 idle
 power management and a cleanup of cpuidle infrastructure.
 
 Earlier discussions on the same can be found at:
 
 v6 -- http://lkml.org/lkml/2009/9/22/180
 v5 -- http://lkml.org/lkml/2009/9/22/26
 v4 -- http://lkml.org/lkml/2009/9/1/133
 v3 -- http://lkml.org/lkml/2009/8/27/124
 v2 -- http://lkml.org/lkml/2009/8/26/233
 v1 -- http://lkml.org/lkml/2009/8/19/150
 
 
 Changes in this version:
 -
 
 * Earlier implementation maintained a list of cpuidle devices and
   the list was traversed to disable and enable the cpuidle devices
   when switching governors. This list is not necessary and is removed.
   Now, we get the per cpu structure cpuidle_devices and disable and
   enable it for each online cpu while switching governors.
 
 * Earlier iterations were buggy w.r.t the sysfs interface of the cpuidle.
   This iteration removes the bug in PATCH 1/7.
 
 * In PATCH 7/7, smt_snooze_delay was not being updated whenever it was
   changed through the sysfs interface. Provide a helper routine to
   update it.
 
 * In PATCH 7/7, the idle loops are renamed to denote the idle states
   to avoid any confusion.
 

Also, the per-cpu nature of registration/unregistration of cpuidle
has been maintained as ACPI needs this.

 
 thanks,
 arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v6 PATCH 0/7]: cpuidle/x86/POWER: Cleanup idle power management code in x86, cleanup drivers/cpuidle/cpuidle.c and introduce cpuidle to POWER.

2009-09-25 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-22 16:55:27]:
Hi,

I have done the following experiments and have posted the results
below.


Average of 5 iterations:
--
--

Kernbench make -j16 results on a
16 core x86 machine _with_deep_sleep_ support (C1,C2,C3)


Without the patches applied With the patches applied


31.8s   30.4s

--
--


Kernbench make -j8 results on a
8 core x86 machine _without_deep_sleep_ support (only mwait)


Without the patches applied With the patches applied


20.2s   20.4s

--
--

Kernbench make -j8 results on a 8 core _dedicated_lpar_pSeries_ machine


Without the patches applied With the patches applied


4m, 37s 4m, 36s

--
--

Please let me know if any other kind of testing is necessary.


Based on the feedback, I will post out the next iteration with a few
minor bug fixes.


thanks,
arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v6 PATCH 0/7]: cpuidle/x86/POWER: Cleanup idle power management code in x86, cleanup drivers/cpuidle/cpuidle.c and introduce cpuidle to POWER.

2009-09-23 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-22 16:55:27]:

Hi Len, (or other acpi folks),

I had a question regarding ACPI-cpuidle interaction in the current
implementation.

Currently, every cpu (i.e. acpi_processor) registers to cpuidle as
a cpuidle_device. So every cpu has to go through the process of
setting up the idle states and then registering as a cpuidle device.

What exactly is the reason behind this?

Is this really necessary or can we have a system-wide one-time registering
to cpuidle by ACPI?

I'm currently in the process of enabling cpuidle for POWER systems and
find that having a system-wide registering mechanism to be a cleaner
design.

--arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v5 RFC PATCH 0/7]: cpuidle/x86/POWER (REDESIGN): Cleanup idle power management code in x86, cleanup drivers/cpuidle/cpuidle.c and introduce cpuidle to POWER.

2009-09-22 Thread Arun R Bharadwaj
* Peter Zijlstra a.p.zijls...@chello.nl [2009-09-22 09:25:59]:

 
 
 Much better :-)
 
 
 But I'm puzzled by all the per-cpu-ish-ness of the stuff. Why would you
 need to register things on a per-cpu basis?
 
 Also:
 
 
 +   list_for_each(pos, per_cpu(cpuidle_devices_list, cpu)) {
 +   temp_dev = container_of(pos, struct cpuidle_device,
 +   percpu_list[cpu]);
 +   if (dev == temp_dev) {
 +   list_del(temp_dev-percpu_list[cpu]);
 +   cpuidle_remove_state_sysfs(temp_dev);
 +   }
 +   }
 
 Looks buggy, either you want to break out of the loop on dev ==
 temp_dev, or you want to use list_for_each_safe().
 
 
 

Hi Peter,

There were a couple of buggy issues, which i have cleaned up for the
next iteration.

* As you pointed out above, the loop is buggy.
* Also, the percpu_list[NR_CPUS] which i am defining inside
  struct cpuidle_device is wrong. It does not need to be an
  array.

Thanks for the quick turnaround
arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v6 PATCH 0/7]: cpuidle/x86/POWER: Cleanup idle power management code in x86, cleanup drivers/cpuidle/cpuidle.c and introduce cpuidle to POWER.

2009-09-22 Thread Arun R Bharadwaj
Hi,

This patchset introduces cpuidle infrastructure to POWER, prototyping
for pSeries, and also does a major refactoring of current x86 idle
power management and a cleanup of cpuidle infrastructure.

My earlier iterations can be found at:

v5 -- http://lkml.org/lkml/2009/9/22/26
v4 -- http://lkml.org/lkml/2009/9/1/133
v3 -- http://lkml.org/lkml/2009/8/27/124
v2 -- http://lkml.org/lkml/2009/8/26/233
v1 -- http://lkml.org/lkml/2009/8/19/150


Changes in this version:
--
Remove the bug from previous iteration in the routine
cpuidle_remove_from_list(), which was causing the kernel to panic on
platform supporting multiple sleep states.

Add the routine cpuidle_kick_cpus() in POWER, which is needed to kick
the cpus out of their idle when changing the idle routines.


TODO:
---
Peterz suggested it would be nice to have a sysfs interface through
which an idle routine can be forced at runtime.

Also, current implementation registers every cpu as a cpuidle_device,
but this is an overkill and the registering mechanism should be a
systemwide process and not per-cpu. (probably one of the original
cpuidle authors can reply to this - Venki, Shaohua Li? ).

ppc_md.power_save has been replaced by cpuidle_idle_call only for
pseries. So this needs to be done for all POWER platforms so that
ppc_md.power_save is completely removed.


Any comments on the design is welcome.

--arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v6 PATCH 2/7]: cpuidle: implement a list based approach to register a set of idle routines.

2009-09-22 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-22 16:55:27]:

Implement a list based registering mechanism for architectures which
have multiple sets of idle routines which are to be registered.

Currently, in x86 it is done by merely setting pm_idle = idle_routine
and managing this pm_idle pointer is messy.

To give an example of how this mechanism works:
In x86, initially, idle routine is selected from the set of poll/mwait/
c1e/default idle loops. So the selected idle loop is registered in cpuidle
as one idle state cpuidle devices. Once ACPI comes up, it registers
another set of idle states on top of this state. Again, suppose a module
registers another set of idle loops, it is added to this list.

This provides a clean way of registering and unregistering idle state
routines.

In the current implementation, pm_idle is set as the current idle routine
being used and the old idle routine has to be maintained and when a module
registers/unregisters an idle routine, confusion arises.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 drivers/cpuidle/cpuidle.c |   51 ++
 include/linux/cpuidle.h   |1 
 2 files changed, 48 insertions(+), 4 deletions(-)

Index: linux.trees.git/drivers/cpuidle/cpuidle.c
===
--- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
+++ linux.trees.git/drivers/cpuidle/cpuidle.c
@@ -21,6 +21,7 @@
 #include cpuidle.h
 
 DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
+DEFINE_PER_CPU(struct list_head, cpuidle_devices_list);
 
 DEFINE_MUTEX(cpuidle_lock);
 LIST_HEAD(cpuidle_detected_devices);
@@ -111,6 +112,45 @@ void cpuidle_resume_and_unlock(void)
 
 EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
 
+int cpuidle_add_to_list(struct cpuidle_device *dev)
+{
+   int ret, cpu = dev-cpu;
+   struct cpuidle_device *old_dev;
+
+   if (!list_empty(per_cpu(cpuidle_devices_list, cpu))) {
+   old_dev = list_first_entry(per_cpu(cpuidle_devices_list, cpu),
+   struct cpuidle_device, idle_list);
+   cpuidle_remove_state_sysfs(old_dev);
+   }
+
+   list_add(dev-idle_list, per_cpu(cpuidle_devices_list, cpu));
+   ret = cpuidle_add_state_sysfs(dev);
+   return ret;
+}
+
+void cpuidle_remove_from_list(struct cpuidle_device *dev)
+{
+   struct cpuidle_device *temp_dev;
+   struct list_head *pos;
+   int ret, cpu = dev-cpu;
+
+   list_for_each(pos, per_cpu(cpuidle_devices_list, cpu)) {
+   temp_dev = container_of(pos, struct cpuidle_device, idle_list);
+   if (dev == temp_dev) {
+   list_del(temp_dev-idle_list);
+   cpuidle_remove_state_sysfs(temp_dev);
+   break;
+   }
+   }
+
+   if (!list_empty(per_cpu(cpuidle_devices_list, cpu))) {
+   temp_dev = list_first_entry(per_cpu(cpuidle_devices_list, cpu),
+   struct cpuidle_device, idle_list);
+   ret = cpuidle_add_state_sysfs(temp_dev);
+   }
+   cpuidle_kick_cpus();
+}
+
 /**
  * cpuidle_enable_device - enables idle PM for a CPU
  * @dev: the CPU
@@ -135,7 +175,7 @@ int cpuidle_enable_device(struct cpuidle
return ret;
}
 
-   if ((ret = cpuidle_add_state_sysfs(dev)))
+   if ((cpuidle_add_to_list(dev)))
return ret;
 
if (cpuidle_curr_governor-enable 
@@ -156,7 +196,7 @@ int cpuidle_enable_device(struct cpuidle
return 0;
 
 fail_sysfs:
-   cpuidle_remove_state_sysfs(dev);
+   cpuidle_remove_from_list(dev);
 
return ret;
 }
@@ -182,7 +222,7 @@ void cpuidle_disable_device(struct cpuid
if (cpuidle_curr_governor-disable)
cpuidle_curr_governor-disable(dev);
 
-   cpuidle_remove_state_sysfs(dev);
+   cpuidle_remove_from_list(dev);
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_disable_device);
@@ -350,12 +390,15 @@ static inline void latency_notifier_init
  */
 static int __init cpuidle_init(void)
 {
-   int ret;
+   int ret, cpu;
 
ret = cpuidle_add_class_sysfs(cpu_sysdev_class);
if (ret)
return ret;
 
+   for_each_possible_cpu(cpu)
+   INIT_LIST_HEAD(per_cpu(cpuidle_devices_list, cpu));
+
latency_notifier_init(cpuidle_latency_notifier);
 
return 0;
Index: linux.trees.git/include/linux/cpuidle.h
===
--- linux.trees.git.orig/include/linux/cpuidle.h
+++ linux.trees.git/include/linux/cpuidle.h
@@ -93,6 +93,7 @@ struct cpuidle_device {
struct cpuidle_state*last_state;
 
struct list_headdevice_list;
+   struct list_headidle_list;
struct kobject  kobj;
struct completion   kobj_unregister;
void*governor_data

[v6 PATCH 3/7]: x86: refactor x86 idle power management code and remove all instances of pm_idle.

2009-09-22 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-22 16:55:27]:

This patch cleans up x86 of all instances of pm_idle.

pm_idle which was earlier called from cpu_idle() idle loop
is replaced by cpuidle_idle_call.

x86 also registers to cpuidle when the idle routine is selected,
by populating the cpuidle_device data structure for each cpu.

This is replicated for apm module and for xen, which also used pm_idle.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/x86/kernel/apm_32.c |   37 +--
 arch/x86/kernel/process.c|   69 ++-
 arch/x86/kernel/process_32.c |3 +
 arch/x86/kernel/process_64.c |3 +
 arch/x86/xen/setup.c |   22 +
 5 files changed, 114 insertions(+), 20 deletions(-)

Index: linux.trees.git/arch/x86/kernel/process.c
===
--- linux.trees.git.orig/arch/x86/kernel/process.c
+++ linux.trees.git/arch/x86/kernel/process.c
@@ -9,6 +9,8 @@
 #include linux/pm.h
 #include linux/clockchips.h
 #include linux/random.h
+#include linux/cpuidle.h
+
 #include trace/power.h
 #include asm/system.h
 #include asm/apic.h
@@ -247,12 +249,6 @@ int sys_vfork(struct pt_regs *regs)
 unsigned long boot_option_idle_override = 0;
 EXPORT_SYMBOL(boot_option_idle_override);
 
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
-
 #ifdef CONFIG_X86_32
 /*
  * This halt magic was a workaround for ancient floppy DMA
@@ -531,15 +527,58 @@ static void c1e_idle(void)
default_idle();
 }
 
+static void (*local_idle)(void);
+DEFINE_PER_CPU(struct cpuidle_device, idle_devices);
+
+struct cpuidle_driver cpuidle_default_driver = {
+   .name = cpuidle_default,
+};
+
+static int local_idle_loop(struct cpuidle_device *dev, struct cpuidle_state 
*st)
+{
+   ktime_t t1, t2;
+   s64 diff;
+   int ret;
+
+   t1 = ktime_get();
+   local_idle();
+   t2 = ktime_get();
+
+   diff = ktime_to_us(ktime_sub(t2, t1));
+   if (diff  INT_MAX)
+   diff = INT_MAX;
+   ret = (int) diff;
+
+   return ret;
+}
+static int __cpuinit setup_cpuidle_simple(void)
+{
+   struct cpuidle_device *dev;
+   int cpu;
+
+   if (!cpuidle_curr_driver)
+   cpuidle_register_driver(cpuidle_default_driver);
+
+   for_each_online_cpu(cpu) {
+   dev = per_cpu(idle_devices, cpu);
+   dev-cpu = cpu;
+   dev-states[0].enter = local_idle_loop;
+   dev-state_count = 1;
+   cpuidle_register_device(dev);
+   }
+   return 0;
+}
+late_initcall(setup_cpuidle_simple);
+
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
-   if (pm_idle == poll_idle  smp_num_siblings  1) {
+   if (local_idle == poll_idle  smp_num_siblings  1) {
printk(KERN_WARNING WARNING: polling idle and HT enabled,
 performance may degrade.\n);
}
 #endif
-   if (pm_idle)
+   if (local_idle)
return;
 
if (cpu_has(c, X86_FEATURE_MWAIT)  mwait_usable(c)) {
@@ -547,18 +586,20 @@ void __cpuinit select_idle_routine(const
 * One CPU supports mwait = All CPUs supports mwait
 */
printk(KERN_INFO using mwait in idle threads.\n);
-   pm_idle = mwait_idle;
+   local_idle = mwait_idle;
} else if (check_c1e_idle(c)) {
printk(KERN_INFO using C1E aware idle routine\n);
-   pm_idle = c1e_idle;
+   local_idle = c1e_idle;
} else
-   pm_idle = default_idle;
+   local_idle = default_idle;
+
+   return;
 }
 
 void __init init_c1e_mask(void)
 {
/* If we're using c1e_idle, we need to allocate c1e_mask. */
-   if (pm_idle == c1e_idle) {
+   if (local_idle == c1e_idle) {
alloc_cpumask_var(c1e_mask, GFP_KERNEL);
cpumask_clear(c1e_mask);
}
@@ -571,7 +612,7 @@ static int __init idle_setup(char *str)
 
if (!strcmp(str, poll)) {
printk(using polling idle threads.\n);
-   pm_idle = poll_idle;
+   local_idle = poll_idle;
} else if (!strcmp(str, mwait))
force_mwait = 1;
else if (!strcmp(str, halt)) {
@@ -582,7 +623,7 @@ static int __init idle_setup(char *str)
 * To continue to load the CPU idle driver, don't touch
 * the boot_option_idle_override.
 */
-   pm_idle = default_idle;
+   local_idle = default_idle;
idle_halt = 1;
return 0;
} else if (!strcmp(str, nomwait)) {
Index: linux.trees.git/arch/x86/kernel/process_32.c
===
--- linux.trees.git.orig/arch/x86/kernel/process_32.c

[v6 PATCH 4/7]: POWER: enable cpuidle for POWER.

2009-09-22 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-22 16:55:27]:

This patch enables the cpuidle option in Kconfig for pSeries.

Currently cpuidle infrastructure is enabled only for x86 and ARM.
This code is almost completely borrowed from x86 to enable
cpuidle for pSeries.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/Kconfig  |   17 +
 arch/powerpc/include/asm/system.h |2 ++
 arch/powerpc/kernel/idle.c|   19 +++
 3 files changed, 38 insertions(+)

Index: linux.trees.git/arch/powerpc/Kconfig
===
--- linux.trees.git.orig/arch/powerpc/Kconfig
+++ linux.trees.git/arch/powerpc/Kconfig
@@ -88,6 +88,9 @@ config ARCH_HAS_ILOG2_U64
bool
default y if 64BIT
 
+config ARCH_HAS_CPU_IDLE_WAIT
+   def_bool y
+
 config GENERIC_HWEIGHT
bool
default y
@@ -243,6 +246,20 @@ source kernel/Kconfig.freezer
 source arch/powerpc/sysdev/Kconfig
 source arch/powerpc/platforms/Kconfig
 
+menu Power management options
+
+source drivers/cpuidle/Kconfig
+
+config PSERIES_PROCESSOR_IDLE
+   bool Idle Power Management Support for pSeries
+   depends on PPC_PSERIES  CPU_IDLE
+   default y
+   help
+ Idle Power Management Support for pSeries. This hooks onto cpuidle
+ infrastructure to help in idle cpu power management.
+
+endmenu
+
 menu Kernel options
 
 config HIGHMEM
Index: linux.trees.git/arch/powerpc/include/asm/system.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/system.h
+++ linux.trees.git/arch/powerpc/include/asm/system.h
@@ -546,5 +546,7 @@ extern void account_system_vtime(struct 
 
 extern struct dentry *powerpc_debugfs_root;
 
+void cpu_idle_wait(void);
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SYSTEM_H */
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -102,6 +102,25 @@ void cpu_idle(void)
}
 }
 
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs come out of the old
+ * idle loop and start using the new idle loop.
+ * Required while changing idle handler on SMP systems.
+ * Caller must have changed idle handler to the new value before the call.
+ */
+void cpu_idle_wait(void)
+{
+   /* Ensure that new value of ppc_md.power_save is set */
+   smp_mb();
+   /* kick all the CPUs so that they exit out of ppc_md.power_save */
+   smp_call_function(do_nothing, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 int powersave_nap;
 
 #ifdef CONFIG_SYSCTL
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v6 PATCH 5/7]: pSeries/cpuidle: remove dedicate/shared idle loops, which will be moved to arch/powerpc/platforms/pseries/processor_idle.c

2009-09-22 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-22 16:55:27]:

This patch removes the routines, pseries_shared_idle_sleep and
pseries_dedicated_idle_sleep, since this is implemented as a part
of arch/powerpc/platform/pseries/processor_idle.c

Also, similar to x86, call cpuidle_idle_call from cpu_idle() idle
loop instead of ppc_md.power_save.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/idle.c |   50 +++---
 arch/powerpc/platforms/pseries/setup.c |   89 -
 2 files changed, 22 insertions(+), 117 deletions(-)

Index: linux.trees.git/arch/powerpc/platforms/pseries/setup.c
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/setup.c
+++ linux.trees.git/arch/powerpc/platforms/pseries/setup.c
@@ -75,9 +75,6 @@ EXPORT_SYMBOL(CMO_PageSize);
 
 int fwnmi_active;  /* TRUE if an FWNMI handler is present */
 
-static void pseries_shared_idle_sleep(void);
-static void pseries_dedicated_idle_sleep(void);
-
 static struct device_node *pSeries_mpic_node;
 
 static void pSeries_show_cpuinfo(struct seq_file *m)
@@ -301,18 +298,8 @@ static void __init pSeries_setup_arch(vo
pSeries_nvram_init();
 
/* Choose an idle loop */
-   if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+   if (firmware_has_feature(FW_FEATURE_SPLPAR))
vpa_init(boot_cpuid);
-   if (get_lppaca()-shared_proc) {
-   printk(KERN_DEBUG Using shared processor idle loop\n);
-   ppc_md.power_save = pseries_shared_idle_sleep;
-   } else {
-   printk(KERN_DEBUG Using dedicated idle loop\n);
-   ppc_md.power_save = pseries_dedicated_idle_sleep;
-   }
-   } else {
-   printk(KERN_DEBUG Using default idle loop\n);
-   }
 
if (firmware_has_feature(FW_FEATURE_LPAR))
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
@@ -500,80 +487,6 @@ static int __init pSeries_probe(void)
return 1;
 }
 
-
-DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
-
-static void pseries_dedicated_idle_sleep(void)
-{ 
-   unsigned int cpu = smp_processor_id();
-   unsigned long start_snooze;
-   unsigned long in_purr, out_purr;
-
-   /*
-* Indicate to the HV that we are idle. Now would be
-* a good time to find other work to dispatch.
-*/
-   get_lppaca()-idle = 1;
-   get_lppaca()-donate_dedicated_cpu = 1;
-   in_purr = mfspr(SPRN_PURR);
-
-   /*
-* We come in with interrupts disabled, and need_resched()
-* has been checked recently.  If we should poll for a little
-* while, do so.
-*/
-   if (__get_cpu_var(smt_snooze_delay)) {
-   start_snooze = get_tb() +
-   __get_cpu_var(smt_snooze_delay) * tb_ticks_per_usec;
-   local_irq_enable();
-   set_thread_flag(TIF_POLLING_NRFLAG);
-
-   while (get_tb()  start_snooze) {
-   if (need_resched() || cpu_is_offline(cpu))
-   goto out;
-   ppc64_runlatch_off();
-   HMT_low();
-   HMT_very_low();
-   }
-
-   HMT_medium();
-   clear_thread_flag(TIF_POLLING_NRFLAG);
-   smp_mb();
-   local_irq_disable();
-   if (need_resched() || cpu_is_offline(cpu))
-   goto out;
-   }
-
-   cede_processor();
-
-out:
-   HMT_medium();
-   out_purr = mfspr(SPRN_PURR);
-   get_lppaca()-wait_state_cycles += out_purr - in_purr;
-   get_lppaca()-donate_dedicated_cpu = 0;
-   get_lppaca()-idle = 0;
-}
-
-static void pseries_shared_idle_sleep(void)
-{
-   /*
-* Indicate to the HV that we are idle. Now would be
-* a good time to find other work to dispatch.
-*/
-   get_lppaca()-idle = 1;
-
-   /*
-* Yield the processor to the hypervisor.  We return if
-* an external interrupt occurs (which are driven prior
-* to returning here) or if a prod occurs from another
-* processor. When returning here, external interrupts
-* are enabled.
-*/
-   cede_processor();
-
-   get_lppaca()-idle = 0;
-}
-
 static int pSeries_pci_probe_mode(struct pci_bus *bus)
 {
if (firmware_has_feature(FW_FEATURE_LPAR))
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -25,6 +25,7 @@
 #include linux/cpu.h
 #include linux/sysctl.h
 #include linux/tick.h
+#include linux/cpuidle.h
 
 #include asm/system.h
 #include asm/processor.h
@@ -60,35 +61,26 @@ void cpu_idle(void)
while (!need_resched

[v6 PATCH 6/7]: POWER: add a default_idle idle loop for POWER.

2009-09-22 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-22 16:55:27]:

In arch/powerpc/kernel/idle.c create a default_idle() routine by moving
the failover condition of the cpu_idle() idle loop. This is needed by
cpuidle infrastructure to call default_idle when other idle routines
are not yet registered. Functionality remains the same, but the code is
slightly moved around.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/Kconfig  |3 +++
 arch/powerpc/include/asm/system.h |1 +
 arch/powerpc/kernel/idle.c|6 ++
 3 files changed, 10 insertions(+)

Index: linux.trees.git/arch/powerpc/Kconfig
===
--- linux.trees.git.orig/arch/powerpc/Kconfig
+++ linux.trees.git/arch/powerpc/Kconfig
@@ -91,6 +91,9 @@ config ARCH_HAS_ILOG2_U64
 config ARCH_HAS_CPU_IDLE_WAIT
def_bool y
 
+config ARCH_HAS_DEFAULT_IDLE
+   def_bool y
+
 config GENERIC_HWEIGHT
bool
default y
Index: linux.trees.git/arch/powerpc/include/asm/system.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/system.h
+++ linux.trees.git/arch/powerpc/include/asm/system.h
@@ -218,6 +218,7 @@ extern unsigned long klimit;
 extern void *alloc_maybe_bootmem(size_t size, gfp_t mask);
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
 
+extern void default_idle(void);
 extern int powersave_nap;  /* set if nap mode can be used in idle loop */
 
 /*
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -113,6 +113,12 @@ void cpu_idle_wait(void)
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
 
+void default_idle(void)
+{
+   HMT_low();
+   HMT_very_low();
+}
+
 int powersave_nap;
 
 #ifdef CONFIG_SYSCTL
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v6 PATCH 7/7]: pSeries: implement pSeries processor idle module.

2009-09-22 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-22 16:55:27]:

This patch creates arch/powerpc/platforms/pseries/processor_idle.c,
which implements the cpuidle infrastructure for pseries.
It implements a pseries_cpuidle_loop() which would be the main idle loop
called from cpu_idle(). It makes decision of entering either cede1 or cede2
for dedicated lpar and shared_cede for shared lpar processor based on the
decision taken by the cpuidle governor.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/pseries/Makefile |1 
 arch/powerpc/platforms/pseries/processor_idle.c |  191 
 arch/powerpc/platforms/pseries/pseries.h|9 +
 arch/powerpc/platforms/pseries/setup.c  |8 -
 4 files changed, 207 insertions(+), 2 deletions(-)

Index: linux.trees.git/arch/powerpc/platforms/pseries/Makefile
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/Makefile
+++ linux.trees.git/arch/powerpc/platforms/pseries/Makefile
@@ -26,3 +26,4 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst
 obj-$(CONFIG_PHYP_DUMP)+= phyp_dump.o
 obj-$(CONFIG_CMM)  += cmm.o
 obj-$(CONFIG_DTL)  += dtl.o
+obj-$(CONFIG_PSERIES_PROCESSOR_IDLE)   += processor_idle.o
Index: linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/pseries.h
+++ linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
@@ -10,6 +10,8 @@
 #ifndef _PSERIES_PSERIES_H
 #define _PSERIES_PSERIES_H
 
+#include linux/cpuidle.h
+
 extern void __init fw_feature_init(const char *hypertas, unsigned long len);
 
 struct pt_regs;
@@ -40,4 +42,11 @@ extern unsigned long rtas_poweron_auto;
 
 extern void find_udbg_vterm(void);
 
+DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
+
+#ifdef CONFIG_PSERIES_PROCESSOR_IDLE
+int pseries_processor_idle_init(void);
+extern struct cpuidle_driver pseries_idle_driver;
+#endif
+
 #endif /* _PSERIES_PSERIES_H */
Index: linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
===
--- /dev/null
+++ linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
@@ -0,0 +1,191 @@
+/*
+ *  processor_idle - idle state cpuidle driver.
+ *  Adapted from drivers/acpi/processor_idle.c
+ *
+ *  Arun R Bharadwaj a...@linux.vnet.ibm.com
+ *
+ *  Copyright (C) 2009 IBM Corporation.
+ * ~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~
+ */
+
+#include linux/kernel.h
+#include linux/module.h
+#include linux/init.h
+#include linux/moduleparam.h
+#include linux/cpuidle.h
+
+#include asm/paca.h
+#include asm/reg.h
+#include asm/system.h
+#include asm/machdep.h
+#include asm/firmware.h
+
+#include plpar_wrappers.h
+#include pseries.h
+
+MODULE_AUTHOR(Arun R Bharadwaj);
+MODULE_DESCRIPTION(pSeries Idle State Driver);
+MODULE_LICENSE(GPL);
+
+struct cpuidle_driver pseries_idle_driver = {
+   .name = pseries_idle,
+   .owner =THIS_MODULE,
+};
+
+DEFINE_PER_CPU(struct cpuidle_device, pseries_dev);
+
+#define IDLE_STATE_COUNT   2
+
+static int pseries_idle_init(struct cpuidle_device *dev)
+{
+   return cpuidle_register_device(dev);
+}
+
+static void shared_cede(void)
+{
+   get_lppaca()-idle = 1;
+   cede_processor();
+   get_lppaca()-idle = 0;
+}
+
+static void cede1(void)
+{
+   local_irq_enable();
+   set_thread_flag(TIF_POLLING_NRFLAG);
+   while (!need_resched()) {
+   ppc64_runlatch_off();
+   HMT_low();
+   HMT_very_low();
+   }
+   HMT_medium();
+   clear_thread_flag(TIF_POLLING_NRFLAG);
+   smp_mb();
+   local_irq_disable();
+}
+
+static void cede2(void)
+{
+   ppc64_runlatch_off();
+   HMT_medium();
+   cede_processor();
+}
+
+static int pseries_cpuidle_loop(struct cpuidle_device *dev,
+   struct cpuidle_state *st)
+{
+   ktime_t t1, t2;
+   s64 diff;
+   int ret;
+   unsigned long

[v5 RFC PATCH 0/7]: cpuidle/x86/POWER (REDESIGN): Cleanup idle power management code in x86, cleanup drivers/cpuidle/cpuidle.c and introduce cpuidle to POWER.

2009-09-21 Thread Arun R Bharadwaj
Hi,

 This is an RFC, not for inclusion **

This patchset introduces cpuidle infrastructure to POWER, prototyping
for pSeries, and also does a major refactoring of current x86 idle
power management and a cleanup of cpuidle infrastructure.

My earlier iterations can be found at:

v4 -- http://lkml.org/lkml/2009/9/1/133
v3 -- http://lkml.org/lkml/2009/8/27/124
v2 -- http://lkml.org/lkml/2009/8/26/233
v1 -- http://lkml.org/lkml/2009/8/19/150

Major Changes in this iteration:
--

* Refactoring x86 idle power management code
Remove all instances of pm_idle and make cpuidle_idle_call as
_the_ idle routine in x86. So, cpuidle_idle_call will be
called from the main idle loop, cpu_idle instead of the
function pointer pm_idle. Also, pm_idle was used by apm module
and xen. Change those instances such that they register to
cpuidle instead.

* Cleanup drivers/cpuidle/cpuidle.c
Currently, the cpuidle implementation has weakness in the
framework where an exported pm_idle function pointer is
manipulated by various subsystem. The proposed framework has
a registration mechanism to cleanly add and remove new idle
routines from different subsystems.

* Implement cpuidle for pSeries
Implement the processor_idle module for pseries, which
registers idle loops to cpuidle and also cleanup
arch/powerpc/platforms/pseries/setup.c and remove the
redundant pseries_dedicated/shared_idle_sleep which is
implemented in processor_idle.c
Also, remove all instances of ppc_md.power_save, for the same
reason as that given for pm_idle.

TODO:
-

* Currently, the list based approach that I'm using here is not
  working in a clean way on a few x86 platforms which have multiple
  sleep states, leading to kernel panics. So working on resolving
  that.

* ppc_md.power_save has been replaced by cpuidle_idle_call only for
  pseries. So this needs to be done for all POWER platforms so that
  ppc_md.power_save is completely removed.

Patches included in this series:
-

1/7 - cleanup drivers/cpuidle/cpuidle.c
2/7 - implement a list based approach to register a set of idle
  routines.
3/7 - refactor x86 idle power management code and remove all instances
  of pm_idle.
4/7 - enable cpuidle for POWER.
5/7 - remove dedicate/shared idle loops, which will be moved to
  arch/powerpc/platforms/pseries/processor_idle.c
6/7 - add a default_idle idle loop for POWER.
7/7 - implement pSeries processor idle module.

Any comments on the design is welcome.

--arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v5 RFC PATCH 1/7]: cpuidle: cleanup drivers/cpuidle/cpuidle.c

2009-09-21 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-22 11:03:14]:

This patch cleans up drivers/cpuidle/cpuidle.c
Earlier cpuidle assumed pm_idle as the default idle loop. Break that
assumption and make it more generic. cpuidle_idle_call() which is the
main idle loop of cpuidle is to be called by architectures which have
registered to cpuidle.

Remove routines cpuidle_install/uninstall_idle_handler() and
cpuidle_kick_cpus() which are not needed anymore.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 drivers/cpuidle/cpuidle.c  |   59 ++---
 drivers/cpuidle/cpuidle.h  |1 
 drivers/cpuidle/governor.c |3 --
 include/linux/cpuidle.h|3 ++
 4 files changed, 11 insertions(+), 55 deletions(-)

Index: linux.trees.git/drivers/cpuidle/cpuidle.c
===
--- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
+++ linux.trees.git/drivers/cpuidle/cpuidle.c
@@ -24,20 +24,6 @@ DEFINE_PER_CPU(struct cpuidle_device *, 
 
 DEFINE_MUTEX(cpuidle_lock);
 LIST_HEAD(cpuidle_detected_devices);
-static void (*pm_idle_old)(void);
-
-static int enabled_devices;
-
-#if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
-static void cpuidle_kick_cpus(void)
-{
-   cpu_idle_wait();
-}
-#elif defined(CONFIG_SMP)
-# error Arch needs cpu_idle_wait() equivalent here
-#else /* !CONFIG_ARCH_HAS_CPU_IDLE_WAIT  !CONFIG_SMP */
-static void cpuidle_kick_cpus(void) {}
-#endif
 
 static int __cpuidle_register_device(struct cpuidle_device *dev);
 
@@ -46,7 +32,7 @@ static int __cpuidle_register_device(str
  *
  * NOTE: no locks or semaphores should be used here
  */
-static void cpuidle_idle_call(void)
+void cpuidle_idle_call(void)
 {
struct cpuidle_device *dev = __get_cpu_var(cpuidle_devices);
struct cpuidle_state *target_state;
@@ -54,13 +40,10 @@ static void cpuidle_idle_call(void)
 
/* check if the device is ready */
if (!dev || !dev-enabled) {
-   if (pm_idle_old)
-   pm_idle_old();
-   else
 #if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE)
-   default_idle();
+   default_idle();
 #else
-   local_irq_enable();
+   local_irq_enable();
 #endif
return;
}
@@ -74,7 +57,11 @@ static void cpuidle_idle_call(void)
hrtimer_peek_ahead_timers();
 #endif
/* ask the governor for the next state */
-   next_state = cpuidle_curr_governor-select(dev);
+   if (dev-state_count  1)
+   next_state = cpuidle_curr_governor-select(dev);
+   else
+   next_state = 0;
+
if (need_resched())
return;
target_state = dev-states[next_state];
@@ -94,35 +81,11 @@ static void cpuidle_idle_call(void)
 }
 
 /**
- * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
- */
-void cpuidle_install_idle_handler(void)
-{
-   if (enabled_devices  (pm_idle != cpuidle_idle_call)) {
-   /* Make sure all changes finished before we switch to new idle 
*/
-   smp_wmb();
-   pm_idle = cpuidle_idle_call;
-   }
-}
-
-/**
- * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
- */
-void cpuidle_uninstall_idle_handler(void)
-{
-   if (enabled_devices  pm_idle_old  (pm_idle != pm_idle_old)) {
-   pm_idle = pm_idle_old;
-   cpuidle_kick_cpus();
-   }
-}
-
-/**
  * cpuidle_pause_and_lock - temporarily disables CPUIDLE
  */
 void cpuidle_pause_and_lock(void)
 {
mutex_lock(cpuidle_lock);
-   cpuidle_uninstall_idle_handler();
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
@@ -132,7 +95,6 @@ EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock
  */
 void cpuidle_resume_and_unlock(void)
 {
-   cpuidle_install_idle_handler();
mutex_unlock(cpuidle_lock);
 }
 
@@ -180,7 +142,6 @@ int cpuidle_enable_device(struct cpuidle
 
dev-enabled = 1;
 
-   enabled_devices++;
return 0;
 
 fail_sysfs:
@@ -211,7 +172,6 @@ void cpuidle_disable_device(struct cpuid
cpuidle_curr_governor-disable(dev);
 
cpuidle_remove_state_sysfs(dev);
-   enabled_devices--;
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_disable_device);
@@ -303,7 +263,6 @@ int cpuidle_register_device(struct cpuid
}
 
cpuidle_enable_device(dev);
-   cpuidle_install_idle_handler();
 
mutex_unlock(cpuidle_lock);
 
@@ -382,8 +341,6 @@ static int __init cpuidle_init(void)
 {
int ret;
 
-   pm_idle_old = pm_idle;
-
ret = cpuidle_add_class_sysfs(cpu_sysdev_class);
if (ret)
return ret;
Index: linux.trees.git/drivers/cpuidle/governor.c
===
--- linux.trees.git.orig/drivers/cpuidle/governor.c
+++ linux.trees.git/drivers/cpuidle/governor.c
@@ -48,8 +48,6 @@ int cpuidle_switch_governor(struct cpuid
if (gov

[v5 RFC PATCH 2/7]: cpuidle: implement a list based approach to register a set of idle routines.

2009-09-21 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-22 11:03:14]:

Implement a list based registering mechanism for architectures which
have multiple sets of idle routines which are to be registered.

Currently, in x86 it is done by merely setting pm_idle = idle_routine
and managing this pm_idle pointer is messy.

To give an example of how this mechanism works:
In x86, initially, idle routine is selected from the set of poll/mwait/
c1e/default idle loops. So the selected idle loop is registered in cpuidle
as one idle state cpuidle devices. Once ACPI comes up, it registers
another set of idle states on top of this state. Again, suppose a module
registers another set of idle loops, it is added to this list.

This provides a clean way of registering and unregistering idle state
routines.

In the current implementation, pm_idle is set as the current idle routine
being used and the old idle routine has to be maintained and when a module
registers/unregisters an idle routine, confusion arises.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 drivers/cpuidle/cpuidle.c |   50 ++
 include/linux/cpuidle.h   |1 
 2 files changed, 47 insertions(+), 4 deletions(-)

Index: linux.trees.git/drivers/cpuidle/cpuidle.c
===
--- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
+++ linux.trees.git/drivers/cpuidle/cpuidle.c
@@ -21,6 +21,7 @@
 #include cpuidle.h
 
 DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
+DEFINE_PER_CPU(struct list_head, cpuidle_devices_list);
 
 DEFINE_MUTEX(cpuidle_lock);
 LIST_HEAD(cpuidle_detected_devices);
@@ -100,6 +101,44 @@ void cpuidle_resume_and_unlock(void)
 
 EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
 
+int cpuidle_add_to_list(struct cpuidle_device *dev)
+{
+   int ret, cpu = dev-cpu;
+   struct cpuidle_device *old_dev;
+
+   if (!list_empty(per_cpu(cpuidle_devices_list, cpu))) {
+   old_dev = list_first_entry(per_cpu(cpuidle_devices_list, cpu),
+   struct cpuidle_device, percpu_list[cpu]);
+   cpuidle_remove_state_sysfs(old_dev);
+   }
+
+   list_add(dev-percpu_list[cpu], per_cpu(cpuidle_devices_list, cpu));
+   ret = cpuidle_add_state_sysfs(dev);
+   return ret;
+}
+
+void cpuidle_remove_from_list(struct cpuidle_device *dev)
+{
+   struct cpuidle_device *temp_dev;
+   struct list_head *pos;
+   int ret, cpu = dev-cpu;
+
+   list_for_each(pos, per_cpu(cpuidle_devices_list, cpu)) {
+   temp_dev = container_of(pos, struct cpuidle_device,
+   percpu_list[cpu]);
+   if (dev == temp_dev) {
+   list_del(temp_dev-percpu_list[cpu]);
+   cpuidle_remove_state_sysfs(temp_dev);
+   }
+   }
+
+   if (!list_empty(per_cpu(cpuidle_devices_list, cpu))) {
+   temp_dev = list_first_entry(per_cpu(cpuidle_devices_list, cpu),
+   struct cpuidle_device, percpu_list[cpu]);
+   ret = cpuidle_add_state_sysfs(temp_dev);
+   }
+}
+
 /**
  * cpuidle_enable_device - enables idle PM for a CPU
  * @dev: the CPU
@@ -124,7 +163,7 @@ int cpuidle_enable_device(struct cpuidle
return ret;
}
 
-   if ((ret = cpuidle_add_state_sysfs(dev)))
+   if ((cpuidle_add_to_list(dev)))
return ret;
 
if (cpuidle_curr_governor-enable 
@@ -145,7 +184,7 @@ int cpuidle_enable_device(struct cpuidle
return 0;
 
 fail_sysfs:
-   cpuidle_remove_state_sysfs(dev);
+   cpuidle_remove_from_list(dev);
 
return ret;
 }
@@ -171,7 +210,7 @@ void cpuidle_disable_device(struct cpuid
if (cpuidle_curr_governor-disable)
cpuidle_curr_governor-disable(dev);
 
-   cpuidle_remove_state_sysfs(dev);
+   cpuidle_remove_from_list(dev);
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_disable_device);
@@ -339,12 +378,15 @@ static inline void latency_notifier_init
  */
 static int __init cpuidle_init(void)
 {
-   int ret;
+   int ret, cpu;
 
ret = cpuidle_add_class_sysfs(cpu_sysdev_class);
if (ret)
return ret;
 
+   for_each_possible_cpu(cpu)
+   INIT_LIST_HEAD(per_cpu(cpuidle_devices_list, cpu));
+
latency_notifier_init(cpuidle_latency_notifier);
 
return 0;
Index: linux.trees.git/include/linux/cpuidle.h
===
--- linux.trees.git.orig/include/linux/cpuidle.h
+++ linux.trees.git/include/linux/cpuidle.h
@@ -93,6 +93,7 @@ struct cpuidle_device {
struct cpuidle_state*last_state;
 
struct list_headdevice_list;
+   struct list_headpercpu_list[NR_CPUS];
struct kobject  kobj;
struct completion   kobj_unregister;
void*governor_data

[v5 RFC PATCH 3/7]: x86: refactor x86 idle power management code and remove all instances of pm_idle.

2009-09-21 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-22 11:03:14]:

This patch cleans up x86 of all instances of pm_idle.

pm_idle which was earlier called from cpu_idle() idle loop
is replaced by cpuidle_idle_call.

x86 also registers to cpuidle when the idle routine is selected,
by populating the cpuidle_device data structure for each cpu.

This is replicated for apm module and for xen, which also used pm_idle.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/x86/kernel/apm_32.c |   37 +--
 arch/x86/kernel/process.c|   69 ++-
 arch/x86/kernel/process_32.c |3 +
 arch/x86/kernel/process_64.c |3 +
 arch/x86/xen/setup.c |   22 +
 5 files changed, 114 insertions(+), 20 deletions(-)

Index: linux.trees.git/arch/x86/kernel/process.c
===
--- linux.trees.git.orig/arch/x86/kernel/process.c
+++ linux.trees.git/arch/x86/kernel/process.c
@@ -9,6 +9,8 @@
 #include linux/pm.h
 #include linux/clockchips.h
 #include linux/random.h
+#include linux/cpuidle.h
+
 #include trace/power.h
 #include asm/system.h
 #include asm/apic.h
@@ -247,12 +249,6 @@ int sys_vfork(struct pt_regs *regs)
 unsigned long boot_option_idle_override = 0;
 EXPORT_SYMBOL(boot_option_idle_override);
 
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
-
 #ifdef CONFIG_X86_32
 /*
  * This halt magic was a workaround for ancient floppy DMA
@@ -531,15 +527,58 @@ static void c1e_idle(void)
default_idle();
 }
 
+static void (*local_idle)(void);
+DEFINE_PER_CPU(struct cpuidle_device, idle_devices);
+
+struct cpuidle_driver cpuidle_default_driver = {
+   .name = cpuidle_default,
+};
+
+static int local_idle_loop(struct cpuidle_device *dev, struct cpuidle_state 
*st)
+{
+   ktime_t t1, t2;
+   s64 diff;
+   int ret;
+
+   t1 = ktime_get();
+   local_idle();
+   t2 = ktime_get();
+
+   diff = ktime_to_us(ktime_sub(t2, t1));
+   if (diff  INT_MAX)
+   diff = INT_MAX;
+   ret = (int) diff;
+
+   return ret;
+}
+static int __cpuinit setup_cpuidle_simple(void)
+{
+   struct cpuidle_device *dev;
+   int cpu;
+
+   if (!cpuidle_curr_driver)
+   cpuidle_register_driver(cpuidle_default_driver);
+
+   for_each_online_cpu(cpu) {
+   dev = per_cpu(idle_devices, cpu);
+   dev-cpu = cpu;
+   dev-states[0].enter = local_idle_loop;
+   dev-state_count = 1;
+   cpuidle_register_device(dev);
+   }
+   return 0;
+}
+late_initcall(setup_cpuidle_simple);
+
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
-   if (pm_idle == poll_idle  smp_num_siblings  1) {
+   if (local_idle == poll_idle  smp_num_siblings  1) {
printk(KERN_WARNING WARNING: polling idle and HT enabled,
 performance may degrade.\n);
}
 #endif
-   if (pm_idle)
+   if (local_idle)
return;
 
if (cpu_has(c, X86_FEATURE_MWAIT)  mwait_usable(c)) {
@@ -547,18 +586,20 @@ void __cpuinit select_idle_routine(const
 * One CPU supports mwait = All CPUs supports mwait
 */
printk(KERN_INFO using mwait in idle threads.\n);
-   pm_idle = mwait_idle;
+   local_idle = mwait_idle;
} else if (check_c1e_idle(c)) {
printk(KERN_INFO using C1E aware idle routine\n);
-   pm_idle = c1e_idle;
+   local_idle = c1e_idle;
} else
-   pm_idle = default_idle;
+   local_idle = default_idle;
+
+   return;
 }
 
 void __init init_c1e_mask(void)
 {
/* If we're using c1e_idle, we need to allocate c1e_mask. */
-   if (pm_idle == c1e_idle) {
+   if (local_idle == c1e_idle) {
alloc_cpumask_var(c1e_mask, GFP_KERNEL);
cpumask_clear(c1e_mask);
}
@@ -571,7 +612,7 @@ static int __init idle_setup(char *str)
 
if (!strcmp(str, poll)) {
printk(using polling idle threads.\n);
-   pm_idle = poll_idle;
+   local_idle = poll_idle;
} else if (!strcmp(str, mwait))
force_mwait = 1;
else if (!strcmp(str, halt)) {
@@ -582,7 +623,7 @@ static int __init idle_setup(char *str)
 * To continue to load the CPU idle driver, don't touch
 * the boot_option_idle_override.
 */
-   pm_idle = default_idle;
+   local_idle = default_idle;
idle_halt = 1;
return 0;
} else if (!strcmp(str, nomwait)) {
Index: linux.trees.git/arch/x86/kernel/process_32.c
===
--- linux.trees.git.orig/arch/x86/kernel/process_32.c

[v5 RFC PATCH 4/7]: POWER: enable cpuidle for POWER.

2009-09-21 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-22 11:03:14]:

This patch enables the cpuidle option in Kconfig for pSeries.

Currently cpuidle infrastructure is enabled only for x86 and ARM.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/Kconfig |   14 ++
 1 file changed, 14 insertions(+)

Index: linux.trees.git/arch/powerpc/Kconfig
===
--- linux.trees.git.orig/arch/powerpc/Kconfig
+++ linux.trees.git/arch/powerpc/Kconfig
@@ -243,6 +243,20 @@ source kernel/Kconfig.freezer
 source arch/powerpc/sysdev/Kconfig
 source arch/powerpc/platforms/Kconfig
 
+menu Power management options
+
+source drivers/cpuidle/Kconfig
+
+config PSERIES_PROCESSOR_IDLE
+   bool Idle Power Management Support for pSeries
+   depends on PPC_PSERIES  CPU_IDLE
+   default y
+   help
+ Idle Power Management Support for pSeries. This hooks onto cpuidle
+ infrastructure to help in idle cpu power management.
+
+endmenu
+
 menu Kernel options
 
 config HIGHMEM
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v5 RFC PATCH 5/7]: POWER/pSeries: remove dedicate/shared idle loops, which will be moved to arch/powerpc/platforms/pseries/processor_idle.c

2009-09-21 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-22 11:03:14]:

This patch removes the routines, pseries_shared_idle_sleep and
pseries_dedicated_idle_sleep, since this is implemented as a part
of arch/powerpc/platform/pseries/processor_idle.c

Also, similar to x86, call cpuidle_idle_call from cpu_idle() idle
loop instead of ppc_md.power_save.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/idle.c |   50 +++---
 arch/powerpc/platforms/pseries/setup.c |   89 -
 2 files changed, 22 insertions(+), 117 deletions(-)

Index: linux.trees.git/arch/powerpc/platforms/pseries/setup.c
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/setup.c
+++ linux.trees.git/arch/powerpc/platforms/pseries/setup.c
@@ -75,9 +75,6 @@ EXPORT_SYMBOL(CMO_PageSize);
 
 int fwnmi_active;  /* TRUE if an FWNMI handler is present */
 
-static void pseries_shared_idle_sleep(void);
-static void pseries_dedicated_idle_sleep(void);
-
 static struct device_node *pSeries_mpic_node;
 
 static void pSeries_show_cpuinfo(struct seq_file *m)
@@ -301,18 +298,8 @@ static void __init pSeries_setup_arch(vo
pSeries_nvram_init();
 
/* Choose an idle loop */
-   if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+   if (firmware_has_feature(FW_FEATURE_SPLPAR))
vpa_init(boot_cpuid);
-   if (get_lppaca()-shared_proc) {
-   printk(KERN_DEBUG Using shared processor idle loop\n);
-   ppc_md.power_save = pseries_shared_idle_sleep;
-   } else {
-   printk(KERN_DEBUG Using dedicated idle loop\n);
-   ppc_md.power_save = pseries_dedicated_idle_sleep;
-   }
-   } else {
-   printk(KERN_DEBUG Using default idle loop\n);
-   }
 
if (firmware_has_feature(FW_FEATURE_LPAR))
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
@@ -500,80 +487,6 @@ static int __init pSeries_probe(void)
return 1;
 }
 
-
-DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
-
-static void pseries_dedicated_idle_sleep(void)
-{ 
-   unsigned int cpu = smp_processor_id();
-   unsigned long start_snooze;
-   unsigned long in_purr, out_purr;
-
-   /*
-* Indicate to the HV that we are idle. Now would be
-* a good time to find other work to dispatch.
-*/
-   get_lppaca()-idle = 1;
-   get_lppaca()-donate_dedicated_cpu = 1;
-   in_purr = mfspr(SPRN_PURR);
-
-   /*
-* We come in with interrupts disabled, and need_resched()
-* has been checked recently.  If we should poll for a little
-* while, do so.
-*/
-   if (__get_cpu_var(smt_snooze_delay)) {
-   start_snooze = get_tb() +
-   __get_cpu_var(smt_snooze_delay) * tb_ticks_per_usec;
-   local_irq_enable();
-   set_thread_flag(TIF_POLLING_NRFLAG);
-
-   while (get_tb()  start_snooze) {
-   if (need_resched() || cpu_is_offline(cpu))
-   goto out;
-   ppc64_runlatch_off();
-   HMT_low();
-   HMT_very_low();
-   }
-
-   HMT_medium();
-   clear_thread_flag(TIF_POLLING_NRFLAG);
-   smp_mb();
-   local_irq_disable();
-   if (need_resched() || cpu_is_offline(cpu))
-   goto out;
-   }
-
-   cede_processor();
-
-out:
-   HMT_medium();
-   out_purr = mfspr(SPRN_PURR);
-   get_lppaca()-wait_state_cycles += out_purr - in_purr;
-   get_lppaca()-donate_dedicated_cpu = 0;
-   get_lppaca()-idle = 0;
-}
-
-static void pseries_shared_idle_sleep(void)
-{
-   /*
-* Indicate to the HV that we are idle. Now would be
-* a good time to find other work to dispatch.
-*/
-   get_lppaca()-idle = 1;
-
-   /*
-* Yield the processor to the hypervisor.  We return if
-* an external interrupt occurs (which are driven prior
-* to returning here) or if a prod occurs from another
-* processor. When returning here, external interrupts
-* are enabled.
-*/
-   cede_processor();
-
-   get_lppaca()-idle = 0;
-}
-
 static int pSeries_pci_probe_mode(struct pci_bus *bus)
 {
if (firmware_has_feature(FW_FEATURE_LPAR))
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -25,6 +25,7 @@
 #include linux/cpu.h
 #include linux/sysctl.h
 #include linux/tick.h
+#include linux/cpuidle.h
 
 #include asm/system.h
 #include asm/processor.h
@@ -60,35 +61,26 @@ void cpu_idle(void)
while (!need_resched

[v5 RFC PATCH 6/7]: POWER: add a default_idle idle loop for POWER.

2009-09-21 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-22 11:03:14]:

In arch/powerpc/kernel/idle.c create a default_idle() routine by moving
the failover condition of the cpu_idle() idle loop. This is needed by
cpuidle infrastructure to call default_idle when other idle routines
are not yet registered. Functionality remains the same, but the code is
slightly moved around.


Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/Kconfig  |3 +++
 arch/powerpc/include/asm/system.h |1 +
 arch/powerpc/kernel/idle.c|6 ++
 3 files changed, 10 insertions(+)

Index: linux.trees.git/arch/powerpc/Kconfig
===
--- linux.trees.git.orig/arch/powerpc/Kconfig
+++ linux.trees.git/arch/powerpc/Kconfig
@@ -88,6 +88,9 @@ config ARCH_HAS_ILOG2_U64
bool
default y if 64BIT
 
+config ARCH_HAS_DEFAULT_IDLE
+   def_bool y
+
 config GENERIC_HWEIGHT
bool
default y
Index: linux.trees.git/arch/powerpc/include/asm/system.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/system.h
+++ linux.trees.git/arch/powerpc/include/asm/system.h
@@ -218,6 +218,7 @@ extern unsigned long klimit;
 extern void *alloc_maybe_bootmem(size_t size, gfp_t mask);
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
 
+extern void default_idle(void);
 extern int powersave_nap;  /* set if nap mode can be used in idle loop */
 
 /*
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -94,6 +94,12 @@ void cpu_idle(void)
}
 }
 
+void default_idle(void)
+{
+   HMT_low();
+   HMT_very_low();
+}
+
 int powersave_nap;
 
 #ifdef CONFIG_SYSCTL
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v5 RFC PATCH 7/7]: POWER/pSeries: implement pSeries processor idle module.

2009-09-21 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-22 11:03:14]:

This patch creates arch/powerpc/platforms/pseries/processor_idle.c,
which implements the cpuidle infrastructure for pseries.
It implements a pseries_cpuidle_loop() which would be the main idle loop
called from cpu_idle(). It makes decision of entering either cede1 or cede2
for dedicated lpar and shared_cede for shared lpar processor based on the
decision taken by the cpuidle governor.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/pseries/Makefile |1 
 arch/powerpc/platforms/pseries/processor_idle.c |  191 
 arch/powerpc/platforms/pseries/pseries.h|9 +
 arch/powerpc/platforms/pseries/setup.c  |8 -
 4 files changed, 207 insertions(+), 2 deletions(-)

Index: linux.trees.git/arch/powerpc/platforms/pseries/Makefile
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/Makefile
+++ linux.trees.git/arch/powerpc/platforms/pseries/Makefile
@@ -26,3 +26,4 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst
 obj-$(CONFIG_PHYP_DUMP)+= phyp_dump.o
 obj-$(CONFIG_CMM)  += cmm.o
 obj-$(CONFIG_DTL)  += dtl.o
+obj-$(CONFIG_PSERIES_PROCESSOR_IDLE)   += processor_idle.o
Index: linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/pseries.h
+++ linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
@@ -10,6 +10,8 @@
 #ifndef _PSERIES_PSERIES_H
 #define _PSERIES_PSERIES_H
 
+#include linux/cpuidle.h
+
 extern void __init fw_feature_init(const char *hypertas, unsigned long len);
 
 struct pt_regs;
@@ -40,4 +42,11 @@ extern unsigned long rtas_poweron_auto;
 
 extern void find_udbg_vterm(void);
 
+DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
+
+#ifdef CONFIG_PSERIES_PROCESSOR_IDLE
+int pseries_processor_idle_init(void);
+extern struct cpuidle_driver pseries_idle_driver;
+#endif
+
 #endif /* _PSERIES_PSERIES_H */
Index: linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
===
--- /dev/null
+++ linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
@@ -0,0 +1,191 @@
+/*
+ *  processor_idle - idle state cpuidle driver.
+ *  Adapted from drivers/acpi/processor_idle.c
+ *
+ *  Arun R Bharadwaj a...@linux.vnet.ibm.com
+ *
+ *  Copyright (C) 2009 IBM Corporation.
+ * ~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~
+ */
+
+#include linux/kernel.h
+#include linux/module.h
+#include linux/init.h
+#include linux/moduleparam.h
+#include linux/cpuidle.h
+
+#include asm/paca.h
+#include asm/reg.h
+#include asm/system.h
+#include asm/machdep.h
+#include asm/firmware.h
+
+#include plpar_wrappers.h
+#include pseries.h
+
+MODULE_AUTHOR(Arun R Bharadwaj);
+MODULE_DESCRIPTION(pSeries Idle State Driver);
+MODULE_LICENSE(GPL);
+
+struct cpuidle_driver pseries_idle_driver = {
+   .name = pseries_idle,
+   .owner =THIS_MODULE,
+};
+
+DEFINE_PER_CPU(struct cpuidle_device, pseries_dev);
+
+#define IDLE_STATE_COUNT   2
+
+static int pseries_idle_init(struct cpuidle_device *dev)
+{
+   return cpuidle_register_device(dev);
+}
+
+static void shared_cede(void)
+{
+   get_lppaca()-idle = 1;
+   cede_processor();
+   get_lppaca()-idle = 0;
+}
+
+static void cede1(void)
+{
+   local_irq_enable();
+   set_thread_flag(TIF_POLLING_NRFLAG);
+   while (!need_resched()) {
+   ppc64_runlatch_off();
+   HMT_low();
+   HMT_very_low();
+   }
+   HMT_medium();
+   clear_thread_flag(TIF_POLLING_NRFLAG);
+   smp_mb();
+   local_irq_disable();
+}
+
+static void cede2(void)
+{
+   ppc64_runlatch_off();
+   HMT_medium();
+   cede_processor();
+}
+
+static int pseries_cpuidle_loop(struct cpuidle_device *dev,
+   struct cpuidle_state *st)
+{
+   ktime_t t1, t2;
+   s64 diff;
+   int ret;
+   unsigned long

Re: [v4 PATCH 1/5]: cpuidle: Cleanup drivers/cpuidle/cpuidle.c

2009-09-02 Thread Arun R Bharadwaj
* Peter Zijlstra a.p.zijls...@chello.nl [2009-09-02 07:42:24]:

 On Tue, 2009-09-01 at 17:08 +0530, Arun R Bharadwaj wrote:
  * Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-01 17:07:04]:
  
  Cleanup drivers/cpuidle/cpuidle.c
  
  Cpuidle maintains a pm_idle_old void pointer because, currently in x86
  there is no clean way of registering and unregistering a idle function.
 
 Right, and instead of fixing that, they build this cpuidle crap on top,
 instead of replacing the current crap with it.
 
  So remove pm_idle_old and leave the responsibility of maintaining the
  list of registered idle loops to the architecture specific code. If the
  architecture registers cpuidle_idle_call as its idle loop, only then
  this loop is called.
 
 OK, that's a start I guess. Best would be to replace all of pm_idle with
 cpuidle, which is what should have been done from the very start.
 
 If cpuidle cannot fully replace the pm_idle functionality, then it needs
 to fix that. But having two layers of idle functions is just silly.
 
 Looking at patch 2 and 3, you're making the same mistake on power, after
 those patches there are multiple ways of registering idle functions, one
 through some native interface and one through cpuidle, this strikes me
 as undesirable.
 
 If cpuidle is a good idle function manager, then it should be good
 enough to be the sole one, if its not, then why bother with it at all.
 

Okay, I'm giving this approach a shot now. i.e. trying to make cpuidle
as _the_ sole idle function manager. This would mean doing away with
pm_idle and ppc_md.power_save. And, cpuidle_idle_call() which is the
main idle loop of cpuidle, present in drivers/cpuidle/cpuidle.c will
have to be called from arch specific code of cpu_idle()

Also this would mean enabling cpuidle for all platforms, even if the
platform doesn't have multiple idle states. So suppose a platform doesnt
have multiple states, it wouldn't want the bloated code of cpuidle
governors, and would want just a simple cpuidle loop.

--arun
 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v4 PATCH 0/5]: cpuidle/POWER (REDISIGN): Introducing cpuidle to POWER.

2009-09-01 Thread Arun R Bharadwaj
Hi,

 This is an RFC, not for inclusion **

This patchset introduces cpuidle infrastructure to POWER, prototyping
for pseries and currently in the process of porting to x86 and hence
will *not* build on x86/other POWER platforms.

This is to get initial comments on the redesign of my earlier implementation
which can be found at http://lkml.org/lkml/2009/8/27/124

Major changes from last iteration:
--

* Cleanup drivers/cpuidle/cpuidle.c
Currently, the cpuidle implementation has weakness in the
framework where an exported pm_idle function pointer is
manipulated by various subsystem. The proposed framework has
a registration architecture to cleanly add and remove new idle
routines from different subsystems.

* Introduce [un]register_idle_function() routines
Implement a LIFO based approach for registering architecture
dependent idle routines.

* Sample implementation of register_idle_function for pSeries


TODO:
-

* Extend this prototype to cover x86 and other archs that use cpuidle.
Currently, in x86, the cpu_idle() idle loop doesn't have a
default idle loop to fall back to if pm_idle is NULL, unlike
the corresponding implementation in pseries, where
ppc_md.power_save can be NULL and there is a fallback.
So we need to create a similar fork in cpu_idle() idle loop of
x86.



Patches included in this series:


1/5 - Cleanup drivers/cpuidle/cpuidle.c
2/5 - Implement routines to register and unregister idle function.
3/5 - Incorporate registering of idle loop for pSeries.
4/5 - Add Kconfig entry to enable cpuidle for POWER.
5/5 - Implement pSeries processor idle module.


Any comments on the design is welcome.

--arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v4 PATCH 1/5]: cpuidle: Cleanup drivers/cpuidle/cpuidle.c

2009-09-01 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-01 17:07:04]:

Cleanup drivers/cpuidle/cpuidle.c

Cpuidle maintains a pm_idle_old void pointer because, currently in x86
there is no clean way of registering and unregistering a idle function.

So remove pm_idle_old and leave the responsibility of maintaining the
list of registered idle loops to the architecture specific code. If the
architecture registers cpuidle_idle_call as its idle loop, only then
this loop is called.

Also remove unwanted functions cpuidle_[un]install_idle_handler,
cpuidle_kick_cpus()

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 drivers/cpuidle/cpuidle.c  |   51 +++--
 drivers/cpuidle/governor.c |3 --
 2 files changed, 17 insertions(+), 37 deletions(-)

Index: linux.trees.git/drivers/cpuidle/cpuidle.c
===
--- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
+++ linux.trees.git/drivers/cpuidle/cpuidle.c
@@ -24,9 +24,14 @@ DEFINE_PER_CPU(struct cpuidle_device *, 
 
 DEFINE_MUTEX(cpuidle_lock);
 LIST_HEAD(cpuidle_detected_devices);
-static void (*pm_idle_old)(void);
 
 static int enabled_devices;
+static int idle_function_registered;
+
+struct idle_function_desc cpuidle_idle_desc = {
+   .name   =   cpuidle_loop,
+   .idle_func  =   cpuidle_idle_call,
+};
 
 #if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
 static void cpuidle_kick_cpus(void)
@@ -54,13 +59,10 @@ static void cpuidle_idle_call(void)
 
/* check if the device is ready */
if (!dev || !dev-enabled) {
-   if (pm_idle_old)
-   pm_idle_old();
-   else
 #if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE)
-   default_idle();
+   default_idle();
 #else
-   local_irq_enable();
+   local_irq_enable();
 #endif
return;
}
@@ -94,35 +96,11 @@ static void cpuidle_idle_call(void)
 }
 
 /**
- * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
- */
-void cpuidle_install_idle_handler(void)
-{
-   if (enabled_devices  (pm_idle != cpuidle_idle_call)) {
-   /* Make sure all changes finished before we switch to new idle 
*/
-   smp_wmb();
-   pm_idle = cpuidle_idle_call;
-   }
-}
-
-/**
- * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
- */
-void cpuidle_uninstall_idle_handler(void)
-{
-   if (enabled_devices  pm_idle_old  (pm_idle != pm_idle_old)) {
-   pm_idle = pm_idle_old;
-   cpuidle_kick_cpus();
-   }
-}
-
-/**
  * cpuidle_pause_and_lock - temporarily disables CPUIDLE
  */
 void cpuidle_pause_and_lock(void)
 {
mutex_lock(cpuidle_lock);
-   cpuidle_uninstall_idle_handler();
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
@@ -132,7 +110,6 @@ EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock
  */
 void cpuidle_resume_and_unlock(void)
 {
-   cpuidle_install_idle_handler();
mutex_unlock(cpuidle_lock);
 }
 
@@ -287,6 +264,12 @@ static int __cpuidle_register_device(str
return 0;
 }
 
+static void register_cpuidle_idle_function(void)
+{
+   register_idle_function(cpuidle_idle_desc);
+
+   idle_function_registered = 1;
+}
 /**
  * cpuidle_register_device - registers a CPU's idle PM feature
  * @dev: the cpu
@@ -303,7 +286,9 @@ int cpuidle_register_device(struct cpuid
}
 
cpuidle_enable_device(dev);
-   cpuidle_install_idle_handler();
+
+   if (!idle_function_registered)
+   register_cpuidle_idle_function();
 
mutex_unlock(cpuidle_lock);
 
@@ -382,8 +367,6 @@ static int __init cpuidle_init(void)
 {
int ret;
 
-   pm_idle_old = pm_idle;
-
ret = cpuidle_add_class_sysfs(cpu_sysdev_class);
if (ret)
return ret;
Index: linux.trees.git/drivers/cpuidle/governor.c
===
--- linux.trees.git.orig/drivers/cpuidle/governor.c
+++ linux.trees.git/drivers/cpuidle/governor.c
@@ -48,8 +48,6 @@ int cpuidle_switch_governor(struct cpuid
if (gov == cpuidle_curr_governor)
return 0;
 
-   cpuidle_uninstall_idle_handler();
-
if (cpuidle_curr_governor) {
list_for_each_entry(dev, cpuidle_detected_devices, device_list)
cpuidle_disable_device(dev);
@@ -63,7 +61,6 @@ int cpuidle_switch_governor(struct cpuid
return -EINVAL;
list_for_each_entry(dev, cpuidle_detected_devices, device_list)
cpuidle_enable_device(dev);
-   cpuidle_install_idle_handler();
printk(KERN_INFO cpuidle: using governor %s\n, gov-name);
}
 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v4 PATCH 2/5]: cpuidle: Implement routines to register and unregister idle function.

2009-09-01 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-01 17:07:04]:

Implement a LIFO based approach for registering arch dependent
idle routines.

This is a prototype for pseries, needs to be extended
for other platforms.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/idle.c |5 +
 drivers/cpuidle/cpuidle.c  |   37 +
 include/linux/pm.h |   10 ++
 3 files changed, 52 insertions(+)

Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -46,6 +46,11 @@ static int __init powersave_off(char *ar
 }
 __setup(powersave=off, powersave_off);
 
+void set_arch_idle(void (*idle)(void))
+{
+   ppc_md.power_save = idle;
+}
+
 /*
  * The body of the idle task.
  */
Index: linux.trees.git/include/linux/pm.h
===
--- linux.trees.git.orig/include/linux/pm.h
+++ linux.trees.git/include/linux/pm.h
@@ -30,6 +30,16 @@ extern void (*pm_idle)(void);
 extern void (*pm_power_off)(void);
 extern void (*pm_power_off_prepare)(void);
 
+struct idle_function_desc {
+   char*name;
+   void(*idle_func)(void);
+   struct list_headidle_list;
+};
+
+extern void set_arch_idle(void (*idle)(void));
+extern void register_idle_function(struct idle_function_desc *desc);
+extern void unregister_idle_function(struct idle_function_desc *desc);
+
 /*
  * Device power management
  */
Index: linux.trees.git/drivers/cpuidle/cpuidle.c
===
--- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
+++ linux.trees.git/drivers/cpuidle/cpuidle.c
@@ -44,6 +44,43 @@ static void cpuidle_kick_cpus(void)
 static void cpuidle_kick_cpus(void) {}
 #endif
 
+LIST_HEAD(idle_function_list);
+static DEFINE_MUTEX(idle_list_mutex);
+
+void register_idle_function(struct idle_function_desc *desc)
+{
+   mutex_lock(idle_list_mutex);
+
+   list_add(desc-idle_list, idle_function_list);
+   set_arch_idle(desc-idle_func);
+   cpuidle_kick_cpus();
+
+   mutex_unlock(idle_list_mutex);
+}
+
+void unregister_idle_function(struct idle_function_desc *desc)
+{
+   struct list_head *pos;
+   struct idle_function_desc *temp_desc;
+
+   mutex_lock(idle_list_mutex);
+   WARN_ON_ONCE(list_empty(desc-idle_list) || desc != NULL);
+
+   list_for_each(pos, idle_function_list) {
+   temp_desc = container_of(pos, struct idle_function_desc,
+   idle_list);
+   if (temp_desc == desc) {
+   list_del(temp_desc-idle_list);
+   /* Re-using temp_desc here */
+   temp_desc = list_first_entry(idle_function_list,
+   struct idle_function_desc, idle_list);
+   set_arch_idle(temp_desc-idle_func);
+   cpuidle_kick_cpus();
+   }
+   }
+   mutex_unlock(idle_list_mutex);
+}
+
 static int __cpuidle_register_device(struct cpuidle_device *dev);
 
 /**
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v4 PATCH 3/5]: pSeries: Incorporate registering of idle loop for pSeries.

2009-09-01 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-01 17:07:04]:

Platform needs to register its idle function via register_idle_function()
in order to provide a clean way of handling the ppc_md.power_save

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/pseries/setup.c |   13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

Index: linux.trees.git/arch/powerpc/platforms/pseries/setup.c
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/setup.c
+++ linux.trees.git/arch/powerpc/platforms/pseries/setup.c
@@ -280,6 +280,8 @@ static struct notifier_block pci_dn_reco
 
 static void __init pSeries_setup_arch(void)
 {
+   struct idle_function_desc pseries_idle_desc;
+
/* Discover PIC type and setup ppc_md accordingly */
pseries_discover_pic();
 
@@ -305,10 +307,17 @@ static void __init pSeries_setup_arch(vo
vpa_init(boot_cpuid);
if (get_lppaca()-shared_proc) {
printk(KERN_DEBUG Using shared processor idle loop\n);
-   ppc_md.power_save = pseries_shared_idle_sleep;
+   //snprintf(pseries_idle_desc.name, 16, shared_loop);
+   pseries_idle_desc.name = shared_loop;
+   pseries_idle_desc.idle_func = pseries_shared_idle_sleep;
+   register_idle_function(pseries_idle_desc);
} else {
printk(KERN_DEBUG Using dedicated idle loop\n);
-   ppc_md.power_save = pseries_dedicated_idle_sleep;
+   //snprintf(pseries_idle_desc.name, 16, 
dedicated_loop);
+   pseries_idle_desc.name = dedicated_loop;
+   pseries_idle_desc.idle_func =
+   pseries_dedicated_idle_sleep;
+   register_idle_function(pseries_idle_desc);
}
} else {
printk(KERN_DEBUG Using default idle loop\n);
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v4 PATCH 4/5]: cpuidle: Add Kconfig entry to enable cpuidle for POWER.

2009-09-01 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-01 17:07:04]:

This patch enables the cpuidle option in Kconfig for pSeries.

Currently cpuidle infrastructure is enabled only for x86 and ARM.
This code is almost completely borrowed from x86 to enable
cpuidle for pSeries.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/Kconfig  |   17 +
 arch/powerpc/include/asm/system.h |2 ++
 arch/powerpc/kernel/idle.c|   19 +++
 3 files changed, 38 insertions(+)

Index: linux.trees.git/arch/powerpc/Kconfig
===
--- linux.trees.git.orig/arch/powerpc/Kconfig
+++ linux.trees.git/arch/powerpc/Kconfig
@@ -88,6 +88,9 @@ config ARCH_HAS_ILOG2_U64
bool
default y if 64BIT
 
+config ARCH_HAS_CPU_IDLE_WAIT
+   def_bool y
+
 config GENERIC_HWEIGHT
bool
default y
@@ -243,6 +246,20 @@ source kernel/Kconfig.freezer
 source arch/powerpc/sysdev/Kconfig
 source arch/powerpc/platforms/Kconfig
 
+menu Power management options
+
+source drivers/cpuidle/Kconfig
+
+config PSERIES_PROCESSOR_IDLE
+   bool Idle Power Management Support for pSeries
+   depends on PPC_PSERIES  CPU_IDLE
+   default y
+   help
+ Idle Power Management Support for pSeries. This hooks onto cpuidle
+ infrastructure to help in idle cpu power management.
+
+endmenu
+
 menu Kernel options
 
 config HIGHMEM
Index: linux.trees.git/arch/powerpc/include/asm/system.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/system.h
+++ linux.trees.git/arch/powerpc/include/asm/system.h
@@ -546,5 +546,7 @@ extern void account_system_vtime(struct 
 
 extern struct dentry *powerpc_debugfs_root;
 
+void cpu_idle_wait(void);
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SYSTEM_H */
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -107,6 +107,25 @@ void cpu_idle(void)
}
 }
 
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+ * ppc_md.power_save and update to new value.
+ * Required while changing ppc_md.power_save handler on SMP systems.
+ * Caller must have changed ppc_md.power_save to the new value before the call.
+ */
+void cpu_idle_wait(void)
+{
+   /* Ensure that new value of ppc_md.power_save is set */
+   smp_mb();
+   /* kick all the CPUs so that they exit out of ppc_md.power_save */
+   smp_call_function(do_nothing, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 int powersave_nap;
 
 #ifdef CONFIG_SYSCTL
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v4 PATCH 5/5]: pSeries: Implement pSeries processor idle module.

2009-09-01 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-01 17:07:04]:

This patch creates arch/powerpc/platforms/pseries/processor_idle.c,
which implements the cpuidle infrastructure for pseries.
It implements a pseries_cpuidle_loop() which would be the main idle loop
called from cpu_idle(). It makes decision of entering either snooze or nap
state based on the decision taken by the cpuidle governor.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/pseries/Makefile |1 
 arch/powerpc/platforms/pseries/processor_idle.c |  179 
 arch/powerpc/platforms/pseries/pseries.h|   14 +
 arch/powerpc/platforms/pseries/setup.c  |3 
 4 files changed, 194 insertions(+), 3 deletions(-)

Index: linux.trees.git/arch/powerpc/platforms/pseries/Makefile
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/Makefile
+++ linux.trees.git/arch/powerpc/platforms/pseries/Makefile
@@ -26,3 +26,4 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst
 obj-$(CONFIG_PHYP_DUMP)+= phyp_dump.o
 obj-$(CONFIG_CMM)  += cmm.o
 obj-$(CONFIG_DTL)  += dtl.o
+obj-$(CONFIG_PSERIES_PROCESSOR_IDLE)   += processor_idle.o
Index: linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/pseries.h
+++ linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
@@ -10,6 +10,8 @@
 #ifndef _PSERIES_PSERIES_H
 #define _PSERIES_PSERIES_H
 
+#include linux/cpuidle.h
+
 extern void __init fw_feature_init(const char *hypertas, unsigned long len);
 
 struct pt_regs;
@@ -40,4 +42,16 @@ extern unsigned long rtas_poweron_auto;
 
 extern void find_udbg_vterm(void);
 
+DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
+
+#ifdef CONFIG_PSERIES_PROCESSOR_IDLE
+struct pseries_processor_power {
+   struct cpuidle_device dev;
+   int count;
+   int id;
+};
+
+extern struct cpuidle_driver pseries_idle_driver;
+#endif
+
 #endif /* _PSERIES_PSERIES_H */
Index: linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
===
--- /dev/null
+++ linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
@@ -0,0 +1,179 @@
+/*
+ *  processor_idle - idle state cpuidle driver.
+ *  Adapted from drivers/acpi/processor_idle.c
+ *
+ *  Arun R Bharadwaj a...@linux.vnet.ibm.com
+ *
+ *  Copyright (C) 2009 IBM Corporation.
+ * ~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~
+ */
+
+#include linux/kernel.h
+#include linux/module.h
+#include linux/init.h
+#include linux/moduleparam.h
+#include linux/cpuidle.h
+
+#include asm/paca.h
+#include asm/reg.h
+#include asm/system.h
+#include asm/machdep.h
+
+#include plpar_wrappers.h
+#include pseries.h
+
+MODULE_AUTHOR(Arun R Bharadwaj);
+MODULE_DESCRIPTION(pSeries Idle State Driver);
+MODULE_LICENSE(GPL);
+
+struct cpuidle_driver pseries_idle_driver = {
+   .name = pseries_idle,
+   .owner =THIS_MODULE,
+};
+
+DEFINE_PER_CPU(struct pseries_processor_power, power);
+
+#define IDLE_STATE_COUNT   2
+
+static int pseries_idle_init(struct pseries_processor_power *power)
+{
+   return cpuidle_register_device(power-dev);
+}
+
+static void cede1(void)
+{
+   local_irq_enable();
+   set_thread_flag(TIF_POLLING_NRFLAG);
+   while (!need_resched()) {
+   ppc64_runlatch_off();
+   HMT_low();
+   HMT_very_low();
+   }
+   HMT_medium();
+   clear_thread_flag(TIF_POLLING_NRFLAG);
+   smp_mb();
+   local_irq_disable();
+}
+
+static void cede2(void)
+{
+   ppc64_runlatch_off();
+   HMT_medium();
+   cede_processor();
+}
+
+static int pseries_cpuidle_loop(struct cpuidle_device *dev,
+   struct cpuidle_state *st)
+{
+   ktime_t t1, t2;
+   s64 diff;
+   int ret;
+   unsigned long in_purr, out_purr;
+
+   get_lppaca()-idle = 1;
+   get_lppaca()-donate_dedicated_cpu = 1;
+   in_purr = mfspr

Re: [v4 PATCH 1/5]: cpuidle: Cleanup drivers/cpuidle/cpuidle.c

2009-09-01 Thread Arun R Bharadwaj
* Balbir Singh bal...@linux.vnet.ibm.com [2009-09-01 22:58:25]:

 * Arun R B a...@linux.vnet.ibm.com [2009-09-01 17:08:40]:
 
  * Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-01 17:07:04]:
  
  Cleanup drivers/cpuidle/cpuidle.c
  
  Cpuidle maintains a pm_idle_old void pointer because, currently in x86
  there is no clean way of registering and unregistering a idle function.
 
  So remove pm_idle_old and leave the responsibility of maintaining the
  list of registered idle loops to the architecture specific code. If the
  architecture registers cpuidle_idle_call as its idle loop, only then
  this loop is called.
  
 
 It sounds as if there is a side-effect of this
 patch on x86 (am I reading it incorrectly), which can be fixed, but
 it will need a patch or so to get back the old behaviour on x86.
 

Hi Balbir,

Yes, your understanding is correct. Currently, x86 exports pm_idle and
this pm_idle is set to cpuidle_idle_call inside cpuidle.c

So instead of that x86 should just export a function called
set_arch_idle() which will be called from within
register_idle_function() and set pm_idle to the idle handler which is
currently being registered.

I have implemented this for pseries, and in the process of doing it
for x86 too.

  Also remove unwanted functions cpuidle_[un]install_idle_handler,
  cpuidle_kick_cpus()
 
  Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
  ---
   drivers/cpuidle/cpuidle.c  |   51 
  +++--
   drivers/cpuidle/governor.c |3 --
   2 files changed, 17 insertions(+), 37 deletions(-)
  
  Index: linux.trees.git/drivers/cpuidle/cpuidle.c
  ===
  --- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
  +++ linux.trees.git/drivers/cpuidle/cpuidle.c
  @@ -24,9 +24,14 @@ DEFINE_PER_CPU(struct cpuidle_device *, 
  
   DEFINE_MUTEX(cpuidle_lock);
   LIST_HEAD(cpuidle_detected_devices);
  -static void (*pm_idle_old)(void);
  
   static int enabled_devices;
  +static int idle_function_registered;
  +
  +struct idle_function_desc cpuidle_idle_desc = {
  +   .name   =   cpuidle_loop,
  +   .idle_func  =   cpuidle_idle_call,
  +};
  
   #if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
   static void cpuidle_kick_cpus(void)
  @@ -54,13 +59,10 @@ static void cpuidle_idle_call(void)
  
  /* check if the device is ready */
  if (!dev || !dev-enabled) {
  -   if (pm_idle_old)
  -   pm_idle_old();
  -   else
   #if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE)
  -   default_idle();
  +   default_idle();
   #else
  -   local_irq_enable();
  +   local_irq_enable();
   #endif
  return;
  }
  @@ -94,35 +96,11 @@ static void cpuidle_idle_call(void)
   }
  
   /**
  - * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
  - */
  -void cpuidle_install_idle_handler(void)
  -{
  -   if (enabled_devices  (pm_idle != cpuidle_idle_call)) {
  -   /* Make sure all changes finished before we switch to new idle 
  */
  -   smp_wmb();
  -   pm_idle = cpuidle_idle_call;
  -   }
  -}
  -
  -/**
  - * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop 
  handler
  - */
  -void cpuidle_uninstall_idle_handler(void)
  -{
  -   if (enabled_devices  pm_idle_old  (pm_idle != pm_idle_old)) {
  -   pm_idle = pm_idle_old;
  -   cpuidle_kick_cpus();
  -   }
  -}
  -
  -/**
* cpuidle_pause_and_lock - temporarily disables CPUIDLE
*/
   void cpuidle_pause_and_lock(void)
   {
  mutex_lock(cpuidle_lock);
  -   cpuidle_uninstall_idle_handler();
   }
  
   EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
  @@ -132,7 +110,6 @@ EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock
*/
   void cpuidle_resume_and_unlock(void)
   {
  -   cpuidle_install_idle_handler();
  mutex_unlock(cpuidle_lock);
   }
  
 
 What does this mean for users of cpuidle_pause_and_lock/unlock?
 Should we be calling register/unregister_idle_function here?


Yes, you are right. I have missed out on this part.
register/unregister_idle_function should replace
install/uninstall_idle_handler at those places. Thanks.

 
  @@ -287,6 +264,12 @@ static int __cpuidle_register_device(str
  return 0;
   }
  
  +static void register_cpuidle_idle_function(void)
  +{
  +   register_idle_function(cpuidle_idle_desc);
  +
  +   idle_function_registered = 1;
 
 Use booleans if possible, unless you intend to extend the meaning of
 registered someday.


I don't intend to extend the meaning of idle_function_registered.
Will use boolean here.

  +}
   /**
* cpuidle_register_device - registers a CPU's idle PM feature
* @dev: the cpu
  @@ -303,7 +286,9 @@ int cpuidle_register_device(struct cpuid
  }
  
  cpuidle_enable_device(dev);
  -   cpuidle_install_idle_handler();
  +
  +   if (!idle_function_registered)
  +   register_cpuidle_idle_function();
  
  mutex_unlock

Re: [v4 PATCH 1/5]: cpuidle: Cleanup drivers/cpuidle/cpuidle.c

2009-09-01 Thread Arun R Bharadwaj
* Balbir Singh bal...@linux.vnet.ibm.com [2009-09-01 22:58:25]:

 * Arun R B a...@linux.vnet.ibm.com [2009-09-01 17:08:40]:
 
  * Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-09-01 17:07:04]:
  
  Cleanup drivers/cpuidle/cpuidle.c
  
  Cpuidle maintains a pm_idle_old void pointer because, currently in x86
  there is no clean way of registering and unregistering a idle function.
 
  So remove pm_idle_old and leave the responsibility of maintaining the
  list of registered idle loops to the architecture specific code. If the
  architecture registers cpuidle_idle_call as its idle loop, only then
  this loop is called.
  
 
 It sounds as if there is a side-effect of this
 patch on x86 (am I reading it incorrectly), which can be fixed, but
 it will need a patch or so to get back the old behaviour on x86.
 
  Also remove unwanted functions cpuidle_[un]install_idle_handler,
  cpuidle_kick_cpus()
 
  Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
  ---
   drivers/cpuidle/cpuidle.c  |   51 
  +++--
   drivers/cpuidle/governor.c |3 --
   2 files changed, 17 insertions(+), 37 deletions(-)
  
  Index: linux.trees.git/drivers/cpuidle/cpuidle.c
  ===
  --- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
  +++ linux.trees.git/drivers/cpuidle/cpuidle.c
  @@ -24,9 +24,14 @@ DEFINE_PER_CPU(struct cpuidle_device *, 
  
   DEFINE_MUTEX(cpuidle_lock);
   LIST_HEAD(cpuidle_detected_devices);
  -static void (*pm_idle_old)(void);
  
   static int enabled_devices;
  +static int idle_function_registered;
  +
  +struct idle_function_desc cpuidle_idle_desc = {
  +   .name   =   cpuidle_loop,
  +   .idle_func  =   cpuidle_idle_call,
  +};
  
   #if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
   static void cpuidle_kick_cpus(void)
  @@ -54,13 +59,10 @@ static void cpuidle_idle_call(void)
  
  /* check if the device is ready */
  if (!dev || !dev-enabled) {
  -   if (pm_idle_old)
  -   pm_idle_old();
  -   else
   #if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE)
  -   default_idle();
  +   default_idle();
   #else
  -   local_irq_enable();
  +   local_irq_enable();
   #endif
  return;
  }
  @@ -94,35 +96,11 @@ static void cpuidle_idle_call(void)
   }
  
   /**
  - * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
  - */
  -void cpuidle_install_idle_handler(void)
  -{
  -   if (enabled_devices  (pm_idle != cpuidle_idle_call)) {
  -   /* Make sure all changes finished before we switch to new idle 
  */
  -   smp_wmb();
  -   pm_idle = cpuidle_idle_call;
  -   }
  -}
  -
  -/**
  - * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop 
  handler
  - */
  -void cpuidle_uninstall_idle_handler(void)
  -{
  -   if (enabled_devices  pm_idle_old  (pm_idle != pm_idle_old)) {
  -   pm_idle = pm_idle_old;
  -   cpuidle_kick_cpus();
  -   }
  -}
  -
  -/**
* cpuidle_pause_and_lock - temporarily disables CPUIDLE
*/
   void cpuidle_pause_and_lock(void)
   {
  mutex_lock(cpuidle_lock);
  -   cpuidle_uninstall_idle_handler();
   }
  
   EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
  @@ -132,7 +110,6 @@ EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock
*/
   void cpuidle_resume_and_unlock(void)
   {
  -   cpuidle_install_idle_handler();
  mutex_unlock(cpuidle_lock);
   }
  
 
 What does this mean for users of cpuidle_pause_and_lock/unlock?
 Should we be calling register/unregister_idle_function here?
 

Just observed the use case for cpuidle_pause_and_lock/unlock.
It is not clear as to why we need to switch back to the old idle
handler and then again back to cpuidle's idle handler. Wouldn't it
make more sense to just register the idle handler when the first
cpuidle device is being registered and unregister the idle handler
when the last cpuidle device is unregistered?

--arun

 
  @@ -287,6 +264,12 @@ static int __cpuidle_register_device(str
  return 0;
   }
  
  +static void register_cpuidle_idle_function(void)
  +{
  +   register_idle_function(cpuidle_idle_desc);
  +
  +   idle_function_registered = 1;
 
 Use booleans if possible, unless you intend to extend the meaning of
 registered someday.
 
  +}
   /**
* cpuidle_register_device - registers a CPU's idle PM feature
* @dev: the cpu
  @@ -303,7 +286,9 @@ int cpuidle_register_device(struct cpuid
  }
  
  cpuidle_enable_device(dev);
  -   cpuidle_install_idle_handler();
  +
  +   if (!idle_function_registered)
  +   register_cpuidle_idle_function();
  
  mutex_unlock(cpuidle_lock);
  
  @@ -382,8 +367,6 @@ static int __init cpuidle_init(void)
   {
  int ret;
  
  -   pm_idle_old = pm_idle;
  -
  ret = cpuidle_add_class_sysfs(cpu_sysdev_class);
  if (ret)
  return ret;
  Index: linux.trees.git/drivers/cpuidle/governor.c

Re: [PATCH 2/4]: CPUIDLE: Introduce architecture independent cpuidle_pm_idle in drivers/cpuidle/cpuidle.c

2009-08-28 Thread Arun R Bharadwaj
* Peter Zijlstra a.p.zijls...@chello.nl [2009-08-27 14:53:27]:

Hi Peter, Ben,

I've put the whole thing in a sort of a block diagram. Hope it
explains things more clearly.







|CPUIDLE   |  (Select idle states like
|   GOVERNORS  |   C1, C1e, C6 etc in case
| (Menu/Ladder)|   x86  nap, snooze in
|  |   case of POWER - based on
   latency  power req)
^
|
|
|
|
|
  ----
  |||   ||  PSERIES  |
  |  ACPI  |-- |CPUIDLE| --|   IDLE|
  |||   ||   |
  ----

Main idle routine- pm_idle() Main idle routine-
 ppc_md.power_save()

pm_idle = cpuidle_pm_idle;   ppc_md.power_save =
(start using cpuidle's idle   cpuidle_pm_idle();
 loop, which internally calls
 governor to select the right
 state to go into).


Relavent code snippet from drivers/cpuidle/cpuidle.c
-

static void cpuidle_idle_call(void)
{



/* Call the menu_select() to select the idle state to enter. */
next_state = cpuidle_curr_governor-select(dev);




/*
 * Enter the idle state previously selected. target_state-enter
 * would call pseries_cpuidle_loop() which selects nap/snooze
 * /
dev-last_residency = target_state-enter(dev, target_state);
}

void cpuidle_install_idle_handler(void)
{
.
.
cpuidle_pm_idle = cpuidle_idle_call;
}

--arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 2/4]: CPUIDLE: Introduce architecture independent cpuidle_pm_idle in drivers/cpuidle/cpuidle.c

2009-08-28 Thread Arun R Bharadwaj
* Peter Zijlstra a.p.zijls...@chello.nl [2009-08-27 14:53:27]:

 On Thu, 2009-08-27 at 17:23 +0530, Arun R Bharadwaj wrote:
  * Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-08-27 17:19:08]:
  
  Cpuidle infrastructure assumes pm_idle as the default idle routine.
  But, ppc_md.power_save is the default idle callback in case of pSeries.
  
  So, create a more generic, architecture independent cpuidle_pm_idle
  function pointer in driver/cpuidle/cpuidle.c and allow the idle routines
  of architectures to be set to cpuidle_pm_idle.
  
  Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
  ---
   drivers/cpuidle/cpuidle.c |   12 +++-
   include/linux/cpuidle.h   |7 +++
   2 files changed, 14 insertions(+), 5 deletions(-)
  
  Index: linux.trees.git/drivers/cpuidle/cpuidle.c
  ===
  --- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
  +++ linux.trees.git/drivers/cpuidle/cpuidle.c
  @@ -25,6 +25,7 @@ DEFINE_PER_CPU(struct cpuidle_device *, 
   DEFINE_MUTEX(cpuidle_lock);
   LIST_HEAD(cpuidle_detected_devices);
   static void (*pm_idle_old)(void);
  +void (*cpuidle_pm_idle)(void);
   
   static int enabled_devices;
   
  @@ -98,10 +99,10 @@ static void cpuidle_idle_call(void)
*/
   void cpuidle_install_idle_handler(void)
   {
  -   if (enabled_devices  (pm_idle != cpuidle_idle_call)) {
  +   if (enabled_devices  (cpuidle_pm_idle != cpuidle_idle_call)) {
  /* Make sure all changes finished before we switch to new idle 
  */
  smp_wmb();
  -   pm_idle = cpuidle_idle_call;
  +   cpuidle_pm_idle = cpuidle_idle_call;
  }
   }
   
  @@ -110,8 +111,9 @@ void cpuidle_install_idle_handler(void)
*/
   void cpuidle_uninstall_idle_handler(void)
   {
  -   if (enabled_devices  pm_idle_old  (pm_idle != pm_idle_old)) {
  -   pm_idle = pm_idle_old;
  +   if (enabled_devices  pm_idle_old 
  +   (cpuidle_pm_idle != pm_idle_old)) {
  +   cpuidle_pm_idle = pm_idle_old;
  cpuidle_kick_cpus();
  }
   }
  @@ -382,7 +384,7 @@ static int __init cpuidle_init(void)
   {
  int ret;
   
  -   pm_idle_old = pm_idle;
  +   pm_idle_old = cpuidle_pm_idle;
   
  ret = cpuidle_add_class_sysfs(cpu_sysdev_class);
  if (ret)
  Index: linux.trees.git/include/linux/cpuidle.h
  ===
  --- linux.trees.git.orig/include/linux/cpuidle.h
  +++ linux.trees.git/include/linux/cpuidle.h
  @@ -188,4 +188,11 @@ static inline void cpuidle_unregister_go
   #define CPUIDLE_DRIVER_STATE_START 0
   #endif
   
  +/*
  + * Idle callback used by cpuidle to call the cpuidle_idle_call().
  + * Platform drivers can use this to register to cpuidle's idle loop.
  + */
  +
  +extern void (*cpuidle_pm_idle)(void);
  +
   #endif /* _LINUX_CPUIDLE_H */
 
 
 I'm not quite seeing how this makes anything any better. Not we have 3
 function pointers, where 1 should suffice.
 

Or, can we have something like:
(if exporting a function is ok, instead of exporting a function
pointer).

in drivers/cpuidle/cpuidle.c

void (*return_cpuidle_handler(void))(void)
{
return cpuidle_pm_idle;
}
EXPORT_SYMBOL(return_cpuidle_handler);


and from pseries/processor_idle.c,

ppc_md.power_save = return_cpuidle_handler;


--arun

 /me wonders what's wrong with something like:
 
 struct idle_func_desc {
   int  power;
   int  latency;
   void (*idle)(void);
   struct list_head list;
 };
 
 static void spin_idle(void)
 {
   for (;;)
   cpu_relax();
 }
 
 static idle_func_desc default_idle_func = {
   power = 0, /* doesn't safe any power */
   latency = INT_MAX, /* has max latency */
   idle = spin_idle,
   list = INIT_LIST_HEAD(default_idle_func.list),
 };
 
 void (*idle_func)(void);
 static struct list_head idle_func_list;
 
 static void pick_idle_func(void)
 {
   struct idle_func_desc *desc, *idle = default_idle_desc;
 
   list_for_each_entry(desc, idle_func_list, list) {
   if (desc-power  idle-power)
   continue;
   if (desc-latency  target_latency);
   continue;
   idle = desc;
   }
 
   pm_idle = idle-idle;
 }
 
 void register_idle_func(struct idle_func_desc *desc)
 {
   WARN_ON_ONCE(!list_empty(desc-list));
 
   list_add_tail(idle_func_list, desc-list);
   pick_idle_func();
 }
 
 void unregister_idle_func(struct idle_func_desc *desc)
 {
   WARN_ON_ONCE(list_empty(desc-list));
 
   list_del_init(desc-list);
   if (idle_func == desc-idle) 
   pick_idle_func();
 }
 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v3 PATCH 0/4]: CPUIDLE/POWER: Introducing cpuidle infrastructure to POWER

2009-08-27 Thread Arun R Bharadwaj
Hi,


Changes from previous iteration:


* Remove the EXPORT_SYMBOL(pm_idle) from
arch/powerpc/platform/pseries/processor_idle.c and introduce a
generic cpuidle_pm_idle in cpuidle.c which was earlier assuming pm_idle
to be the default idle routine. (As suggested by Peter and Ben).

* Move the cpu_idle_wait function from arch/powerpc/platforms/pseries/setup.c
to arch/powerpc/kernel/idle.c which would prevent breaking the build of
other platforms. (As suggested by Ben).
---

Cpuidle is a CPU Power Management infrastrusture which helps manage
idle CPUs in a clean and efficient manner. The architecture can register
its driver (in this case, pseries_idle driver) so that it subscribes for
cpuidle feature. Cpuidle has a set of governors (ladder and menu),
which will decide the best idle state to be chosen for the current situation,
based on heuristics, and calculates the expected residency time
for the current idle state. So based on this, the cpu is put into
the right idle state.

Currently, cpuidle infrasture is exploited by ACPI to choose between
the available ACPI C-states. This patch-set is aimed at enabling
cpuidle for powerpc and provides a sample implementation for pseries.

Currently, in the pseries_dedicated_idle_sleep(), the processor would
poll for a time period, which is called the snooze, and only then it
is ceded, which would put the processor in nap state. Cpuidle aims at
separating this into 2 different idle states. Based on the expected
residency time predicted by the cpuidle governor, the idle state is
chosen directly. So, choosing to enter the nap state directly based on
the decision made by cpuidle would avoid unnecessary snoozing before
entering nap.

This patch-set tries to achieve the above objective by introducing a
pseries processor idle driver called pseries_idle_driver in
arch/powerpc/platform/pseries/processor_idle.c, which implements the
idle loop which would replace the pseries_dedicated_idle_sleep()
when cpuidle is enabled.

Experiment conducted:
--

The following experiment was conducted on a completely idle JS22 blade,
to prove that using cpuidle infrastructure, the amount of nap time increases.

Nap and snooze times were sampled for all the cpus.
For a window of 1000 samples, When cpuidle was enabled,
the total nap time was of the order of a few seconds (5-10s), whereas
the total snooze time was of the order of a few milliseconds(10-30 ms).

When cpuidle infrastructure was disabled and the regular
pseries_dedicated_idle_sleep() idle loop was used, the snooze time itself
was of the order of hundreds of milliseconds. (100 - 500 ms).
This is clearly due to unnecessary snoozing before napping even on a
completely idle system.


The previous post in this area can be found at
http://lkml.org/lkml/2009/8/26/233


Patches included in this set:
--

PATCH 1/4 - Enable cpuidle for pSeries.
PATCH 2/4 - Introduce architecture independent cpuidle_pm_idle in
drivers/cpuidle/cpuidle.c
PATCH 3/4 - Register for cpuidle_pm_idle in drivers/acpi/processor_idle.c
and arch/arm/mach-kirkwood/cpuidle.c
PATCH 4/4 - Implement Pseries Processor Idle idle module


--arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 1/4]: CPUIDLE/POWER: Enable cpuidle for pSeries.

2009-08-27 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-08-27 17:19:08]:

This patch enables the cpuidle option in Kconfig for pSeries.

Currently cpuidle infrastructure is enabled only for x86 and ARM.
This code is almost completely borrowed from x86 to enable
cpuidle for pSeries.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/Kconfig  |   17 +
 arch/powerpc/include/asm/system.h |2 ++
 arch/powerpc/kernel/idle.c|   19 +++
 3 files changed, 38 insertions(+)

Index: linux.trees.git/arch/powerpc/Kconfig
===
--- linux.trees.git.orig/arch/powerpc/Kconfig
+++ linux.trees.git/arch/powerpc/Kconfig
@@ -88,6 +88,9 @@ config ARCH_HAS_ILOG2_U64
bool
default y if 64BIT
 
+config ARCH_HAS_CPU_IDLE_WAIT
+   def_bool y
+
 config GENERIC_HWEIGHT
bool
default y
@@ -243,6 +246,20 @@ source kernel/Kconfig.freezer
 source arch/powerpc/sysdev/Kconfig
 source arch/powerpc/platforms/Kconfig
 
+menu Power management options
+
+source drivers/cpuidle/Kconfig
+
+config PSERIES_PROCESSOR_IDLE
+   bool Idle Power Management Support for pSeries
+   depends on PPC_PSERIES  CPU_IDLE
+   default y
+   help
+ Idle Power Management Support for pSeries. This hooks onto cpuidle
+ infrastructure to help in idle cpu power management.
+
+endmenu
+
 menu Kernel options
 
 config HIGHMEM
Index: linux.trees.git/arch/powerpc/include/asm/system.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/system.h
+++ linux.trees.git/arch/powerpc/include/asm/system.h
@@ -546,5 +546,7 @@ extern void account_system_vtime(struct 
 
 extern struct dentry *powerpc_debugfs_root;
 
+void cpu_idle_wait(void);
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SYSTEM_H */
Index: linux.trees.git/arch/powerpc/kernel/idle.c
===
--- linux.trees.git.orig/arch/powerpc/kernel/idle.c
+++ linux.trees.git/arch/powerpc/kernel/idle.c
@@ -102,6 +102,25 @@ void cpu_idle(void)
}
 }
 
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+ * ppc_md.power_save and update to new value.
+ * Required while changing ppc_md.power_save handler on SMP systems.
+ * Caller must have changed ppc_md.power_save to the new value before the call.
+ */
+void cpu_idle_wait(void)
+{
+   /* Ensure that new value of ppc_md.power_save is set */
+   smp_mb();
+   /* kick all the CPUs so that they exit out of ppc_md.power_save */
+   smp_call_function(do_nothing, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 int powersave_nap;
 
 #ifdef CONFIG_SYSCTL
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 2/4]: CPUIDLE: Introduce architecture independent cpuidle_pm_idle in drivers/cpuidle/cpuidle.c

2009-08-27 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-08-27 17:19:08]:

Cpuidle infrastructure assumes pm_idle as the default idle routine.
But, ppc_md.power_save is the default idle callback in case of pSeries.

So, create a more generic, architecture independent cpuidle_pm_idle
function pointer in driver/cpuidle/cpuidle.c and allow the idle routines
of architectures to be set to cpuidle_pm_idle.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 drivers/cpuidle/cpuidle.c |   12 +++-
 include/linux/cpuidle.h   |7 +++
 2 files changed, 14 insertions(+), 5 deletions(-)

Index: linux.trees.git/drivers/cpuidle/cpuidle.c
===
--- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
+++ linux.trees.git/drivers/cpuidle/cpuidle.c
@@ -25,6 +25,7 @@ DEFINE_PER_CPU(struct cpuidle_device *, 
 DEFINE_MUTEX(cpuidle_lock);
 LIST_HEAD(cpuidle_detected_devices);
 static void (*pm_idle_old)(void);
+void (*cpuidle_pm_idle)(void);
 
 static int enabled_devices;
 
@@ -98,10 +99,10 @@ static void cpuidle_idle_call(void)
  */
 void cpuidle_install_idle_handler(void)
 {
-   if (enabled_devices  (pm_idle != cpuidle_idle_call)) {
+   if (enabled_devices  (cpuidle_pm_idle != cpuidle_idle_call)) {
/* Make sure all changes finished before we switch to new idle 
*/
smp_wmb();
-   pm_idle = cpuidle_idle_call;
+   cpuidle_pm_idle = cpuidle_idle_call;
}
 }
 
@@ -110,8 +111,9 @@ void cpuidle_install_idle_handler(void)
  */
 void cpuidle_uninstall_idle_handler(void)
 {
-   if (enabled_devices  pm_idle_old  (pm_idle != pm_idle_old)) {
-   pm_idle = pm_idle_old;
+   if (enabled_devices  pm_idle_old 
+   (cpuidle_pm_idle != pm_idle_old)) {
+   cpuidle_pm_idle = pm_idle_old;
cpuidle_kick_cpus();
}
 }
@@ -382,7 +384,7 @@ static int __init cpuidle_init(void)
 {
int ret;
 
-   pm_idle_old = pm_idle;
+   pm_idle_old = cpuidle_pm_idle;
 
ret = cpuidle_add_class_sysfs(cpu_sysdev_class);
if (ret)
Index: linux.trees.git/include/linux/cpuidle.h
===
--- linux.trees.git.orig/include/linux/cpuidle.h
+++ linux.trees.git/include/linux/cpuidle.h
@@ -188,4 +188,11 @@ static inline void cpuidle_unregister_go
 #define CPUIDLE_DRIVER_STATE_START 0
 #endif
 
+/*
+ * Idle callback used by cpuidle to call the cpuidle_idle_call().
+ * Platform drivers can use this to register to cpuidle's idle loop.
+ */
+
+extern void (*cpuidle_pm_idle)(void);
+
 #endif /* _LINUX_CPUIDLE_H */
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 3/4]: ACPI/ARM: Register for cpuidle_pm_idle in drivers/acpi/processor_idle.c and arch/arm/mach-kirkwood/cpuidle.c

2009-08-27 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-08-27 17:19:08]:

Set the idle routine to cpuidle_pm_idle after registering cpuidle
devices. Earlier pm_idle was assumed as the defualt idle loop by
cpuidle infrastructure. This is changed to an architecture independent
cpuidle_pm_idle.

There are 2 instances which are using cpuidle infrastructure currently.
This patch makes the change in both the places.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/arm/mach-kirkwood/cpuidle.c |6 ++
 drivers/acpi/processor_idle.c|5 +
 2 files changed, 11 insertions(+)

Index: linux.trees.git/arch/arm/mach-kirkwood/cpuidle.c
===
--- linux.trees.git.orig/arch/arm/mach-kirkwood/cpuidle.c
+++ linux.trees.git/arch/arm/mach-kirkwood/cpuidle.c
@@ -90,6 +90,12 @@ static int kirkwood_init_cpuidle(void)
printk(KERN_ERR kirkwood_init_cpuidle: Failed registering\n);
return -EIO;
}
+
+   if (pm_idle != cpuidle_pm_idle) {
+   printk(KERN_INFO using cpuidle idle loop\n);
+   pm_idle = cpuidle_pm_idle;
+   }
+
return 0;
 }
 
Index: linux.trees.git/drivers/acpi/processor_idle.c
===
--- linux.trees.git.orig/drivers/acpi/processor_idle.c
+++ linux.trees.git/drivers/acpi/processor_idle.c
@@ -1216,6 +1216,11 @@ int __cpuinit acpi_processor_power_init(
printk( C%d[C%d], i,
   pr-power.states[i].type);
printk()\n);
+
+   if (pm_idle != cpuidle_pm_idle) {
+   printk(KERN_INFO using cpuidle idle loop\n);
+   pm_idle = cpuidle_pm_idle;
+   }
}
 
/* 'power' [R] */
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 4/4]: CPUIDLE/POWER: Implement Pseries Processor Idle module

2009-08-27 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-08-27 17:19:08]:

This patch creates arch/powerpc/platforms/pseries/processor_idle.c,
which implements the cpuidle infrastructure for pseries.
It implements a pseries_cpuidle_loop() which would be the main idle loop
called from cpu_idle(). It makes decision of entering either snooze or nap
state based on the decision taken by the cpuidle governor.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/pseries/Makefile |1 
 arch/powerpc/platforms/pseries/processor_idle.c |  178 
 arch/powerpc/platforms/pseries/pseries.h|   14 +
 arch/powerpc/platforms/pseries/setup.c  |3 
 4 files changed, 193 insertions(+), 3 deletions(-)

Index: linux.trees.git/arch/powerpc/platforms/pseries/Makefile
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/Makefile
+++ linux.trees.git/arch/powerpc/platforms/pseries/Makefile
@@ -26,3 +26,4 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst
 obj-$(CONFIG_PHYP_DUMP)+= phyp_dump.o
 obj-$(CONFIG_CMM)  += cmm.o
 obj-$(CONFIG_DTL)  += dtl.o
+obj-$(CONFIG_PSERIES_PROCESSOR_IDLE)   += processor_idle.o
Index: linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/pseries.h
+++ linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
@@ -10,6 +10,8 @@
 #ifndef _PSERIES_PSERIES_H
 #define _PSERIES_PSERIES_H
 
+#include linux/cpuidle.h
+
 extern void __init fw_feature_init(const char *hypertas, unsigned long len);
 
 struct pt_regs;
@@ -40,4 +42,16 @@ extern unsigned long rtas_poweron_auto;
 
 extern void find_udbg_vterm(void);
 
+DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
+
+#ifdef CONFIG_PSERIES_PROCESSOR_IDLE
+struct pseries_processor_power {
+   struct cpuidle_device dev;
+   int count;
+   int id;
+};
+
+extern struct cpuidle_driver pseries_idle_driver;
+#endif
+
 #endif /* _PSERIES_PSERIES_H */
Index: linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
===
--- /dev/null
+++ linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
@@ -0,0 +1,178 @@
+/*
+ *  processor_idle - idle state cpuidle driver.
+ *  Adapted from drivers/acpi/processor_idle.c
+ *
+ *  Arun R Bharadwaj a...@linux.vnet.ibm.com
+ *
+ *  Copyright (C) 2009 IBM Corporation.
+ * ~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~
+ */
+
+#include linux/kernel.h
+#include linux/module.h
+#include linux/init.h
+#include linux/moduleparam.h
+#include linux/cpuidle.h
+
+#include asm/paca.h
+#include asm/reg.h
+#include asm/machdep.h
+
+#include plpar_wrappers.h
+#include pseries.h
+
+MODULE_AUTHOR(Arun R Bharadwaj);
+MODULE_DESCRIPTION(pSeries Idle State Driver);
+MODULE_LICENSE(GPL);
+
+struct cpuidle_driver pseries_idle_driver = {
+   .name = pseries_idle,
+   .owner =THIS_MODULE,
+};
+
+DEFINE_PER_CPU(struct pseries_processor_power, power);
+
+#define IDLE_STATE_COUNT   2
+
+static int pseries_idle_init(struct pseries_processor_power *power)
+{
+   return cpuidle_register_device(power-dev);
+}
+
+static void snooze(void)
+{
+   local_irq_enable();
+   set_thread_flag(TIF_POLLING_NRFLAG);
+   while (!need_resched()) {
+   ppc64_runlatch_off();
+   HMT_low();
+   HMT_very_low();
+   }
+   HMT_medium();
+   clear_thread_flag(TIF_POLLING_NRFLAG);
+   smp_mb();
+   local_irq_disable();
+}
+
+static void nap(void)
+{
+   ppc64_runlatch_off();
+   HMT_medium();
+   cede_processor();
+}
+
+static int pseries_cpuidle_loop(struct cpuidle_device *dev,
+   struct cpuidle_state *st)
+{
+   ktime_t t1, t2;
+   s64 diff;
+   int ret;
+   unsigned long in_purr, out_purr;
+
+   get_lppaca()-idle = 1;
+   get_lppaca()-donate_dedicated_cpu = 1;
+   in_purr = mfspr(SPRN_PURR);
+
+   t1

Re: [PATCH 2/4]: CPUIDLE: Introduce architecture independent cpuidle_pm_idle in drivers/cpuidle/cpuidle.c

2009-08-27 Thread Arun R Bharadwaj
* Peter Zijlstra a.p.zijls...@chello.nl [2009-08-27 14:53:27]:

 On Thu, 2009-08-27 at 17:23 +0530, Arun R Bharadwaj wrote:
  * Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-08-27 17:19:08]:
  
  Cpuidle infrastructure assumes pm_idle as the default idle routine.
  But, ppc_md.power_save is the default idle callback in case of pSeries.
  
  So, create a more generic, architecture independent cpuidle_pm_idle
  function pointer in driver/cpuidle/cpuidle.c and allow the idle routines
  of architectures to be set to cpuidle_pm_idle.
  
  Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
  ---
   drivers/cpuidle/cpuidle.c |   12 +++-
   include/linux/cpuidle.h   |7 +++
   2 files changed, 14 insertions(+), 5 deletions(-)
  
  Index: linux.trees.git/drivers/cpuidle/cpuidle.c
  ===
  --- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
  +++ linux.trees.git/drivers/cpuidle/cpuidle.c
  @@ -25,6 +25,7 @@ DEFINE_PER_CPU(struct cpuidle_device *, 
   DEFINE_MUTEX(cpuidle_lock);
   LIST_HEAD(cpuidle_detected_devices);
   static void (*pm_idle_old)(void);
  +void (*cpuidle_pm_idle)(void);
   
   static int enabled_devices;
   
  @@ -98,10 +99,10 @@ static void cpuidle_idle_call(void)
*/
   void cpuidle_install_idle_handler(void)
   {
  -   if (enabled_devices  (pm_idle != cpuidle_idle_call)) {
  +   if (enabled_devices  (cpuidle_pm_idle != cpuidle_idle_call)) {
  /* Make sure all changes finished before we switch to new idle 
  */
  smp_wmb();
  -   pm_idle = cpuidle_idle_call;
  +   cpuidle_pm_idle = cpuidle_idle_call;
  }
   }
   
  @@ -110,8 +111,9 @@ void cpuidle_install_idle_handler(void)
*/
   void cpuidle_uninstall_idle_handler(void)
   {
  -   if (enabled_devices  pm_idle_old  (pm_idle != pm_idle_old)) {
  -   pm_idle = pm_idle_old;
  +   if (enabled_devices  pm_idle_old 
  +   (cpuidle_pm_idle != pm_idle_old)) {
  +   cpuidle_pm_idle = pm_idle_old;
  cpuidle_kick_cpus();
  }
   }
  @@ -382,7 +384,7 @@ static int __init cpuidle_init(void)
   {
  int ret;
   
  -   pm_idle_old = pm_idle;
  +   pm_idle_old = cpuidle_pm_idle;
   
  ret = cpuidle_add_class_sysfs(cpu_sysdev_class);
  if (ret)
  Index: linux.trees.git/include/linux/cpuidle.h
  ===
  --- linux.trees.git.orig/include/linux/cpuidle.h
  +++ linux.trees.git/include/linux/cpuidle.h
  @@ -188,4 +188,11 @@ static inline void cpuidle_unregister_go
   #define CPUIDLE_DRIVER_STATE_START 0
   #endif
   
  +/*
  + * Idle callback used by cpuidle to call the cpuidle_idle_call().
  + * Platform drivers can use this to register to cpuidle's idle loop.
  + */
  +
  +extern void (*cpuidle_pm_idle)(void);
  +
   #endif /* _LINUX_CPUIDLE_H */
 
 
 I'm not quite seeing how this makes anything any better. Not we have 3
 function pointers, where 1 should suffice.
 

Not really. We already do have pm_idle in case of x86 and
ppc_md.power_save in case of POWER. So here I'm only introducing
cpuidle_pm_idle which can be used by doing a

ppc_md.power_save = cpuidle_pm_idle;


 /me wonders what's wrong with something like:
 
 struct idle_func_desc {
   int  power;
   int  latency;
   void (*idle)(void);
   struct list_head list;
 };
 
 static void spin_idle(void)
 {
   for (;;)
   cpu_relax();
 }
 
 static idle_func_desc default_idle_func = {
   power = 0, /* doesn't safe any power */
   latency = INT_MAX, /* has max latency */
   idle = spin_idle,
   list = INIT_LIST_HEAD(default_idle_func.list),
 };
 
 void (*idle_func)(void);
 static struct list_head idle_func_list;
 
 static void pick_idle_func(void)
 {
   struct idle_func_desc *desc, *idle = default_idle_desc;
 
   list_for_each_entry(desc, idle_func_list, list) {
   if (desc-power  idle-power)
   continue;
   if (desc-latency  target_latency);
   continue;
   idle = desc;
   }
 
   pm_idle = idle-idle;
 }


This only does the job of picking the right idle loop for current
latency and power requirement. This is already done in ladder/menu
governors under the routines menu_select()/ladder_select().
I'm not sure whats the purpose of it here.

Here we are only concerned about the main idle loop, which is
pm_idle/ppc_md.power_save. After setting the main idle loop to
cpuidle_pm_idle, that would call cpuidle_idle_call() which would do
the job of picking the right low level idle loop based on latency and
other requirements.


 void register_idle_func(struct idle_func_desc *desc)
 {
   WARN_ON_ONCE(!list_empty(desc-list));
 
   list_add_tail(idle_func_list, desc-list);
   pick_idle_func();
 }
 
 void unregister_idle_func(struct idle_func_desc *desc)
 {
   WARN_ON_ONCE(list_empty

[v2 PATCH 0/2]: cpuidle: Introducing cpuidle infrastructure to POWER

2009-08-26 Thread Arun R Bharadwaj
Hi,



Cpuidle is a CPU Power Management infrastrusture which helps manage
idle CPUs in a clean and efficient manner. The architecture can register
its driver (in this case, pseries_idle driver) so that it subscribes for
cpuidle feature. Cpuidle has a set of governors (ladder and menu),
which will decide the best idle state to be chosen for the current situation,
based on heuristics, and calculates the expected residency time
for the current idle state. So based on this, the cpu is put into
the right idle state.

Currently, cpuidle infrasture is exploited by ACPI to choose between
the available ACPI C-states. This patch-set is aimed at enabling
cpuidle for powerpc and provides a sample implementation for pseries.

Currently, in the pseries_dedicated_idle_sleep(), the processor would
poll for a time period, which is called the snooze, and only then it
is ceded, which would put the processor in nap state. Cpuidle aims at
separating this into 2 different idle states. Based on the expected
residency time predicted by the cpuidle governor, the idle state is
chosen directly. So, choosing to enter the nap state directly based on
the decision made by cpuidle would avoid unnecessary snoozing before
entering nap.

This patch-set tries to achieve the above objective by introducing a
pseries processor idle driver called pseries_idle_driver in
arch/powerpc/platform/pseries/processor_idle.c, which implements the
idle loop which would replace the pseries_dedicated_idle_sleep()
when cpuidle is enabled.

Experiment conducted:
--

The following experiment was conducted on a completely idle JS22 blade,
to prove that using cpuidle infrastructure, the amount of nap time increases.

Nap and snooze times were sampled for all the cpus.
For a window of 1000 samples, When cpuidle was enabled,
the total nap time was of the order of a few seconds (5-10s), whereas
the total snooze time was of the order of a few milliseconds(10-30 ms).

When cpuidle infrastructure was disabled and the regular
pseries_dedicated_idle_sleep() idle loop was used, the snooze time itself
was of the order of hundreds of milliseconds. (100 - 500 ms).
This is clearly due to unnecessary snoozing before napping even on a
completely idle system.


The previous post in this area can be found at
http://lkml.org/lkml/2009/8/19/150

Changes from the previous version:
--

Earlier I used the the name TPMD (Thermal and Power Management Devices)
to refer to this module which hooks on to the cpuidle infrastructure.
I have renamed this to Pseries Processor Idle, in order to avoid confusion.


Patches included in this set:
--
PATCH 1/2 - Enable cpuidle for pSeries.
PATCH 2/2 - Implement Pseries Processor Idle idle module


Any feedback on the overall design and idea is immensely valuable.

--arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v2 PATCH 1/2]: pseries: Enable cpuidle for pSeries.

2009-08-26 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-08-26 16:37:21]:

This patch enables the cpuidle option in Kconfig for pSeries.

Currently cpuidle infrastructure is enabled only for x86.
This code is almost completely borrowed from x86 to enable
cpuidle for pSeries.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/Kconfig   |   17 +
 arch/powerpc/include/asm/system.h  |2 ++
 arch/powerpc/platforms/pseries/setup.c |   21 +
 3 files changed, 40 insertions(+)

Index: linux.trees.git/arch/powerpc/Kconfig
===
--- linux.trees.git.orig/arch/powerpc/Kconfig
+++ linux.trees.git/arch/powerpc/Kconfig
@@ -88,6 +88,9 @@ config ARCH_HAS_ILOG2_U64
bool
default y if 64BIT
 
+config ARCH_HAS_CPU_IDLE_WAIT
+   def_bool y
+
 config GENERIC_HWEIGHT
bool
default y
@@ -243,6 +246,20 @@ source kernel/Kconfig.freezer
 source arch/powerpc/sysdev/Kconfig
 source arch/powerpc/platforms/Kconfig
 
+menu Power management options
+
+source drivers/cpuidle/Kconfig
+
+config PSERIES_PROCESSOR_IDLE
+   bool Idle Power Management Support for pSeries
+   depends on PPC_PSERIES  CPU_IDLE
+   default y
+   help
+ Idle Power Management Support for pSeries. This hooks onto cpuidle
+ infrastructure to help in idle cpu power management.
+
+endmenu
+
 menu Kernel options
 
 config HIGHMEM
Index: linux.trees.git/arch/powerpc/platforms/pseries/setup.c
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/setup.c
+++ linux.trees.git/arch/powerpc/platforms/pseries/setup.c
@@ -278,6 +278,27 @@ static struct notifier_block pci_dn_reco
.notifier_call = pci_dn_reconfig_notifier,
 };
 
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+ * pm_idle and update to new pm_idle value. Required while changing pm_idle
+ * handler on SMP systems.
+ *
+ * Caller must have changed pm_idle to the new value before the call. Old
+ * pm_idle value will not be used by any CPU after the return of this function.
+ */
+void cpu_idle_wait(void)
+{
+   /* Ensure that new value of pm_idle is set before proceeding */
+   smp_mb();
+   /* kick all the CPUs so that they exit out of pm_idle */
+   smp_call_function(do_nothing, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 static void __init pSeries_setup_arch(void)
 {
/* Discover PIC type and setup ppc_md accordingly */
Index: linux.trees.git/arch/powerpc/include/asm/system.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/system.h
+++ linux.trees.git/arch/powerpc/include/asm/system.h
@@ -546,5 +546,7 @@ extern void account_system_vtime(struct 
 
 extern struct dentry *powerpc_debugfs_root;
 
+void cpu_idle_wait(void);
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SYSTEM_H */
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[v2 PATCH 2/2]: pseries: Implement Pseries Processor Idle idle module.

2009-08-26 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-08-26 16:37:21]:

This patch creates arch/powerpc/platforms/pseries/processor_idle.c,
which implements the cpuidle infrastructure for pseries.
It implements a pseries_cpuidle_loop() which would be the main idle loop
called from cpu_idle(). It makes decision of entering either snooze or nap
state based on the decision taken by the cpuidle governor.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/pseries/Makefile |1 
 arch/powerpc/platforms/pseries/processor_idle.c |  181 
 arch/powerpc/platforms/pseries/pseries.h|   14 +
 arch/powerpc/platforms/pseries/setup.c  |3 
 4 files changed, 196 insertions(+), 3 deletions(-)

Index: linux.trees.git/arch/powerpc/platforms/pseries/Makefile
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/Makefile
+++ linux.trees.git/arch/powerpc/platforms/pseries/Makefile
@@ -26,3 +26,4 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst
 obj-$(CONFIG_PHYP_DUMP)+= phyp_dump.o
 obj-$(CONFIG_CMM)  += cmm.o
 obj-$(CONFIG_DTL)  += dtl.o
+obj-$(CONFIG_PSERIES_PROCESSOR_IDLE)   += processor_idle.o
Index: linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/pseries.h
+++ linux.trees.git/arch/powerpc/platforms/pseries/pseries.h
@@ -10,6 +10,8 @@
 #ifndef _PSERIES_PSERIES_H
 #define _PSERIES_PSERIES_H
 
+#include linux/cpuidle.h
+
 extern void __init fw_feature_init(const char *hypertas, unsigned long len);
 
 struct pt_regs;
@@ -40,4 +42,16 @@ extern unsigned long rtas_poweron_auto;
 
 extern void find_udbg_vterm(void);
 
+DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
+
+#ifdef CONFIG_PSERIES_PROCESSOR_IDLE
+struct pseries_processor_power {
+   struct cpuidle_device dev;
+   int count;
+   int id;
+};
+
+extern struct cpuidle_driver pseries_idle_driver;
+#endif
+
 #endif /* _PSERIES_PSERIES_H */
Index: linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
===
--- /dev/null
+++ linux.trees.git/arch/powerpc/platforms/pseries/processor_idle.c
@@ -0,0 +1,181 @@
+/*
+ *  processor_idle - idle state cpuidle driver.
+ *  Adapted from drivers/acpi/processor_idle.c
+ *
+ *  Arun R Bharadwaj a...@linux.vnet.ibm.com
+ *
+ *  Copyright (C) 2009 IBM Corporation.
+ * ~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~
+ */
+
+#include linux/kernel.h
+#include linux/module.h
+#include linux/init.h
+#include linux/moduleparam.h
+#include linux/cpuidle.h
+
+#include asm/paca.h
+#include asm/reg.h
+#include asm/machdep.h
+
+#include plpar_wrappers.h
+#include pseries.h
+
+MODULE_AUTHOR(Arun R Bharadwaj);
+MODULE_DESCRIPTION(pSeries Idle State Driver);
+MODULE_LICENSE(GPL);
+
+struct cpuidle_driver pseries_idle_driver = {
+   .name = pseries_idle,
+   .owner =THIS_MODULE,
+};
+
+void (*pm_idle)(void);
+EXPORT_SYMBOL_GPL(pm_idle);
+
+DEFINE_PER_CPU(struct pseries_processor_power, power);
+
+#define IDLE_STATE_COUNT   2
+
+static int pseries_idle_init(struct pseries_processor_power *power)
+{
+   return cpuidle_register_device(power-dev);
+}
+
+static void snooze(void)
+{
+   local_irq_enable();
+   set_thread_flag(TIF_POLLING_NRFLAG);
+   while (!need_resched()) {
+   ppc64_runlatch_off();
+   HMT_low();
+   HMT_very_low();
+   }
+   HMT_medium();
+   clear_thread_flag(TIF_POLLING_NRFLAG);
+   smp_mb();
+   local_irq_disable();
+}
+
+static void nap(void)
+{
+   ppc64_runlatch_off();
+   HMT_medium();
+   cede_processor();
+}
+
+static int pseries_cpuidle_loop(struct cpuidle_device *dev,
+   struct cpuidle_state *st)
+{
+   ktime_t t1, t2;
+   s64 diff;
+   int ret;
+   unsigned long in_purr, out_purr;
+
+   get_lppaca()-idle = 1;
+   get_lppaca

Re: [v2 PATCH 2/2]: pseries: Implement Pseries Processor Idle idle module.

2009-08-26 Thread Arun R Bharadwaj
* Peter Zijlstra a.p.zijls...@chello.nl [2009-08-26 13:27:18]:

 On Wed, 2009-08-26 at 16:40 +0530, Arun R Bharadwaj wrote:
  +void (*pm_idle)(void);
  +EXPORT_SYMBOL_GPL(pm_idle);
 
 Seriously.. this caused plenty problems over on x86 and you're doing the
 exact same dumb thing?
 

Hi Peter,

Cpuidle assumes pm_idle to be the default idle power management
function. So i should either do this, or change the stuff in cpuidle.c
so that it is more abstract.

--arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 0/2]: cpuidle: Introducing cpuidle infrastructure to powerpc.

2009-08-19 Thread Arun R Bharadwaj
Hi,


 RFC not for inclusion 

Cpuidle is a CPU Power Management infrastrusture which helps manage
idle CPUs in a clean and efficient manner. The architecture can register
its driver (in this case, tpmd_idle driver) so that it subscribes for
cpuidle feature. Cpuidle has a set of governors (ladder and menu),
which will decide the best idle state to be chosen for the current situation,
based on heuristics, and calculates the expected residency time
for the current idle state. So based on this, the cpu is put into
the right idle state.

Currently, cpuidle infrasture is exploited by ACPI to choose between
the available ACPI C-states. This patch-set is aimed at enabling
cpuidle for powerpc and provides a sample implementation for pseries.

Currently, in the pseries_dedicated_idle_sleep(), the processor would
poll for a time period, which is called the snooze, and only then it
is ceded, which would put the processor in nap state. Cpuidle aims at
separating this into 2 different idle states. Based on the expected
residency time predicted by the cpuidle governor, the idle state is
chosen directly. So, choosing to enter the nap state directly based on
the decision made by cpuidle would avoid unnecessary snoozing before
entering nap.

This patch-set tries to achieve the above objective by introducing a
Thermal and Power Management Device module called tpmd_idle in
arch/powerpc/platform/pseries/tpmd_idle.c, which implements cpuidle
idle loop which would replace the pseries_dedicated_idle_sleep()
when cpuidle is enabled.

Patches included in this set:
PATCH 1/2 - Enable cpuidle for pSeries.
PATCH 2/2 - Implement Thermal  Power Management Devices(TPMD) idle module


Any feedback on the overall design and idea is immensely valuable.

--arun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 1/2]: pSeries: Enable cpuidle for pSeries.

2009-08-19 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-08-19 18:27:16]:

This patch enables the cpuidle option in Kconfig for pSeries.
It also adds the routine cpu_idle_wait.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/Kconfig   |   18 ++
 arch/powerpc/include/asm/system.h  |2 ++
 arch/powerpc/platforms/pseries/setup.c |   20 
 drivers/cpuidle/cpuidle.c  |1 +
 4 files changed, 41 insertions(+)

Index: linux.trees.git/arch/powerpc/Kconfig
===
--- linux.trees.git.orig/arch/powerpc/Kconfig
+++ linux.trees.git/arch/powerpc/Kconfig
@@ -88,6 +88,9 @@ config ARCH_HAS_ILOG2_U64
bool
default y if 64BIT
 
+config ARCH_HAS_CPU_IDLE_WAIT
+   def_bool y
+
 config GENERIC_HWEIGHT
bool
default y
@@ -243,6 +246,21 @@ source kernel/Kconfig.freezer
 source arch/powerpc/sysdev/Kconfig
 source arch/powerpc/platforms/Kconfig
 
+menu Power management options
+
+source drivers/cpuidle/Kconfig
+
+config TPMD
+   tristate TPMD power management support
+   depends on PPC_PSERIES  CPU_IDLE
+   default y
+   help
+ Thermal and Power Management Devices (TPMD). This hooks onto cpuidle
+ infrastructure to help in idle cpu power management. Currently this
+ is enabled only for pSeries.
+
+endmenu
+
 menu Kernel options
 
 config HIGHMEM
Index: linux.trees.git/drivers/cpuidle/cpuidle.c
===
--- linux.trees.git.orig/drivers/cpuidle/cpuidle.c
+++ linux.trees.git/drivers/cpuidle/cpuidle.c
@@ -17,6 +17,7 @@
 #include linux/cpuidle.h
 #include linux/ktime.h
 #include linux/hrtimer.h
+#include linux/pm.h
 
 #include cpuidle.h
 
Index: linux.trees.git/arch/powerpc/platforms/pseries/setup.c
===
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/setup.c
+++ linux.trees.git/arch/powerpc/platforms/pseries/setup.c
@@ -278,6 +278,26 @@ static struct notifier_block pci_dn_reco
.notifier_call = pci_dn_reconfig_notifier,
 };
 
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+ * pm_idle and update to new pm_idle value. Required while changing pm_idle
+ * handler on SMP systems.
+ *
+ * Caller must have changed pm_idle to the new value before the call. Old
+ * pm_idle value will not be used by any CPU after the return of this function.
+ */
+void cpu_idle_wait(void)
+{
+   smp_mb();
+   /* kick all the CPUs so that they exit out of pm_idle */
+   smp_call_function(do_nothing, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 static void __init pSeries_setup_arch(void)
 {
/* Discover PIC type and setup ppc_md accordingly */
Index: linux.trees.git/arch/powerpc/include/asm/system.h
===
--- linux.trees.git.orig/arch/powerpc/include/asm/system.h
+++ linux.trees.git/arch/powerpc/include/asm/system.h
@@ -546,5 +546,7 @@ extern void account_system_vtime(struct 
 
 extern struct dentry *powerpc_debugfs_root;
 
+void cpu_idle_wait(void);
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SYSTEM_H */
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 2/2]: pSeries: Implement Thermal Power Management Devices(TPMD) idle module.

2009-08-19 Thread Arun R Bharadwaj
* Arun R Bharadwaj a...@linux.vnet.ibm.com [2009-08-19 18:27:16]:

This patch creates the Thermal  Power Management Devices module, tpmd_idle
which implements the cpuidle infrasture for pseries.
It implements a tpmd_idle_loop() which would be the main idle loop called
from cpu_idle(). It makes decision of entering either snooze or nap state
based on the decision taken by the cpuidle governor.

Signed-off-by: Arun R Bharadwaj a...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/pseries/Makefile|1 
 arch/powerpc/platforms/pseries/tpmd.h  |   10 +
 arch/powerpc/platforms/pseries/tpmd_idle.c |  192 +
 3 files changed, 203 insertions(+)

Index: linux.trees.git/arch/powerpc/platforms/pseries/tpmd_idle.c
===
--- /dev/null
+++ linux.trees.git/arch/powerpc/platforms/pseries/tpmd_idle.c
@@ -0,0 +1,192 @@
+
+/*
+ * tpmd_idle - idle state submodule to the tpmd driver
+ *
+ *  Copyright (C) 2009 Arun R Bharadwaj a...@linux.vnet.ibm.com
+ * ~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~
+ */
+
+#include linux/kernel.h
+#include linux/module.h
+#include linux/init.h
+#include linux/moduleparam.h
+#include linux/cpuidle.h
+#include linux/cpu.h
+
+#include asm/paca.h
+#include asm/machdep.h
+
+#include plpar_wrappers.h
+#include tpmd.h
+
+MODULE_AUTHOR(Arun R Bharadwaj);
+MODULE_DESCRIPTION(TPMD Idle State Driver);
+MODULE_LICENSE(GPL);
+
+struct cpuidle_driver tpmd_idle_driver = {
+   .name = tpmd_idle,
+   .owner =THIS_MODULE,
+};
+
+void (*pm_idle)(void);
+EXPORT_SYMBOL(pm_idle);
+
+static void (*old_idle_power_save)(void);
+
+DEFINE_PER_CPU(struct tpmd_processor_power, power);
+
+#defineIDLE_STATE_COUNT2
+
+static int tpmd_idle_init(struct tpmd_processor_power *power)
+{
+   return cpuidle_register_device(power-dev);
+}
+
+void tpmd_idle_exit(struct tpmd_processor_power *power)
+{
+   cpuidle_unregister_device(power-dev);
+}
+
+static void snooze(void)
+{
+   local_irq_enable();
+   set_thread_flag(TIF_POLLING_NRFLAG);
+   while (!need_resched()) {
+   HMT_low();
+   HMT_very_low();
+   }
+   clear_thread_flag(TIF_POLLING_NRFLAG);
+   local_irq_disable();
+   smp_mb();
+}
+
+static void nap(void)
+{
+   HMT_medium();
+   smp_mb();
+   cede_processor();
+}
+
+static int tpmd_idle_loop(struct cpuidle_device *dev, struct cpuidle_state *st)
+{
+   ktime_t t1, t2;
+   s64 diff;
+   int ret;
+
+   get_lppaca()-idle = 1;
+   get_lppaca()-donate_dedicated_cpu = 1;
+
+   t1 = ktime_get();
+
+   if (strcmp(st-desc, idle) == 0)
+   snooze();
+   else
+   nap();
+
+   t2 = ktime_get();
+   diff = ktime_to_us(ktime_sub(t2, t1));
+   if (diff  INT_MAX)
+   diff = INT_MAX;
+
+   ret = (int) diff;
+
+   get_lppaca()-idle = 0;
+   get_lppaca()-donate_dedicated_cpu = 0;
+
+   return ret;
+}
+
+static int tpmd_setup_cpuidle(struct tpmd_processor_power *power)
+{
+   int i;
+   struct cpuidle_state *state;
+   struct cpuidle_device *dev = power-dev;
+
+   dev-cpu = power-id;
+
+   dev-enabled = 0;
+   for (i = 0; i  IDLE_STATE_COUNT; i++) {
+   state = dev-states[i];
+
+   snprintf(state-name, CPUIDLE_NAME_LEN, TPM%d, i);
+
+   switch (i) {
+   case 0:
+   strncpy(state-desc, idle, CPUIDLE_DESC_LEN);
+   state-exit_latency = 0;
+   state-target_residency = 0;
+   state-enter = tpmd_idle_loop;
+   break;
+
+   case 1:
+   strncpy(state-desc, nap, CPUIDLE_DESC_LEN);
+   state-exit_latency = 1;
+   state-target_residency = 100;
+   state-enter = tpmd_idle_loop;
+   break;
+   }
+   }
+
+   power-dev.state_count = i;
+   return 0;
+}
+
+static int tpmd_processor_get_power_info(struct