Re: [patch 2/4] Expose Power Management Policy option to users

2007-08-09 Thread Kristen Carlson Accardi
On Wed, 1 Aug 2007 14:16:51 -0700
Kristen Carlson Accardi [EMAIL PROTECTED] wrote:

 I was able to duplicate Tejun's problem on an ATAPI device I had here.
 this updated patch fixed my problem.  These devices are just setting 
 PhyReady (N) and CommWake (W) in Serror on power state changes, ignoring
 them seems to be fine, and fixed the problem for me.

Hi Tejun,
Were you able to test out this patch and see if it solved your ATAPI
device/ALPM issues?

Thanks,
Kristen


 
 
 Enable Aggressive Link Power management for AHCI controllers.
 
 This patch will set the correct bits to turn on Aggressive
 Link Power Management (ALPM) for the ahci driver.  This
 will cause the controller and disk to negotiate a lower
 power state for the link when there is no activity (see
 the AHCI 1.x spec for details).  This feature is mutually
 exclusive with Hot Plug, so when ALPM is enabled, Hot Plug
 is disabled.  ALPM will be enabled by default, but it is
 settable via the scsi host syfs interface.  Possible 
 settings for this feature are:
 
 Setting   Effect
 --
 min_power ALPM is enabled, and link set to enter 
   lowest power state (SLUMBER) when idle
   Hot plug not allowed.
 
 max_performance   ALPM is disabled, Hot Plug is allowed
 
 medium_power  ALPM is enabled, and link set to enter
   second lowest power state (PARTIAL) when
   idle.  Hot plug not allowed.
 
 Signed-off-by:  Kristen Carlson Accardi [EMAIL PROTECTED]
 
 Index: 2.6-git/drivers/ata/ahci.c
 ===
 --- 2.6-git.orig/drivers/ata/ahci.c
 +++ 2.6-git/drivers/ata/ahci.c
 @@ -48,6 +48,9 @@
  #define DRV_NAME ahci
  #define DRV_VERSION  2.3
  
 +static int ahci_enable_alpm(struct ata_port *ap,
 + enum link_pm policy);
 +static int ahci_disable_alpm(struct ata_port *ap);
  
  enum {
   AHCI_PCI_BAR= 5,
 @@ -98,6 +101,7 @@ enum {
   /* HOST_CAP bits */
   HOST_CAP_SSC= (1  14), /* Slumber capable */
   HOST_CAP_CLO= (1  24), /* Command List Override support */
 + HOST_CAP_ALPM   = (1  26), /* Aggressive Link PM support */
   HOST_CAP_SSS= (1  27), /* Staggered Spin-up */
   HOST_CAP_SNTF   = (1  29), /* SNotification register */
   HOST_CAP_NCQ= (1  30), /* Native Command Queueing */
 @@ -153,6 +157,8 @@ enum {
 PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS,
  
   /* PORT_CMD bits */
 + PORT_CMD_ASP= (1  27), /* Aggressive Slumber/Partial */
 + PORT_CMD_ALPE   = (1  26), /* Aggressive Link PM enable */
   PORT_CMD_ATAPI  = (1  24), /* Device is ATAPI */
   PORT_CMD_LIST_ON= (1  15), /* cmd list DMA engine running */
   PORT_CMD_FIS_ON = (1  14), /* FIS DMA engine running */
 @@ -244,6 +250,11 @@ static int ahci_pci_device_suspend(struc
  static int ahci_pci_device_resume(struct pci_dev *pdev);
  #endif
  
 +static struct class_device_attribute *ahci_shost_attrs[] = {
 + class_device_attr_link_power_management_policy,
 + NULL
 +};
 +
  static struct scsi_host_template ahci_sht = {
   .module = THIS_MODULE,
   .name   = DRV_NAME,
 @@ -261,6 +272,7 @@ static struct scsi_host_template ahci_sh
   .slave_configure= ata_scsi_slave_config,
   .slave_destroy  = ata_scsi_slave_destroy,
   .bios_param = ata_std_bios_param,
 + .shost_attrs= ahci_shost_attrs,
  };
  
  static const struct ata_port_operations ahci_ops = {
 @@ -292,6 +304,8 @@ static const struct ata_port_operations 
   .port_suspend   = ahci_port_suspend,
   .port_resume= ahci_port_resume,
  #endif
 + .enable_pm  = ahci_enable_alpm,
 + .disable_pm = ahci_disable_alpm,
  
   .port_start = ahci_port_start,
   .port_stop  = ahci_port_stop,
 @@ -778,6 +792,156 @@ static void ahci_power_up(struct ata_por
   writel(cmd | PORT_CMD_ICC_ACTIVE, port_mmio + PORT_CMD);
  }
  
 +static int ahci_disable_alpm(struct ata_port *ap)
 +{
 + void __iomem *port_mmio = ahci_port_base(ap);
 + u32 cmd, scontrol;
 + struct ahci_port_priv *pp = ap-private_data;
 +
 + /*
 +  * disable Interface Power Management State Transitions
 +  * This is accomplished by setting bits 8:11 of the
 +  * SATA Control register
 +  */
 + scontrol = readl(port_mmio + PORT_SCR_CTL);
 + scontrol |= (0x3  8);
 + writel(scontrol, port_mmio + PORT_SCR_CTL);
 +
 + /* get the existing command bits */
 + cmd = readl(port_mmio + PORT_CMD);
 +
 + /* disable ALPM and ASP */
 + cmd = ~PORT_CMD_ASP;
 + cmd = ~PORT_CMD_ALPE;
 +
 + /* force the interface back to active */
 + cmd |= PORT_CMD_ICC_ACTIVE;

Re: [patch 2/4] Expose Power Management Policy option to users

2007-08-01 Thread Tejun Heo
Tejun Heo wrote:
 Arjan van de Ven wrote:
 They were hardware problems.  I don't think any amount of proper
 implementation can fix them.  I have one DVD RAM somewhere in my pile of
 hardware which locks up solidly if any link PS mode is used and had a
 and the AHCI ALPM code decides to use power savings on this device? if
 so, please give us the idents so that we can add it to the blacklist as
 the first entry... (or can buy it to check it in detail first)
 
 Yeap, lemme check.
 
 It's TSSTcorpCD/DVDW SH-S183A with firmware revision SB01.  Wanna
 check ID capability bits but 'hdparm -I /dev/sr0' is still broken and
 it's already past 3 am here.  I'll report back tomorrow.

Oops, that was the wrong one.  Locking up one was HL-DT-STDVD-RAM
GSA-H30N and it correctly reports that it doesn't support IPM.  Here
are some test results.

Controller is ICH9.

00:1f.2 SATA controller [Class 0106]: Intel Corporation Unknown device 
[8086:2922] (rev 02)



1. HL-DT-STDVD-RAM GSA-H30N

  ATAPI CD-ROM, with removable media
  Model Number:   HL-DT-STDVD-RAM GSA-H30N
  Serial Number:
  Firmware Revision:  1.00
  Standards:
  Likely used CD-ROM ATAPI-1
  Configuration:
  DRQ response: 50us.
  Packet size: 12 bytes
  Capabilities:
  LBA, IORDY(can be disabled)
  DMA: sdma0 sdma1 sdma2 mdma0 mdma1 mdma2 udma0 udma1 udma2 udma3 
udma4 *udma5
   Cycle time: min=120ns recommended=120ns
  PIO: pio0 pio1 pio2 pio3 pio4
   Cycle time: no flow control=120ns  IORDY flow control=120

This device doesn't claim to support HIPM nor DIPM.

  # echo min_power  link_power_management_policy
  [  752.761751] ata1.00: Unable to set Link PM policy
  [  784.510218] ata1.00: exception Emask 0x50 SAct 0x0 SErr 0xd0800 action 0x6 
frozen
  [  784.530266] ata1.00: cmd a0/00:00:00:00:20/00:00:00:00:00/a0 tag 0 cdb 
0x43 data 12 in
  [  784.530270]  res 40/00:03:00:00:20/00:00:00:00:00/a0 Emask 0x54 
(ATA bus error)
  [  784.571175] ata1: hard resetting port
  [  790.489486] ata1: port is slow to respond, please be patient (Status 0x80)
  [  794.594247] ata1: COMRESET failed (errno=-16)
  [  794.611174] ata1: hard resetting port
  [  800.541562] ata1: port is slow to respond, please be patient (Status 0x80)
  [  804.646316] ata1: COMRESET failed (errno=-16)
  [  804.663284] ata1: hard resetting port
  [  810.593623] ata1: port is slow to respond, please be patient (Status 0x80)
  [  839.654644] ata1: COMRESET failed (errno=-16)
  [  839.672576] ata1: limiting SATA link speed to 1.5 Gbps
  [  839.691024] ata1: hard resetting port
  [  844.726659] ata1: COMRESET failed (errno=-16)
  [  844.744064] ata1: reset failed, giving up
  [  844.761614] ata1.00: disabled
  [  844.761639] ata1: EH complete

The device doesn't respond till power is physically removed and
restored.  It seems something in ahci_disable_alpm() path upsets the
device.

2. TSSTcorpCD/DVDW SH-S183A

  ATAPI CD-ROM, with removable media
  Model Number:   TSSTcorpCD/DVDW SH-S183A
  Serial Number:  
  Firmware Revision:  SB01
  Standards:
  Likely used CD-ROM ATAPI-1
  Configuration:
  DRQ response: 50us.
  Packet size: 12 bytes
  Capabilities:
  LBA, IORDY(can be disabled)
  DMA: mdma0 mdma1 mdma2 udma0 udma1 *udma2 
   Cycle time: min=120ns recommended=120ns
  PIO: pio0 pio1 pio2 pio3 pio4 
   Cycle time: no flow control=120ns  IORDY flow control=120ns
  Commands/features:
  Enabled Supported:
 *SATA-I signaling speed (1.5Gb/s)
 *Host-initiated interface power management
 *Phy event counters
  Device-initiated interface power management
  unknown 78[5]
 *Software settings preservation

This one claims to support HIPS.

  # echo min_power  link_power_management_policy
  [ 1301.917248] ata1.00: exception Emask 0x10 SAct 0x0 SErr 0x5 action 0x2
  [ 1301.938338] ata1.00: irq_stat 0x4001
  [ 1301.956955] ata1.00: cmd a0/00:00:00:00:20/00:00:00:00:00/a0 tag 0 cdb 
0x43 data 12 in
  [ 1301.956959]  res 51/20:03:00:00:00/00:00:00:00:00/a0 Emask 0x10 
(ATA bus error)
  [ 1304.189565] ata1: soft resetting port
  [ 1304.207745] ata1: SATA link down (SStatus 611 SControl 300)
  [ 1304.228076] ata1: failed to recover some devices, retrying in 5 secs
  [ 1309.245599] ata1: hard resetting port
  [ 1314.773227] ata1: port is slow to respond, please be patient (Status 0x80)
  [ 1319.269677] ata1: COMRESET failed (errno=-16)
  [ 1319.289639] ata1: hard resetting port
  [ 1319.781285] ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
  [ 1320.115569] ata1.00: configured for UDMA/33
  [ 1320.134780] ata1: EH complete

And hotplug works fine after EH is done with itself.  Dunno why.

3. PLEXTOR DVDR   PX-716A

  ATAPI CD-ROM, with removable media

Re: [patch 2/4] Expose Power Management Policy option to users

2007-08-01 Thread Kristen Carlson Accardi
On Wed, 01 Aug 2007 12:24:44 +0900
Tejun Heo [EMAIL PROTECTED] wrote:

 It would be *really* great if we can find more about how they do it.
 How and when it's enabled and on which systems.  Is it possible to find
 this out?

No - it's really not a good idea for us to go and ask other OS's how they
implement things in this level of detail. 
-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-08-01 Thread Kristen Carlson Accardi
On Wed, 01 Aug 2007 18:23:21 +0900
Tejun Heo [EMAIL PROTECTED] wrote:

 Tejun Heo wrote:
  Arjan van de Ven wrote:
  They were hardware problems.  I don't think any amount of proper
  implementation can fix them.  I have one DVD RAM somewhere in my pile of
  hardware which locks up solidly if any link PS mode is used and had a
  and the AHCI ALPM code decides to use power savings on this device? if
  so, please give us the idents so that we can add it to the blacklist as
  the first entry... (or can buy it to check it in detail first)
  
  Yeap, lemme check.
  
  It's TSSTcorpCD/DVDW SH-S183A with firmware revision SB01.  Wanna
  check ID capability bits but 'hdparm -I /dev/sr0' is still broken and
  it's already past 3 am here.  I'll report back tomorrow.
 
 Oops, that was the wrong one.  Locking up one was HL-DT-STDVD-RAM
 GSA-H30N and it correctly reports that it doesn't support IPM.  Here
 are some test results.
 
 Controller is ICH9.
 
 00:1f.2 SATA controller [Class 0106]: Intel Corporation Unknown device 
 [8086:2922] (rev 02)
 
 
 
 1. HL-DT-STDVD-RAM GSA-H30N
 
   ATAPI CD-ROM, with removable media
 Model Number:   HL-DT-STDVD-RAM GSA-H30N
 Serial Number:
 Firmware Revision:  1.00
   Standards:
 Likely used CD-ROM ATAPI-1
   Configuration:
 DRQ response: 50us.
 Packet size: 12 bytes
   Capabilities:
 LBA, IORDY(can be disabled)
 DMA: sdma0 sdma1 sdma2 mdma0 mdma1 mdma2 udma0 udma1 udma2 udma3 
 udma4 *udma5
  Cycle time: min=120ns recommended=120ns
 PIO: pio0 pio1 pio2 pio3 pio4
  Cycle time: no flow control=120ns  IORDY flow control=120
 
 This device doesn't claim to support HIPM nor DIPM.

Are you sure you are using the latest version of the patch?  This seems
like a bug, if the device doesn't support HIPM or DIPM it shouldn't attempt
to use ALPM.  There was a check for this put into the patch on about the 
second or third version - maybe I'm not doing it right.
 
(from the patch located here:
http://www.kernel.org/pub/linux/kernel/people/kristen/patches/SATA/alpm/libata-enable-pm

if (ata_id_has_hipm(dev-id) || ata_id_has_dipm(dev-id))
+   dev-flags |= ATA_DFLAG_IPM;
+
snip
 
 2. TSSTcorpCD/DVDW SH-S183A
 
   ATAPI CD-ROM, with removable media
 Model Number:   TSSTcorpCD/DVDW SH-S183A
 Serial Number:  
 Firmware Revision:  SB01
   Standards:
 Likely used CD-ROM ATAPI-1
   Configuration:
 DRQ response: 50us.
 Packet size: 12 bytes
   Capabilities:
 LBA, IORDY(can be disabled)
 DMA: mdma0 mdma1 mdma2 udma0 udma1 *udma2 
  Cycle time: min=120ns recommended=120ns
 PIO: pio0 pio1 pio2 pio3 pio4 
  Cycle time: no flow control=120ns  IORDY flow control=120ns
   Commands/features:
 Enabled Supported:
*SATA-I signaling speed (1.5Gb/s)
*Host-initiated interface power management
*Phy event counters
 Device-initiated interface power management
 unknown 78[5]
*Software settings preservation
 
 This one claims to support HIPS.
 
   # echo min_power  link_power_management_policy
   [ 1301.917248] ata1.00: exception Emask 0x10 SAct 0x0 SErr 0x5 action 
 0x2
   [ 1301.938338] ata1.00: irq_stat 0x4001
   [ 1301.956955] ata1.00: cmd a0/00:00:00:00:20/00:00:00:00:00/a0 tag 0 cdb 
 0x43 data 12 in
   [ 1301.956959]  res 51/20:03:00:00:00/00:00:00:00:00/a0 Emask 0x10 
 (ATA bus error)
   [ 1304.189565] ata1: soft resetting port
   [ 1304.207745] ata1: SATA link down (SStatus 611 SControl 300)
   [ 1304.228076] ata1: failed to recover some devices, retrying in 5 secs
   [ 1309.245599] ata1: hard resetting port
   [ 1314.773227] ata1: port is slow to respond, please be patient (Status 
 0x80)
   [ 1319.269677] ata1: COMRESET failed (errno=-16)
   [ 1319.289639] ata1: hard resetting port
   [ 1319.781285] ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
   [ 1320.115569] ata1.00: configured for UDMA/33
   [ 1320.134780] ata1: EH complete
 
 And hotplug works fine after EH is done with itself.  Dunno why.

It works because after EH you've reset the port, and when you reset the
port you turn off ipm (note the value of SControl).  I left this the way
it was without attempting to renable link pm after a reset because I figured
if there was something bad happening and we needed to reset we should leave
ipm off.

As far as the failure you are seeing goes - this may not actually be a
hardware problem but just a case of us not understanding which bits we
need to clear out of the Interrupt status register once we enable
ALPM. The value of SErr indicates that this device has gotten PhyRdy - 
which is normal for power state changes, and the interrupt status indicates 
that a d2h FIS was received which is also normal - the TFES bit seems to be 
set - it may be that we 

Re: [patch 2/4] Expose Power Management Policy option to users

2007-08-01 Thread Matthew Wilcox
On Tue, Jul 31, 2007 at 09:18:08AM -0500, James Bottomley wrote:
 The other comment is that power saving seems to be a property of the
 transport rather than the host.  If you do it in the transport classes,
 then you can expose all the knobs the actual transport possesses (which
 is, unfortunately, none for quite a few SCSI transports).

Would it save any power to negotiate down to, say, FAST-20 for the SPI
transport?  Or to negotiate narrow instead of wide, so fewer cables have
to be powered?

-- 
Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step.
-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-08-01 Thread James Bottomley
On Wed, 2007-08-01 at 10:53 -0600, Matthew Wilcox wrote:
 On Tue, Jul 31, 2007 at 09:18:08AM -0500, James Bottomley wrote:
  The other comment is that power saving seems to be a property of the
  transport rather than the host.  If you do it in the transport classes,
  then you can expose all the knobs the actual transport possesses (which
  is, unfortunately, none for quite a few SCSI transports).
 
 Would it save any power to negotiate down to, say, FAST-20 for the SPI
 transport?  Or to negotiate narrow instead of wide, so fewer cables have
 to be powered?

Mechanically, I don't believe so.  I'm not sure I can find any reports
on it; however, I believe the power wastage SPI comes from the
transcievers and terminators.  There's the passive power from simply
powering the resistive bus and then the RMS power from sending commands
over an impedance circuit.  simple physics on a 5v bus tells me that the
former is likely to be far greater than the latter.  Finally, if you
think about what the bus is doing during signalling, the power drain is
independent of the frequency so I think the faster you squirt your data,
the lower the energy drain (hence high speed busses are actually lower
energy than low speed ones if the number of bits you have to transfer
remains constant).

James


-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-08-01 Thread Kristen Carlson Accardi
I was able to duplicate Tejun's problem on an ATAPI device I had here.
this updated patch fixed my problem.  These devices are just setting 
PhyReady (N) and CommWake (W) in Serror on power state changes, ignoring
them seems to be fine, and fixed the problem for me.


Enable Aggressive Link Power management for AHCI controllers.

This patch will set the correct bits to turn on Aggressive
Link Power Management (ALPM) for the ahci driver.  This
will cause the controller and disk to negotiate a lower
power state for the link when there is no activity (see
the AHCI 1.x spec for details).  This feature is mutually
exclusive with Hot Plug, so when ALPM is enabled, Hot Plug
is disabled.  ALPM will be enabled by default, but it is
settable via the scsi host syfs interface.  Possible 
settings for this feature are:

Setting Effect
--
min_power   ALPM is enabled, and link set to enter 
lowest power state (SLUMBER) when idle
Hot plug not allowed.

max_performance ALPM is disabled, Hot Plug is allowed

medium_powerALPM is enabled, and link set to enter
second lowest power state (PARTIAL) when
idle.  Hot plug not allowed.

Signed-off-by:  Kristen Carlson Accardi [EMAIL PROTECTED]

Index: 2.6-git/drivers/ata/ahci.c
===
--- 2.6-git.orig/drivers/ata/ahci.c
+++ 2.6-git/drivers/ata/ahci.c
@@ -48,6 +48,9 @@
 #define DRV_NAME   ahci
 #define DRV_VERSION2.3
 
+static int ahci_enable_alpm(struct ata_port *ap,
+   enum link_pm policy);
+static int ahci_disable_alpm(struct ata_port *ap);
 
 enum {
AHCI_PCI_BAR= 5,
@@ -98,6 +101,7 @@ enum {
/* HOST_CAP bits */
HOST_CAP_SSC= (1  14), /* Slumber capable */
HOST_CAP_CLO= (1  24), /* Command List Override support */
+   HOST_CAP_ALPM   = (1  26), /* Aggressive Link PM support */
HOST_CAP_SSS= (1  27), /* Staggered Spin-up */
HOST_CAP_SNTF   = (1  29), /* SNotification register */
HOST_CAP_NCQ= (1  30), /* Native Command Queueing */
@@ -153,6 +157,8 @@ enum {
  PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS,
 
/* PORT_CMD bits */
+   PORT_CMD_ASP= (1  27), /* Aggressive Slumber/Partial */
+   PORT_CMD_ALPE   = (1  26), /* Aggressive Link PM enable */
PORT_CMD_ATAPI  = (1  24), /* Device is ATAPI */
PORT_CMD_LIST_ON= (1  15), /* cmd list DMA engine running */
PORT_CMD_FIS_ON = (1  14), /* FIS DMA engine running */
@@ -244,6 +250,11 @@ static int ahci_pci_device_suspend(struc
 static int ahci_pci_device_resume(struct pci_dev *pdev);
 #endif
 
+static struct class_device_attribute *ahci_shost_attrs[] = {
+   class_device_attr_link_power_management_policy,
+   NULL
+};
+
 static struct scsi_host_template ahci_sht = {
.module = THIS_MODULE,
.name   = DRV_NAME,
@@ -261,6 +272,7 @@ static struct scsi_host_template ahci_sh
.slave_configure= ata_scsi_slave_config,
.slave_destroy  = ata_scsi_slave_destroy,
.bios_param = ata_std_bios_param,
+   .shost_attrs= ahci_shost_attrs,
 };
 
 static const struct ata_port_operations ahci_ops = {
@@ -292,6 +304,8 @@ static const struct ata_port_operations 
.port_suspend   = ahci_port_suspend,
.port_resume= ahci_port_resume,
 #endif
+   .enable_pm  = ahci_enable_alpm,
+   .disable_pm = ahci_disable_alpm,
 
.port_start = ahci_port_start,
.port_stop  = ahci_port_stop,
@@ -778,6 +792,156 @@ static void ahci_power_up(struct ata_por
writel(cmd | PORT_CMD_ICC_ACTIVE, port_mmio + PORT_CMD);
 }
 
+static int ahci_disable_alpm(struct ata_port *ap)
+{
+   void __iomem *port_mmio = ahci_port_base(ap);
+   u32 cmd, scontrol;
+   struct ahci_port_priv *pp = ap-private_data;
+
+   /*
+* disable Interface Power Management State Transitions
+* This is accomplished by setting bits 8:11 of the
+* SATA Control register
+*/
+   scontrol = readl(port_mmio + PORT_SCR_CTL);
+   scontrol |= (0x3  8);
+   writel(scontrol, port_mmio + PORT_SCR_CTL);
+
+   /* get the existing command bits */
+   cmd = readl(port_mmio + PORT_CMD);
+
+   /* disable ALPM and ASP */
+   cmd = ~PORT_CMD_ASP;
+   cmd = ~PORT_CMD_ALPE;
+
+   /* force the interface back to active */
+   cmd |= PORT_CMD_ICC_ACTIVE;
+
+   /* write out new cmd value */
+   writel(cmd, port_mmio + PORT_CMD);
+   cmd = readl(port_mmio + PORT_CMD);
+
+   /* wait 10ms to be sure we've come out of any low power state */
+   msleep(10);

Re: [patch 2/4] Expose Power Management Policy option to users

2007-08-01 Thread Kristen Carlson Accardi
On Wed, 01 Aug 2007 12:24:44 +0900
Tejun Heo [EMAIL PROTECTED] wrote:

 Kristen Carlson Accardi wrote:
  I don't think the interface you're suggesting is a good one.  Do you?
  
  I think if it's applicable to SCSI at all it is fine.  If it is not, then
  I think we need to make do with the interface we are given.  I do not think
  we should hold up a feature for libata sysfs integration.
 
 Well, I guess we'll have to agree to disagree here and leave the
 decision to James and Jeff.

A compromise to avoiding having this in SCSI is to use the
shost_attrs array in the scsi host template.  This will not change the way
the interface will look, it will just remove any link power management 
stuff from the scsi subsystem directly and have it be an ATA only attribute.
I did go ahead and leave the Documentation file in the scsi directory
because that is where the attribute winds up being visible to the user.
Here's the patch, and this series is also available at
http://www.kernel.org/pub/linux/kernel/people/kristen/patches/SATA/alpm:

Enable link power management for ata drivers

libata drivers can define a function (enable_pm) that will
perform hardware specific actions to enable whatever power
management policy the user set up from the scsi sysfs 
interface if the driver supports it.  This power management 
policy will be activated after all disks have been 
enumerated and intialized.  Drivers should also define
disable_pm, which will turn off link power management, but
not change link power management policy.

Signed-off-by:  Kristen Carlson Accardi [EMAIL PROTECTED]
Index: 2.6-git/drivers/ata/libata-scsi.c
===
--- 2.6-git.orig/drivers/ata/libata-scsi.c
+++ 2.6-git/drivers/ata/libata-scsi.c
@@ -110,6 +110,78 @@ static struct scsi_transport_template at
.user_scan  = ata_scsi_user_scan,
 };
 
+static const struct {
+   enum link_pmvalue;
+   char*name;
+} link_pm_policy[] = {
+   { NOT_AVAILABLE, max_performance },
+   { MIN_POWER, min_power },
+   { MAX_PERFORMANCE, max_performance },
+   { MEDIUM_POWER, medium_power },
+};
+
+const char *ata_scsi_link_pm_policy(enum link_pm policy)
+{
+   int i;
+   char *name = NULL;
+
+   for (i = 0; i  ARRAY_SIZE(link_pm_policy); i++) {
+   if (link_pm_policy[i].value == policy) {
+   name = link_pm_policy[i].name;
+   break;
+   }
+   }
+   return name;
+}
+
+static ssize_t store_link_pm_policy(struct class_device *class_dev,
+   const char *buf, size_t count)
+{
+   struct Scsi_Host *shost = class_to_shost(class_dev);
+   struct ata_port *ap = ata_shost_to_port(shost);
+   enum link_pm policy = 0;
+   int i;
+
+   /*
+* we are skipping array location 0 on purpose - this
+* is because a value of NOT_AVAILABLE is displayed
+* to the user as max_performance, but when the user
+* writes max_performance, they actually want the
+* value to match MAX_PERFORMANCE.
+*/
+   for (i = 1; i  ARRAY_SIZE(link_pm_policy); i++) {
+   const int len = strlen(link_pm_policy[i].name);
+   if (strncmp(link_pm_policy[i].name, buf, len) == 0 
+  buf[len] == '\n') {
+   policy = link_pm_policy[i].value;
+   break;
+   }
+   }
+   if (!policy)
+   return -EINVAL;
+
+   if (ata_scsi_set_link_pm_policy(ap, policy))
+   return -EINVAL;
+   return count;
+}
+
+static ssize_t
+show_link_pm_policy(struct class_device *class_dev, char *buf)
+{
+   struct Scsi_Host *shost = class_to_shost(class_dev);
+   struct ata_port *ap = ata_shost_to_port(shost);
+   const char *policy =
+   ata_scsi_link_pm_policy(ap-pm_policy);
+
+   if (!policy)
+   return -EINVAL;
+
+   return snprintf(buf, 23, %s\n, policy);
+}
+CLASS_DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR,
+   show_link_pm_policy, store_link_pm_policy);
+EXPORT_SYMBOL_GPL(class_device_attr_link_power_management_policy);
+
 static void ata_scsi_invalid_field(struct scsi_cmnd *cmd,
   void (*done)(struct scsi_cmnd *))
 {
@@ -2904,6 +2976,47 @@ void ata_scsi_simulate(struct ata_device
}
 }
 
+int ata_scsi_set_link_pm_policy(struct ata_port *ap,
+   enum link_pm policy)
+{
+   int rc = -EINVAL;
+   int i;
+
+   /*
+* make sure no broken devices are on this port,
+* and that all devices support interface power
+* management
+*/
+   for (i = 0; i  ATA_MAX_DEVICES; i++) {
+   struct ata_device *dev = ap-device[i];
+
+   /* only check drives which exist */
+   if (!ata_dev_enabled(dev))
+   continue;
+
+   /*
+  

Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-31 Thread Tejun Heo
Jeff Garzik wrote:
 Any chance the SCSI peeps could ACK this, and then let me include it in
 the ALPM patchset in the libata tree?

ATA link PS is pretty complex with HIPM, DIPM and AHCI ALPM.  I'm not
sure whether this three level knob would be sufficient.  It might be
good enough if we're gonna develop extensive in-kernel black/white list
specifying which method works on which combination but my gut tells me
that it's best left to userland (probably in the form of per-notebook PS
profile).

Adding to the fun, there are quite a few broken devices out there which
act weirdly when link PS actions are taken.

Also, I generally don't think AHCI ALPM is a good idea.  It doesn't have
'cool down' period before entering PS state which unnecessarily hampers
performance and might increase chance of device malfunction.

So, mild NACK from me.

-- 
tejun
-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-31 Thread Arjan van de Ven
On Tue, 2007-07-31 at 15:27 +0900, Tejun Heo wrote:
 Jeff Garzik wrote:
  Any chance the SCSI peeps could ACK this, and then let me include it in
  the ALPM patchset in the libata tree?
 
 ATA link PS is pretty complex with HIPM, DIPM and AHCI ALPM.  I'm not
 sure whether this three level knob would be sufficient. 

adding more levels later is easy.

  It might be
 good enough if we're gonna develop extensive in-kernel black/white list
 specifying which method works on which combination but my gut tells me
 that it's best left to userland (probably in the form of per-notebook PS
 profile).

either sucks. AHCI ALPM ought to work if it's supported; it's what other
operating systems also use... 
 
 Adding to the fun, there are quite a few broken devices out there which
 act weirdly when link PS actions are taken.

do you have any specific examples that act funny with the patch in
question here? (the patch tries to be careful, previous patches weren't
always so please test this patch before claiming the concept as a whole
is broken)
 
 Also, I generally don't think AHCI ALPM is a good idea.  It doesn't have
 'cool down' period before entering PS state

that's a chipset implementation decision not part of the
spec/technology per se.


-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-31 Thread James Bottomley
On Tue, 2007-07-31 at 15:27 +0900, Tejun Heo wrote:
 Jeff Garzik wrote:
  Any chance the SCSI peeps could ACK this, and then let me include it in
  the ALPM patchset in the libata tree?
 
 ATA link PS is pretty complex with HIPM, DIPM and AHCI ALPM.  I'm not
 sure whether this three level knob would be sufficient.  It might be
 good enough if we're gonna develop extensive in-kernel black/white list
 specifying which method works on which combination but my gut tells me
 that it's best left to userland (probably in the form of per-notebook PS
 profile).
 
 Adding to the fun, there are quite a few broken devices out there which
 act weirdly when link PS actions are taken.
 
 Also, I generally don't think AHCI ALPM is a good idea.  It doesn't have
 'cool down' period before entering PS state which unnecessarily hampers
 performance and might increase chance of device malfunction.
 
 So, mild NACK from me.

The other comment is that power saving seems to be a property of the
transport rather than the host.  If you do it in the transport classes,
then you can expose all the knobs the actual transport possesses (which
is, unfortunately, none for quite a few SCSI transports).

James


-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-31 Thread Tejun Heo
Arjan van de Ven wrote:
 On Tue, 2007-07-31 at 15:27 +0900, Tejun Heo wrote:
 Jeff Garzik wrote:
 Any chance the SCSI peeps could ACK this, and then let me include it in
 the ALPM patchset in the libata tree?
 ATA link PS is pretty complex with HIPM, DIPM and AHCI ALPM.  I'm not
 sure whether this three level knob would be sufficient. 
 
 adding more levels later is easy.

Dunno whether they would be linear levels and putting HIPM, DIPM, ALPM
selection into SCSI sysfs knob doesn't look so appealing.

  It might be
 good enough if we're gonna develop extensive in-kernel black/white list
 specifying which method works on which combination but my gut tells me
 that it's best left to userland (probably in the form of per-notebook PS
 profile).
 
 either sucks. AHCI ALPM ought to work if it's supported; it's what other
 operating systems also use... 
 Adding to the fun, there are quite a few broken devices out there which
 act weirdly when link PS actions are taken.
 
 do you have any specific examples that act funny with the patch in
 question here? (the patch tries to be careful, previous patches weren't
 always so please test this patch before claiming the concept as a whole
 is broken)

They were hardware problems.  I don't think any amount of proper
implementation can fix them.  I have one DVD RAM somewhere in my pile of
hardware which locks up solidly if any link PS mode is used and had a
report of a HDD which had problem with link PS.  Can't remember the
details tho.  Also, IIRC one of my wendies spins down on SLUMBER.

 Also, I generally don't think AHCI ALPM is a good idea.  It doesn't have
 'cool down' period before entering PS state
 
 that's a chipset implementation decision not part of the
 spec/technology per se.

That's actually something AHCI spec specifically states.  From section
8.3.1.3.

When PxCMD.ALPE is set to ‘1’, if the HBA recognizes that there are no
commands to process, the HBA shall initiate a transition to Partial or
Slumber interface power management state based upon the setting of
PxCMD.ASP. The HBA recognizes no commands to transmit as either:

• PxSACT is set to 0h, and the HBA updates PxCI from a non-zero
value to 0h.
• PxCI is set to 0h, and a Set Device Bits FIS is received that
updates PxSACT from a non-zero value to 0h.

Have no idea why it's specified this way.  Adding 100ms or 1s cool down
time wouldn't burn noticeably more power.  Maybe AHCI expects the host
driver to queue commands even for non-NCQ drives but libata currently
doesn't do that.

Anyways, I don't really think this attribute belongs to SCSI sysfs
hierarchy.  There currently isn't any alternative but sysfs is part of
userland visible interface and putting something into SCSI sysfs
hierarchy just because libata doesn't have one doesn't look like a good
idea.

sysfs isn't far from being detached from kobject and driver model.  I
think it would be best to wait a bit and build proper libata sysfs
hierarchy which won't have to be changed later when libata departs from
SCSI.  Well, it isn't really a good way but IMHO it's better than
sticking ATA power saving node into SCSI sysfs hierarchy.

Thanks.

-- 
tejun
-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-31 Thread Tejun Heo
Arjan van de Ven wrote:
 either sucks. AHCI ALPM ought to work if it's supported; it's what other
 operating systems also use... 

A question.  Does the other OS enable ALPM without checking against
white/black list?  Or is it enabled only on certain configurations -
e.g. specific notebooks, etc?

-- 
tejun
-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-31 Thread Arjan van de Ven

 They were hardware problems.  I don't think any amount of proper
 implementation can fix them.  I have one DVD RAM somewhere in my pile of
 hardware which locks up solidly if any link PS mode is used and had a

and the AHCI ALPM code decides to use power savings on this device? if
so, please give us the idents so that we can add it to the blacklist as
the first entry... (or can buy it to check it in detail first)

-- 
if you want to mail me at work (you don't), use arjan (at) linux.intel.com
Test the interaction between Linux and your BIOS via 
http://www.linuxfirmwarekit.org

-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-31 Thread Kristen Carlson Accardi
On Tue, 31 Jul 2007 23:45:25 +0900
Tejun Heo [EMAIL PROTECTED] wrote:

 Anyways, I don't really think this attribute belongs to SCSI sysfs
 hierarchy.  There currently isn't any alternative but sysfs is part of
 userland visible interface and putting something into SCSI sysfs
 hierarchy just because libata doesn't have one doesn't look like a good
 idea.
 
 sysfs isn't far from being detached from kobject and driver model.  I
 think it would be best to wait a bit and build proper libata sysfs
 hierarchy which won't have to be changed later when libata departs from
 SCSI.  Well, it isn't really a good way but IMHO it's better than
 sticking ATA power saving node into SCSI sysfs hierarchy.

Wait a bit could be a very long time.  Who is working on building this
new libata sysfs support now?  If the answer is no one, which I think it 
may be, do you want to hold up a feature that actually helps many people
for possibly 6 months or more just because we have to go through scsi
right now for our sysfs interface?  on top of that, the last mail I 
got from James on this subject indicated that if we kept our granularity
large with the power savings levels, SCSI can actually take advantage of
this as well.  Sure, we may have to tweak things around later, but isn't
this what we do routinely?  Holding up valuable features from the kernel 
because things aren't perfect yet seems really broken. 

As far as your complaints about broken hardware go, keep in mind that
the patch set does provide a method of adding these disks to a blacklist,
so I don't see that as a problem.  And, the default for this feature is
off, and user space would have to explicitly enable it.
-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-31 Thread Kristen Carlson Accardi
On Tue, 31 Jul 2007 15:27:34 +0900
Tejun Heo [EMAIL PROTECTED] wrote:

 Jeff Garzik wrote:
  Any chance the SCSI peeps could ACK this, and then let me include it in
  the ALPM patchset in the libata tree?
 
 ATA link PS is pretty complex with HIPM, DIPM and AHCI ALPM.  I'm not
 sure whether this three level knob would be sufficient.  It might be
 good enough if we're gonna develop extensive in-kernel black/white list
 specifying which method works on which combination but my gut tells me
 that it's best left to userland (probably in the form of per-notebook PS
 profile).

I think what you are saying is that you'd like a way to use your HIPM
and DIPM without ALPM on the AHCI driver.  Fine - it's really easy
to add these levels later - if they don't make sense at the sysfs interface
we can add module params to specify the definition of min_power as 
being performed via HIPM and DIPM instead of ALPM - although as of yet we
have no evidence what so ever that this method actually adds value over
ALPM.

 
 Adding to the fun, there are quite a few broken devices out there which
 act weirdly when link PS actions are taken.

OK - this is why I added the blacklist for this feature.

 
 Also, I generally don't think AHCI ALPM is a good idea.  It doesn't have
 'cool down' period before entering PS state which unnecessarily hampers
 performance and might increase chance of device malfunction.

might increase?  How about some actual examples of where you've shown
this to be a problem?  I can assert that I think ALPM is a good idea,
because I've never had a report of it causing problems.  Windows has 
been using this feature for a very long time - and you have to admit that
they have a pretty large market share.  Nobody is complaining about ALPM
increasing device malfunction, so unless you have proof it seems insane
to nak due to this. 
-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-31 Thread Tejun Heo
Hello, Kristen.

Kristen Carlson Accardi wrote:
 On Tue, 31 Jul 2007 23:45:25 +0900
 Tejun Heo [EMAIL PROTECTED] wrote:
 
 Anyways, I don't really think this attribute belongs to SCSI sysfs
 hierarchy.  There currently isn't any alternative but sysfs is part of
 userland visible interface and putting something into SCSI sysfs
 hierarchy just because libata doesn't have one doesn't look like a good
 idea.

 sysfs isn't far from being detached from kobject and driver model.  I
 think it would be best to wait a bit and build proper libata sysfs
 hierarchy which won't have to be changed later when libata departs from
 SCSI.  Well, it isn't really a good way but IMHO it's better than
 sticking ATA power saving node into SCSI sysfs hierarchy.
 
 Wait a bit could be a very long time.  Who is working on building this
 new libata sysfs support now?

I happen to be working on sysfs these days and guess why I started
working on sysfs. :-)

Disassociating sysfs from driver model is probably one or two patchsets
away.  It could have happened earlier but I wanted to pace things a bit
so that the changes receive some testing through release cycles.  Check
how many sysfs related changes went through .23-rc1 merge window and
expect about the same influx during the next cycle; with some luck, it
can be complete before .24-rc1 window.

 If the answer is no one, which I think it 
 may be, do you want to hold up a feature that actually helps many people
 for possibly 6 months or more just because we have to go through scsi
 right now for our sysfs interface?

I don't necessarily want to but delaying it might be the right thing to
do.  Anyways, if we're going to do an interim solution, I don't think
mainline SCSI sysfs hierarchy is the right place.  Do it with module
parameter which carries large to be deprecated sign or maintain out of
tree patches.

 on top of that, the last mail I 
 got from James on this subject indicated that if we kept our granularity
 large with the power savings levels, SCSI can actually take advantage of
 this as well.  Sure, we may have to tweak things around later, but isn't
 this what we do routinely?  Holding up valuable features from the kernel 
 because things aren't perfect yet seems really broken.

 As far as your complaints about broken hardware go, keep in mind that
 the patch set does provide a method of adding these disks to a blacklist,
 so I don't see that as a problem.  And, the default for this feature is
 off, and user space would have to explicitly enable it.

This is gonna be a bit long.  Please be patient.

Due to the generosity of the ATA committee, we have, by default, at
least two ways to achieve link PS - HIPS and DIPS.  I dunno why but
someone thought we needed two.  And then, ahci people thought automatic
HIPS would be nice, which I fully agree, and added ALPM.  Unfortunately,
they mandated ALPM to kick in the moment command engine becomes idle, so
most current implementations suffer unnecessary performance hit when
ALPM is active.

We have this crazy number of combinations.  HIPS, DIPS,
not-so-hot-looking ALPM and probably some number of broken devices which
may be happy with some method but not with others.  Some might be happy
with PARTIAL but vomit on SLUMBER.  I might be being too pessimistic but
my last two years in IDE/ATA land taught me to be pessimistic about
hardware quality.  Heck, I bet you some of non-intel ahci
implementations which claim to support ALPM will crap themselves when
actually told to do so.

If we're gonna do this like NCQ and gonna put knowledge about all the
combinations into the driver.  The suggested interface is good enough.
Heck, we probably don't even need three levels.  On and off should be
enough if things are done right as link PS doesn't have to incur
noticeable performance degradation.  But to do that, we'll need to test
a lot of combinations and it's gonna be much harder than NCQ (some ahci
implementations don't seem to actually enter PS mode when instructed to
do so via SControl but turning off the controller saves a lot of power.
 Maybe ALPM works better on such cases) and the blacklist will probably
be longer.

The alternate way is to export flexible interface to userland and let
userland decide and if we're gonna do that.  SCSI sysfs just isn't the
place.

Thanks.

-- 
tejun
-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-31 Thread Tejun Heo
Kristen Carlson Accardi wrote:
 I think what you are saying is that you'd like a way to use your HIPM
 and DIPM without ALPM on the AHCI driver.  Fine - it's really easy
 to add these levels later - if they don't make sense at the sysfs interface
 we can add module params to specify the definition of min_power as 
 being performed via HIPM and DIPM instead of ALPM - although as of yet we
 have no evidence what so ever that this method actually adds value over
 ALPM.

I don't really care whose PS implementation goes in.  Believe me.  I try
to stay away from that.  I don't even like my previous implementation.

ALPM has unnecessary performance penalty  is not applicable to
non-ahci controller.  Have you tested ALPM on non-intel ahcis?  There
are a lot out there these days.

I don't think the interface you're suggesting is a good one.  Do you?

 Also, I generally don't think AHCI ALPM is a good idea.  It doesn't have
 'cool down' period before entering PS state which unnecessarily hampers
 performance and might increase chance of device malfunction.
 
 might increase?  How about some actual examples of where you've shown
 this to be a problem?

I wouldn't have used might if I had actual examples.  Well, feel free
to disregard anything following the might.  I just feel uneasy about
jumping back and forth between PS and active states between consecutive
commands.

 I can assert that I think ALPM is a good idea,
 because I've never had a report of it causing problems.  Windows has 
 been using this feature for a very long time - and you have to admit that
 they have a pretty large market share.  Nobody is complaining about ALPM
 increasing device malfunction, so unless you have proof it seems insane
 to nak due to this. 

Is ALPM enabled by default?  How do they deal with the performance
degradation?

-- 
tejun
-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-31 Thread Tejun Heo
Arjan van de Ven wrote:
 They were hardware problems.  I don't think any amount of proper
 implementation can fix them.  I have one DVD RAM somewhere in my pile of
 hardware which locks up solidly if any link PS mode is used and had a
 
 and the AHCI ALPM code decides to use power savings on this device? if
 so, please give us the idents so that we can add it to the blacklist as
 the first entry... (or can buy it to check it in detail first)

Yeap, lemme check.

It's TSSTcorpCD/DVDW SH-S183A with firmware revision SB01.  Wanna
check ID capability bits but 'hdparm -I /dev/sr0' is still broken and
it's already past 3 am here.  I'll report back tomorrow.

Thanks.

-- 
tejun
-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-31 Thread Kristen Carlson Accardi
On Wed, 01 Aug 2007 03:02:55 +0900
Tejun Heo [EMAIL PROTECTED] wrote:

 Kristen Carlson Accardi wrote:
  I think what you are saying is that you'd like a way to use your HIPM
  and DIPM without ALPM on the AHCI driver.  Fine - it's really easy
  to add these levels later - if they don't make sense at the sysfs interface
  we can add module params to specify the definition of min_power as 
  being performed via HIPM and DIPM instead of ALPM - although as of yet we
  have no evidence what so ever that this method actually adds value over
  ALPM.
 
 I don't really care whose PS implementation goes in.  Believe me.  I try
 to stay away from that.  I don't even like my previous implementation.
 
 ALPM has unnecessary performance penalty  is not applicable to
 non-ahci controller.  Have you tested ALPM on non-intel ahcis?  There
 are a lot out there these days.

I have not personally, however there has been a lot of testing of this
hardware feature both on other OS and for this particular implementation
by the powertop community, which is composed of community members running
all sorts of hardware.  So far I've not received any bug reports
wrt non-intel AHCI.  As I mentioned several times, the default for ALPM
is to be off anyway.

 
 I don't think the interface you're suggesting is a good one.  Do you?

I think if it's applicable to SCSI at all it is fine.  If it is not, then
I think we need to make do with the interface we are given.  I do not think
we should hold up a feature for libata sysfs integration.

 
  Also, I generally don't think AHCI ALPM is a good idea.  It doesn't have
  'cool down' period before entering PS state which unnecessarily hampers
  performance and might increase chance of device malfunction.
  
  might increase?  How about some actual examples of where you've shown
  this to be a problem?
 
 I wouldn't have used might if I had actual examples.  Well, feel free
 to disregard anything following the might.  I just feel uneasy about
 jumping back and forth between PS and active states between consecutive
 commands.

I want us to be careful about spreading a lot of unease without data to
back it up.

 
  I can assert that I think ALPM is a good idea,
  because I've never had a report of it causing problems.  Windows has 
  been using this feature for a very long time - and you have to admit that
  they have a pretty large market share.  Nobody is complaining about ALPM
  increasing device malfunction, so unless you have proof it seems insane
  to nak due to this. 
 
 Is ALPM enabled by default?  How do they deal with the performance
 degradation?

I believe so, but I'm obviously not privvy to their implementation details.
-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-31 Thread Kristen Carlson Accardi
On Wed, 01 Aug 2007 02:48:42 +0900
Tejun Heo [EMAIL PROTECTED] wrote:

 Hello, Kristen.
 
 Kristen Carlson Accardi wrote:
  On Tue, 31 Jul 2007 23:45:25 +0900
  Tejun Heo [EMAIL PROTECTED] wrote:
  
  Anyways, I don't really think this attribute belongs to SCSI sysfs
  hierarchy.  There currently isn't any alternative but sysfs is part of
  userland visible interface and putting something into SCSI sysfs
  hierarchy just because libata doesn't have one doesn't look like a good
  idea.
 
  sysfs isn't far from being detached from kobject and driver model.  I
  think it would be best to wait a bit and build proper libata sysfs
  hierarchy which won't have to be changed later when libata departs from
  SCSI.  Well, it isn't really a good way but IMHO it's better than
  sticking ATA power saving node into SCSI sysfs hierarchy.
  
  Wait a bit could be a very long time.  Who is working on building this
  new libata sysfs support now?
 
 I happen to be working on sysfs these days and guess why I started
 working on sysfs. :-)
 
 Disassociating sysfs from driver model is probably one or two patchsets
 away.  It could have happened earlier but I wanted to pace things a bit
 so that the changes receive some testing through release cycles.  Check
 how many sysfs related changes went through .23-rc1 merge window and
 expect about the same influx during the next cycle; with some luck, it
 can be complete before .24-rc1 window.

So at current rate of development and kernel release schedule, the best
possible scenario is still 6 months away - whereas this patchset is already 
tested and ready for merging now.

 
  If the answer is no one, which I think it 
  may be, do you want to hold up a feature that actually helps many people
  for possibly 6 months or more just because we have to go through scsi
  right now for our sysfs interface?
 
 I don't necessarily want to but delaying it might be the right thing to
 do.  Anyways, if we're going to do an interim solution, I don't think
 mainline SCSI sysfs hierarchy is the right place.  Do it with module
 parameter which carries large to be deprecated sign or maintain out of
 tree patches.

Out of tree patches don't work.  Nobody tests them.  A module parameter
will not work - we need to be able to expose the sysfs interface so that
users may chose to turn the feature off and on at will - mainly according
to their usage.  For example, at boot time - you want ALPM off to maximize
performance.  Lets say when you are plugged into the wall socket, you leave
it off, but then when you go on battery power you would want to enable it.


 
  on top of that, the last mail I 
  got from James on this subject indicated that if we kept our granularity
  large with the power savings levels, SCSI can actually take advantage of
  this as well.  Sure, we may have to tweak things around later, but isn't
  this what we do routinely?  Holding up valuable features from the kernel 
  because things aren't perfect yet seems really broken.
 
  As far as your complaints about broken hardware go, keep in mind that
  the patch set does provide a method of adding these disks to a blacklist,
  so I don't see that as a problem.  And, the default for this feature is
  off, and user space would have to explicitly enable it.
 
 This is gonna be a bit long.  Please be patient.
 
 Due to the generosity of the ATA committee, we have, by default, at
 least two ways to achieve link PS - HIPS and DIPS.  I dunno why but
 someone thought we needed two.  And then, ahci people thought automatic

They thought we needed two because sometimes the device knows when it
will be idle faster than the host can. this is why ALPM can determine
idleness faster than any software algorithm on the host cpu can.  You
can use ALPM without having HIPM.  You can also use it without having 
DIPM.

 HIPS would be nice, which I fully agree, and added ALPM.  Unfortunately,
 they mandated ALPM to kick in the moment command engine becomes idle, so
 most current implementations suffer unnecessary performance hit when
 ALPM is active.

unnecessary is subjective and at the moment, untrue.  You 
have to make performance/power tradeoffs with anything, whether it's 
CPU or your AHCI controller.  It's the current cost of getting out of these 
sleep states even if you aren't using ALPM but just doing HIPM or DIPM 
manually.  But, this is why it's so critical to allow the user to 
control when ALPM is enabled dynamically - which this patchset does.
The medium power setting is provided for users who wish to not go to
SLUMBER and get the higher latency cost but still have some power savings.

 
 We have this crazy number of combinations.  HIPS, DIPS,
 not-so-hot-looking ALPM and probably some number of broken devices which
 may be happy with some method but not with others.  Some might be happy
 with PARTIAL but vomit on SLUMBER.  I might be being too pessimistic but
 my last two years in IDE/ATA land taught me to be pessimistic about
 hardware 

Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-31 Thread Tejun Heo
Kristen Carlson Accardi wrote:
 So at current rate of development and kernel release schedule, the best
 possible scenario is still 6 months away - whereas this patchset is already 
 tested and ready for merging now.

The best possible scenario is .24-rc1 merge window with or without
waiting.  With waiting, the best possible scenario is harder to achieve tho.

 Out of tree patches don't work.  Nobody tests them.  A module parameter
 will not work - we need to be able to expose the sysfs interface so that
 users may chose to turn the feature off and on at will - mainly according
 to their usage.  For example, at boot time - you want ALPM off to maximize
 performance.  Lets say when you are plugged into the wall socket, you leave
 it off, but then when you go on battery power you would want to enable it.

You can turn on and off dynamically using a module parameter.  Although
it's not a pretty interface, it should work as an interim solution if we
absolutely must merge ALPM now.

 Due to the generosity of the ATA committee, we have, by default, at
 least two ways to achieve link PS - HIPS and DIPS.  I dunno why but
 someone thought we needed two.  And then, ahci people thought automatic
 
 They thought we needed two because sometimes the device knows when it
 will be idle faster than the host can. this is why ALPM can determine
 idleness faster than any software algorithm on the host cpu can.  You
 can use ALPM without having HIPM.  You can also use it without having 
 DIPM.

I see.  I get that one way is better than another in some way.  I'm just
not convinced whether the advantage is substantial enough to justify the
complexity.

 HIPS would be nice, which I fully agree, and added ALPM.  Unfortunately,
 they mandated ALPM to kick in the moment command engine becomes idle, so
 most current implementations suffer unnecessary performance hit when
 ALPM is active.
 
 unnecessary is subjective and at the moment, untrue.  You 
 have to make performance/power tradeoffs with anything, whether it's 
 CPU or your AHCI controller.  It's the current cost of getting out of these 
 sleep states even if you aren't using ALPM but just doing HIPM or DIPM 
 manually.

Having short cool-down time would remove most of performance degradation
and I'm pretty sure power consumption would stay about the same.

 But, this is why it's so critical to allow the user to 
 control when ALPM is enabled dynamically - which this patchset does.
 The medium power setting is provided for users who wish to not go to
 SLUMBER and get the higher latency cost but still have some power savings.

Note that PARTIAL also incurs noticeable performance degradation.

 I understand you are being cautious based on your prior experience, but
 again we still have no data to show that we are going to have a lot of
 problems.  Perhaps we should proceed optimistically and deal with problems
 when they actually occur rather than block something on a bet.

I would agree with you, merge it and see the fireworks in -mm if it
didn't involve user visible API.  We have an API decision to make here
and it hasn't been sufficiently considered yet.

 The alternate way is to export flexible interface to userland and let
 userland decide and if we're gonna do that.  SCSI sysfs just isn't the
 place.
 
 How about a patch? I'd love to have a flexible interface to userland,
 it was my goal to provide this with the sysfs files.  The requirement
 is that the users be able to turn ALPM off and on dynamically, and to
 be able to chose the power savings level you want - i.e. SLUMBER vs.
 PARTIAL - perferrably not using those terms since users really shouldn't
 need to know AHCI terminology just to chose a power management level.

As I wrote before, which level of interface to export to userland is
related with where to put the knowledge about working and broken
combinations.  Unfortunately, we don't have data to support one way or
the other at the moment.  All I'm saying is that we should be cautious
before introducing user-land visible interface which lives in SCSI sysfs
as it's unknown whether it would fit the reality or not.

Sorry that I don't have an alternative patch now, but something which
can relieve the situation is being worked on, so let's give it some time
and see how things turn out.  Things have to wait till the next -rc1
window anyway.

Thanks.

-- 
tejun
-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-31 Thread Tejun Heo
Kristen Carlson Accardi wrote:
 I don't think the interface you're suggesting is a good one.  Do you?
 
 I think if it's applicable to SCSI at all it is fine.  If it is not, then
 I think we need to make do with the interface we are given.  I do not think
 we should hold up a feature for libata sysfs integration.

Well, I guess we'll have to agree to disagree here and leave the
decision to James and Jeff.

 I can assert that I think ALPM is a good idea,
 because I've never had a report of it causing problems.  Windows has 
 been using this feature for a very long time - and you have to admit that
 they have a pretty large market share.  Nobody is complaining about ALPM
 increasing device malfunction, so unless you have proof it seems insane
 to nak due to this. 
 Is ALPM enabled by default?  How do they deal with the performance
 degradation?
 
 I believe so, but I'm obviously not privvy to their implementation details.

It would be *really* great if we can find more about how they do it.
How and when it's enabled and on which systems.  Is it possible to find
this out?

-- 
tejun
-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-30 Thread Jeff Garzik

Kristen Carlson Accardi wrote:

@@ -42,6 +42,16 @@ enum scsi_eh_timer_return {
EH_RESET_TIMER,
 };
 
+/*

+ * shost pm policy: If you alter this, you also need to alter scsi_sysfs.c
+ * (for the ascii descriptions)
+ */
+enum scsi_host_link_pm {
+   SHOST_NOT_AVAILABLE,
+   SHOST_MIN_POWER,
+   SHOST_MAX_PERFORMANCE,
+   SHOST_MEDIUM_POWER,
+};
 
 struct scsi_host_template {

struct module *module;
@@ -345,6 +355,12 @@ struct scsi_host_template {
int (*suspend)(struct scsi_device *, pm_message_t state);
 
 	/*

+* link power management support
+*/
+   int (*set_link_pm_policy)(struct Scsi_Host *, enum scsi_host_link_pm);
+   enum scsi_host_link_pm default_link_pm_policy;
+
+   /*
 * Name of proc directory
 */
char *proc_name;
@@ -642,6 +658,7 @@ struct Scsi_Host {

 
 	enum scsi_host_state shost_state;

+   enum scsi_host_link_pm shost_link_pm_policy;
 
 	/* ldm bits */

struct device   shost_gendev;



Any chance the SCSI peeps could ACK this, and then let me include it in 
the ALPM patchset in the libata tree?


Jeff


-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-11 Thread Pavel Machek
Hi!

 This patch will modify the scsi subsystem to allow
 users to set a power management policy for the link.
 
 The scsi subsystem will create a new sysfs file for each
 host in /sys/class/scsi_host called link_power_management_policy.
 This file can have 3 possible values:
 
 Value Meaning
 ---
 min_power User wishes the link to conserve power as much as
   possible, even at the cost of some performance
 
 max_performance User wants priority to be on performance, not power
   savings
 
 medium_power  User wants power savings, with less performance cost
   than min_power (but less power savings as well).

Has that anything to do with HIPM vs. DIPM? 

Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/4] Expose Power Management Policy option to users

2007-07-11 Thread Kristen Carlson Accardi
On Mon, 9 Jul 2007 19:36:04 +
Pavel Machek [EMAIL PROTECTED] wrote:

 Hi!
 
  This patch will modify the scsi subsystem to allow
  users to set a power management policy for the link.
  
  The scsi subsystem will create a new sysfs file for each
  host in /sys/class/scsi_host called link_power_management_policy.
  This file can have 3 possible values:
  
  Value   Meaning
  ---
  min_power   User wishes the link to conserve power as much as
  possible, even at the cost of some performance
  
  max_performance User wants priority to be on performance, not power
  savings
  
  medium_powerUser wants power savings, with less performance cost
  than min_power (but less power savings as well).
 
 Has that anything to do with HIPM vs. DIPM? 
 
   Pavel
Hi Pavel,
I'm not sure I'm understanding your question, but if you mean the different
levels of power savings you would get, no.  With ALPM you have the option of
instructing the link to either go into slumber or partial mode when it 
determines the link is quiet.  Slumber uses less power, but has more latency
to come out of.  So, for a SATA device, setting the link_power_managment
file to medium_power will set up the link to only go into Partial mode,
which has less power savings (about half by my informal measurement), but 
less latency, and therefore less of a performance hit.

Hopefully this answers your question.
Kristen
-
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 2/4] Expose Power Management Policy option to users

2007-07-05 Thread Kristen Carlson Accardi
This patch will modify the scsi subsystem to allow
users to set a power management policy for the link.

The scsi subsystem will create a new sysfs file for each
host in /sys/class/scsi_host called link_power_management_policy.
This file can have 3 possible values:

Value   Meaning
---
min_power   User wishes the link to conserve power as much as
possible, even at the cost of some performance

max_performance User wants priority to be on performance, not power
savings

medium_powerUser wants power savings, with less performance cost
than min_power (but less power savings as well).


Signed-off-by:  Kristen Carlson Accardi [EMAIL PROTECTED]
Index: 2.6-git/drivers/scsi/hosts.c
===
--- 2.6-git.orig/drivers/scsi/hosts.c
+++ 2.6-git/drivers/scsi/hosts.c
@@ -54,6 +54,25 @@ static struct class shost_class = {
 };
 
 /**
+ * scsi_host_set_link_pm - Change the link power management policy
+ * @shost: scsi host to change the policy of.
+ * @policy:policy to change to.
+ *
+ * Returns zero if successful or an error if the requested
+ * transition is illegal.
+ **/
+int scsi_host_set_link_pm(struct Scsi_Host *shost,
+   enum scsi_host_link_pm policy)
+{
+   struct scsi_host_template *sht = shost-hostt;
+   if (sht-set_link_pm_policy)
+   sht-set_link_pm_policy(shost, policy);
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(scsi_host_set_link_pm);
+
+/**
  * scsi_host_set_state - Take the given host through the host
  * state model.
  * @shost: scsi host to change the state of.
Index: 2.6-git/drivers/scsi/scsi_sysfs.c
===
--- 2.6-git.orig/drivers/scsi/scsi_sysfs.c
+++ 2.6-git/drivers/scsi/scsi_sysfs.c
@@ -189,6 +189,74 @@ show_shost_state(struct class_device *cl
 
 static CLASS_DEVICE_ATTR(state, S_IRUGO | S_IWUSR, show_shost_state, 
store_shost_state);
 
+static const struct {
+   enum scsi_host_link_pm  value;
+   char*name;
+} shost_link_pm_policy[] = {
+   { SHOST_NOT_AVAILABLE, max_performance },
+   { SHOST_MIN_POWER, min_power },
+   { SHOST_MAX_PERFORMANCE, max_performance },
+   { SHOST_MEDIUM_POWER, medium_power },
+};
+
+const char *scsi_host_link_pm_policy(enum scsi_host_link_pm policy)
+{
+   int i;
+   char *name = NULL;
+
+   for (i = 0; i  ARRAY_SIZE(shost_link_pm_policy); i++) {
+   if (shost_link_pm_policy[i].value == policy) {
+   name = shost_link_pm_policy[i].name;
+   break;
+   }
+   }
+   return name;
+}
+
+static ssize_t store_link_pm_policy(struct class_device *class_dev,
+   const char *buf, size_t count)
+{
+   struct Scsi_Host *shost = class_to_shost(class_dev);
+   enum scsi_host_link_pm policy = 0;
+   int i;
+
+   /*
+* we are skipping array location 0 on purpose - this
+* is because a value of SHOST_NOT_AVAILABLE is displayed
+* to the user as max_performance, but when the user
+* writes max_performance, they actually want the
+* value to match SHOST_MAX_PERFORMANCE.
+*/
+   for (i = 1; i  ARRAY_SIZE(shost_link_pm_policy); i++) {
+   const int len = strlen(shost_link_pm_policy[i].name);
+   if (strncmp(shost_link_pm_policy[i].name, buf, len) == 0 
+  buf[len] == '\n') {
+   policy = shost_link_pm_policy[i].value;
+   break;
+   }
+   }
+   if (!policy)
+   return -EINVAL;
+
+   if (scsi_host_set_link_pm(shost, policy))
+   return -EINVAL;
+   return count;
+}
+static ssize_t
+show_link_pm_policy(struct class_device *class_dev, char *buf)
+{
+   struct Scsi_Host *shost = class_to_shost(class_dev);
+   const char *policy =
+   scsi_host_link_pm_policy(shost-shost_link_pm_policy);
+
+   if (!policy)
+   return -EINVAL;
+
+   return snprintf(buf, 23, %s\n, policy);
+}
+static CLASS_DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR,
+   show_link_pm_policy, store_link_pm_policy);
+
 shost_rd_attr(unique_id, %u\n);
 shost_rd_attr(host_busy, %hu\n);
 shost_rd_attr(cmd_per_lun, %hd\n);
@@ -207,6 +275,7 @@ static struct class_device_attribute *sc
class_device_attr_proc_name,
class_device_attr_scan,
class_device_attr_state,
+   class_device_attr_link_power_management_policy,
NULL
 };
 
Index: 2.6-git/include/scsi/scsi_host.h
===
--- 2.6-git.orig/include/scsi/scsi_host.h
+++ 2.6-git/include/scsi/scsi_host.h
@@ -42,6 +42,16 @@ enum scsi_eh_timer_return {
EH_RESET_TIMER,
 };
 
+/*
+