Re: [patch 2/4] Expose Power Management Policy option to users
On Wed, 1 Aug 2007 14:16:51 -0700 Kristen Carlson Accardi [EMAIL PROTECTED] wrote: I was able to duplicate Tejun's problem on an ATAPI device I had here. this updated patch fixed my problem. These devices are just setting PhyReady (N) and CommWake (W) in Serror on power state changes, ignoring them seems to be fine, and fixed the problem for me. Hi Tejun, Were you able to test out this patch and see if it solved your ATAPI device/ALPM issues? Thanks, Kristen Enable Aggressive Link Power management for AHCI controllers. This patch will set the correct bits to turn on Aggressive Link Power Management (ALPM) for the ahci driver. This will cause the controller and disk to negotiate a lower power state for the link when there is no activity (see the AHCI 1.x spec for details). This feature is mutually exclusive with Hot Plug, so when ALPM is enabled, Hot Plug is disabled. ALPM will be enabled by default, but it is settable via the scsi host syfs interface. Possible settings for this feature are: Setting Effect -- min_power ALPM is enabled, and link set to enter lowest power state (SLUMBER) when idle Hot plug not allowed. max_performance ALPM is disabled, Hot Plug is allowed medium_power ALPM is enabled, and link set to enter second lowest power state (PARTIAL) when idle. Hot plug not allowed. Signed-off-by: Kristen Carlson Accardi [EMAIL PROTECTED] Index: 2.6-git/drivers/ata/ahci.c === --- 2.6-git.orig/drivers/ata/ahci.c +++ 2.6-git/drivers/ata/ahci.c @@ -48,6 +48,9 @@ #define DRV_NAME ahci #define DRV_VERSION 2.3 +static int ahci_enable_alpm(struct ata_port *ap, + enum link_pm policy); +static int ahci_disable_alpm(struct ata_port *ap); enum { AHCI_PCI_BAR= 5, @@ -98,6 +101,7 @@ enum { /* HOST_CAP bits */ HOST_CAP_SSC= (1 14), /* Slumber capable */ HOST_CAP_CLO= (1 24), /* Command List Override support */ + HOST_CAP_ALPM = (1 26), /* Aggressive Link PM support */ HOST_CAP_SSS= (1 27), /* Staggered Spin-up */ HOST_CAP_SNTF = (1 29), /* SNotification register */ HOST_CAP_NCQ= (1 30), /* Native Command Queueing */ @@ -153,6 +157,8 @@ enum { PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS, /* PORT_CMD bits */ + PORT_CMD_ASP= (1 27), /* Aggressive Slumber/Partial */ + PORT_CMD_ALPE = (1 26), /* Aggressive Link PM enable */ PORT_CMD_ATAPI = (1 24), /* Device is ATAPI */ PORT_CMD_LIST_ON= (1 15), /* cmd list DMA engine running */ PORT_CMD_FIS_ON = (1 14), /* FIS DMA engine running */ @@ -244,6 +250,11 @@ static int ahci_pci_device_suspend(struc static int ahci_pci_device_resume(struct pci_dev *pdev); #endif +static struct class_device_attribute *ahci_shost_attrs[] = { + class_device_attr_link_power_management_policy, + NULL +}; + static struct scsi_host_template ahci_sht = { .module = THIS_MODULE, .name = DRV_NAME, @@ -261,6 +272,7 @@ static struct scsi_host_template ahci_sh .slave_configure= ata_scsi_slave_config, .slave_destroy = ata_scsi_slave_destroy, .bios_param = ata_std_bios_param, + .shost_attrs= ahci_shost_attrs, }; static const struct ata_port_operations ahci_ops = { @@ -292,6 +304,8 @@ static const struct ata_port_operations .port_suspend = ahci_port_suspend, .port_resume= ahci_port_resume, #endif + .enable_pm = ahci_enable_alpm, + .disable_pm = ahci_disable_alpm, .port_start = ahci_port_start, .port_stop = ahci_port_stop, @@ -778,6 +792,156 @@ static void ahci_power_up(struct ata_por writel(cmd | PORT_CMD_ICC_ACTIVE, port_mmio + PORT_CMD); } +static int ahci_disable_alpm(struct ata_port *ap) +{ + void __iomem *port_mmio = ahci_port_base(ap); + u32 cmd, scontrol; + struct ahci_port_priv *pp = ap-private_data; + + /* + * disable Interface Power Management State Transitions + * This is accomplished by setting bits 8:11 of the + * SATA Control register + */ + scontrol = readl(port_mmio + PORT_SCR_CTL); + scontrol |= (0x3 8); + writel(scontrol, port_mmio + PORT_SCR_CTL); + + /* get the existing command bits */ + cmd = readl(port_mmio + PORT_CMD); + + /* disable ALPM and ASP */ + cmd = ~PORT_CMD_ASP; + cmd = ~PORT_CMD_ALPE; + + /* force the interface back to active */ + cmd |= PORT_CMD_ICC_ACTIVE;
Re: [patch 2/4] Expose Power Management Policy option to users
Tejun Heo wrote: Arjan van de Ven wrote: They were hardware problems. I don't think any amount of proper implementation can fix them. I have one DVD RAM somewhere in my pile of hardware which locks up solidly if any link PS mode is used and had a and the AHCI ALPM code decides to use power savings on this device? if so, please give us the idents so that we can add it to the blacklist as the first entry... (or can buy it to check it in detail first) Yeap, lemme check. It's TSSTcorpCD/DVDW SH-S183A with firmware revision SB01. Wanna check ID capability bits but 'hdparm -I /dev/sr0' is still broken and it's already past 3 am here. I'll report back tomorrow. Oops, that was the wrong one. Locking up one was HL-DT-STDVD-RAM GSA-H30N and it correctly reports that it doesn't support IPM. Here are some test results. Controller is ICH9. 00:1f.2 SATA controller [Class 0106]: Intel Corporation Unknown device [8086:2922] (rev 02) 1. HL-DT-STDVD-RAM GSA-H30N ATAPI CD-ROM, with removable media Model Number: HL-DT-STDVD-RAM GSA-H30N Serial Number: Firmware Revision: 1.00 Standards: Likely used CD-ROM ATAPI-1 Configuration: DRQ response: 50us. Packet size: 12 bytes Capabilities: LBA, IORDY(can be disabled) DMA: sdma0 sdma1 sdma2 mdma0 mdma1 mdma2 udma0 udma1 udma2 udma3 udma4 *udma5 Cycle time: min=120ns recommended=120ns PIO: pio0 pio1 pio2 pio3 pio4 Cycle time: no flow control=120ns IORDY flow control=120 This device doesn't claim to support HIPM nor DIPM. # echo min_power link_power_management_policy [ 752.761751] ata1.00: Unable to set Link PM policy [ 784.510218] ata1.00: exception Emask 0x50 SAct 0x0 SErr 0xd0800 action 0x6 frozen [ 784.530266] ata1.00: cmd a0/00:00:00:00:20/00:00:00:00:00/a0 tag 0 cdb 0x43 data 12 in [ 784.530270] res 40/00:03:00:00:20/00:00:00:00:00/a0 Emask 0x54 (ATA bus error) [ 784.571175] ata1: hard resetting port [ 790.489486] ata1: port is slow to respond, please be patient (Status 0x80) [ 794.594247] ata1: COMRESET failed (errno=-16) [ 794.611174] ata1: hard resetting port [ 800.541562] ata1: port is slow to respond, please be patient (Status 0x80) [ 804.646316] ata1: COMRESET failed (errno=-16) [ 804.663284] ata1: hard resetting port [ 810.593623] ata1: port is slow to respond, please be patient (Status 0x80) [ 839.654644] ata1: COMRESET failed (errno=-16) [ 839.672576] ata1: limiting SATA link speed to 1.5 Gbps [ 839.691024] ata1: hard resetting port [ 844.726659] ata1: COMRESET failed (errno=-16) [ 844.744064] ata1: reset failed, giving up [ 844.761614] ata1.00: disabled [ 844.761639] ata1: EH complete The device doesn't respond till power is physically removed and restored. It seems something in ahci_disable_alpm() path upsets the device. 2. TSSTcorpCD/DVDW SH-S183A ATAPI CD-ROM, with removable media Model Number: TSSTcorpCD/DVDW SH-S183A Serial Number: Firmware Revision: SB01 Standards: Likely used CD-ROM ATAPI-1 Configuration: DRQ response: 50us. Packet size: 12 bytes Capabilities: LBA, IORDY(can be disabled) DMA: mdma0 mdma1 mdma2 udma0 udma1 *udma2 Cycle time: min=120ns recommended=120ns PIO: pio0 pio1 pio2 pio3 pio4 Cycle time: no flow control=120ns IORDY flow control=120ns Commands/features: Enabled Supported: *SATA-I signaling speed (1.5Gb/s) *Host-initiated interface power management *Phy event counters Device-initiated interface power management unknown 78[5] *Software settings preservation This one claims to support HIPS. # echo min_power link_power_management_policy [ 1301.917248] ata1.00: exception Emask 0x10 SAct 0x0 SErr 0x5 action 0x2 [ 1301.938338] ata1.00: irq_stat 0x4001 [ 1301.956955] ata1.00: cmd a0/00:00:00:00:20/00:00:00:00:00/a0 tag 0 cdb 0x43 data 12 in [ 1301.956959] res 51/20:03:00:00:00/00:00:00:00:00/a0 Emask 0x10 (ATA bus error) [ 1304.189565] ata1: soft resetting port [ 1304.207745] ata1: SATA link down (SStatus 611 SControl 300) [ 1304.228076] ata1: failed to recover some devices, retrying in 5 secs [ 1309.245599] ata1: hard resetting port [ 1314.773227] ata1: port is slow to respond, please be patient (Status 0x80) [ 1319.269677] ata1: COMRESET failed (errno=-16) [ 1319.289639] ata1: hard resetting port [ 1319.781285] ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 300) [ 1320.115569] ata1.00: configured for UDMA/33 [ 1320.134780] ata1: EH complete And hotplug works fine after EH is done with itself. Dunno why. 3. PLEXTOR DVDR PX-716A ATAPI CD-ROM, with removable media
Re: [patch 2/4] Expose Power Management Policy option to users
On Wed, 01 Aug 2007 12:24:44 +0900 Tejun Heo [EMAIL PROTECTED] wrote: It would be *really* great if we can find more about how they do it. How and when it's enabled and on which systems. Is it possible to find this out? No - it's really not a good idea for us to go and ask other OS's how they implement things in this level of detail. - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
On Wed, 01 Aug 2007 18:23:21 +0900 Tejun Heo [EMAIL PROTECTED] wrote: Tejun Heo wrote: Arjan van de Ven wrote: They were hardware problems. I don't think any amount of proper implementation can fix them. I have one DVD RAM somewhere in my pile of hardware which locks up solidly if any link PS mode is used and had a and the AHCI ALPM code decides to use power savings on this device? if so, please give us the idents so that we can add it to the blacklist as the first entry... (or can buy it to check it in detail first) Yeap, lemme check. It's TSSTcorpCD/DVDW SH-S183A with firmware revision SB01. Wanna check ID capability bits but 'hdparm -I /dev/sr0' is still broken and it's already past 3 am here. I'll report back tomorrow. Oops, that was the wrong one. Locking up one was HL-DT-STDVD-RAM GSA-H30N and it correctly reports that it doesn't support IPM. Here are some test results. Controller is ICH9. 00:1f.2 SATA controller [Class 0106]: Intel Corporation Unknown device [8086:2922] (rev 02) 1. HL-DT-STDVD-RAM GSA-H30N ATAPI CD-ROM, with removable media Model Number: HL-DT-STDVD-RAM GSA-H30N Serial Number: Firmware Revision: 1.00 Standards: Likely used CD-ROM ATAPI-1 Configuration: DRQ response: 50us. Packet size: 12 bytes Capabilities: LBA, IORDY(can be disabled) DMA: sdma0 sdma1 sdma2 mdma0 mdma1 mdma2 udma0 udma1 udma2 udma3 udma4 *udma5 Cycle time: min=120ns recommended=120ns PIO: pio0 pio1 pio2 pio3 pio4 Cycle time: no flow control=120ns IORDY flow control=120 This device doesn't claim to support HIPM nor DIPM. Are you sure you are using the latest version of the patch? This seems like a bug, if the device doesn't support HIPM or DIPM it shouldn't attempt to use ALPM. There was a check for this put into the patch on about the second or third version - maybe I'm not doing it right. (from the patch located here: http://www.kernel.org/pub/linux/kernel/people/kristen/patches/SATA/alpm/libata-enable-pm if (ata_id_has_hipm(dev-id) || ata_id_has_dipm(dev-id)) + dev-flags |= ATA_DFLAG_IPM; + snip 2. TSSTcorpCD/DVDW SH-S183A ATAPI CD-ROM, with removable media Model Number: TSSTcorpCD/DVDW SH-S183A Serial Number: Firmware Revision: SB01 Standards: Likely used CD-ROM ATAPI-1 Configuration: DRQ response: 50us. Packet size: 12 bytes Capabilities: LBA, IORDY(can be disabled) DMA: mdma0 mdma1 mdma2 udma0 udma1 *udma2 Cycle time: min=120ns recommended=120ns PIO: pio0 pio1 pio2 pio3 pio4 Cycle time: no flow control=120ns IORDY flow control=120ns Commands/features: Enabled Supported: *SATA-I signaling speed (1.5Gb/s) *Host-initiated interface power management *Phy event counters Device-initiated interface power management unknown 78[5] *Software settings preservation This one claims to support HIPS. # echo min_power link_power_management_policy [ 1301.917248] ata1.00: exception Emask 0x10 SAct 0x0 SErr 0x5 action 0x2 [ 1301.938338] ata1.00: irq_stat 0x4001 [ 1301.956955] ata1.00: cmd a0/00:00:00:00:20/00:00:00:00:00/a0 tag 0 cdb 0x43 data 12 in [ 1301.956959] res 51/20:03:00:00:00/00:00:00:00:00/a0 Emask 0x10 (ATA bus error) [ 1304.189565] ata1: soft resetting port [ 1304.207745] ata1: SATA link down (SStatus 611 SControl 300) [ 1304.228076] ata1: failed to recover some devices, retrying in 5 secs [ 1309.245599] ata1: hard resetting port [ 1314.773227] ata1: port is slow to respond, please be patient (Status 0x80) [ 1319.269677] ata1: COMRESET failed (errno=-16) [ 1319.289639] ata1: hard resetting port [ 1319.781285] ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 300) [ 1320.115569] ata1.00: configured for UDMA/33 [ 1320.134780] ata1: EH complete And hotplug works fine after EH is done with itself. Dunno why. It works because after EH you've reset the port, and when you reset the port you turn off ipm (note the value of SControl). I left this the way it was without attempting to renable link pm after a reset because I figured if there was something bad happening and we needed to reset we should leave ipm off. As far as the failure you are seeing goes - this may not actually be a hardware problem but just a case of us not understanding which bits we need to clear out of the Interrupt status register once we enable ALPM. The value of SErr indicates that this device has gotten PhyRdy - which is normal for power state changes, and the interrupt status indicates that a d2h FIS was received which is also normal - the TFES bit seems to be set - it may be that we
Re: [patch 2/4] Expose Power Management Policy option to users
On Tue, Jul 31, 2007 at 09:18:08AM -0500, James Bottomley wrote: The other comment is that power saving seems to be a property of the transport rather than the host. If you do it in the transport classes, then you can expose all the knobs the actual transport possesses (which is, unfortunately, none for quite a few SCSI transports). Would it save any power to negotiate down to, say, FAST-20 for the SPI transport? Or to negotiate narrow instead of wide, so fewer cables have to be powered? -- Bill, look, we understand that you're interested in selling us this operating system, but compare it to ours. We can't possibly take such a retrograde step. - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
On Wed, 2007-08-01 at 10:53 -0600, Matthew Wilcox wrote: On Tue, Jul 31, 2007 at 09:18:08AM -0500, James Bottomley wrote: The other comment is that power saving seems to be a property of the transport rather than the host. If you do it in the transport classes, then you can expose all the knobs the actual transport possesses (which is, unfortunately, none for quite a few SCSI transports). Would it save any power to negotiate down to, say, FAST-20 for the SPI transport? Or to negotiate narrow instead of wide, so fewer cables have to be powered? Mechanically, I don't believe so. I'm not sure I can find any reports on it; however, I believe the power wastage SPI comes from the transcievers and terminators. There's the passive power from simply powering the resistive bus and then the RMS power from sending commands over an impedance circuit. simple physics on a 5v bus tells me that the former is likely to be far greater than the latter. Finally, if you think about what the bus is doing during signalling, the power drain is independent of the frequency so I think the faster you squirt your data, the lower the energy drain (hence high speed busses are actually lower energy than low speed ones if the number of bits you have to transfer remains constant). James - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
I was able to duplicate Tejun's problem on an ATAPI device I had here. this updated patch fixed my problem. These devices are just setting PhyReady (N) and CommWake (W) in Serror on power state changes, ignoring them seems to be fine, and fixed the problem for me. Enable Aggressive Link Power management for AHCI controllers. This patch will set the correct bits to turn on Aggressive Link Power Management (ALPM) for the ahci driver. This will cause the controller and disk to negotiate a lower power state for the link when there is no activity (see the AHCI 1.x spec for details). This feature is mutually exclusive with Hot Plug, so when ALPM is enabled, Hot Plug is disabled. ALPM will be enabled by default, but it is settable via the scsi host syfs interface. Possible settings for this feature are: Setting Effect -- min_power ALPM is enabled, and link set to enter lowest power state (SLUMBER) when idle Hot plug not allowed. max_performance ALPM is disabled, Hot Plug is allowed medium_powerALPM is enabled, and link set to enter second lowest power state (PARTIAL) when idle. Hot plug not allowed. Signed-off-by: Kristen Carlson Accardi [EMAIL PROTECTED] Index: 2.6-git/drivers/ata/ahci.c === --- 2.6-git.orig/drivers/ata/ahci.c +++ 2.6-git/drivers/ata/ahci.c @@ -48,6 +48,9 @@ #define DRV_NAME ahci #define DRV_VERSION2.3 +static int ahci_enable_alpm(struct ata_port *ap, + enum link_pm policy); +static int ahci_disable_alpm(struct ata_port *ap); enum { AHCI_PCI_BAR= 5, @@ -98,6 +101,7 @@ enum { /* HOST_CAP bits */ HOST_CAP_SSC= (1 14), /* Slumber capable */ HOST_CAP_CLO= (1 24), /* Command List Override support */ + HOST_CAP_ALPM = (1 26), /* Aggressive Link PM support */ HOST_CAP_SSS= (1 27), /* Staggered Spin-up */ HOST_CAP_SNTF = (1 29), /* SNotification register */ HOST_CAP_NCQ= (1 30), /* Native Command Queueing */ @@ -153,6 +157,8 @@ enum { PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS, /* PORT_CMD bits */ + PORT_CMD_ASP= (1 27), /* Aggressive Slumber/Partial */ + PORT_CMD_ALPE = (1 26), /* Aggressive Link PM enable */ PORT_CMD_ATAPI = (1 24), /* Device is ATAPI */ PORT_CMD_LIST_ON= (1 15), /* cmd list DMA engine running */ PORT_CMD_FIS_ON = (1 14), /* FIS DMA engine running */ @@ -244,6 +250,11 @@ static int ahci_pci_device_suspend(struc static int ahci_pci_device_resume(struct pci_dev *pdev); #endif +static struct class_device_attribute *ahci_shost_attrs[] = { + class_device_attr_link_power_management_policy, + NULL +}; + static struct scsi_host_template ahci_sht = { .module = THIS_MODULE, .name = DRV_NAME, @@ -261,6 +272,7 @@ static struct scsi_host_template ahci_sh .slave_configure= ata_scsi_slave_config, .slave_destroy = ata_scsi_slave_destroy, .bios_param = ata_std_bios_param, + .shost_attrs= ahci_shost_attrs, }; static const struct ata_port_operations ahci_ops = { @@ -292,6 +304,8 @@ static const struct ata_port_operations .port_suspend = ahci_port_suspend, .port_resume= ahci_port_resume, #endif + .enable_pm = ahci_enable_alpm, + .disable_pm = ahci_disable_alpm, .port_start = ahci_port_start, .port_stop = ahci_port_stop, @@ -778,6 +792,156 @@ static void ahci_power_up(struct ata_por writel(cmd | PORT_CMD_ICC_ACTIVE, port_mmio + PORT_CMD); } +static int ahci_disable_alpm(struct ata_port *ap) +{ + void __iomem *port_mmio = ahci_port_base(ap); + u32 cmd, scontrol; + struct ahci_port_priv *pp = ap-private_data; + + /* +* disable Interface Power Management State Transitions +* This is accomplished by setting bits 8:11 of the +* SATA Control register +*/ + scontrol = readl(port_mmio + PORT_SCR_CTL); + scontrol |= (0x3 8); + writel(scontrol, port_mmio + PORT_SCR_CTL); + + /* get the existing command bits */ + cmd = readl(port_mmio + PORT_CMD); + + /* disable ALPM and ASP */ + cmd = ~PORT_CMD_ASP; + cmd = ~PORT_CMD_ALPE; + + /* force the interface back to active */ + cmd |= PORT_CMD_ICC_ACTIVE; + + /* write out new cmd value */ + writel(cmd, port_mmio + PORT_CMD); + cmd = readl(port_mmio + PORT_CMD); + + /* wait 10ms to be sure we've come out of any low power state */ + msleep(10);
Re: [patch 2/4] Expose Power Management Policy option to users
On Wed, 01 Aug 2007 12:24:44 +0900 Tejun Heo [EMAIL PROTECTED] wrote: Kristen Carlson Accardi wrote: I don't think the interface you're suggesting is a good one. Do you? I think if it's applicable to SCSI at all it is fine. If it is not, then I think we need to make do with the interface we are given. I do not think we should hold up a feature for libata sysfs integration. Well, I guess we'll have to agree to disagree here and leave the decision to James and Jeff. A compromise to avoiding having this in SCSI is to use the shost_attrs array in the scsi host template. This will not change the way the interface will look, it will just remove any link power management stuff from the scsi subsystem directly and have it be an ATA only attribute. I did go ahead and leave the Documentation file in the scsi directory because that is where the attribute winds up being visible to the user. Here's the patch, and this series is also available at http://www.kernel.org/pub/linux/kernel/people/kristen/patches/SATA/alpm: Enable link power management for ata drivers libata drivers can define a function (enable_pm) that will perform hardware specific actions to enable whatever power management policy the user set up from the scsi sysfs interface if the driver supports it. This power management policy will be activated after all disks have been enumerated and intialized. Drivers should also define disable_pm, which will turn off link power management, but not change link power management policy. Signed-off-by: Kristen Carlson Accardi [EMAIL PROTECTED] Index: 2.6-git/drivers/ata/libata-scsi.c === --- 2.6-git.orig/drivers/ata/libata-scsi.c +++ 2.6-git/drivers/ata/libata-scsi.c @@ -110,6 +110,78 @@ static struct scsi_transport_template at .user_scan = ata_scsi_user_scan, }; +static const struct { + enum link_pmvalue; + char*name; +} link_pm_policy[] = { + { NOT_AVAILABLE, max_performance }, + { MIN_POWER, min_power }, + { MAX_PERFORMANCE, max_performance }, + { MEDIUM_POWER, medium_power }, +}; + +const char *ata_scsi_link_pm_policy(enum link_pm policy) +{ + int i; + char *name = NULL; + + for (i = 0; i ARRAY_SIZE(link_pm_policy); i++) { + if (link_pm_policy[i].value == policy) { + name = link_pm_policy[i].name; + break; + } + } + return name; +} + +static ssize_t store_link_pm_policy(struct class_device *class_dev, + const char *buf, size_t count) +{ + struct Scsi_Host *shost = class_to_shost(class_dev); + struct ata_port *ap = ata_shost_to_port(shost); + enum link_pm policy = 0; + int i; + + /* +* we are skipping array location 0 on purpose - this +* is because a value of NOT_AVAILABLE is displayed +* to the user as max_performance, but when the user +* writes max_performance, they actually want the +* value to match MAX_PERFORMANCE. +*/ + for (i = 1; i ARRAY_SIZE(link_pm_policy); i++) { + const int len = strlen(link_pm_policy[i].name); + if (strncmp(link_pm_policy[i].name, buf, len) == 0 + buf[len] == '\n') { + policy = link_pm_policy[i].value; + break; + } + } + if (!policy) + return -EINVAL; + + if (ata_scsi_set_link_pm_policy(ap, policy)) + return -EINVAL; + return count; +} + +static ssize_t +show_link_pm_policy(struct class_device *class_dev, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(class_dev); + struct ata_port *ap = ata_shost_to_port(shost); + const char *policy = + ata_scsi_link_pm_policy(ap-pm_policy); + + if (!policy) + return -EINVAL; + + return snprintf(buf, 23, %s\n, policy); +} +CLASS_DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR, + show_link_pm_policy, store_link_pm_policy); +EXPORT_SYMBOL_GPL(class_device_attr_link_power_management_policy); + static void ata_scsi_invalid_field(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) { @@ -2904,6 +2976,47 @@ void ata_scsi_simulate(struct ata_device } } +int ata_scsi_set_link_pm_policy(struct ata_port *ap, + enum link_pm policy) +{ + int rc = -EINVAL; + int i; + + /* +* make sure no broken devices are on this port, +* and that all devices support interface power +* management +*/ + for (i = 0; i ATA_MAX_DEVICES; i++) { + struct ata_device *dev = ap-device[i]; + + /* only check drives which exist */ + if (!ata_dev_enabled(dev)) + continue; + + /* +
Re: [patch 2/4] Expose Power Management Policy option to users
Jeff Garzik wrote: Any chance the SCSI peeps could ACK this, and then let me include it in the ALPM patchset in the libata tree? ATA link PS is pretty complex with HIPM, DIPM and AHCI ALPM. I'm not sure whether this three level knob would be sufficient. It might be good enough if we're gonna develop extensive in-kernel black/white list specifying which method works on which combination but my gut tells me that it's best left to userland (probably in the form of per-notebook PS profile). Adding to the fun, there are quite a few broken devices out there which act weirdly when link PS actions are taken. Also, I generally don't think AHCI ALPM is a good idea. It doesn't have 'cool down' period before entering PS state which unnecessarily hampers performance and might increase chance of device malfunction. So, mild NACK from me. -- tejun - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
On Tue, 2007-07-31 at 15:27 +0900, Tejun Heo wrote: Jeff Garzik wrote: Any chance the SCSI peeps could ACK this, and then let me include it in the ALPM patchset in the libata tree? ATA link PS is pretty complex with HIPM, DIPM and AHCI ALPM. I'm not sure whether this three level knob would be sufficient. adding more levels later is easy. It might be good enough if we're gonna develop extensive in-kernel black/white list specifying which method works on which combination but my gut tells me that it's best left to userland (probably in the form of per-notebook PS profile). either sucks. AHCI ALPM ought to work if it's supported; it's what other operating systems also use... Adding to the fun, there are quite a few broken devices out there which act weirdly when link PS actions are taken. do you have any specific examples that act funny with the patch in question here? (the patch tries to be careful, previous patches weren't always so please test this patch before claiming the concept as a whole is broken) Also, I generally don't think AHCI ALPM is a good idea. It doesn't have 'cool down' period before entering PS state that's a chipset implementation decision not part of the spec/technology per se. - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
On Tue, 2007-07-31 at 15:27 +0900, Tejun Heo wrote: Jeff Garzik wrote: Any chance the SCSI peeps could ACK this, and then let me include it in the ALPM patchset in the libata tree? ATA link PS is pretty complex with HIPM, DIPM and AHCI ALPM. I'm not sure whether this three level knob would be sufficient. It might be good enough if we're gonna develop extensive in-kernel black/white list specifying which method works on which combination but my gut tells me that it's best left to userland (probably in the form of per-notebook PS profile). Adding to the fun, there are quite a few broken devices out there which act weirdly when link PS actions are taken. Also, I generally don't think AHCI ALPM is a good idea. It doesn't have 'cool down' period before entering PS state which unnecessarily hampers performance and might increase chance of device malfunction. So, mild NACK from me. The other comment is that power saving seems to be a property of the transport rather than the host. If you do it in the transport classes, then you can expose all the knobs the actual transport possesses (which is, unfortunately, none for quite a few SCSI transports). James - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
Arjan van de Ven wrote: On Tue, 2007-07-31 at 15:27 +0900, Tejun Heo wrote: Jeff Garzik wrote: Any chance the SCSI peeps could ACK this, and then let me include it in the ALPM patchset in the libata tree? ATA link PS is pretty complex with HIPM, DIPM and AHCI ALPM. I'm not sure whether this three level knob would be sufficient. adding more levels later is easy. Dunno whether they would be linear levels and putting HIPM, DIPM, ALPM selection into SCSI sysfs knob doesn't look so appealing. It might be good enough if we're gonna develop extensive in-kernel black/white list specifying which method works on which combination but my gut tells me that it's best left to userland (probably in the form of per-notebook PS profile). either sucks. AHCI ALPM ought to work if it's supported; it's what other operating systems also use... Adding to the fun, there are quite a few broken devices out there which act weirdly when link PS actions are taken. do you have any specific examples that act funny with the patch in question here? (the patch tries to be careful, previous patches weren't always so please test this patch before claiming the concept as a whole is broken) They were hardware problems. I don't think any amount of proper implementation can fix them. I have one DVD RAM somewhere in my pile of hardware which locks up solidly if any link PS mode is used and had a report of a HDD which had problem with link PS. Can't remember the details tho. Also, IIRC one of my wendies spins down on SLUMBER. Also, I generally don't think AHCI ALPM is a good idea. It doesn't have 'cool down' period before entering PS state that's a chipset implementation decision not part of the spec/technology per se. That's actually something AHCI spec specifically states. From section 8.3.1.3. When PxCMD.ALPE is set to ‘1’, if the HBA recognizes that there are no commands to process, the HBA shall initiate a transition to Partial or Slumber interface power management state based upon the setting of PxCMD.ASP. The HBA recognizes no commands to transmit as either: • PxSACT is set to 0h, and the HBA updates PxCI from a non-zero value to 0h. • PxCI is set to 0h, and a Set Device Bits FIS is received that updates PxSACT from a non-zero value to 0h. Have no idea why it's specified this way. Adding 100ms or 1s cool down time wouldn't burn noticeably more power. Maybe AHCI expects the host driver to queue commands even for non-NCQ drives but libata currently doesn't do that. Anyways, I don't really think this attribute belongs to SCSI sysfs hierarchy. There currently isn't any alternative but sysfs is part of userland visible interface and putting something into SCSI sysfs hierarchy just because libata doesn't have one doesn't look like a good idea. sysfs isn't far from being detached from kobject and driver model. I think it would be best to wait a bit and build proper libata sysfs hierarchy which won't have to be changed later when libata departs from SCSI. Well, it isn't really a good way but IMHO it's better than sticking ATA power saving node into SCSI sysfs hierarchy. Thanks. -- tejun - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
Arjan van de Ven wrote: either sucks. AHCI ALPM ought to work if it's supported; it's what other operating systems also use... A question. Does the other OS enable ALPM without checking against white/black list? Or is it enabled only on certain configurations - e.g. specific notebooks, etc? -- tejun - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
They were hardware problems. I don't think any amount of proper implementation can fix them. I have one DVD RAM somewhere in my pile of hardware which locks up solidly if any link PS mode is used and had a and the AHCI ALPM code decides to use power savings on this device? if so, please give us the idents so that we can add it to the blacklist as the first entry... (or can buy it to check it in detail first) -- if you want to mail me at work (you don't), use arjan (at) linux.intel.com Test the interaction between Linux and your BIOS via http://www.linuxfirmwarekit.org - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
On Tue, 31 Jul 2007 23:45:25 +0900 Tejun Heo [EMAIL PROTECTED] wrote: Anyways, I don't really think this attribute belongs to SCSI sysfs hierarchy. There currently isn't any alternative but sysfs is part of userland visible interface and putting something into SCSI sysfs hierarchy just because libata doesn't have one doesn't look like a good idea. sysfs isn't far from being detached from kobject and driver model. I think it would be best to wait a bit and build proper libata sysfs hierarchy which won't have to be changed later when libata departs from SCSI. Well, it isn't really a good way but IMHO it's better than sticking ATA power saving node into SCSI sysfs hierarchy. Wait a bit could be a very long time. Who is working on building this new libata sysfs support now? If the answer is no one, which I think it may be, do you want to hold up a feature that actually helps many people for possibly 6 months or more just because we have to go through scsi right now for our sysfs interface? on top of that, the last mail I got from James on this subject indicated that if we kept our granularity large with the power savings levels, SCSI can actually take advantage of this as well. Sure, we may have to tweak things around later, but isn't this what we do routinely? Holding up valuable features from the kernel because things aren't perfect yet seems really broken. As far as your complaints about broken hardware go, keep in mind that the patch set does provide a method of adding these disks to a blacklist, so I don't see that as a problem. And, the default for this feature is off, and user space would have to explicitly enable it. - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
On Tue, 31 Jul 2007 15:27:34 +0900 Tejun Heo [EMAIL PROTECTED] wrote: Jeff Garzik wrote: Any chance the SCSI peeps could ACK this, and then let me include it in the ALPM patchset in the libata tree? ATA link PS is pretty complex with HIPM, DIPM and AHCI ALPM. I'm not sure whether this three level knob would be sufficient. It might be good enough if we're gonna develop extensive in-kernel black/white list specifying which method works on which combination but my gut tells me that it's best left to userland (probably in the form of per-notebook PS profile). I think what you are saying is that you'd like a way to use your HIPM and DIPM without ALPM on the AHCI driver. Fine - it's really easy to add these levels later - if they don't make sense at the sysfs interface we can add module params to specify the definition of min_power as being performed via HIPM and DIPM instead of ALPM - although as of yet we have no evidence what so ever that this method actually adds value over ALPM. Adding to the fun, there are quite a few broken devices out there which act weirdly when link PS actions are taken. OK - this is why I added the blacklist for this feature. Also, I generally don't think AHCI ALPM is a good idea. It doesn't have 'cool down' period before entering PS state which unnecessarily hampers performance and might increase chance of device malfunction. might increase? How about some actual examples of where you've shown this to be a problem? I can assert that I think ALPM is a good idea, because I've never had a report of it causing problems. Windows has been using this feature for a very long time - and you have to admit that they have a pretty large market share. Nobody is complaining about ALPM increasing device malfunction, so unless you have proof it seems insane to nak due to this. - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
Hello, Kristen. Kristen Carlson Accardi wrote: On Tue, 31 Jul 2007 23:45:25 +0900 Tejun Heo [EMAIL PROTECTED] wrote: Anyways, I don't really think this attribute belongs to SCSI sysfs hierarchy. There currently isn't any alternative but sysfs is part of userland visible interface and putting something into SCSI sysfs hierarchy just because libata doesn't have one doesn't look like a good idea. sysfs isn't far from being detached from kobject and driver model. I think it would be best to wait a bit and build proper libata sysfs hierarchy which won't have to be changed later when libata departs from SCSI. Well, it isn't really a good way but IMHO it's better than sticking ATA power saving node into SCSI sysfs hierarchy. Wait a bit could be a very long time. Who is working on building this new libata sysfs support now? I happen to be working on sysfs these days and guess why I started working on sysfs. :-) Disassociating sysfs from driver model is probably one or two patchsets away. It could have happened earlier but I wanted to pace things a bit so that the changes receive some testing through release cycles. Check how many sysfs related changes went through .23-rc1 merge window and expect about the same influx during the next cycle; with some luck, it can be complete before .24-rc1 window. If the answer is no one, which I think it may be, do you want to hold up a feature that actually helps many people for possibly 6 months or more just because we have to go through scsi right now for our sysfs interface? I don't necessarily want to but delaying it might be the right thing to do. Anyways, if we're going to do an interim solution, I don't think mainline SCSI sysfs hierarchy is the right place. Do it with module parameter which carries large to be deprecated sign or maintain out of tree patches. on top of that, the last mail I got from James on this subject indicated that if we kept our granularity large with the power savings levels, SCSI can actually take advantage of this as well. Sure, we may have to tweak things around later, but isn't this what we do routinely? Holding up valuable features from the kernel because things aren't perfect yet seems really broken. As far as your complaints about broken hardware go, keep in mind that the patch set does provide a method of adding these disks to a blacklist, so I don't see that as a problem. And, the default for this feature is off, and user space would have to explicitly enable it. This is gonna be a bit long. Please be patient. Due to the generosity of the ATA committee, we have, by default, at least two ways to achieve link PS - HIPS and DIPS. I dunno why but someone thought we needed two. And then, ahci people thought automatic HIPS would be nice, which I fully agree, and added ALPM. Unfortunately, they mandated ALPM to kick in the moment command engine becomes idle, so most current implementations suffer unnecessary performance hit when ALPM is active. We have this crazy number of combinations. HIPS, DIPS, not-so-hot-looking ALPM and probably some number of broken devices which may be happy with some method but not with others. Some might be happy with PARTIAL but vomit on SLUMBER. I might be being too pessimistic but my last two years in IDE/ATA land taught me to be pessimistic about hardware quality. Heck, I bet you some of non-intel ahci implementations which claim to support ALPM will crap themselves when actually told to do so. If we're gonna do this like NCQ and gonna put knowledge about all the combinations into the driver. The suggested interface is good enough. Heck, we probably don't even need three levels. On and off should be enough if things are done right as link PS doesn't have to incur noticeable performance degradation. But to do that, we'll need to test a lot of combinations and it's gonna be much harder than NCQ (some ahci implementations don't seem to actually enter PS mode when instructed to do so via SControl but turning off the controller saves a lot of power. Maybe ALPM works better on such cases) and the blacklist will probably be longer. The alternate way is to export flexible interface to userland and let userland decide and if we're gonna do that. SCSI sysfs just isn't the place. Thanks. -- tejun - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
Kristen Carlson Accardi wrote: I think what you are saying is that you'd like a way to use your HIPM and DIPM without ALPM on the AHCI driver. Fine - it's really easy to add these levels later - if they don't make sense at the sysfs interface we can add module params to specify the definition of min_power as being performed via HIPM and DIPM instead of ALPM - although as of yet we have no evidence what so ever that this method actually adds value over ALPM. I don't really care whose PS implementation goes in. Believe me. I try to stay away from that. I don't even like my previous implementation. ALPM has unnecessary performance penalty is not applicable to non-ahci controller. Have you tested ALPM on non-intel ahcis? There are a lot out there these days. I don't think the interface you're suggesting is a good one. Do you? Also, I generally don't think AHCI ALPM is a good idea. It doesn't have 'cool down' period before entering PS state which unnecessarily hampers performance and might increase chance of device malfunction. might increase? How about some actual examples of where you've shown this to be a problem? I wouldn't have used might if I had actual examples. Well, feel free to disregard anything following the might. I just feel uneasy about jumping back and forth between PS and active states between consecutive commands. I can assert that I think ALPM is a good idea, because I've never had a report of it causing problems. Windows has been using this feature for a very long time - and you have to admit that they have a pretty large market share. Nobody is complaining about ALPM increasing device malfunction, so unless you have proof it seems insane to nak due to this. Is ALPM enabled by default? How do they deal with the performance degradation? -- tejun - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
Arjan van de Ven wrote: They were hardware problems. I don't think any amount of proper implementation can fix them. I have one DVD RAM somewhere in my pile of hardware which locks up solidly if any link PS mode is used and had a and the AHCI ALPM code decides to use power savings on this device? if so, please give us the idents so that we can add it to the blacklist as the first entry... (or can buy it to check it in detail first) Yeap, lemme check. It's TSSTcorpCD/DVDW SH-S183A with firmware revision SB01. Wanna check ID capability bits but 'hdparm -I /dev/sr0' is still broken and it's already past 3 am here. I'll report back tomorrow. Thanks. -- tejun - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
On Wed, 01 Aug 2007 03:02:55 +0900 Tejun Heo [EMAIL PROTECTED] wrote: Kristen Carlson Accardi wrote: I think what you are saying is that you'd like a way to use your HIPM and DIPM without ALPM on the AHCI driver. Fine - it's really easy to add these levels later - if they don't make sense at the sysfs interface we can add module params to specify the definition of min_power as being performed via HIPM and DIPM instead of ALPM - although as of yet we have no evidence what so ever that this method actually adds value over ALPM. I don't really care whose PS implementation goes in. Believe me. I try to stay away from that. I don't even like my previous implementation. ALPM has unnecessary performance penalty is not applicable to non-ahci controller. Have you tested ALPM on non-intel ahcis? There are a lot out there these days. I have not personally, however there has been a lot of testing of this hardware feature both on other OS and for this particular implementation by the powertop community, which is composed of community members running all sorts of hardware. So far I've not received any bug reports wrt non-intel AHCI. As I mentioned several times, the default for ALPM is to be off anyway. I don't think the interface you're suggesting is a good one. Do you? I think if it's applicable to SCSI at all it is fine. If it is not, then I think we need to make do with the interface we are given. I do not think we should hold up a feature for libata sysfs integration. Also, I generally don't think AHCI ALPM is a good idea. It doesn't have 'cool down' period before entering PS state which unnecessarily hampers performance and might increase chance of device malfunction. might increase? How about some actual examples of where you've shown this to be a problem? I wouldn't have used might if I had actual examples. Well, feel free to disregard anything following the might. I just feel uneasy about jumping back and forth between PS and active states between consecutive commands. I want us to be careful about spreading a lot of unease without data to back it up. I can assert that I think ALPM is a good idea, because I've never had a report of it causing problems. Windows has been using this feature for a very long time - and you have to admit that they have a pretty large market share. Nobody is complaining about ALPM increasing device malfunction, so unless you have proof it seems insane to nak due to this. Is ALPM enabled by default? How do they deal with the performance degradation? I believe so, but I'm obviously not privvy to their implementation details. - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
On Wed, 01 Aug 2007 02:48:42 +0900 Tejun Heo [EMAIL PROTECTED] wrote: Hello, Kristen. Kristen Carlson Accardi wrote: On Tue, 31 Jul 2007 23:45:25 +0900 Tejun Heo [EMAIL PROTECTED] wrote: Anyways, I don't really think this attribute belongs to SCSI sysfs hierarchy. There currently isn't any alternative but sysfs is part of userland visible interface and putting something into SCSI sysfs hierarchy just because libata doesn't have one doesn't look like a good idea. sysfs isn't far from being detached from kobject and driver model. I think it would be best to wait a bit and build proper libata sysfs hierarchy which won't have to be changed later when libata departs from SCSI. Well, it isn't really a good way but IMHO it's better than sticking ATA power saving node into SCSI sysfs hierarchy. Wait a bit could be a very long time. Who is working on building this new libata sysfs support now? I happen to be working on sysfs these days and guess why I started working on sysfs. :-) Disassociating sysfs from driver model is probably one or two patchsets away. It could have happened earlier but I wanted to pace things a bit so that the changes receive some testing through release cycles. Check how many sysfs related changes went through .23-rc1 merge window and expect about the same influx during the next cycle; with some luck, it can be complete before .24-rc1 window. So at current rate of development and kernel release schedule, the best possible scenario is still 6 months away - whereas this patchset is already tested and ready for merging now. If the answer is no one, which I think it may be, do you want to hold up a feature that actually helps many people for possibly 6 months or more just because we have to go through scsi right now for our sysfs interface? I don't necessarily want to but delaying it might be the right thing to do. Anyways, if we're going to do an interim solution, I don't think mainline SCSI sysfs hierarchy is the right place. Do it with module parameter which carries large to be deprecated sign or maintain out of tree patches. Out of tree patches don't work. Nobody tests them. A module parameter will not work - we need to be able to expose the sysfs interface so that users may chose to turn the feature off and on at will - mainly according to their usage. For example, at boot time - you want ALPM off to maximize performance. Lets say when you are plugged into the wall socket, you leave it off, but then when you go on battery power you would want to enable it. on top of that, the last mail I got from James on this subject indicated that if we kept our granularity large with the power savings levels, SCSI can actually take advantage of this as well. Sure, we may have to tweak things around later, but isn't this what we do routinely? Holding up valuable features from the kernel because things aren't perfect yet seems really broken. As far as your complaints about broken hardware go, keep in mind that the patch set does provide a method of adding these disks to a blacklist, so I don't see that as a problem. And, the default for this feature is off, and user space would have to explicitly enable it. This is gonna be a bit long. Please be patient. Due to the generosity of the ATA committee, we have, by default, at least two ways to achieve link PS - HIPS and DIPS. I dunno why but someone thought we needed two. And then, ahci people thought automatic They thought we needed two because sometimes the device knows when it will be idle faster than the host can. this is why ALPM can determine idleness faster than any software algorithm on the host cpu can. You can use ALPM without having HIPM. You can also use it without having DIPM. HIPS would be nice, which I fully agree, and added ALPM. Unfortunately, they mandated ALPM to kick in the moment command engine becomes idle, so most current implementations suffer unnecessary performance hit when ALPM is active. unnecessary is subjective and at the moment, untrue. You have to make performance/power tradeoffs with anything, whether it's CPU or your AHCI controller. It's the current cost of getting out of these sleep states even if you aren't using ALPM but just doing HIPM or DIPM manually. But, this is why it's so critical to allow the user to control when ALPM is enabled dynamically - which this patchset does. The medium power setting is provided for users who wish to not go to SLUMBER and get the higher latency cost but still have some power savings. We have this crazy number of combinations. HIPS, DIPS, not-so-hot-looking ALPM and probably some number of broken devices which may be happy with some method but not with others. Some might be happy with PARTIAL but vomit on SLUMBER. I might be being too pessimistic but my last two years in IDE/ATA land taught me to be pessimistic about hardware
Re: [patch 2/4] Expose Power Management Policy option to users
Kristen Carlson Accardi wrote: So at current rate of development and kernel release schedule, the best possible scenario is still 6 months away - whereas this patchset is already tested and ready for merging now. The best possible scenario is .24-rc1 merge window with or without waiting. With waiting, the best possible scenario is harder to achieve tho. Out of tree patches don't work. Nobody tests them. A module parameter will not work - we need to be able to expose the sysfs interface so that users may chose to turn the feature off and on at will - mainly according to their usage. For example, at boot time - you want ALPM off to maximize performance. Lets say when you are plugged into the wall socket, you leave it off, but then when you go on battery power you would want to enable it. You can turn on and off dynamically using a module parameter. Although it's not a pretty interface, it should work as an interim solution if we absolutely must merge ALPM now. Due to the generosity of the ATA committee, we have, by default, at least two ways to achieve link PS - HIPS and DIPS. I dunno why but someone thought we needed two. And then, ahci people thought automatic They thought we needed two because sometimes the device knows when it will be idle faster than the host can. this is why ALPM can determine idleness faster than any software algorithm on the host cpu can. You can use ALPM without having HIPM. You can also use it without having DIPM. I see. I get that one way is better than another in some way. I'm just not convinced whether the advantage is substantial enough to justify the complexity. HIPS would be nice, which I fully agree, and added ALPM. Unfortunately, they mandated ALPM to kick in the moment command engine becomes idle, so most current implementations suffer unnecessary performance hit when ALPM is active. unnecessary is subjective and at the moment, untrue. You have to make performance/power tradeoffs with anything, whether it's CPU or your AHCI controller. It's the current cost of getting out of these sleep states even if you aren't using ALPM but just doing HIPM or DIPM manually. Having short cool-down time would remove most of performance degradation and I'm pretty sure power consumption would stay about the same. But, this is why it's so critical to allow the user to control when ALPM is enabled dynamically - which this patchset does. The medium power setting is provided for users who wish to not go to SLUMBER and get the higher latency cost but still have some power savings. Note that PARTIAL also incurs noticeable performance degradation. I understand you are being cautious based on your prior experience, but again we still have no data to show that we are going to have a lot of problems. Perhaps we should proceed optimistically and deal with problems when they actually occur rather than block something on a bet. I would agree with you, merge it and see the fireworks in -mm if it didn't involve user visible API. We have an API decision to make here and it hasn't been sufficiently considered yet. The alternate way is to export flexible interface to userland and let userland decide and if we're gonna do that. SCSI sysfs just isn't the place. How about a patch? I'd love to have a flexible interface to userland, it was my goal to provide this with the sysfs files. The requirement is that the users be able to turn ALPM off and on dynamically, and to be able to chose the power savings level you want - i.e. SLUMBER vs. PARTIAL - perferrably not using those terms since users really shouldn't need to know AHCI terminology just to chose a power management level. As I wrote before, which level of interface to export to userland is related with where to put the knowledge about working and broken combinations. Unfortunately, we don't have data to support one way or the other at the moment. All I'm saying is that we should be cautious before introducing user-land visible interface which lives in SCSI sysfs as it's unknown whether it would fit the reality or not. Sorry that I don't have an alternative patch now, but something which can relieve the situation is being worked on, so let's give it some time and see how things turn out. Things have to wait till the next -rc1 window anyway. Thanks. -- tejun - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
Kristen Carlson Accardi wrote: I don't think the interface you're suggesting is a good one. Do you? I think if it's applicable to SCSI at all it is fine. If it is not, then I think we need to make do with the interface we are given. I do not think we should hold up a feature for libata sysfs integration. Well, I guess we'll have to agree to disagree here and leave the decision to James and Jeff. I can assert that I think ALPM is a good idea, because I've never had a report of it causing problems. Windows has been using this feature for a very long time - and you have to admit that they have a pretty large market share. Nobody is complaining about ALPM increasing device malfunction, so unless you have proof it seems insane to nak due to this. Is ALPM enabled by default? How do they deal with the performance degradation? I believe so, but I'm obviously not privvy to their implementation details. It would be *really* great if we can find more about how they do it. How and when it's enabled and on which systems. Is it possible to find this out? -- tejun - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
Kristen Carlson Accardi wrote: @@ -42,6 +42,16 @@ enum scsi_eh_timer_return { EH_RESET_TIMER, }; +/* + * shost pm policy: If you alter this, you also need to alter scsi_sysfs.c + * (for the ascii descriptions) + */ +enum scsi_host_link_pm { + SHOST_NOT_AVAILABLE, + SHOST_MIN_POWER, + SHOST_MAX_PERFORMANCE, + SHOST_MEDIUM_POWER, +}; struct scsi_host_template { struct module *module; @@ -345,6 +355,12 @@ struct scsi_host_template { int (*suspend)(struct scsi_device *, pm_message_t state); /* +* link power management support +*/ + int (*set_link_pm_policy)(struct Scsi_Host *, enum scsi_host_link_pm); + enum scsi_host_link_pm default_link_pm_policy; + + /* * Name of proc directory */ char *proc_name; @@ -642,6 +658,7 @@ struct Scsi_Host { enum scsi_host_state shost_state; + enum scsi_host_link_pm shost_link_pm_policy; /* ldm bits */ struct device shost_gendev; Any chance the SCSI peeps could ACK this, and then let me include it in the ALPM patchset in the libata tree? Jeff - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
Hi! This patch will modify the scsi subsystem to allow users to set a power management policy for the link. The scsi subsystem will create a new sysfs file for each host in /sys/class/scsi_host called link_power_management_policy. This file can have 3 possible values: Value Meaning --- min_power User wishes the link to conserve power as much as possible, even at the cost of some performance max_performance User wants priority to be on performance, not power savings medium_power User wants power savings, with less performance cost than min_power (but less power savings as well). Has that anything to do with HIPM vs. DIPM? Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/4] Expose Power Management Policy option to users
On Mon, 9 Jul 2007 19:36:04 + Pavel Machek [EMAIL PROTECTED] wrote: Hi! This patch will modify the scsi subsystem to allow users to set a power management policy for the link. The scsi subsystem will create a new sysfs file for each host in /sys/class/scsi_host called link_power_management_policy. This file can have 3 possible values: Value Meaning --- min_power User wishes the link to conserve power as much as possible, even at the cost of some performance max_performance User wants priority to be on performance, not power savings medium_powerUser wants power savings, with less performance cost than min_power (but less power savings as well). Has that anything to do with HIPM vs. DIPM? Pavel Hi Pavel, I'm not sure I'm understanding your question, but if you mean the different levels of power savings you would get, no. With ALPM you have the option of instructing the link to either go into slumber or partial mode when it determines the link is quiet. Slumber uses less power, but has more latency to come out of. So, for a SATA device, setting the link_power_managment file to medium_power will set up the link to only go into Partial mode, which has less power savings (about half by my informal measurement), but less latency, and therefore less of a performance hit. Hopefully this answers your question. Kristen - To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 2/4] Expose Power Management Policy option to users
This patch will modify the scsi subsystem to allow users to set a power management policy for the link. The scsi subsystem will create a new sysfs file for each host in /sys/class/scsi_host called link_power_management_policy. This file can have 3 possible values: Value Meaning --- min_power User wishes the link to conserve power as much as possible, even at the cost of some performance max_performance User wants priority to be on performance, not power savings medium_powerUser wants power savings, with less performance cost than min_power (but less power savings as well). Signed-off-by: Kristen Carlson Accardi [EMAIL PROTECTED] Index: 2.6-git/drivers/scsi/hosts.c === --- 2.6-git.orig/drivers/scsi/hosts.c +++ 2.6-git/drivers/scsi/hosts.c @@ -54,6 +54,25 @@ static struct class shost_class = { }; /** + * scsi_host_set_link_pm - Change the link power management policy + * @shost: scsi host to change the policy of. + * @policy:policy to change to. + * + * Returns zero if successful or an error if the requested + * transition is illegal. + **/ +int scsi_host_set_link_pm(struct Scsi_Host *shost, + enum scsi_host_link_pm policy) +{ + struct scsi_host_template *sht = shost-hostt; + if (sht-set_link_pm_policy) + sht-set_link_pm_policy(shost, policy); + + return 0; +} +EXPORT_SYMBOL_GPL(scsi_host_set_link_pm); + +/** * scsi_host_set_state - Take the given host through the host * state model. * @shost: scsi host to change the state of. Index: 2.6-git/drivers/scsi/scsi_sysfs.c === --- 2.6-git.orig/drivers/scsi/scsi_sysfs.c +++ 2.6-git/drivers/scsi/scsi_sysfs.c @@ -189,6 +189,74 @@ show_shost_state(struct class_device *cl static CLASS_DEVICE_ATTR(state, S_IRUGO | S_IWUSR, show_shost_state, store_shost_state); +static const struct { + enum scsi_host_link_pm value; + char*name; +} shost_link_pm_policy[] = { + { SHOST_NOT_AVAILABLE, max_performance }, + { SHOST_MIN_POWER, min_power }, + { SHOST_MAX_PERFORMANCE, max_performance }, + { SHOST_MEDIUM_POWER, medium_power }, +}; + +const char *scsi_host_link_pm_policy(enum scsi_host_link_pm policy) +{ + int i; + char *name = NULL; + + for (i = 0; i ARRAY_SIZE(shost_link_pm_policy); i++) { + if (shost_link_pm_policy[i].value == policy) { + name = shost_link_pm_policy[i].name; + break; + } + } + return name; +} + +static ssize_t store_link_pm_policy(struct class_device *class_dev, + const char *buf, size_t count) +{ + struct Scsi_Host *shost = class_to_shost(class_dev); + enum scsi_host_link_pm policy = 0; + int i; + + /* +* we are skipping array location 0 on purpose - this +* is because a value of SHOST_NOT_AVAILABLE is displayed +* to the user as max_performance, but when the user +* writes max_performance, they actually want the +* value to match SHOST_MAX_PERFORMANCE. +*/ + for (i = 1; i ARRAY_SIZE(shost_link_pm_policy); i++) { + const int len = strlen(shost_link_pm_policy[i].name); + if (strncmp(shost_link_pm_policy[i].name, buf, len) == 0 + buf[len] == '\n') { + policy = shost_link_pm_policy[i].value; + break; + } + } + if (!policy) + return -EINVAL; + + if (scsi_host_set_link_pm(shost, policy)) + return -EINVAL; + return count; +} +static ssize_t +show_link_pm_policy(struct class_device *class_dev, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(class_dev); + const char *policy = + scsi_host_link_pm_policy(shost-shost_link_pm_policy); + + if (!policy) + return -EINVAL; + + return snprintf(buf, 23, %s\n, policy); +} +static CLASS_DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR, + show_link_pm_policy, store_link_pm_policy); + shost_rd_attr(unique_id, %u\n); shost_rd_attr(host_busy, %hu\n); shost_rd_attr(cmd_per_lun, %hd\n); @@ -207,6 +275,7 @@ static struct class_device_attribute *sc class_device_attr_proc_name, class_device_attr_scan, class_device_attr_state, + class_device_attr_link_power_management_policy, NULL }; Index: 2.6-git/include/scsi/scsi_host.h === --- 2.6-git.orig/include/scsi/scsi_host.h +++ 2.6-git/include/scsi/scsi_host.h @@ -42,6 +42,16 @@ enum scsi_eh_timer_return { EH_RESET_TIMER, }; +/* +