[PATCH]iova-lockdep-false-alarm-fix.

2008-02-20 Thread mark gross
lockdep goes off on the iova copy_reserved_iova because it and a
function it calls grabs locks in the from, and the to of the copy
operation.

This patch gives the reserved_ioval_list locks special lockdep classes.


--mgross

Signed-off-by: [EMAIL PROTECTED]


Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c
===
--- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-20 
15:52:23.0 -0800
+++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c  2008-02-20 16:08:27.0 
-0800
@@ -1127,6 +1127,8 @@
 }
 
 static struct iova_domain reserved_iova_list;
+static struct lock_class_key reserved_alloc_key;
+static struct lock_class_key reserved_rbtree_key;
 
 static void dmar_init_reserved_ranges(void)
 {
@@ -1137,6 +1139,11 @@
 
init_iova_domain(reserved_iova_list, DMA_32BIT_PFN);
 
+   lockdep_set_class(reserved_iova_list.iova_alloc_lock,
+   reserved_alloc_key);
+   lockdep_set_class(reserved_iova_list.iova_rbtree_lock,
+   reserved_rbtree_key);
+
/* IOAPIC ranges shouldn't be accessed by DMA */
iova = reserve_iova(reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
IOVA_PFN(IOAPIC_RANGE_END));
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]intel-iommu batched iotlb flushes

2008-02-13 Thread mark gross
On Wed, Feb 13, 2008 at 10:23:34AM -0800, Randy Dunlap wrote:
>> Index: linux-2.6.24-mm1/Documentation/kernel-parameters.txt
>> ===
>> --- linux-2.6.24-mm1.orig/Documentation/kernel-parameters.txt
>> 2008-02-12 
>> 07:12:06.0 -0800
>> +++ linux-2.6.24-mm1/Documentation/kernel-parameters.txt 2008-02-12 
>> 11:36:07.0 -0800
>> @@ -822,6 +822,10 @@
>>  than 32 bit addressing. The default is to look
>>  for translation below 32 bit and if not available
>>  then look in the higher range.
>> +strict [Default Off]
>> +With this option on every umap_single operation will
>
> so I'll ask one more time :(
> Shouldn't this be "unmap_single" ?
>
>> +result in a hardware IOTLB flush operation as opposed
>> +to batching them for performance.
>>  io_delay=   [X86-32,X86-64] I/O delay method
>>  0x80
>
>

crap I was focused on the single.

Signed-off-by: <[EMAIL PROTECTED]>


Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c
===
--- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-12 
07:12:06.0 -0800
+++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c  2008-02-12 11:35:42.0 
-0800
@@ -21,6 +21,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -30,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "iova.h"
 #include "intel-iommu.h"
 #include  /* force_iommu in this header in x86-64*/
@@ -50,11 +52,39 @@
 
 #define DOMAIN_MAX_ADDR(gaw) u64)1) << gaw) - 1)
 
+
+static void flush_unmaps_timeout(unsigned long data);
+
+static struct timer_list unmap_timer =
+   TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0);
+
+struct unmap_list {
+   struct list_head list;
+   struct dmar_domain *domain;
+   struct iova *iova;
+};
+
+static struct intel_iommu *g_iommus;
+/* bitmap for indexing intel_iommus */
+static unsigned long   *g_iommus_to_flush;
+static int g_num_of_iommus;
+
+static DEFINE_SPINLOCK(async_umap_flush_lock);
+static LIST_HEAD(unmaps_to_do);
+
+static int timer_on;
+static long list_size;
+static int high_watermark;
+
+static struct dentry *intel_iommu_debug, *debug;
+
+
 static void domain_remove_dev_info(struct dmar_domain *domain);
 
 static int dmar_disabled;
 static int __initdata dmar_map_gfx = 1;
 static int dmar_forcedac;
+static int intel_iommu_strict;
 
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 static DEFINE_SPINLOCK(device_domain_lock);
@@ -73,9 +103,13 @@
printk(KERN_INFO
"Intel-IOMMU: disable GFX device mapping\n");
} else if (!strncmp(str, "forcedac", 8)) {
-   printk (KERN_INFO
+   printk(KERN_INFO
"Intel-IOMMU: Forcing DAC for PCI devices\n");
dmar_forcedac = 1;
+   } else if (!strncmp(str, "strict", 8)) {
+   printk(KERN_INFO
+   "Intel-IOMMU: disable batched IOTLB flush\n");
+   intel_iommu_strict = 1;
}
 
str += strcspn(str, ",");
@@ -965,17 +999,13 @@
set_bit(0, iommu->domain_ids);
return 0;
 }
-
-static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
+static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
+   struct dmar_drhd_unit *drhd)
 {
-   struct intel_iommu *iommu;
int ret;
int map_size;
u32 ver;
 
-   iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
-   if (!iommu)
-   return NULL;
iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
if (!iommu->reg) {
printk(KERN_ERR "IOMMU: can't map the region\n");
@@ -1396,7 +1426,7 @@
int index;
 
while (dev) {
-   for (index = 0; index < cnt; index ++)
+   for (index = 0; index < cnt; index++)
if (dev == devices[index])
return 1;
 
@@ -1661,7 +1691,7 @@
struct dmar_rmrr_unit *rmrr;
struct pci_dev *pdev;
struct intel_iommu *iommu;
-   int ret, unit = 0;
+   int nlongs, i, ret, unit = 0;
 
/*
 * for each drhd
@@ -1672,7 +1702,30 @@
for_each_drhd_unit(drhd) {
if (drhd->ignored)
continue;
-   iommu = alloc_iommu(drhd);
+   g_num_of_iommus++;
+   }
+
+   nlongs = BITS_TO_LONGS(g_num_of_iommus);
+   g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL);
+   if (!g_iommus_to_flush) {
+   printk(KERN_ERR "Intel-IOMMU: "
+   "Allocating 

Re: [PATCH]intel-iommu batched iotlb flushes

2008-02-13 Thread mark gross
On Tue, Feb 12, 2008 at 07:54:48AM -0800, David Miller wrote:
> > Something could be done:
> > we could enable drivers to have DMA-pools they manage that get mapped
> > and are re-used.
> > 
> > I would rather the DMA-pools be tied to PID's that way any bad behavior
> > would be limited to the address space of the process using the device.
> > I haven't thought about how hard this would be to do but it would be
> > nice.  I think this could be tricky.
> 
> Yes, this is a good idea especially for networking.
> 
> For transmit on 10GB links the IOMMU setup is near the top
> of the profiles.

true.
 
> What a driver could do is determine the maximum number of
> IOMMU pages it could need to map one maximally sized packet.
> So then it allocates enough space for all such entries in
> it's TX ring.
> 
> This eliminates the range allocation from the transmit path.
> All that's left is "remap DMA range X to scatterlist Y"
> 
> And yes it would be nice to have dma_map_skb() type interfaces
> so that we don't walk into the IOMMU code N times per packet.


/me starts looking more closely at how this could be done...

--mgross
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]intel-iommu batched iotlb flushes

2008-02-13 Thread mark gross
On Tue, Feb 12, 2008 at 12:21:08PM -0800, Randy Dunlap wrote:
>> Index: linux-2.6.24-mm1/Documentation/kernel-parameters.txt
>> ===
>> --- linux-2.6.24-mm1.orig/Documentation/kernel-parameters.txt
>> 2008-02-12 
>> 07:12:06.0 -0800
>> +++ linux-2.6.24-mm1/Documentation/kernel-parameters.txt 2008-02-12 
>> 11:36:07.0 -0800
>> @@ -822,6 +822,10 @@
>>  than 32 bit addressing. The default is to look
>>  for translation below 32 bit and if not available
>>  then look in the higher range.
>> +strict [Default Off]
>> +With this option on every umap_signle operation will
>
>   umap_signle ???  (again)
>
>> +result in a hardware IOTLB flush operation as opposed
>> +to batching them for performance.
>>  io_delay=   [X86-32,X86-64] I/O delay method
>>  0x80
>
>
> -- 
> ~Randy
>

Signed-off-by: <[EMAIL PROTECTED]>


Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c
===
--- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-12 
07:12:06.0 -0800
+++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c  2008-02-12 11:35:42.0 
-0800
@@ -21,6 +21,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -30,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "iova.h"
 #include "intel-iommu.h"
 #include  /* force_iommu in this header in x86-64*/
@@ -50,11 +52,39 @@
 
 #define DOMAIN_MAX_ADDR(gaw) u64)1) << gaw) - 1)
 
+
+static void flush_unmaps_timeout(unsigned long data);
+
+static struct timer_list unmap_timer =
+   TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0);
+
+struct unmap_list {
+   struct list_head list;
+   struct dmar_domain *domain;
+   struct iova *iova;
+};
+
+static struct intel_iommu *g_iommus;
+/* bitmap for indexing intel_iommus */
+static unsigned long   *g_iommus_to_flush;
+static int g_num_of_iommus;
+
+static DEFINE_SPINLOCK(async_umap_flush_lock);
+static LIST_HEAD(unmaps_to_do);
+
+static int timer_on;
+static long list_size;
+static int high_watermark;
+
+static struct dentry *intel_iommu_debug, *debug;
+
+
 static void domain_remove_dev_info(struct dmar_domain *domain);
 
 static int dmar_disabled;
 static int __initdata dmar_map_gfx = 1;
 static int dmar_forcedac;
+static int intel_iommu_strict;
 
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 static DEFINE_SPINLOCK(device_domain_lock);
@@ -73,9 +103,13 @@
printk(KERN_INFO
"Intel-IOMMU: disable GFX device mapping\n");
} else if (!strncmp(str, "forcedac", 8)) {
-   printk (KERN_INFO
+   printk(KERN_INFO
"Intel-IOMMU: Forcing DAC for PCI devices\n");
dmar_forcedac = 1;
+   } else if (!strncmp(str, "strict", 8)) {
+   printk(KERN_INFO
+   "Intel-IOMMU: disable batched IOTLB flush\n");
+   intel_iommu_strict = 1;
}
 
str += strcspn(str, ",");
@@ -965,17 +999,13 @@
set_bit(0, iommu->domain_ids);
return 0;
 }
-
-static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
+static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
+   struct dmar_drhd_unit *drhd)
 {
-   struct intel_iommu *iommu;
int ret;
int map_size;
u32 ver;
 
-   iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
-   if (!iommu)
-   return NULL;
iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
if (!iommu->reg) {
printk(KERN_ERR "IOMMU: can't map the region\n");
@@ -1396,7 +1426,7 @@
int index;
 
while (dev) {
-   for (index = 0; index < cnt; index ++)
+   for (index = 0; index < cnt; index++)
if (dev == devices[index])
return 1;
 
@@ -1661,7 +1691,7 @@
struct dmar_rmrr_unit *rmrr;
struct pci_dev *pdev;
struct intel_iommu *iommu;
-   int ret, unit = 0;
+   int nlongs, i, ret, unit = 0;
 
/*
 * for each drhd
@@ -1672,7 +1702,30 @@
for_each_drhd_unit(drhd) {
if (drhd->ignored)
continue;
-   iommu = alloc_iommu(drhd);
+   g_num_of_iommus++;
+   }
+
+   nlongs = BITS_TO_LONGS(g_num_of_iommus);
+   g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL);
+   if (!g_iommus_to_flush) {
+   printk(KERN_ERR "Intel-IOMMU: "
+   "Allocating bitmap array failed\n");
+   return -ENOMEM;

Re: [PATCH]intel-iommu batched iotlb flushes

2008-02-13 Thread mark gross
On Tue, Feb 12, 2008 at 12:21:08PM -0800, Randy Dunlap wrote:
 Index: linux-2.6.24-mm1/Documentation/kernel-parameters.txt
 ===
 --- linux-2.6.24-mm1.orig/Documentation/kernel-parameters.txt
 2008-02-12 
 07:12:06.0 -0800
 +++ linux-2.6.24-mm1/Documentation/kernel-parameters.txt 2008-02-12 
 11:36:07.0 -0800
 @@ -822,6 +822,10 @@
  than 32 bit addressing. The default is to look
  for translation below 32 bit and if not available
  then look in the higher range.
 +strict [Default Off]
 +With this option on every umap_signle operation will

   umap_signle ???  (again)

 +result in a hardware IOTLB flush operation as opposed
 +to batching them for performance.
  io_delay=   [X86-32,X86-64] I/O delay method
  0x80


 -- 
 ~Randy


Signed-off-by: [EMAIL PROTECTED]


Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c
===
--- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-12 
07:12:06.0 -0800
+++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c  2008-02-12 11:35:42.0 
-0800
@@ -21,6 +21,7 @@
 
 #include linux/init.h
 #include linux/bitmap.h
+#include linux/debugfs.h
 #include linux/slab.h
 #include linux/irq.h
 #include linux/interrupt.h
@@ -30,6 +31,7 @@
 #include linux/dmar.h
 #include linux/dma-mapping.h
 #include linux/mempool.h
+#include linux/timer.h
 #include iova.h
 #include intel-iommu.h
 #include asm/proto.h /* force_iommu in this header in x86-64*/
@@ -50,11 +52,39 @@
 
 #define DOMAIN_MAX_ADDR(gaw) u64)1)  gaw) - 1)
 
+
+static void flush_unmaps_timeout(unsigned long data);
+
+static struct timer_list unmap_timer =
+   TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0);
+
+struct unmap_list {
+   struct list_head list;
+   struct dmar_domain *domain;
+   struct iova *iova;
+};
+
+static struct intel_iommu *g_iommus;
+/* bitmap for indexing intel_iommus */
+static unsigned long   *g_iommus_to_flush;
+static int g_num_of_iommus;
+
+static DEFINE_SPINLOCK(async_umap_flush_lock);
+static LIST_HEAD(unmaps_to_do);
+
+static int timer_on;
+static long list_size;
+static int high_watermark;
+
+static struct dentry *intel_iommu_debug, *debug;
+
+
 static void domain_remove_dev_info(struct dmar_domain *domain);
 
 static int dmar_disabled;
 static int __initdata dmar_map_gfx = 1;
 static int dmar_forcedac;
+static int intel_iommu_strict;
 
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 static DEFINE_SPINLOCK(device_domain_lock);
@@ -73,9 +103,13 @@
printk(KERN_INFO
Intel-IOMMU: disable GFX device mapping\n);
} else if (!strncmp(str, forcedac, 8)) {
-   printk (KERN_INFO
+   printk(KERN_INFO
Intel-IOMMU: Forcing DAC for PCI devices\n);
dmar_forcedac = 1;
+   } else if (!strncmp(str, strict, 8)) {
+   printk(KERN_INFO
+   Intel-IOMMU: disable batched IOTLB flush\n);
+   intel_iommu_strict = 1;
}
 
str += strcspn(str, ,);
@@ -965,17 +999,13 @@
set_bit(0, iommu-domain_ids);
return 0;
 }
-
-static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
+static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
+   struct dmar_drhd_unit *drhd)
 {
-   struct intel_iommu *iommu;
int ret;
int map_size;
u32 ver;
 
-   iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
-   if (!iommu)
-   return NULL;
iommu-reg = ioremap(drhd-reg_base_addr, PAGE_SIZE_4K);
if (!iommu-reg) {
printk(KERN_ERR IOMMU: can't map the region\n);
@@ -1396,7 +1426,7 @@
int index;
 
while (dev) {
-   for (index = 0; index  cnt; index ++)
+   for (index = 0; index  cnt; index++)
if (dev == devices[index])
return 1;
 
@@ -1661,7 +1691,7 @@
struct dmar_rmrr_unit *rmrr;
struct pci_dev *pdev;
struct intel_iommu *iommu;
-   int ret, unit = 0;
+   int nlongs, i, ret, unit = 0;
 
/*
 * for each drhd
@@ -1672,7 +1702,30 @@
for_each_drhd_unit(drhd) {
if (drhd-ignored)
continue;
-   iommu = alloc_iommu(drhd);
+   g_num_of_iommus++;
+   }
+
+   nlongs = BITS_TO_LONGS(g_num_of_iommus);
+   g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL);
+   if (!g_iommus_to_flush) {
+   printk(KERN_ERR Intel-IOMMU: 
+  

Re: [PATCH]intel-iommu batched iotlb flushes

2008-02-13 Thread mark gross
On Tue, Feb 12, 2008 at 07:54:48AM -0800, David Miller wrote:
  Something could be done:
  we could enable drivers to have DMA-pools they manage that get mapped
  and are re-used.
  
  I would rather the DMA-pools be tied to PID's that way any bad behavior
  would be limited to the address space of the process using the device.
  I haven't thought about how hard this would be to do but it would be
  nice.  I think this could be tricky.
 
 Yes, this is a good idea especially for networking.
 
 For transmit on 10GB links the IOMMU setup is near the top
 of the profiles.

true.
 
 What a driver could do is determine the maximum number of
 IOMMU pages it could need to map one maximally sized packet.
 So then it allocates enough space for all such entries in
 it's TX ring.
 
 This eliminates the range allocation from the transmit path.
 All that's left is remap DMA range X to scatterlist Y
 
 And yes it would be nice to have dma_map_skb() type interfaces
 so that we don't walk into the IOMMU code N times per packet.


/me starts looking more closely at how this could be done...

--mgross
 
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]intel-iommu batched iotlb flushes

2008-02-13 Thread mark gross
On Wed, Feb 13, 2008 at 10:23:34AM -0800, Randy Dunlap wrote:
 Index: linux-2.6.24-mm1/Documentation/kernel-parameters.txt
 ===
 --- linux-2.6.24-mm1.orig/Documentation/kernel-parameters.txt
 2008-02-12 
 07:12:06.0 -0800
 +++ linux-2.6.24-mm1/Documentation/kernel-parameters.txt 2008-02-12 
 11:36:07.0 -0800
 @@ -822,6 +822,10 @@
  than 32 bit addressing. The default is to look
  for translation below 32 bit and if not available
  then look in the higher range.
 +strict [Default Off]
 +With this option on every umap_single operation will

 so I'll ask one more time :(
 Shouldn't this be unmap_single ?

 +result in a hardware IOTLB flush operation as opposed
 +to batching them for performance.
  io_delay=   [X86-32,X86-64] I/O delay method
  0x80



crap I was focused on the single.

Signed-off-by: [EMAIL PROTECTED]


Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c
===
--- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-12 
07:12:06.0 -0800
+++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c  2008-02-12 11:35:42.0 
-0800
@@ -21,6 +21,7 @@
 
 #include linux/init.h
 #include linux/bitmap.h
+#include linux/debugfs.h
 #include linux/slab.h
 #include linux/irq.h
 #include linux/interrupt.h
@@ -30,6 +31,7 @@
 #include linux/dmar.h
 #include linux/dma-mapping.h
 #include linux/mempool.h
+#include linux/timer.h
 #include iova.h
 #include intel-iommu.h
 #include asm/proto.h /* force_iommu in this header in x86-64*/
@@ -50,11 +52,39 @@
 
 #define DOMAIN_MAX_ADDR(gaw) u64)1)  gaw) - 1)
 
+
+static void flush_unmaps_timeout(unsigned long data);
+
+static struct timer_list unmap_timer =
+   TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0);
+
+struct unmap_list {
+   struct list_head list;
+   struct dmar_domain *domain;
+   struct iova *iova;
+};
+
+static struct intel_iommu *g_iommus;
+/* bitmap for indexing intel_iommus */
+static unsigned long   *g_iommus_to_flush;
+static int g_num_of_iommus;
+
+static DEFINE_SPINLOCK(async_umap_flush_lock);
+static LIST_HEAD(unmaps_to_do);
+
+static int timer_on;
+static long list_size;
+static int high_watermark;
+
+static struct dentry *intel_iommu_debug, *debug;
+
+
 static void domain_remove_dev_info(struct dmar_domain *domain);
 
 static int dmar_disabled;
 static int __initdata dmar_map_gfx = 1;
 static int dmar_forcedac;
+static int intel_iommu_strict;
 
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 static DEFINE_SPINLOCK(device_domain_lock);
@@ -73,9 +103,13 @@
printk(KERN_INFO
Intel-IOMMU: disable GFX device mapping\n);
} else if (!strncmp(str, forcedac, 8)) {
-   printk (KERN_INFO
+   printk(KERN_INFO
Intel-IOMMU: Forcing DAC for PCI devices\n);
dmar_forcedac = 1;
+   } else if (!strncmp(str, strict, 8)) {
+   printk(KERN_INFO
+   Intel-IOMMU: disable batched IOTLB flush\n);
+   intel_iommu_strict = 1;
}
 
str += strcspn(str, ,);
@@ -965,17 +999,13 @@
set_bit(0, iommu-domain_ids);
return 0;
 }
-
-static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
+static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
+   struct dmar_drhd_unit *drhd)
 {
-   struct intel_iommu *iommu;
int ret;
int map_size;
u32 ver;
 
-   iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
-   if (!iommu)
-   return NULL;
iommu-reg = ioremap(drhd-reg_base_addr, PAGE_SIZE_4K);
if (!iommu-reg) {
printk(KERN_ERR IOMMU: can't map the region\n);
@@ -1396,7 +1426,7 @@
int index;
 
while (dev) {
-   for (index = 0; index  cnt; index ++)
+   for (index = 0; index  cnt; index++)
if (dev == devices[index])
return 1;
 
@@ -1661,7 +1691,7 @@
struct dmar_rmrr_unit *rmrr;
struct pci_dev *pdev;
struct intel_iommu *iommu;
-   int ret, unit = 0;
+   int nlongs, i, ret, unit = 0;
 
/*
 * for each drhd
@@ -1672,7 +1702,30 @@
for_each_drhd_unit(drhd) {
if (drhd-ignored)
continue;
-   iommu = alloc_iommu(drhd);
+   g_num_of_iommus++;
+   }
+
+   nlongs = BITS_TO_LONGS(g_num_of_iommus);
+   g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL);
+   if (!g_iommus_to_flush) {
+   

Re: [PATCH]intel-iommu batched iotlb flushes

2008-02-12 Thread mark gross
On Tue, Feb 12, 2008 at 08:34:39AM -0800, Randy Dunlap wrote:
> mark gross wrote:
>>  Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c
>> ===
>> --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c  2008-02-12 
>> 07:12:06.0 -0800
>> +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c   2008-02-12 
>> 07:47:04.0 -0800
>
>> @@ -1672,7 +1702,30 @@
>>  for_each_drhd_unit(drhd) {
>>  if (drhd->ignored)
>>  continue;
>> -iommu = alloc_iommu(drhd);
>> +g_num_of_iommus++;
>> +}
>> +
>> +nlongs = BITS_TO_LONGS(g_num_of_iommus);
>> +g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL);
>> +if (!g_iommus_to_flush) {
>> +printk(KERN_ERR "Intel-IOMMU: \
>> +Allocating bitmap array failed\n");
>
> I think that will make a string like below:
> Intel-IOMMU:  Allocating bitmap array failed
>

you are right.

>> +return -ENOMEM;
>> +}
>> +strict [Default Off]
>> +With this option on every umap_signal opperation will
>
> unmap_single operation
>
>> +result in a hardware IOTLB flush opperation as opposed

Signed-off-by: <[EMAIL PROTECTED]>


Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c
===
--- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-12 
07:12:06.0 -0800
+++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c  2008-02-12 11:35:42.0 
-0800
@@ -21,6 +21,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -30,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "iova.h"
 #include "intel-iommu.h"
 #include  /* force_iommu in this header in x86-64*/
@@ -50,11 +52,39 @@
 
 #define DOMAIN_MAX_ADDR(gaw) u64)1) << gaw) - 1)
 
+
+static void flush_unmaps_timeout(unsigned long data);
+
+static struct timer_list unmap_timer =
+   TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0);
+
+struct unmap_list {
+   struct list_head list;
+   struct dmar_domain *domain;
+   struct iova *iova;
+};
+
+static struct intel_iommu *g_iommus;
+/* bitmap for indexing intel_iommus */
+static unsigned long   *g_iommus_to_flush;
+static int g_num_of_iommus;
+
+static DEFINE_SPINLOCK(async_umap_flush_lock);
+static LIST_HEAD(unmaps_to_do);
+
+static int timer_on;
+static long list_size;
+static int high_watermark;
+
+static struct dentry *intel_iommu_debug, *debug;
+
+
 static void domain_remove_dev_info(struct dmar_domain *domain);
 
 static int dmar_disabled;
 static int __initdata dmar_map_gfx = 1;
 static int dmar_forcedac;
+static int intel_iommu_strict;
 
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 static DEFINE_SPINLOCK(device_domain_lock);
@@ -73,9 +103,13 @@
printk(KERN_INFO
"Intel-IOMMU: disable GFX device mapping\n");
} else if (!strncmp(str, "forcedac", 8)) {
-   printk (KERN_INFO
+   printk(KERN_INFO
"Intel-IOMMU: Forcing DAC for PCI devices\n");
dmar_forcedac = 1;
+   } else if (!strncmp(str, "strict", 8)) {
+   printk(KERN_INFO
+   "Intel-IOMMU: disable batched IOTLB flush\n");
+   intel_iommu_strict = 1;
}
 
str += strcspn(str, ",");
@@ -965,17 +999,13 @@
set_bit(0, iommu->domain_ids);
return 0;
 }
-
-static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
+static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
+   struct dmar_drhd_unit *drhd)
 {
-   struct intel_iommu *iommu;
int ret;
int map_size;
u32 ver;
 
-   iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
-   if (!iommu)
-   return NULL;
iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
if (!iommu->reg) {
printk(KERN_ERR "IOMMU: can't map the region\n");
@@ -1396,7 +1426,7 @@
int index;
 
while (dev) {
-   for (index = 0; index < cnt; index ++)
+   for (index = 0; index < cnt; index++)
if (dev == devices[index])
return 1;
 
@@ -1661,7 +1691,7 @@
struct dmar_rmrr_unit *rmrr;
struct pci_dev *pdev;
struct intel_iommu *iommu;

Re: [PATCH]intel-iommu batched iotlb flushes

2008-02-12 Thread mark gross
On Mon, Feb 11, 2008 at 03:27:16PM -0800, Randy Dunlap wrote:
> On Mon, 11 Feb 2008 14:41:05 -0800 mark gross wrote:
> 
> > The hole is the following scenarios: 
> > do many map_signal operations, do some unmap_signals, reuse a recently
> > unmapped page,  > memory>
> > 
> > Or: you have rouge hardware using DMA's to look at pages: do many
> 
>   or rogue hardware?

yes bad-guy hardware.

> 
> > map_signal's, do many unmap_singles, reuse some unmapped pages : 
> 
>   signal  single
> 
> > 
> 
>why rouge?

because I'm a dumb ass.

> 
> > Note : these holes are very hard to get too, as the IOTLB is small and
> > only the PTE's still in the IOTLB can be accessed through this
> > mechanism.
> > 
> > Its recommended that strict is used when developing drivers that do DMA
> > operations to catch bugs early.  For production code where performance
> > is desired running with the batched IOTLB flushing is a good way to
> > go.
> > 
> > 
> > --mgross
> > 
> > 
> > Signed-off-by: <[EMAIL PROTECTED]>
> > 
> > 
> > 
> > Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c
> > ===
> > --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-07 
> > 13:03:10.0 -0800
> > +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c  2008-02-11 
> > 10:38:49.0 -0800
> 
> > @@ -50,11 +52,39 @@
> >  
> >  #define DOMAIN_MAX_ADDR(gaw) u64)1) << gaw) - 1)
> >  
> > +
> > +static void flush_unmaps_timeout(unsigned long data);
> > +
> > +static struct timer_list unmap_timer =
> > +   TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0);
> > +
> > +struct unmap_list {
> > +   struct list_head list;
> > +   struct dmar_domain *domain;
> > +   struct iova *iova;
> > +};
> > +
> > +static struct intel_iommu *g_iommus;
> > +/* bitmap for indexing intel_iommus */
> > +static unsigned long   *g_iommus_to_flush;
> > +static int g_num_of_iommus;
> > +
> > +static DEFINE_SPINLOCK(async_umap_flush_lock);
> > +static LIST_HEAD(unmaps_to_do);
> > +
> > +static int timer_on;
> > +static long list_size;
> > +static int high_watermark;
> > +
> > +static struct dentry *intel_iommu_debug, *debug;
> > +
> > +
> >  static void domain_remove_dev_info(struct dmar_domain *domain);
> >  
> >  static int dmar_disabled;
> >  static int __initdata dmar_map_gfx = 1;
> >  static int dmar_forcedac;
> > +static int intel_iommu_strict;
> >  
> >  #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
> >  static DEFINE_SPINLOCK(device_domain_lock);
> > @@ -73,9 +103,13 @@
> > printk(KERN_INFO
> > "Intel-IOMMU: disable GFX device mapping\n");
> > } else if (!strncmp(str, "forcedac", 8)) {
> > -   printk (KERN_INFO
> > +   printk(KERN_INFO
> > "Intel-IOMMU: Forcing DAC for PCI devices\n");
> > dmar_forcedac = 1;
> > +   } else if (!strncmp(str, "strict", 8)) {
> > +   printk(KERN_INFO
> > +   "Intel-IOMMU: disable bached IOTLB flush\n");
> 
>   batched

fixed.

> 
> > +   intel_iommu_strict = 1;
> > }
> >  
> > str += strcspn(str, ",");
> 
> > @@ -1672,7 +1702,29 @@
> > for_each_drhd_unit(drhd) {
> > if (drhd->ignored)
> > continue;
> > -   iommu = alloc_iommu(drhd);
> > +   g_num_of_iommus++;
> > +   }
> > +
> > +   nlongs = BITS_TO_LONGS(g_num_of_iommus);
> > +   g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL);
> > +   if (!g_iommus_to_flush) {
> > +   printk(KERN_ERR "Allocating bitmap array failed\n");
> 
> identify:   "IOMMU:
> 
> > +   return -ENOMEM;
> > +   }
> > +
> > +   g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
> > +   if (!g_iommus) {
> > +   kfree(g_iommus_to_flush);
> > +   ret = -ENOMEM;
> > +   goto error;
> > +   }
> > +
> > +   i = 0;
> > +   for_each_drhd_unit(drhd) {
> > +   i

Re: [PATCH]intel-iommu batched iotlb flushes

2008-02-12 Thread mark gross
On Tue, Feb 12, 2008 at 01:00:06AM -0800, David Miller wrote:
> From: Muli Ben-Yehuda <[EMAIL PROTECTED]>
> Date: Tue, 12 Feb 2008 10:52:56 +0200
> 
> > The streaming DMA-API was designed to conserve IOMMU mappings for
> > machines where IOMMU mappings are a scarce resource, and is a poor
> > fit for a modern IOMMU such as VT-d with a 64-bit IO address space
> > (or even an IOMMU with a 32-bit address space such as Calgary) where
> > there are plenty of IOMMU mappings available.
> 
> For the 64-bit case what you are suggesting eventually amounts
> to mapping all available RAM in the IOMMU.

Something could be done:
we could enable drivers to have DMA-pools they manage that get mapped
and are re-used.

I would rather the DMA-pools be tied to PID's that way any bad behavior
would be limited to the address space of the process using the device.
I haven't thought about how hard this would be to do but it would be
nice.  I think this could be tricky.

Application sets up ring buffer of device DMA memory, passes this to
driver/stack.  Need to handle hitting high water marks and application
exit clean up sanely... 

--mgross

> 
> Although an extreme version of your suggestion, it would be the
> most efficient as it would require zero IOMMU flush operations.
> 
> But we'd lose things like protection and other benefits.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]intel-iommu batched iotlb flushes

2008-02-12 Thread mark gross
On Tue, Feb 12, 2008 at 10:52:56AM +0200, Muli Ben-Yehuda wrote:
> On Mon, Feb 11, 2008 at 02:41:05PM -0800, mark gross wrote:
> 
> > The intel-iommu hardware requires a polling operation to flush IOTLB
> > PTE's after an unmap operation.  Through some TSC instrumentation of
> > a netperf UDP stream with small packets test case it was seen that
> > the flush operations where sucking up to 16% of the CPU time doing
> > iommu_flush_iotlb's
> > 
> > The following patch batches the IOTLB flushes removing most of the
> > overhead in flushing the IOTLB's.  It works by building a list of to
> > be released IOVA's that is iterated over when a timer goes off or
> > when a high water mark is reached.
> > 
> > The wrinkle this has is that the memory protection and page fault
> > warnings from errant DMA operations is somewhat reduced, hence a kernel
> > parameter is added to revert back to the "strict" page flush / unmap
> > behavior. 
> > 
> > The hole is the following scenarios: 
> > do many map_signal operations, do some unmap_signals, reuse a recently
> > unmapped page,  > memory>
> > 
> > Or: you have rouge hardware using DMA's to look at pages: do many
> > map_signal's, do many unmap_singles, reuse some unmapped pages : 
> > 
> > 
> > Note : these holes are very hard to get too, as the IOTLB is small
> > and only the PTE's still in the IOTLB can be accessed through this
> > mechanism.
> > 
> > Its recommended that strict is used when developing drivers that do
> > DMA operations to catch bugs early.  For production code where
> > performance is desired running with the batched IOTLB flushing is a
> > good way to go.
> 
> While I don't disagree with this patch in principle (Calgary does the
> same thing due to expensive IOTLB flushes) the right way to fix it
> IMHO is to fix the drivers to batch mapping and unmapping operations
> or map up-front and unmap when done. The streaming DMA-API was
> designed to conserve IOMMU mappings for machines where IOMMU mappings
> are a scarce resource, and is a poor fit for a modern IOMMU such as
> VT-d with a 64-bit IO address space (or even an IOMMU with a 32-bit
> address space such as Calgary) where there are plenty of IOMMU
> mappings available.

Yes, have a DMA pool of DMA addresses to use and re-use in the stack
instead of setting up and tearing down the PTE's is something we need to
look at closely for network and other high DMA traffic stacks. 

--mgross


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: iova RB tree setup tweak.

2008-02-12 Thread mark gross
On Mon, Feb 11, 2008 at 02:29:46PM -0800, Andrew Morton wrote:
> On Mon, 11 Feb 2008 13:56:51 -0800
> mark gross <[EMAIL PROTECTED]> wrote:
> 
> > The following patch merges two functions into one allowing for a 3%
> > reduction in overhead in locating, allocating and inserting pages for
> > use in IOMMU operations.
> > 
> > Its a bit of a eye-crosser so I welcome any RB-tree / MM experts to take
> > a look.  It works by re-using some of the information gathered in the
> > search for the pages to use in setting up the IOTLB's in the insertion
> > of the iova structure into the RB tree.
> > 
> 
> I guess this is a PCI patch hence I'd be tagging is as
> to-be-merged-via-greg's-pci-tree.
> 
> > 
> > Singed-off-by: <[EMAIL PROTECTED]>
> > 
> 
> ow, that musta hurt.

Not at first but ever since I posted the patch its been a bit.  

> 
> > 
> > Index: linux-2.6.24-mm1/drivers/pci/iova.c
> > ===
> > --- linux-2.6.24-mm1.orig/drivers/pci/iova.c2008-02-07 
> > 11:05:44.0 -0800
> > +++ linux-2.6.24-mm1/drivers/pci/iova.c 2008-02-07 13:05:03.0 
> > -0800
> > @@ -72,20 +72,25 @@
> > return pad_size;
> >  }
> >  
> > -static int __alloc_iova_range(struct iova_domain *iovad, unsigned long 
> > size,
> > -   unsigned long limit_pfn, struct iova *new, bool size_aligned)
> > +static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
> > +   unsigned long size, unsigned long limit_pfn,
> > +   struct iova *new, bool size_aligned)
> >  {
> > -   struct rb_node *curr = NULL;
> > +   struct rb_node *prev, *curr = NULL;
> > unsigned long flags;
> > unsigned long saved_pfn;
> > unsigned int pad_size = 0;
> >  
> > /* Walk the tree backwards */
> > spin_lock_irqsave(>iova_rbtree_lock, flags);
> > saved_pfn = limit_pfn;
> > curr = __get_cached_rbnode(iovad, _pfn);
> > +   prev = curr;
> > while (curr) {
> > struct iova *curr_iova = container_of(curr, struct iova, node);
> > +
> > if (limit_pfn < curr_iova->pfn_lo)
> > goto move_left;
> > else if (limit_pfn < curr_iova->pfn_hi)
> > @@ -99,6 +104,7 @@
> 
> For some reason patch(1) claims that the patch is corrupted at this line.
> 

My bad.  Try the following:

--mgross

Signed-off-by : <[EMAIL PROTECTED]> fa-la-la


Index: linux-2.6.24-mm1/drivers/pci/iova.c
===
--- linux-2.6.24-mm1.orig/drivers/pci/iova.c2008-02-07 11:05:44.0 
-0800
+++ linux-2.6.24-mm1/drivers/pci/iova.c 2008-02-12 07:12:47.0 -0800
@@ -72,10 +72,11 @@
return pad_size;
 }
 
-static int __alloc_iova_range(struct iova_domain *iovad, unsigned long size,
-   unsigned long limit_pfn, struct iova *new, bool size_aligned)
+static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
+   unsigned long size, unsigned long limit_pfn,
+   struct iova *new, bool size_aligned)
 {
-   struct rb_node *curr = NULL;
+   struct rb_node *prev, *curr = NULL;
unsigned long flags;
unsigned long saved_pfn;
unsigned int pad_size = 0;
@@ -84,8 +85,10 @@
spin_lock_irqsave(>iova_rbtree_lock, flags);
saved_pfn = limit_pfn;
curr = __get_cached_rbnode(iovad, _pfn);
+   prev = curr;
while (curr) {
struct iova *curr_iova = container_of(curr, struct iova, node);
+
if (limit_pfn < curr_iova->pfn_lo)
goto move_left;
else if (limit_pfn < curr_iova->pfn_hi)
@@ -99,6 +102,7 @@
 adjust_limit_pfn:
limit_pfn = curr_iova->pfn_lo - 1;
 move_left:
+   prev = curr;
curr = rb_prev(curr);
}
 
@@ -115,7 +119,33 @@
new->pfn_lo = limit_pfn - (size + pad_size) + 1;
new->pfn_hi = new->pfn_lo + size - 1;
 
+   /* Insert the new_iova into domain rbtree by holding writer lock */
+   /* Add new node and rebalance tree. */
+   {
+   struct rb_node **entry = &((prev)), *parent = NULL;
+   /* Figure out where to put new node */
+   while (*entry) {
+   struct iova *this = container_of(*entry,
+   struct iova, node);
+   parent = *entry;
+
+   if (new->pfn_lo < this->pfn_lo)
+   entry = &((*entry)-&

Re: [PATCH]intel-iommu batched iotlb flushes

2008-02-12 Thread mark gross
On Tue, Feb 12, 2008 at 08:34:39AM -0800, Randy Dunlap wrote:
 mark gross wrote:
  Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c
 ===
 --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c  2008-02-12 
 07:12:06.0 -0800
 +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c   2008-02-12 
 07:47:04.0 -0800

 @@ -1672,7 +1702,30 @@
  for_each_drhd_unit(drhd) {
  if (drhd-ignored)
  continue;
 -iommu = alloc_iommu(drhd);
 +g_num_of_iommus++;
 +}
 +
 +nlongs = BITS_TO_LONGS(g_num_of_iommus);
 +g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL);
 +if (!g_iommus_to_flush) {
 +printk(KERN_ERR Intel-IOMMU: \
 +Allocating bitmap array failed\n);

 I think that will make a string like below:
 Intel-IOMMU:  Allocating bitmap array failed


you are right.

 +return -ENOMEM;
 +}
 +strict [Default Off]
 +With this option on every umap_signal opperation will

 unmap_single operation

 +result in a hardware IOTLB flush opperation as opposed

Signed-off-by: [EMAIL PROTECTED]


Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c
===
--- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-12 
07:12:06.0 -0800
+++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c  2008-02-12 11:35:42.0 
-0800
@@ -21,6 +21,7 @@
 
 #include linux/init.h
 #include linux/bitmap.h
+#include linux/debugfs.h
 #include linux/slab.h
 #include linux/irq.h
 #include linux/interrupt.h
@@ -30,6 +31,7 @@
 #include linux/dmar.h
 #include linux/dma-mapping.h
 #include linux/mempool.h
+#include linux/timer.h
 #include iova.h
 #include intel-iommu.h
 #include asm/proto.h /* force_iommu in this header in x86-64*/
@@ -50,11 +52,39 @@
 
 #define DOMAIN_MAX_ADDR(gaw) u64)1)  gaw) - 1)
 
+
+static void flush_unmaps_timeout(unsigned long data);
+
+static struct timer_list unmap_timer =
+   TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0);
+
+struct unmap_list {
+   struct list_head list;
+   struct dmar_domain *domain;
+   struct iova *iova;
+};
+
+static struct intel_iommu *g_iommus;
+/* bitmap for indexing intel_iommus */
+static unsigned long   *g_iommus_to_flush;
+static int g_num_of_iommus;
+
+static DEFINE_SPINLOCK(async_umap_flush_lock);
+static LIST_HEAD(unmaps_to_do);
+
+static int timer_on;
+static long list_size;
+static int high_watermark;
+
+static struct dentry *intel_iommu_debug, *debug;
+
+
 static void domain_remove_dev_info(struct dmar_domain *domain);
 
 static int dmar_disabled;
 static int __initdata dmar_map_gfx = 1;
 static int dmar_forcedac;
+static int intel_iommu_strict;
 
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 static DEFINE_SPINLOCK(device_domain_lock);
@@ -73,9 +103,13 @@
printk(KERN_INFO
Intel-IOMMU: disable GFX device mapping\n);
} else if (!strncmp(str, forcedac, 8)) {
-   printk (KERN_INFO
+   printk(KERN_INFO
Intel-IOMMU: Forcing DAC for PCI devices\n);
dmar_forcedac = 1;
+   } else if (!strncmp(str, strict, 8)) {
+   printk(KERN_INFO
+   Intel-IOMMU: disable batched IOTLB flush\n);
+   intel_iommu_strict = 1;
}
 
str += strcspn(str, ,);
@@ -965,17 +999,13 @@
set_bit(0, iommu-domain_ids);
return 0;
 }
-
-static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
+static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
+   struct dmar_drhd_unit *drhd)
 {
-   struct intel_iommu *iommu;
int ret;
int map_size;
u32 ver;
 
-   iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
-   if (!iommu)
-   return NULL;
iommu-reg = ioremap(drhd-reg_base_addr, PAGE_SIZE_4K);
if (!iommu-reg) {
printk(KERN_ERR IOMMU: can't map the region\n);
@@ -1396,7 +1426,7 @@
int index;
 
while (dev) {
-   for (index = 0; index  cnt; index ++)
+   for (index = 0; index  cnt; index++)
if (dev == devices[index])
return 1;
 
@@ -1661,7 +1691,7 @@
struct dmar_rmrr_unit *rmrr;
struct pci_dev *pdev;
struct intel_iommu *iommu;
-   int ret, unit = 0;
+   int nlongs, i, ret, unit = 0;
 
/*
 * for each drhd
@@ -1672,7 +1702,30 @@
for_each_drhd_unit(drhd) {
if (drhd-ignored)
continue

Re: [PATCH]intel-iommu batched iotlb flushes

2008-02-12 Thread mark gross
On Tue, Feb 12, 2008 at 01:00:06AM -0800, David Miller wrote:
 From: Muli Ben-Yehuda [EMAIL PROTECTED]
 Date: Tue, 12 Feb 2008 10:52:56 +0200
 
  The streaming DMA-API was designed to conserve IOMMU mappings for
  machines where IOMMU mappings are a scarce resource, and is a poor
  fit for a modern IOMMU such as VT-d with a 64-bit IO address space
  (or even an IOMMU with a 32-bit address space such as Calgary) where
  there are plenty of IOMMU mappings available.
 
 For the 64-bit case what you are suggesting eventually amounts
 to mapping all available RAM in the IOMMU.

Something could be done:
we could enable drivers to have DMA-pools they manage that get mapped
and are re-used.

I would rather the DMA-pools be tied to PID's that way any bad behavior
would be limited to the address space of the process using the device.
I haven't thought about how hard this would be to do but it would be
nice.  I think this could be tricky.

Application sets up ring buffer of device DMA memory, passes this to
driver/stack.  Need to handle hitting high water marks and application
exit clean up sanely... 

--mgross

 
 Although an extreme version of your suggestion, it would be the
 most efficient as it would require zero IOMMU flush operations.
 
 But we'd lose things like protection and other benefits.
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to [EMAIL PROTECTED]
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]intel-iommu batched iotlb flushes

2008-02-12 Thread mark gross
On Tue, Feb 12, 2008 at 10:52:56AM +0200, Muli Ben-Yehuda wrote:
 On Mon, Feb 11, 2008 at 02:41:05PM -0800, mark gross wrote:
 
  The intel-iommu hardware requires a polling operation to flush IOTLB
  PTE's after an unmap operation.  Through some TSC instrumentation of
  a netperf UDP stream with small packets test case it was seen that
  the flush operations where sucking up to 16% of the CPU time doing
  iommu_flush_iotlb's
  
  The following patch batches the IOTLB flushes removing most of the
  overhead in flushing the IOTLB's.  It works by building a list of to
  be released IOVA's that is iterated over when a timer goes off or
  when a high water mark is reached.
  
  The wrinkle this has is that the memory protection and page fault
  warnings from errant DMA operations is somewhat reduced, hence a kernel
  parameter is added to revert back to the strict page flush / unmap
  behavior. 
  
  The hole is the following scenarios: 
  do many map_signal operations, do some unmap_signals, reuse a recently
  unmapped page, errant DMA hardware sneaks through and steps on reused
  memory
  
  Or: you have rouge hardware using DMA's to look at pages: do many
  map_signal's, do many unmap_singles, reuse some unmapped pages : 
  rouge hardware looks at reused page
  
  Note : these holes are very hard to get too, as the IOTLB is small
  and only the PTE's still in the IOTLB can be accessed through this
  mechanism.
  
  Its recommended that strict is used when developing drivers that do
  DMA operations to catch bugs early.  For production code where
  performance is desired running with the batched IOTLB flushing is a
  good way to go.
 
 While I don't disagree with this patch in principle (Calgary does the
 same thing due to expensive IOTLB flushes) the right way to fix it
 IMHO is to fix the drivers to batch mapping and unmapping operations
 or map up-front and unmap when done. The streaming DMA-API was
 designed to conserve IOMMU mappings for machines where IOMMU mappings
 are a scarce resource, and is a poor fit for a modern IOMMU such as
 VT-d with a 64-bit IO address space (or even an IOMMU with a 32-bit
 address space such as Calgary) where there are plenty of IOMMU
 mappings available.

Yes, have a DMA pool of DMA addresses to use and re-use in the stack
instead of setting up and tearing down the PTE's is something we need to
look at closely for network and other high DMA traffic stacks. 

--mgross


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: iova RB tree setup tweak.

2008-02-12 Thread mark gross
On Mon, Feb 11, 2008 at 02:29:46PM -0800, Andrew Morton wrote:
 On Mon, 11 Feb 2008 13:56:51 -0800
 mark gross [EMAIL PROTECTED] wrote:
 
  The following patch merges two functions into one allowing for a 3%
  reduction in overhead in locating, allocating and inserting pages for
  use in IOMMU operations.
  
  Its a bit of a eye-crosser so I welcome any RB-tree / MM experts to take
  a look.  It works by re-using some of the information gathered in the
  search for the pages to use in setting up the IOTLB's in the insertion
  of the iova structure into the RB tree.
  
 
 I guess this is a PCI patch hence I'd be tagging is as
 to-be-merged-via-greg's-pci-tree.
 
  
  Singed-off-by: [EMAIL PROTECTED]
  
 
 ow, that musta hurt.

Not at first but ever since I posted the patch its been a bit.  

 
  
  Index: linux-2.6.24-mm1/drivers/pci/iova.c
  ===
  --- linux-2.6.24-mm1.orig/drivers/pci/iova.c2008-02-07 
  11:05:44.0 -0800
  +++ linux-2.6.24-mm1/drivers/pci/iova.c 2008-02-07 13:05:03.0 
  -0800
  @@ -72,20 +72,25 @@
  return pad_size;
   }
   
  -static int __alloc_iova_range(struct iova_domain *iovad, unsigned long 
  size,
  -   unsigned long limit_pfn, struct iova *new, bool size_aligned)
  +static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
  +   unsigned long size, unsigned long limit_pfn,
  +   struct iova *new, bool size_aligned)
   {
  -   struct rb_node *curr = NULL;
  +   struct rb_node *prev, *curr = NULL;
  unsigned long flags;
  unsigned long saved_pfn;
  unsigned int pad_size = 0;
   
  /* Walk the tree backwards */
  spin_lock_irqsave(iovad-iova_rbtree_lock, flags);
  saved_pfn = limit_pfn;
  curr = __get_cached_rbnode(iovad, limit_pfn);
  +   prev = curr;
  while (curr) {
  struct iova *curr_iova = container_of(curr, struct iova, node);
  +
  if (limit_pfn  curr_iova-pfn_lo)
  goto move_left;
  else if (limit_pfn  curr_iova-pfn_hi)
  @@ -99,6 +104,7 @@
 
 For some reason patch(1) claims that the patch is corrupted at this line.
 

My bad.  Try the following:

--mgross

Signed-off-by : [EMAIL PROTECTED] fa-la-la


Index: linux-2.6.24-mm1/drivers/pci/iova.c
===
--- linux-2.6.24-mm1.orig/drivers/pci/iova.c2008-02-07 11:05:44.0 
-0800
+++ linux-2.6.24-mm1/drivers/pci/iova.c 2008-02-12 07:12:47.0 -0800
@@ -72,10 +72,11 @@
return pad_size;
 }
 
-static int __alloc_iova_range(struct iova_domain *iovad, unsigned long size,
-   unsigned long limit_pfn, struct iova *new, bool size_aligned)
+static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
+   unsigned long size, unsigned long limit_pfn,
+   struct iova *new, bool size_aligned)
 {
-   struct rb_node *curr = NULL;
+   struct rb_node *prev, *curr = NULL;
unsigned long flags;
unsigned long saved_pfn;
unsigned int pad_size = 0;
@@ -84,8 +85,10 @@
spin_lock_irqsave(iovad-iova_rbtree_lock, flags);
saved_pfn = limit_pfn;
curr = __get_cached_rbnode(iovad, limit_pfn);
+   prev = curr;
while (curr) {
struct iova *curr_iova = container_of(curr, struct iova, node);
+
if (limit_pfn  curr_iova-pfn_lo)
goto move_left;
else if (limit_pfn  curr_iova-pfn_hi)
@@ -99,6 +102,7 @@
 adjust_limit_pfn:
limit_pfn = curr_iova-pfn_lo - 1;
 move_left:
+   prev = curr;
curr = rb_prev(curr);
}
 
@@ -115,7 +119,33 @@
new-pfn_lo = limit_pfn - (size + pad_size) + 1;
new-pfn_hi = new-pfn_lo + size - 1;
 
+   /* Insert the new_iova into domain rbtree by holding writer lock */
+   /* Add new node and rebalance tree. */
+   {
+   struct rb_node **entry = ((prev)), *parent = NULL;
+   /* Figure out where to put new node */
+   while (*entry) {
+   struct iova *this = container_of(*entry,
+   struct iova, node);
+   parent = *entry;
+
+   if (new-pfn_lo  this-pfn_lo)
+   entry = ((*entry)-rb_left);
+   else if (new-pfn_lo  this-pfn_lo)
+   entry = ((*entry)-rb_right);
+   else
+   BUG(); /* this should not happen */
+   }
+
+   /* Add new node and rebalance tree. */
+   rb_link_node(new-node, parent, entry);
+   rb_insert_color(new-node, iovad-rbroot);
+   }
+   __cached_rbnode_insert_update(iovad, saved_pfn, new);
+
spin_unlock_irqrestore(iovad-iova_rbtree_lock

Re: [PATCH]intel-iommu batched iotlb flushes

2008-02-12 Thread mark gross
On Mon, Feb 11, 2008 at 03:27:16PM -0800, Randy Dunlap wrote:
 On Mon, 11 Feb 2008 14:41:05 -0800 mark gross wrote:
 
  The hole is the following scenarios: 
  do many map_signal operations, do some unmap_signals, reuse a recently
  unmapped page, errant DMA hardware sneaks through and steps on reused
  memory
  
  Or: you have rouge hardware using DMA's to look at pages: do many
 
   or rogue hardware?

yes bad-guy hardware.

 
  map_signal's, do many unmap_singles, reuse some unmapped pages : 
 
   signal  single
 
  rouge hardware looks at reused page
 
why rouge?

because I'm a dumb ass.

 
  Note : these holes are very hard to get too, as the IOTLB is small and
  only the PTE's still in the IOTLB can be accessed through this
  mechanism.
  
  Its recommended that strict is used when developing drivers that do DMA
  operations to catch bugs early.  For production code where performance
  is desired running with the batched IOTLB flushing is a good way to
  go.
  
  
  --mgross
  
  
  Signed-off-by: [EMAIL PROTECTED]
  
  
  
  Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c
  ===
  --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-07 
  13:03:10.0 -0800
  +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c  2008-02-11 
  10:38:49.0 -0800
 
  @@ -50,11 +52,39 @@
   
   #define DOMAIN_MAX_ADDR(gaw) u64)1)  gaw) - 1)
   
  +
  +static void flush_unmaps_timeout(unsigned long data);
  +
  +static struct timer_list unmap_timer =
  +   TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0);
  +
  +struct unmap_list {
  +   struct list_head list;
  +   struct dmar_domain *domain;
  +   struct iova *iova;
  +};
  +
  +static struct intel_iommu *g_iommus;
  +/* bitmap for indexing intel_iommus */
  +static unsigned long   *g_iommus_to_flush;
  +static int g_num_of_iommus;
  +
  +static DEFINE_SPINLOCK(async_umap_flush_lock);
  +static LIST_HEAD(unmaps_to_do);
  +
  +static int timer_on;
  +static long list_size;
  +static int high_watermark;
  +
  +static struct dentry *intel_iommu_debug, *debug;
  +
  +
   static void domain_remove_dev_info(struct dmar_domain *domain);
   
   static int dmar_disabled;
   static int __initdata dmar_map_gfx = 1;
   static int dmar_forcedac;
  +static int intel_iommu_strict;
   
   #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
   static DEFINE_SPINLOCK(device_domain_lock);
  @@ -73,9 +103,13 @@
  printk(KERN_INFO
  Intel-IOMMU: disable GFX device mapping\n);
  } else if (!strncmp(str, forcedac, 8)) {
  -   printk (KERN_INFO
  +   printk(KERN_INFO
  Intel-IOMMU: Forcing DAC for PCI devices\n);
  dmar_forcedac = 1;
  +   } else if (!strncmp(str, strict, 8)) {
  +   printk(KERN_INFO
  +   Intel-IOMMU: disable bached IOTLB flush\n);
 
   batched

fixed.

 
  +   intel_iommu_strict = 1;
  }
   
  str += strcspn(str, ,);
 
  @@ -1672,7 +1702,29 @@
  for_each_drhd_unit(drhd) {
  if (drhd-ignored)
  continue;
  -   iommu = alloc_iommu(drhd);
  +   g_num_of_iommus++;
  +   }
  +
  +   nlongs = BITS_TO_LONGS(g_num_of_iommus);
  +   g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL);
  +   if (!g_iommus_to_flush) {
  +   printk(KERN_ERR Allocating bitmap array failed\n);
 
 identify:   IOMMU:
 
  +   return -ENOMEM;
  +   }
  +
  +   g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
  +   if (!g_iommus) {
  +   kfree(g_iommus_to_flush);
  +   ret = -ENOMEM;
  +   goto error;
  +   }
  +
  +   i = 0;
  +   for_each_drhd_unit(drhd) {
  +   if (drhd-ignored)
  +   continue;
  +   iommu = alloc_iommu(g_iommus[i], drhd);
  +   i++;
  if (!iommu) {
  ret = -ENOMEM;
  goto error;
 
  Index: linux-2.6.24-mm1/Documentation/kernel-parameters.txt
  ===
  --- linux-2.6.24-mm1.orig/Documentation/kernel-parameters.txt   
  2008-02-11 13:44:23.0 -0800
  +++ linux-2.6.24-mm1/Documentation/kernel-parameters.txt2008-02-11 
  14:23:37.0 -0800
  @@ -822,6 +822,10 @@
  than 32 bit addressing. The default is to look
  for translation below 32 bit and if not available
  then look in the higher range.
  +   strict [Default Off]
  +   With this option on every umap_signle will
 
  on, every unmap_si{ngle,gnal} ??


fixed.

 
  +   result

[PATCH]intel-iommu batched iotlb flushes

2008-02-11 Thread mark gross
The intel-iommu hardware requires a polling operation to flush IOTLB
PTE's after an unmap operation.  Through some TSC instrumentation of a
netperf UDP stream with small packets test case it was seen that the
flush operations where sucking up to 16% of the CPU time doing
iommu_flush_iotlb's

The following patch batches the IOTLB flushes removing most of the
overhead in flushing the IOTLB's.  It works by building a list of to be
released IOVA's that is iterated over when a timer goes off or when a
high water mark is reached.

The wrinkle this has is that the memory protection and page fault
warnings from errant DMA operations is somewhat reduced, hence a kernel
parameter is added to revert back to the "strict" page flush / unmap
behavior. 

The hole is the following scenarios: 
do many map_signal operations, do some unmap_signals, reuse a recently
unmapped page, 

Or: you have rouge hardware using DMA's to look at pages: do many
map_signal's, do many unmap_singles, reuse some unmapped pages : 


Note : these holes are very hard to get too, as the IOTLB is small and
only the PTE's still in the IOTLB can be accessed through this
mechanism.

Its recommended that strict is used when developing drivers that do DMA
operations to catch bugs early.  For production code where performance
is desired running with the batched IOTLB flushing is a good way to
go.


--mgross


Signed-off-by: <[EMAIL PROTECTED]>



Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c
===
--- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-07 
13:03:10.0 -0800
+++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c  2008-02-11 10:38:49.0 
-0800
@@ -21,6 +21,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -30,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "iova.h"
 #include "intel-iommu.h"
 #include  /* force_iommu in this header in x86-64*/
@@ -50,11 +52,39 @@
 
 #define DOMAIN_MAX_ADDR(gaw) u64)1) << gaw) - 1)
 
+
+static void flush_unmaps_timeout(unsigned long data);
+
+static struct timer_list unmap_timer =
+   TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0);
+
+struct unmap_list {
+   struct list_head list;
+   struct dmar_domain *domain;
+   struct iova *iova;
+};
+
+static struct intel_iommu *g_iommus;
+/* bitmap for indexing intel_iommus */
+static unsigned long   *g_iommus_to_flush;
+static int g_num_of_iommus;
+
+static DEFINE_SPINLOCK(async_umap_flush_lock);
+static LIST_HEAD(unmaps_to_do);
+
+static int timer_on;
+static long list_size;
+static int high_watermark;
+
+static struct dentry *intel_iommu_debug, *debug;
+
+
 static void domain_remove_dev_info(struct dmar_domain *domain);
 
 static int dmar_disabled;
 static int __initdata dmar_map_gfx = 1;
 static int dmar_forcedac;
+static int intel_iommu_strict;
 
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 static DEFINE_SPINLOCK(device_domain_lock);
@@ -73,9 +103,13 @@
printk(KERN_INFO
"Intel-IOMMU: disable GFX device mapping\n");
} else if (!strncmp(str, "forcedac", 8)) {
-   printk (KERN_INFO
+   printk(KERN_INFO
"Intel-IOMMU: Forcing DAC for PCI devices\n");
dmar_forcedac = 1;
+   } else if (!strncmp(str, "strict", 8)) {
+   printk(KERN_INFO
+   "Intel-IOMMU: disable bached IOTLB flush\n");
+   intel_iommu_strict = 1;
}
 
str += strcspn(str, ",");
@@ -965,17 +999,13 @@
set_bit(0, iommu->domain_ids);
return 0;
 }
-
-static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
+static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
+   struct dmar_drhd_unit *drhd)
 {
-   struct intel_iommu *iommu;
int ret;
int map_size;
u32 ver;
 
-   iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
-   if (!iommu)
-   return NULL;
iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
if (!iommu->reg) {
printk(KERN_ERR "IOMMU: can't map the region\n");
@@ -1396,7 +1426,7 @@
int index;
 
while (dev) {
-   for (index = 0; index < cnt; index ++)
+   for (index = 0; index < cnt; index++)
if (dev == devices[index])
return 1;
 
@@ -1661,7 +1691,7 @@
struct dmar_rmrr_unit *rmrr;
struct pci_dev *pdev;
struct intel_iommu *iommu;
-   int ret, unit = 0;
+   int nlongs, i, ret, unit = 0;
 
/*
 * for each drhd
@@ -1672,7 +1702,29 @@
for_each_drhd_unit(drhd) {
if (drhd->ignored)
continue;
-   iommu = 

iova RB tree setup tweak.

2008-02-11 Thread mark gross
The following patch merges two functions into one allowing for a 3%
reduction in overhead in locating, allocating and inserting pages for
use in IOMMU operations.

Its a bit of a eye-crosser so I welcome any RB-tree / MM experts to take
a look.  It works by re-using some of the information gathered in the
search for the pages to use in setting up the IOTLB's in the insertion
of the iova structure into the RB tree.

--mgross

Singed-off-by: <[EMAIL PROTECTED]>


Index: linux-2.6.24-mm1/drivers/pci/iova.c
===
--- linux-2.6.24-mm1.orig/drivers/pci/iova.c2008-02-07 11:05:44.0 
-0800
+++ linux-2.6.24-mm1/drivers/pci/iova.c 2008-02-07 13:05:03.0 -0800
@@ -72,20 +72,25 @@
return pad_size;
 }
 
-static int __alloc_iova_range(struct iova_domain *iovad, unsigned long size,
-   unsigned long limit_pfn, struct iova *new, bool size_aligned)
+static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
+   unsigned long size, unsigned long limit_pfn,
+   struct iova *new, bool size_aligned)
 {
-   struct rb_node *curr = NULL;
+   struct rb_node *prev, *curr = NULL;
unsigned long flags;
unsigned long saved_pfn;
unsigned int pad_size = 0;
 
/* Walk the tree backwards */
spin_lock_irqsave(>iova_rbtree_lock, flags);
saved_pfn = limit_pfn;
curr = __get_cached_rbnode(iovad, _pfn);
+   prev = curr;
while (curr) {
struct iova *curr_iova = container_of(curr, struct iova, node);
+
if (limit_pfn < curr_iova->pfn_lo)
goto move_left;
else if (limit_pfn < curr_iova->pfn_hi)
@@ -99,6 +104,7 @@
 adjust_limit_pfn:
limit_pfn = curr_iova->pfn_lo - 1;
 move_left:
+   prev = curr;
curr = rb_prev(curr);
}
 
@@ -115,7 +121,33 @@
new->pfn_lo = limit_pfn - (size + pad_size) + 1;
new->pfn_hi = new->pfn_lo + size - 1;
 
+   /* Insert the new_iova into domain rbtree by holding writer lock */
+   /* Add new node and rebalance tree. */
+   {
+   struct rb_node **entry = &((prev)), *parent = NULL;
+   /* Figure out where to put new node */
+   while (*entry) {
+   struct iova *this = container_of(*entry,
+   struct iova, node);
+   parent = *entry;
+
+   if (new->pfn_lo < this->pfn_lo)
+   entry = &((*entry)->rb_left);
+   else if (new->pfn_lo > this->pfn_lo)
+   entry = &((*entry)->rb_right);
+   else
+   BUG(); /* this should not happen */
+   }
+
+   /* Add new node and rebalance tree. */
+   rb_link_node(>node, parent, entry);
+   rb_insert_color(>node, >rbroot);
+   }
+   __cached_rbnode_insert_update(iovad, saved_pfn, new);
+
spin_unlock_irqrestore(>iova_rbtree_lock, flags);
+
+
return 0;
 }
 
@@ -171,23 +203,15 @@
size = __roundup_pow_of_two(size);
 
spin_lock_irqsave(>iova_alloc_lock, flags);
-   ret = __alloc_iova_range(iovad, size, limit_pfn, new_iova,
-   size_aligned);
+   ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn,
+   new_iova, size_aligned);
 
+   spin_unlock_irqrestore(>iova_alloc_lock, flags);
if (ret) {
-   spin_unlock_irqrestore(>iova_alloc_lock, flags);
free_iova_mem(new_iova);
return NULL;
}
 
-   /* Insert the new_iova into domain rbtree by holding writer lock */
-   spin_lock(>iova_rbtree_lock);
-   iova_insert_rbtree(>rbroot, new_iova);
-   __cached_rbnode_insert_update(iovad, limit_pfn, new_iova);
-   spin_unlock(>iova_rbtree_lock);
-
-   spin_unlock_irqrestore(>iova_alloc_lock, flags);
-
return new_iova;
 }
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH]intel-iommu batched iotlb flushes

2008-02-11 Thread mark gross
The intel-iommu hardware requires a polling operation to flush IOTLB
PTE's after an unmap operation.  Through some TSC instrumentation of a
netperf UDP stream with small packets test case it was seen that the
flush operations where sucking up to 16% of the CPU time doing
iommu_flush_iotlb's

The following patch batches the IOTLB flushes removing most of the
overhead in flushing the IOTLB's.  It works by building a list of to be
released IOVA's that is iterated over when a timer goes off or when a
high water mark is reached.

The wrinkle this has is that the memory protection and page fault
warnings from errant DMA operations is somewhat reduced, hence a kernel
parameter is added to revert back to the strict page flush / unmap
behavior. 

The hole is the following scenarios: 
do many map_signal operations, do some unmap_signals, reuse a recently
unmapped page, errant DMA hardware sneaks through and steps on reused
memory

Or: you have rouge hardware using DMA's to look at pages: do many
map_signal's, do many unmap_singles, reuse some unmapped pages : 
rouge hardware looks at reused page

Note : these holes are very hard to get too, as the IOTLB is small and
only the PTE's still in the IOTLB can be accessed through this
mechanism.

Its recommended that strict is used when developing drivers that do DMA
operations to catch bugs early.  For production code where performance
is desired running with the batched IOTLB flushing is a good way to
go.


--mgross


Signed-off-by: [EMAIL PROTECTED]



Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c
===
--- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-07 
13:03:10.0 -0800
+++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c  2008-02-11 10:38:49.0 
-0800
@@ -21,6 +21,7 @@
 
 #include linux/init.h
 #include linux/bitmap.h
+#include linux/debugfs.h
 #include linux/slab.h
 #include linux/irq.h
 #include linux/interrupt.h
@@ -30,6 +31,7 @@
 #include linux/dmar.h
 #include linux/dma-mapping.h
 #include linux/mempool.h
+#include linux/timer.h
 #include iova.h
 #include intel-iommu.h
 #include asm/proto.h /* force_iommu in this header in x86-64*/
@@ -50,11 +52,39 @@
 
 #define DOMAIN_MAX_ADDR(gaw) u64)1)  gaw) - 1)
 
+
+static void flush_unmaps_timeout(unsigned long data);
+
+static struct timer_list unmap_timer =
+   TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0);
+
+struct unmap_list {
+   struct list_head list;
+   struct dmar_domain *domain;
+   struct iova *iova;
+};
+
+static struct intel_iommu *g_iommus;
+/* bitmap for indexing intel_iommus */
+static unsigned long   *g_iommus_to_flush;
+static int g_num_of_iommus;
+
+static DEFINE_SPINLOCK(async_umap_flush_lock);
+static LIST_HEAD(unmaps_to_do);
+
+static int timer_on;
+static long list_size;
+static int high_watermark;
+
+static struct dentry *intel_iommu_debug, *debug;
+
+
 static void domain_remove_dev_info(struct dmar_domain *domain);
 
 static int dmar_disabled;
 static int __initdata dmar_map_gfx = 1;
 static int dmar_forcedac;
+static int intel_iommu_strict;
 
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 static DEFINE_SPINLOCK(device_domain_lock);
@@ -73,9 +103,13 @@
printk(KERN_INFO
Intel-IOMMU: disable GFX device mapping\n);
} else if (!strncmp(str, forcedac, 8)) {
-   printk (KERN_INFO
+   printk(KERN_INFO
Intel-IOMMU: Forcing DAC for PCI devices\n);
dmar_forcedac = 1;
+   } else if (!strncmp(str, strict, 8)) {
+   printk(KERN_INFO
+   Intel-IOMMU: disable bached IOTLB flush\n);
+   intel_iommu_strict = 1;
}
 
str += strcspn(str, ,);
@@ -965,17 +999,13 @@
set_bit(0, iommu-domain_ids);
return 0;
 }
-
-static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
+static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
+   struct dmar_drhd_unit *drhd)
 {
-   struct intel_iommu *iommu;
int ret;
int map_size;
u32 ver;
 
-   iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
-   if (!iommu)
-   return NULL;
iommu-reg = ioremap(drhd-reg_base_addr, PAGE_SIZE_4K);
if (!iommu-reg) {
printk(KERN_ERR IOMMU: can't map the region\n);
@@ -1396,7 +1426,7 @@
int index;
 
while (dev) {
-   for (index = 0; index  cnt; index ++)
+   for (index = 0; index  cnt; index++)
if (dev == devices[index])
return 1;
 
@@ -1661,7 +1691,7 @@
struct dmar_rmrr_unit *rmrr;
struct pci_dev *pdev;
struct intel_iommu *iommu;
-   int ret, unit = 0;
+   int nlongs, i, 

Re: Pull request: DMA pool updates

2008-02-04 Thread mark gross
On Mon, Jan 28, 2008 at 05:11:47PM -0700, Matthew Wilcox wrote:
> G'day Linus, mate
> 
> Could you pull the dmapool branch of
> git://git.kernel.org/pub/scm/linux/kernel/git/willy/misc.git please?
> 
> All the patches have been posted to linux-kernel before, and various
> comments (and acks) have been taken into account.
> 
> It's a fairly nice performance improvement, so would be good to get in.
> It's survived a few hours of *mumble* high-stress database benchmark,
> so I have high confidence in its stability.

I haven't looked at this yet but, I've been doing some work with the
IOMMU performance impacts on DMA operations.  DMApooling  could provide
a way to mitigate some of the spankage gotten when using IOMMU's.

--mgross

> 
> -- 
> Intel are signing my paycheques ... these opinions are still mine
> "Bill, look, we understand that you're interested in selling us this
> operating system, but compare it to ours.  We can't possibly take such
> a retrograde step."
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to [EMAIL PROTECTED]  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: mailto:"[EMAIL PROTECTED]"> [EMAIL PROTECTED] 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Pull request: DMA pool updates

2008-02-04 Thread mark gross
On Mon, Jan 28, 2008 at 05:11:47PM -0700, Matthew Wilcox wrote:
 G'day Linus, mate
 
 Could you pull the dmapool branch of
 git://git.kernel.org/pub/scm/linux/kernel/git/willy/misc.git please?
 
 All the patches have been posted to linux-kernel before, and various
 comments (and acks) have been taken into account.
 
 It's a fairly nice performance improvement, so would be good to get in.
 It's survived a few hours of *mumble* high-stress database benchmark,
 so I have high confidence in its stability.

I haven't looked at this yet but, I've been doing some work with the
IOMMU performance impacts on DMA operations.  DMApooling  could provide
a way to mitigate some of the spankage gotten when using IOMMU's.

--mgross

 
 -- 
 Intel are signing my paycheques ... these opinions are still mine
 Bill, look, we understand that you're interested in selling us this
 operating system, but compare it to ours.  We can't possibly take such
 a retrograde step.
 
 --
 To unsubscribe, send a message with 'unsubscribe linux-mm' in
 the body to [EMAIL PROTECTED]  For more info on Linux MM,
 see: http://www.linux-mm.org/ .
 Don't email: a href=mailto:[EMAIL PROTECTED] [EMAIL PROTECTED] /a
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [agp-mm][PATCH 1/4][intel_iommu] explicit export current graphics dmar status

2008-01-25 Thread mark gross
On Fri, Jan 25, 2008 at 11:02:55AM +0800, Zhenyu Wang wrote:
> 
> Mark, sorry for missing this for long time...
> 
> On 2008.01.08 12:44:20 -0800, mark gross wrote:
> > > > 
> > > > [agp-mm] [intel_iommu] explicit export current graphics dmar status
> > > > 
> > > > To make it possbile to tell other modules about curent
> > > > graphics dmar engine status, that could decide if graphics
> > > > driver should remap physical address to dma address.
> > > > 
> > > > Also this one trys to make dmar_disabled really present
> > > > current status of DMAR, which would be used for completely
> > > > express graphics dmar engine is active or not.
> > 
> > do you think this exporting will be needed?
> > If so why and when would someone use it?
> 
> This is used for our graphics driver module to know if we have to
> do dma remapping in iommu case, both in kernel config and kernel
> boot time param config. 

ok. How soon do you need this export?

> 
> The simplest case is that DMAR_GFX_WA is on, which no dma mapping
> will act in intel_agp.
> 
> If no DMAR_GFX_WA, we still have boot param to turn off whole intel
> iommu (intel_iommu=off), just turn off graphics remap engine
> (intel_iommu=igfx_off). So this exported interface is used to know
> that in runtime.
> 
> > 
> > > > 
> > > > Signed-off-by: Zhenyu Wang <[EMAIL PROTECTED]>
> > > > ---
> > > >  drivers/pci/intel-iommu.c |   18 --
> > > >  include/linux/dmar.h  |6 ++
> > > >  2 files changed, 22 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
> > > > index e079a52..81a0abc 100644
> > > > --- a/drivers/pci/intel-iommu.c
> > > > +++ b/drivers/pci/intel-iommu.c
> > > > @@ -53,7 +53,7 @@
> > > >  static void domain_remove_dev_info(struct dmar_domain *domain);
> > > >  
> > > >  static int dmar_disabled;
> > > > -static int __initdata dmar_map_gfx = 1;
> > > > +static int dmar_map_gfx = 1;
> > > >  static int dmar_forcedac;
> > > >  
> > > >  #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
> > > > @@ -86,6 +86,16 @@ static int __init intel_iommu_setup(char *str)
> > > >  }
> > > >  __setup("intel_iommu=", intel_iommu_setup);
> > > >  
> > > > +int intel_iommu_gfx_remapping(void)
> > > > +{
> > > > +#ifndef CONFIG_DMAR_GFX_WA
> > > > +   if (!dmar_disabled && iommu_detected && dmar_map_gfx)
> > > > +   return 1;
> > > > +#endif
> > > > +   return 0;
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(intel_iommu_gfx_remapping);
> > > > +
> > > >  static struct kmem_cache *iommu_domain_cache;
> > > >  static struct kmem_cache *iommu_devinfo_cache;
> > > >  static struct kmem_cache *iommu_iova_cache;
> > > > @@ -2189,8 +2199,12 @@ static void __init iommu_exit_mempool(void)
> > > >  
> > > >  void __init detect_intel_iommu(void)
> > > >  {
> > > > -   if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
> > > > +   if (dmar_disabled)
> > > > +   return;
> > > > +   if (swiotlb || no_iommu || iommu_detected) {
> > > > +   dmar_disabled = 1;
> > > > return;
> > 
> > Why the bloat?  This block of 7 lines looks like it does the same as the
> > 2 you replaced.
> 
> No, dmar_disabled is not set in origin, which can't tell if it's turned
> off with (intel_iommu=off) later.
> 

oops, I missed that how about something like

if (swiotlb || no_iommu || iommu_detected || dmar_disabled) {
dmar_disabled = 1;
return;
}

I guess your way is ok too.

> > 
> > > > +   }
> > > > if (early_dmar_detect()) {
> > > > iommu_detected = 1;
> > > > }
> > > > diff --git a/include/linux/dmar.h b/include/linux/dmar.h
> > > > index ffb6439..8ae435e 100644
> > > > --- a/include/linux/dmar.h
> > > > +++ b/include/linux/dmar.h
> > > > @@ -47,6 +47,8 @@ extern int intel_iommu_init(void);
> > > >  extern int dmar_table_init(void);
> > > >  extern int early_dmar_detect(void);
> > > >  
> > > > +extern int in

Re: [agp-mm][PATCH 1/4][intel_iommu] explicit export current graphics dmar status

2008-01-25 Thread mark gross
On Fri, Jan 25, 2008 at 11:02:55AM +0800, Zhenyu Wang wrote:
 
 Mark, sorry for missing this for long time...
 
 On 2008.01.08 12:44:20 -0800, mark gross wrote:

[agp-mm] [intel_iommu] explicit export current graphics dmar status

To make it possbile to tell other modules about curent
graphics dmar engine status, that could decide if graphics
driver should remap physical address to dma address.

Also this one trys to make dmar_disabled really present
current status of DMAR, which would be used for completely
express graphics dmar engine is active or not.
  
  do you think this exporting will be needed?
  If so why and when would someone use it?
 
 This is used for our graphics driver module to know if we have to
 do dma remapping in iommu case, both in kernel config and kernel
 boot time param config. 

ok. How soon do you need this export?

 
 The simplest case is that DMAR_GFX_WA is on, which no dma mapping
 will act in intel_agp.
 
 If no DMAR_GFX_WA, we still have boot param to turn off whole intel
 iommu (intel_iommu=off), just turn off graphics remap engine
 (intel_iommu=igfx_off). So this exported interface is used to know
 that in runtime.
 
  

Signed-off-by: Zhenyu Wang [EMAIL PROTECTED]
---
 drivers/pci/intel-iommu.c |   18 --
 include/linux/dmar.h  |6 ++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index e079a52..81a0abc 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -53,7 +53,7 @@
 static void domain_remove_dev_info(struct dmar_domain *domain);
 
 static int dmar_disabled;
-static int __initdata dmar_map_gfx = 1;
+static int dmar_map_gfx = 1;
 static int dmar_forcedac;
 
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
@@ -86,6 +86,16 @@ static int __init intel_iommu_setup(char *str)
 }
 __setup(intel_iommu=, intel_iommu_setup);
 
+int intel_iommu_gfx_remapping(void)
+{
+#ifndef CONFIG_DMAR_GFX_WA
+   if (!dmar_disabled  iommu_detected  dmar_map_gfx)
+   return 1;
+#endif
+   return 0;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_gfx_remapping);
+
 static struct kmem_cache *iommu_domain_cache;
 static struct kmem_cache *iommu_devinfo_cache;
 static struct kmem_cache *iommu_iova_cache;
@@ -2189,8 +2199,12 @@ static void __init iommu_exit_mempool(void)
 
 void __init detect_intel_iommu(void)
 {
-   if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
+   if (dmar_disabled)
+   return;
+   if (swiotlb || no_iommu || iommu_detected) {
+   dmar_disabled = 1;
return;
  
  Why the bloat?  This block of 7 lines looks like it does the same as the
  2 you replaced.
 
 No, dmar_disabled is not set in origin, which can't tell if it's turned
 off with (intel_iommu=off) later.
 

oops, I missed that how about something like

if (swiotlb || no_iommu || iommu_detected || dmar_disabled) {
dmar_disabled = 1;
return;
}

I guess your way is ok too.

  
+   }
if (early_dmar_detect()) {
iommu_detected = 1;
}
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index ffb6439..8ae435e 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -47,6 +47,8 @@ extern int intel_iommu_init(void);
 extern int dmar_table_init(void);
 extern int early_dmar_detect(void);
 
+extern int intel_iommu_gfx_remapping(void);
+
 extern struct list_head dmar_drhd_units;
 extern struct list_head dmar_rmrr_units;
 
@@ -81,6 +83,10 @@ static inline int intel_iommu_init(void)
 {
return -ENODEV;
 }
+static inline int intel_iommu_gfx_remapping(void)
+{
+   return 0;
+}
  
  Um this function is silly lets not post it.
  
 
 #endif /* !CONFIG_DMAR */
 #endif /* __DMAR_H__ */
-- 
1.5.3.4

   
   Any comments to this one? Is it ok to push this iommu patch with
   agp dma remapping patches to next -mm?
   
  
  Its not ready for posting.

send me your current patch and eta for the graphics module patches so I
can better coordinate with you.

--mgross
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [agp-mm][PATCH 1/4][intel_iommu] explicit export current graphics dmar status

2008-01-08 Thread mark gross
Sorry for the late reply.
comments below...


On Fri, Jan 04, 2008 at 09:53:38AM +0800, Zhenyu Wang wrote:
> On 2007.12.19 13:26:08 +, Zhenyu Wang wrote:
> > 
> > [agp-mm] [intel_iommu] explicit export current graphics dmar status
> > 
> > To make it possbile to tell other modules about curent
> > graphics dmar engine status, that could decide if graphics
> > driver should remap physical address to dma address.
> > 
> > Also this one trys to make dmar_disabled really present
> > current status of DMAR, which would be used for completely
> > express graphics dmar engine is active or not.

do you think this exporting will be needed?
If so why and when would someone use it?

> > 
> > Signed-off-by: Zhenyu Wang <[EMAIL PROTECTED]>
> > ---
> >  drivers/pci/intel-iommu.c |   18 --
> >  include/linux/dmar.h  |6 ++
> >  2 files changed, 22 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
> > index e079a52..81a0abc 100644
> > --- a/drivers/pci/intel-iommu.c
> > +++ b/drivers/pci/intel-iommu.c
> > @@ -53,7 +53,7 @@
> >  static void domain_remove_dev_info(struct dmar_domain *domain);
> >  
> >  static int dmar_disabled;
> > -static int __initdata dmar_map_gfx = 1;
> > +static int dmar_map_gfx = 1;
> >  static int dmar_forcedac;
> >  
> >  #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
> > @@ -86,6 +86,16 @@ static int __init intel_iommu_setup(char *str)
> >  }
> >  __setup("intel_iommu=", intel_iommu_setup);
> >  
> > +int intel_iommu_gfx_remapping(void)
> > +{
> > +#ifndef CONFIG_DMAR_GFX_WA
> > +   if (!dmar_disabled && iommu_detected && dmar_map_gfx)
> > +   return 1;
> > +#endif
> > +   return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(intel_iommu_gfx_remapping);
> > +
> >  static struct kmem_cache *iommu_domain_cache;
> >  static struct kmem_cache *iommu_devinfo_cache;
> >  static struct kmem_cache *iommu_iova_cache;
> > @@ -2189,8 +2199,12 @@ static void __init iommu_exit_mempool(void)
> >  
> >  void __init detect_intel_iommu(void)
> >  {
> > -   if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
> > +   if (dmar_disabled)
> > +   return;
> > +   if (swiotlb || no_iommu || iommu_detected) {
> > +   dmar_disabled = 1;
> > return;

Why the bloat?  This block of 7 lines looks like it does the same as the
2 you replaced.

> > +   }
> > if (early_dmar_detect()) {
> > iommu_detected = 1;
> > }
> > diff --git a/include/linux/dmar.h b/include/linux/dmar.h
> > index ffb6439..8ae435e 100644
> > --- a/include/linux/dmar.h
> > +++ b/include/linux/dmar.h
> > @@ -47,6 +47,8 @@ extern int intel_iommu_init(void);
> >  extern int dmar_table_init(void);
> >  extern int early_dmar_detect(void);
> >  
> > +extern int intel_iommu_gfx_remapping(void);
> > +
> >  extern struct list_head dmar_drhd_units;
> >  extern struct list_head dmar_rmrr_units;
> >  
> > @@ -81,6 +83,10 @@ static inline int intel_iommu_init(void)
> >  {
> > return -ENODEV;
> >  }
> > +static inline int intel_iommu_gfx_remapping(void)
> > +{
> > +   return 0;
> > +}

Um this function is silly lets not post it.

> >  
> >  #endif /* !CONFIG_DMAR */
> >  #endif /* __DMAR_H__ */
> > -- 
> > 1.5.3.4
> > 
> 
> Any comments to this one? Is it ok to push this iommu patch with
> agp dma remapping patches to next -mm?
> 

Its not ready for posting.

--mgross

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [agp-mm][PATCH 1/4][intel_iommu] explicit export current graphics dmar status

2008-01-08 Thread mark gross
Sorry for the late reply.
comments below...


On Fri, Jan 04, 2008 at 09:53:38AM +0800, Zhenyu Wang wrote:
 On 2007.12.19 13:26:08 +, Zhenyu Wang wrote:
  
  [agp-mm] [intel_iommu] explicit export current graphics dmar status
  
  To make it possbile to tell other modules about curent
  graphics dmar engine status, that could decide if graphics
  driver should remap physical address to dma address.
  
  Also this one trys to make dmar_disabled really present
  current status of DMAR, which would be used for completely
  express graphics dmar engine is active or not.

do you think this exporting will be needed?
If so why and when would someone use it?

  
  Signed-off-by: Zhenyu Wang [EMAIL PROTECTED]
  ---
   drivers/pci/intel-iommu.c |   18 --
   include/linux/dmar.h  |6 ++
   2 files changed, 22 insertions(+), 2 deletions(-)
  
  diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
  index e079a52..81a0abc 100644
  --- a/drivers/pci/intel-iommu.c
  +++ b/drivers/pci/intel-iommu.c
  @@ -53,7 +53,7 @@
   static void domain_remove_dev_info(struct dmar_domain *domain);
   
   static int dmar_disabled;
  -static int __initdata dmar_map_gfx = 1;
  +static int dmar_map_gfx = 1;
   static int dmar_forcedac;
   
   #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
  @@ -86,6 +86,16 @@ static int __init intel_iommu_setup(char *str)
   }
   __setup(intel_iommu=, intel_iommu_setup);
   
  +int intel_iommu_gfx_remapping(void)
  +{
  +#ifndef CONFIG_DMAR_GFX_WA
  +   if (!dmar_disabled  iommu_detected  dmar_map_gfx)
  +   return 1;
  +#endif
  +   return 0;
  +}
  +EXPORT_SYMBOL_GPL(intel_iommu_gfx_remapping);
  +
   static struct kmem_cache *iommu_domain_cache;
   static struct kmem_cache *iommu_devinfo_cache;
   static struct kmem_cache *iommu_iova_cache;
  @@ -2189,8 +2199,12 @@ static void __init iommu_exit_mempool(void)
   
   void __init detect_intel_iommu(void)
   {
  -   if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
  +   if (dmar_disabled)
  +   return;
  +   if (swiotlb || no_iommu || iommu_detected) {
  +   dmar_disabled = 1;
  return;

Why the bloat?  This block of 7 lines looks like it does the same as the
2 you replaced.

  +   }
  if (early_dmar_detect()) {
  iommu_detected = 1;
  }
  diff --git a/include/linux/dmar.h b/include/linux/dmar.h
  index ffb6439..8ae435e 100644
  --- a/include/linux/dmar.h
  +++ b/include/linux/dmar.h
  @@ -47,6 +47,8 @@ extern int intel_iommu_init(void);
   extern int dmar_table_init(void);
   extern int early_dmar_detect(void);
   
  +extern int intel_iommu_gfx_remapping(void);
  +
   extern struct list_head dmar_drhd_units;
   extern struct list_head dmar_rmrr_units;
   
  @@ -81,6 +83,10 @@ static inline int intel_iommu_init(void)
   {
  return -ENODEV;
   }
  +static inline int intel_iommu_gfx_remapping(void)
  +{
  +   return 0;
  +}

Um this function is silly lets not post it.

   
   #endif /* !CONFIG_DMAR */
   #endif /* __DMAR_H__ */
  -- 
  1.5.3.4
  
 
 Any comments to this one? Is it ok to push this iommu patch with
 agp dma remapping patches to next -mm?
 

Its not ready for posting.

--mgross

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[EMAIL PROTECTED]: intel-iommu-PMEN-think-oh patch.]

2007-11-27 Thread mark gross
I forgot to cc the list.

--mgross

- Forwarded message from mark gross <[EMAIL PROTECTED]> -

Date: Tue, 27 Nov 2007 15:46:09 -0800
From: mark gross <[EMAIL PROTECTED]>
To: Andrew Morton <[EMAIL PROTECTED]>
Reply-To: [EMAIL PROTECTED]
Subject: intel-iommu-PMEN-think-oh patch.

I screwed up with my earlier patch to enable the portected memroy.  The
macro IOMMU_WAIT, exits when the condition goes true.  Without this
patch the code will hang at boot and some ( all?) vtd enabled systems.


--mgross

Signed-off-by: mark gross <[EMAIL PROTECTED]>


Index: linux-2.6.24-rc2+/drivers/pci/intel-iommu.c
===
--- linux-2.6.24-rc2+.orig/drivers/pci/intel-iommu.c2007-11-27 
15:29:38.0 -0800
+++ linux-2.6.24-rc2+/drivers/pci/intel-iommu.c 2007-11-27 15:30:06.0 
-0800
@@ -704,7 +704,7 @@
 
/* wait for the protected region status bit to clear */
IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
-   readl, (pmen & DMA_PMEN_PRS), pmen);
+   readl, !(pmen & DMA_PMEN_PRS), pmen);
 
spin_unlock_irqrestore(>register_lock, flags);
 }

- End forwarded message -
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[EMAIL PROTECTED]: intel-iommu-PMEN-think-oh patch.]

2007-11-27 Thread mark gross
I forgot to cc the list.

--mgross

- Forwarded message from mark gross [EMAIL PROTECTED] -

Date: Tue, 27 Nov 2007 15:46:09 -0800
From: mark gross [EMAIL PROTECTED]
To: Andrew Morton [EMAIL PROTECTED]
Reply-To: [EMAIL PROTECTED]
Subject: intel-iommu-PMEN-think-oh patch.

I screwed up with my earlier patch to enable the portected memroy.  The
macro IOMMU_WAIT, exits when the condition goes true.  Without this
patch the code will hang at boot and some ( all?) vtd enabled systems.


--mgross

Signed-off-by: mark gross [EMAIL PROTECTED]


Index: linux-2.6.24-rc2+/drivers/pci/intel-iommu.c
===
--- linux-2.6.24-rc2+.orig/drivers/pci/intel-iommu.c2007-11-27 
15:29:38.0 -0800
+++ linux-2.6.24-rc2+/drivers/pci/intel-iommu.c 2007-11-27 15:30:06.0 
-0800
@@ -704,7 +704,7 @@
 
/* wait for the protected region status bit to clear */
IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
-   readl, (pmen  DMA_PMEN_PRS), pmen);
+   readl, !(pmen  DMA_PMEN_PRS), pmen);
 
spin_unlock_irqrestore(iommu-register_lock, flags);
 }

- End forwarded message -
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] More Sanity checks for DMAR

2007-11-20 Thread mark gross
On Mon, Nov 19, 2007 at 03:03:48PM -0800, Fenghua Yu wrote:
> 
> This patch adds and changes a few sanity checks in dmar.c.
> 
> 1. The haw field in ACPI DMAR table in VT-d spec doesn't describe the range of
> haw. But since DMA page size is 4KB in DMA remapping, haw should be at least
> 4KB. The current VT-d code in dmar.c returns failure when haw==0. This sanity
> check is not accurate and execution can pass when haw is less than one page
> size 4KB. This patch changes the haw sanity check to validate if haw is less
> than 4KB.
> 2. Add dmar_rmrr_units verification.
> 3. Add parse_dmar_table() verification.
> 
> Thanks.
> 
> -Fenghua
> 
> Signed-off-by: Fenghua Yu <[EMAIL PROTECTED]>
> 
> ---
> 
>  dmar.c |   20 +---
>  1 files changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
> index 5dfdfda..a5b6ed0 100644
> --- a/drivers/pci/dmar.c
> +++ b/drivers/pci/dmar.c
> @@ -25,6 +25,7 @@
>  
>  #include 
>  #include 
> +#include "iova.h"
>  
>  #undef PREFIX
>  #define PREFIX "DMAR:"
> @@ -263,8 +264,8 @@ parse_dmar_table(void)
>   if (!dmar)
>   return -ENODEV;
>  
> - if (!dmar->width) {
> - printk (KERN_WARNING PREFIX "Zero: Invalid DMAR haw\n");
> + if (dmar->width < PAGE_SHIFT_4K - 1) {
> + printk (KERN_WARNING PREFIX "Invalid DMAR haw\n");
>   return -EINVAL;
>   }
>  
> @@ -301,11 +302,24 @@ parse_dmar_table(void)
>  int __init dmar_table_init(void)
>  {
>  
> - parse_dmar_table();
> + int ret;
> + 
> + ret = parse_dmar_table();
> + if (ret) {
> + printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
> + return ret;
> + }
> +
>   if (list_empty(_drhd_units)) {
>   printk(KERN_INFO PREFIX "No DMAR devices found\n");
>   return -ENODEV;
>   }
> +
> + if (list_empty(_rmrr_units)) {
> + printk(KERN_INFO PREFIX "No RMRR found\n");
> + return -ENODEV;
> + }
> +
>   return 0;
>  }
>  
> -
Ack : mark gross <[EMAIL PROTECTED]>

Thanks,

--mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] intel-iommu-fault_reason_index_cleanup.patch

2007-11-20 Thread mark gross
The following patch fixes an off by one bug in the fault reason string
reporting function, and cleans up some of the code around this buglet.


please apply.

--mgross

Signed-off-by: mark gross <[EMAIL PROTECTED]>


Index: linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.c
===
--- linux-2.6.23-rc2-iommu.orig/drivers/pci/intel-iommu.c   2007-11-20 
10:04:39.0 -0800
+++ linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.c2007-11-20 
10:10:48.0 -0800
@@ -745,7 +745,7 @@
 
 /* iommu interrupt handling. Most stuff are MSI-like. */
 
-static char *fault_reason_strings[] =
+static const char *fault_reason_strings[] =
 {
"Software",
"Present bit in root entry is clear",
@@ -759,15 +759,14 @@
"Context table ptr is invalid",
"non-zero reserved fields in RTP",
"non-zero reserved fields in CTP",
-   "non-zero reserved fields in PTE",
-   "Unknown"
+   "non-zero reserved fields in PTE"
 };
 #define MAX_FAULT_REASON_IDX   (ARRAY_SIZE(fault_reason_strings) - 1)
 
-char *dmar_get_fault_reason(u8 fault_reason)
+const char *dmar_get_fault_reason(u8 fault_reason)
 {
-   if (fault_reason >= MAX_FAULT_REASON_IDX)
-   return fault_reason_strings[MAX_FAULT_REASON_IDX - 1];
+   if (fault_reason > MAX_FAULT_REASON_IDX)
+   return "Unknown";
else
return fault_reason_strings[fault_reason];
 }
@@ -825,7 +824,7 @@
 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
u8 fault_reason, u16 source_id, u64 addr)
 {
-   char *reason;
+   const char *reason;
 
reason = dmar_get_fault_reason(fault_reason);
 
Index: linux-2.6.23-rc2-iommu/include/linux/dmar.h
===
--- linux-2.6.23-rc2-iommu.orig/include/linux/dmar.h2007-11-20 
10:12:06.0 -0800
+++ linux-2.6.23-rc2-iommu/include/linux/dmar.h 2007-11-20 10:13:43.0 
-0800
@@ -28,7 +28,7 @@
 #ifdef CONFIG_DMAR
 struct intel_iommu;
 
-extern char *dmar_get_fault_reason(u8 fault_reason);
+extern const char *dmar_get_fault_reason(u8 fault_reason);
 
 /* Can't use the common MSI interrupt functions
  * since DMAR is not a pci device
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]intel-iommu-PMEN support

2007-11-20 Thread mark gross
On Mon, Nov 19, 2007 at 04:38:02PM -0800, Andrew Morton wrote:
> On Fri, 16 Nov 2007 14:39:57 -0800
> mark gross <[EMAIL PROTECTED]> wrote:
> 
> > -#define MAX_FAULT_REASON_IDX   ARRAY_SIZE(fault_reason_strings) - 1
> > +#define MAX_FAULT_REASON_IDX   (ARRAY_SIZE(fault_reason_strings) - 1)
> 
> hm.  The logic in there looks screwy.

I didn't like it when they added the -1 to the above, but after looking
at it sideways for a while I convinced myself that it was ok.  so I
thought it be good to add the parentheses.

> 
> 
> static char *fault_reason_strings[] =
> {
>   "Software",
>   "Present bit in root entry is clear",
>   "Present bit in context entry is clear",
>   "Invalid context entry",
>   "Access beyond MGAW",
>   "PTE Write access is not set",
>   "PTE Read access is not set",
>   "Next page table ptr is invalid",
>   "Root table address invalid",
>   "Context table ptr is invalid",
>   "non-zero reserved fields in RTP",
>   "non-zero reserved fields in CTP",
>   "non-zero reserved fields in PTE",
>   "Unknown"
> };
> #define MAX_FAULT_REASON_IDX  (ARRAY_SIZE(fault_reason_strings) - 1)
> 
> char *dmar_get_fault_reason(u8 fault_reason)
> {
>   if (fault_reason >= MAX_FAULT_REASON_IDX)
>   return fault_reason_strings[MAX_FAULT_REASON_IDX - 1];
>   else
>   return fault_reason_strings[fault_reason];
> }
> 
> 
> so all invalid fault_reasons will cause us to display "non-zero reserved
> fields in PTE".

GAH!  Thats wrong.

I also don't like the use of >= here, its harder for me to parse in my
head than just >.


> 
> Why not just do
> 
>   if (fault_reason >= ARRAY_SIZE(fault_reason_strings))
>   return "Unknown";
>   return fault_reason_strings[fault_reason];
> 
> ?

Why not indeed.  :)

> 
> (might as well make fault_reason_strings[] const, too).

Thats a good thing too.

I'll put together a patch this morning to address these think-ohs.

--mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH]intel-iommu-PMEN support

2007-11-20 Thread mark gross
On Mon, Nov 19, 2007 at 04:38:02PM -0800, Andrew Morton wrote:
 On Fri, 16 Nov 2007 14:39:57 -0800
 mark gross [EMAIL PROTECTED] wrote:
 
  -#define MAX_FAULT_REASON_IDX   ARRAY_SIZE(fault_reason_strings) - 1
  +#define MAX_FAULT_REASON_IDX   (ARRAY_SIZE(fault_reason_strings) - 1)
 
 hm.  The logic in there looks screwy.

I didn't like it when they added the -1 to the above, but after looking
at it sideways for a while I convinced myself that it was ok.  so I
thought it be good to add the parentheses.

 
 
 static char *fault_reason_strings[] =
 {
   Software,
   Present bit in root entry is clear,
   Present bit in context entry is clear,
   Invalid context entry,
   Access beyond MGAW,
   PTE Write access is not set,
   PTE Read access is not set,
   Next page table ptr is invalid,
   Root table address invalid,
   Context table ptr is invalid,
   non-zero reserved fields in RTP,
   non-zero reserved fields in CTP,
   non-zero reserved fields in PTE,
   Unknown
 };
 #define MAX_FAULT_REASON_IDX  (ARRAY_SIZE(fault_reason_strings) - 1)
 
 char *dmar_get_fault_reason(u8 fault_reason)
 {
   if (fault_reason = MAX_FAULT_REASON_IDX)
   return fault_reason_strings[MAX_FAULT_REASON_IDX - 1];
   else
   return fault_reason_strings[fault_reason];
 }
 
 
 so all invalid fault_reasons will cause us to display non-zero reserved
 fields in PTE.

GAH!  Thats wrong.

I also don't like the use of = here, its harder for me to parse in my
head than just .


 
 Why not just do
 
   if (fault_reason = ARRAY_SIZE(fault_reason_strings))
   return Unknown;
   return fault_reason_strings[fault_reason];
 
 ?

Why not indeed.  :)

 
 (might as well make fault_reason_strings[] const, too).

Thats a good thing too.

I'll put together a patch this morning to address these think-ohs.

--mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] intel-iommu-fault_reason_index_cleanup.patch

2007-11-20 Thread mark gross
The following patch fixes an off by one bug in the fault reason string
reporting function, and cleans up some of the code around this buglet.


please apply.

--mgross

Signed-off-by: mark gross [EMAIL PROTECTED]


Index: linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.c
===
--- linux-2.6.23-rc2-iommu.orig/drivers/pci/intel-iommu.c   2007-11-20 
10:04:39.0 -0800
+++ linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.c2007-11-20 
10:10:48.0 -0800
@@ -745,7 +745,7 @@
 
 /* iommu interrupt handling. Most stuff are MSI-like. */
 
-static char *fault_reason_strings[] =
+static const char *fault_reason_strings[] =
 {
Software,
Present bit in root entry is clear,
@@ -759,15 +759,14 @@
Context table ptr is invalid,
non-zero reserved fields in RTP,
non-zero reserved fields in CTP,
-   non-zero reserved fields in PTE,
-   Unknown
+   non-zero reserved fields in PTE
 };
 #define MAX_FAULT_REASON_IDX   (ARRAY_SIZE(fault_reason_strings) - 1)
 
-char *dmar_get_fault_reason(u8 fault_reason)
+const char *dmar_get_fault_reason(u8 fault_reason)
 {
-   if (fault_reason = MAX_FAULT_REASON_IDX)
-   return fault_reason_strings[MAX_FAULT_REASON_IDX - 1];
+   if (fault_reason  MAX_FAULT_REASON_IDX)
+   return Unknown;
else
return fault_reason_strings[fault_reason];
 }
@@ -825,7 +824,7 @@
 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
u8 fault_reason, u16 source_id, u64 addr)
 {
-   char *reason;
+   const char *reason;
 
reason = dmar_get_fault_reason(fault_reason);
 
Index: linux-2.6.23-rc2-iommu/include/linux/dmar.h
===
--- linux-2.6.23-rc2-iommu.orig/include/linux/dmar.h2007-11-20 
10:12:06.0 -0800
+++ linux-2.6.23-rc2-iommu/include/linux/dmar.h 2007-11-20 10:13:43.0 
-0800
@@ -28,7 +28,7 @@
 #ifdef CONFIG_DMAR
 struct intel_iommu;
 
-extern char *dmar_get_fault_reason(u8 fault_reason);
+extern const char *dmar_get_fault_reason(u8 fault_reason);
 
 /* Can't use the common MSI interrupt functions
  * since DMAR is not a pci device
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] More Sanity checks for DMAR

2007-11-20 Thread mark gross
On Mon, Nov 19, 2007 at 03:03:48PM -0800, Fenghua Yu wrote:
 
 This patch adds and changes a few sanity checks in dmar.c.
 
 1. The haw field in ACPI DMAR table in VT-d spec doesn't describe the range of
 haw. But since DMA page size is 4KB in DMA remapping, haw should be at least
 4KB. The current VT-d code in dmar.c returns failure when haw==0. This sanity
 check is not accurate and execution can pass when haw is less than one page
 size 4KB. This patch changes the haw sanity check to validate if haw is less
 than 4KB.
 2. Add dmar_rmrr_units verification.
 3. Add parse_dmar_table() verification.
 
 Thanks.
 
 -Fenghua
 
 Signed-off-by: Fenghua Yu [EMAIL PROTECTED]
 
 ---
 
  dmar.c |   20 +---
  1 files changed, 17 insertions(+), 3 deletions(-)
 
 diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
 index 5dfdfda..a5b6ed0 100644
 --- a/drivers/pci/dmar.c
 +++ b/drivers/pci/dmar.c
 @@ -25,6 +25,7 @@
  
  #include linux/pci.h
  #include linux/dmar.h
 +#include iova.h
  
  #undef PREFIX
  #define PREFIX DMAR:
 @@ -263,8 +264,8 @@ parse_dmar_table(void)
   if (!dmar)
   return -ENODEV;
  
 - if (!dmar-width) {
 - printk (KERN_WARNING PREFIX Zero: Invalid DMAR haw\n);
 + if (dmar-width  PAGE_SHIFT_4K - 1) {
 + printk (KERN_WARNING PREFIX Invalid DMAR haw\n);
   return -EINVAL;
   }
  
 @@ -301,11 +302,24 @@ parse_dmar_table(void)
  int __init dmar_table_init(void)
  {
  
 - parse_dmar_table();
 + int ret;
 + 
 + ret = parse_dmar_table();
 + if (ret) {
 + printk(KERN_INFO PREFIX parse DMAR table failure.\n);
 + return ret;
 + }
 +
   if (list_empty(dmar_drhd_units)) {
   printk(KERN_INFO PREFIX No DMAR devices found\n);
   return -ENODEV;
   }
 +
 + if (list_empty(dmar_rmrr_units)) {
 + printk(KERN_INFO PREFIX No RMRR found\n);
 + return -ENODEV;
 + }
 +
   return 0;
  }
  
 -
Ack : mark gross [EMAIL PROTECTED]

Thanks,

--mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH]intel-iommu-PMEN support

2007-11-16 Thread mark gross
The following patch adds support for protected memory enable bits by
clearing them if they are set at startup time.  Some future boot loaders
or firmware could have this bit set after it loads the kernel, and it
needs to be cleared if DMA's are going to happen effectively.

please apply

--mgross

Signed-off-by: mark gross <[EMAIL PROTECTED]>


Index: linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.c
===
--- linux-2.6.23-rc2-iommu.orig/drivers/pci/intel-iommu.c   2007-11-16 
13:25:14.0 -0800
+++ linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.c2007-11-16 
13:26:07.0 -0800
@@ -692,6 +692,23 @@
DMA_TLB_PSI_FLUSH, non_present_entry_flush);
 }
 
+static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
+{
+   u32 pmen;
+   unsigned long flags;
+
+   spin_lock_irqsave(>register_lock, flags);
+   pmen = readl(iommu->reg + DMAR_PMEN_REG);
+   pmen &= ~DMA_PMEN_EPM;
+   writel(pmen, iommu->reg + DMAR_PMEN_REG);
+
+   /* wait for the protected region status bit to clear */
+   IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
+   readl, (pmen & DMA_PMEN_PRS), pmen);
+
+   spin_unlock_irqrestore(>register_lock, flags);
+}
+
 static int iommu_enable_translation(struct intel_iommu *iommu)
 {
u32 sts;
@@ -745,7 +762,7 @@
"non-zero reserved fields in PTE",
"Unknown"
 };
-#define MAX_FAULT_REASON_IDX   ARRAY_SIZE(fault_reason_strings) - 1
+#define MAX_FAULT_REASON_IDX   (ARRAY_SIZE(fault_reason_strings) - 1)
 
 char *dmar_get_fault_reason(u8 fault_reason)
 {
@@ -1730,6 +1747,8 @@
iommu_flush_context_global(iommu, 0);
iommu_flush_iotlb_global(iommu, 0);
 
+   iommu_disable_protect_mem_regions(iommu);
+
ret = iommu_enable_translation(iommu);
if (ret)
goto error;
Index: linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.h
===
--- linux-2.6.23-rc2-iommu.orig/drivers/pci/intel-iommu.h   2007-11-16 
13:26:01.0 -0800
+++ linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.h2007-11-16 
13:26:36.0 -0800
@@ -71,7 +71,7 @@
hi = readl(dmar + reg + 4); \
(((u64) hi) << 32) + lo; })
 */
-static inline u64 dmar_readq(void *addr)
+static inline u64 dmar_readq(void __iomem *addr)
 {
u32 lo, hi;
lo = readl(addr);
@@ -139,6 +139,10 @@
 #define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
 #define DMA_TLB_MAX_SIZE (0x3f)
 
+/* PMEN_REG */
+#define DMA_PMEN_EPM (((u32)1)<<31)
+#define DMA_PMEN_PRS (((u32)1)<<0)
+
 /* GCMD_REG */
 #define DMA_GCMD_TE (((u32)1) << 31)
 #define DMA_GCMD_SRTP (((u32)1) << 30)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH]intel-iommu-PMEN support

2007-11-16 Thread mark gross
The following patch adds support for protected memory enable bits by
clearing them if they are set at startup time.  Some future boot loaders
or firmware could have this bit set after it loads the kernel, and it
needs to be cleared if DMA's are going to happen effectively.

please apply

--mgross

Signed-off-by: mark gross [EMAIL PROTECTED]


Index: linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.c
===
--- linux-2.6.23-rc2-iommu.orig/drivers/pci/intel-iommu.c   2007-11-16 
13:25:14.0 -0800
+++ linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.c2007-11-16 
13:26:07.0 -0800
@@ -692,6 +692,23 @@
DMA_TLB_PSI_FLUSH, non_present_entry_flush);
 }
 
+static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
+{
+   u32 pmen;
+   unsigned long flags;
+
+   spin_lock_irqsave(iommu-register_lock, flags);
+   pmen = readl(iommu-reg + DMAR_PMEN_REG);
+   pmen = ~DMA_PMEN_EPM;
+   writel(pmen, iommu-reg + DMAR_PMEN_REG);
+
+   /* wait for the protected region status bit to clear */
+   IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
+   readl, (pmen  DMA_PMEN_PRS), pmen);
+
+   spin_unlock_irqrestore(iommu-register_lock, flags);
+}
+
 static int iommu_enable_translation(struct intel_iommu *iommu)
 {
u32 sts;
@@ -745,7 +762,7 @@
non-zero reserved fields in PTE,
Unknown
 };
-#define MAX_FAULT_REASON_IDX   ARRAY_SIZE(fault_reason_strings) - 1
+#define MAX_FAULT_REASON_IDX   (ARRAY_SIZE(fault_reason_strings) - 1)
 
 char *dmar_get_fault_reason(u8 fault_reason)
 {
@@ -1730,6 +1747,8 @@
iommu_flush_context_global(iommu, 0);
iommu_flush_iotlb_global(iommu, 0);
 
+   iommu_disable_protect_mem_regions(iommu);
+
ret = iommu_enable_translation(iommu);
if (ret)
goto error;
Index: linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.h
===
--- linux-2.6.23-rc2-iommu.orig/drivers/pci/intel-iommu.h   2007-11-16 
13:26:01.0 -0800
+++ linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.h2007-11-16 
13:26:36.0 -0800
@@ -71,7 +71,7 @@
hi = readl(dmar + reg + 4); \
(((u64) hi)  32) + lo; })
 */
-static inline u64 dmar_readq(void *addr)
+static inline u64 dmar_readq(void __iomem *addr)
 {
u32 lo, hi;
lo = readl(addr);
@@ -139,6 +139,10 @@
 #define DMA_TLB_IH_NONLEAF (((u64)1)  6)
 #define DMA_TLB_MAX_SIZE (0x3f)
 
+/* PMEN_REG */
+#define DMA_PMEN_EPM (((u32)1)31)
+#define DMA_PMEN_PRS (((u32)1)0)
+
 /* GCMD_REG */
 #define DMA_GCMD_TE (((u32)1)  31)
 #define DMA_GCMD_SRTP (((u32)1)  30)
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] pm-qos-remove-locks-around-blocking-notifier.patch ... was Re: 2.6.24-rc2-mm1

2007-11-15 Thread mark gross
On Thu, Nov 15, 2007 at 11:19:50AM -0800, mark gross wrote:
> On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote:
> > On Wed, 14 Nov 2007 12:29:59 -0800 mark gross <[EMAIL PROTECTED]> wrote:
> > 
> > > > > [  102.366932]  ===
> > > > > [  108.552031] printk: 31 messages suppressed.
> > > > 
> > > > 
> > > > All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 
> > > > patches makes the BUGs go away :
> > > > 
> > > > latencyc-use-qos-infrastructure.patch
> > > > pm-qos-infrastructure-and-interface.patch
> > > > pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch
> > > > 
> > > > 
> > > > Gabriel
> > > > 
> > > > 
> > > This looks like the same issue Rafael saw.
> > > 
> > > Try the patch in the following post:
> > > 
> > > http://marc.info/?l=linux-kernel=119265627228498=2
> > > 
> > 
> > Well that's not very good.  _I_ can go fishing in my lkml archives for 
> > random
> > patches but not everyone is set up to do that.  And the diff to which you
> > refer gets 100% rejects against rc2-mm1 anyway.
> > 
> > Please prepare a tested, changelogged patch against rc2-mm1 asap.  
> 
> 
pm-qos-remove-locks-around-blocking-notifier.patch

I have done some testing with this fix, I think it addresses all the
sleep within a held lock warnings reported.

please apply.

Changelog: 
Remove spin locking around all blocking notifier calls that can sleep.
I had to re-structure some of the code to avoid locking issues.

--mgross

Signed-off-by: mark gross <[EMAIL PROTECTED]>

 
Index: linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c
===
--- linux-2.6.24-rc2-mm1.orig/kernel/pm_qos_params.c2007-11-15 
11:09:27.0 -0800
+++ linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c 2007-11-15 14:10:09.0 
-0800
@@ -135,13 +135,14 @@
 }
 
 
-
-/* assumes pm_qos_lock is held */
 static void update_target(int target)
 {
s32 extreme_value;
struct requirement_list *node;
+   unsigned long flags;
+   int call_notifier = 0;
 
+   spin_lock_irqsave(_qos_lock, flags);
extreme_value = pm_qos_array[target]->default_value;
list_for_each_entry(node,
_qos_array[target]->requirements.list, list) {
@@ -149,13 +150,16 @@
extreme_value, node->value);
}
if (pm_qos_array[target]->target_value != extreme_value) {
+   call_notifier = 1;
pm_qos_array[target]->target_value = extreme_value;
pr_debug(KERN_ERR "new target for qos %d is %d\n", target,
pm_qos_array[target]->target_value);
-   blocking_notifier_call_chain(pm_qos_array[target]->notifiers,
-   (unsigned long) pm_qos_array[target]->target_value,
-   NULL);
}
+   spin_unlock_irqrestore(_qos_lock, flags);
+
+   if (call_notifier)
+   blocking_notifier_call_chain(pm_qos_array[target]->notifiers,
+   (unsigned long) extreme_value, NULL);
 }
 
 static int register_pm_qos_misc(struct pm_qos_object *qos)
@@ -227,8 +231,8 @@
spin_lock_irqsave(_qos_lock, flags);
list_add(>list,
_qos_array[pm_qos_class]->requirements.list);
-   update_target(pm_qos_class);
spin_unlock_irqrestore(_qos_lock, flags);
+   update_target(pm_qos_class);
 
return 0;
}
@@ -269,11 +273,10 @@
break;
}
}
+   spin_unlock_irqrestore(_qos_lock, flags);
if (pending_update)
update_target(pm_qos_class);
 
-   spin_unlock_irqrestore(_qos_lock, flags);
-
return 0;
 }
 EXPORT_SYMBOL_GPL(pm_qos_update_requirement);
@@ -303,9 +306,9 @@
break;
}
}
+   spin_unlock_irqrestore(_qos_lock, flags);
if (pending_update)
update_target(pm_qos_class);
-   spin_unlock_irqrestore(_qos_lock, flags);
 }
 EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
 
@@ -319,13 +322,10 @@
  */
  int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
 {
-   unsigned long flags;
int retval;
 
-   spin_lock_irqsave(_qos_lock, flags);
retval = blocking_notifier_chain_register(
pm_qos_array[pm_qos_class]->notifiers, notifier);
-   spin_unlock_irqrestore(_qos_lock, flags);
 
return retval;
 }
@@ -341,13 +341,10

Re: 2.6.24-rc2-mm1

2007-11-15 Thread mark gross
On Thu, Nov 15, 2007 at 11:19:50AM -0800, mark gross wrote:
> On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote:
> > On Wed, 14 Nov 2007 12:29:59 -0800 mark gross <[EMAIL PROTECTED]> wrote:
> > 
> > > > > [  102.366932]  ===
> > > > > [  108.552031] printk: 31 messages suppressed.
> > > > 
> > > > 
> > > > All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 
> > > > patches makes the BUGs go away :
> > > > 
> > > > latencyc-use-qos-infrastructure.patch
> > > > pm-qos-infrastructure-and-interface.patch
> > > > pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch
> > > > 
> > > > 
> > > > Gabriel
> > > > 
> > > > 
> > > This looks like the same issue Rafael saw.
> > > 
> > > Try the patch in the following post:
> > > 
> > > http://marc.info/?l=linux-kernel=119265627228498=2
> > > 
> > 
> > Well that's not very good.  _I_ can go fishing in my lkml archives for 
> > random
> > patches but not everyone is set up to do that.  And the diff to which you
> > refer gets 100% rejects against rc2-mm1 anyway.
> > 
> > Please prepare a tested, changelogged patch against rc2-mm1 asap.  
> 
> I'm having difficulty coming up with a .config that boots, I'll continue
> working on this but the following is what I'm pretty confident will fix
> the warnings.  
> 
> You should hold off until I get a system booting 2.6.24-rc2-mm1 before
> taking this.

I'm able to boot and have done a bit of testing and this patch wasn't
good enough.  I'm sending a new one in a min.

--mgross

> 
> pm-qos-remove-locks-around-blocking-notifier-registration.patch
> 
> Changelog: 
> Remove spin locking around blocking notifier calls that can sleep.
> 
> --mgross
> 
> Signed-off-by: mark gross <[EMAIL PROTECTED]>
> 
> 
> 
> 
> Index: linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c
> ===
> --- linux-2.6.24-rc2-mm1.orig/kernel/pm_qos_params.c  2007-11-15 
> 11:09:27.0 -0800
> +++ linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c   2007-11-15 
> 11:10:08.0 -0800
> @@ -319,13 +319,10 @@
>   */
>   int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
>  {
> - unsigned long flags;
>   int retval;
>  
> - spin_lock_irqsave(_qos_lock, flags);
>   retval = blocking_notifier_chain_register(
>   pm_qos_array[pm_qos_class]->notifiers, notifier);
> - spin_unlock_irqrestore(_qos_lock, flags);
>  
>   return retval;
>  }
> @@ -341,13 +338,10 @@
>   */
>  int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
>  {
> - unsigned long flags;
>   int retval;
>  
> - spin_lock_irqsave(_qos_lock, flags);
>   retval = blocking_notifier_chain_unregister(
>   pm_qos_array[pm_qos_class]->notifiers, notifier);
> - spin_unlock_irqrestore(_qos_lock, flags);
>  
>   return retval;
>  }
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.24-rc2-mm1

2007-11-15 Thread mark gross
On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote:
> On Wed, 14 Nov 2007 12:29:59 -0800 mark gross <[EMAIL PROTECTED]> wrote:
> 
> > > > [  102.366932]  ===
> > > > [  108.552031] printk: 31 messages suppressed.
> > > 
> > > 
> > > All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 
> > > patches makes the BUGs go away :
> > > 
> > > latencyc-use-qos-infrastructure.patch
> > > pm-qos-infrastructure-and-interface.patch
> > > pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch
> > > 
> > > 
> > > Gabriel
> > > 
> > > 
> > This looks like the same issue Rafael saw.
> > 
> > Try the patch in the following post:
> > 
> > http://marc.info/?l=linux-kernel=119265627228498=2
> > 
> 
> Well that's not very good.  _I_ can go fishing in my lkml archives for random
> patches but not everyone is set up to do that.  And the diff to which you
> refer gets 100% rejects against rc2-mm1 anyway.
> 
> Please prepare a tested, changelogged patch against rc2-mm1 asap.  

I'm having difficulty coming up with a .config that boots, I'll continue
working on this but the following is what I'm pretty confident will fix
the warnings.  

You should hold off until I get a system booting 2.6.24-rc2-mm1 before
taking this.

pm-qos-remove-locks-around-blocking-notifier-registration.patch

Changelog: 
Remove spin locking around blocking notifier calls that can sleep.

--mgross

Signed-off-by: mark gross <[EMAIL PROTECTED]>




Index: linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c
===
--- linux-2.6.24-rc2-mm1.orig/kernel/pm_qos_params.c2007-11-15 
11:09:27.0 -0800
+++ linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c 2007-11-15 11:10:08.0 
-0800
@@ -319,13 +319,10 @@
  */
  int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
 {
-   unsigned long flags;
int retval;
 
-   spin_lock_irqsave(_qos_lock, flags);
retval = blocking_notifier_chain_register(
pm_qos_array[pm_qos_class]->notifiers, notifier);
-   spin_unlock_irqrestore(_qos_lock, flags);
 
return retval;
 }
@@ -341,13 +338,10 @@
  */
 int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
 {
-   unsigned long flags;
int retval;
 
-   spin_lock_irqsave(_qos_lock, flags);
retval = blocking_notifier_chain_unregister(
pm_qos_array[pm_qos_class]->notifiers, notifier);
-   spin_unlock_irqrestore(_qos_lock, flags);
 
return retval;
 }
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.24-rc2-mm1

2007-11-15 Thread mark gross
On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote:
> On Wed, 14 Nov 2007 12:29:59 -0800 mark gross <[EMAIL PROTECTED]> wrote:
> 
> > > > [  102.366932]  ===
> > > > [  108.552031] printk: 31 messages suppressed.
> > > 
> > > 
> > > All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 
> > > patches makes the BUGs go away :
> > > 
> > > latencyc-use-qos-infrastructure.patch
> > > pm-qos-infrastructure-and-interface.patch
> > > pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch
> > > 
> > > 
> > > Gabriel
> > > 
> > > 
> > This looks like the same issue Rafael saw.
> > 
> > Try the patch in the following post:
> > 
> > http://marc.info/?l=linux-kernel=119265627228498=2
> > 
> 
> Well that's not very good.  _I_ can go fishing in my lkml archives for random
> patches but not everyone is set up to do that.  And the diff to which you
> refer gets 100% rejects against rc2-mm1 anyway.
> 
> Please prepare a tested, changelogged patch against rc2-mm1 asap.  

my bad.  I didn't catch the 24-rc2-mm1 baseline for this issue.  I'll
look at this right way.

--mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.24-rc2-mm1

2007-11-15 Thread mark gross
On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote:
> On Wed, 14 Nov 2007 12:29:59 -0800 mark gross <[EMAIL PROTECTED]> wrote:
> 
> > > > [  102.366932]  ===
> > > > [  108.552031] printk: 31 messages suppressed.
> > > 
> > > 
> > > All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 
> > > patches makes the BUGs go away :
> > > 
> > > latencyc-use-qos-infrastructure.patch
> > > pm-qos-infrastructure-and-interface.patch
> > > pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch
> > > 
> > > 
> > > Gabriel
> > > 
> > > 
> > This looks like the same issue Rafael saw.
> > 
> > Try the patch in the following post:
> > 
> > http://marc.info/?l=linux-kernel=119265627228498=2
> > 
> 
> Well that's not very good.  _I_ can go fishing in my lkml archives for random
> patches but not everyone is set up to do that.  And the diff to which you
> refer gets 100% rejects against rc2-mm1 anyway.
> 
> Please prepare a tested, changelogged patch against rc2-mm1 asap.  

I'm sorry for not being clear, this patch is already in your tree from a
few weeks ago.  I was pointing Gabriel at the fix from back then.

--mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.24-rc2-mm1

2007-11-15 Thread mark gross
On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote:
 On Wed, 14 Nov 2007 12:29:59 -0800 mark gross [EMAIL PROTECTED] wrote:
 
[  102.366932]  ===
[  108.552031] printk: 31 messages suppressed.
   
   
   All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 
   patches makes the BUGs go away :
   
   latencyc-use-qos-infrastructure.patch
   pm-qos-infrastructure-and-interface.patch
   pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch
   
   
   Gabriel
   
   
  This looks like the same issue Rafael saw.
  
  Try the patch in the following post:
  
  http://marc.info/?l=linux-kernelm=119265627228498w=2
  
 
 Well that's not very good.  _I_ can go fishing in my lkml archives for random
 patches but not everyone is set up to do that.  And the diff to which you
 refer gets 100% rejects against rc2-mm1 anyway.
 
 Please prepare a tested, changelogged patch against rc2-mm1 asap.  

I'm sorry for not being clear, this patch is already in your tree from a
few weeks ago.  I was pointing Gabriel at the fix from back then.

--mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.24-rc2-mm1

2007-11-15 Thread mark gross
On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote:
 On Wed, 14 Nov 2007 12:29:59 -0800 mark gross [EMAIL PROTECTED] wrote:
 
[  102.366932]  ===
[  108.552031] printk: 31 messages suppressed.
   
   
   All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 
   patches makes the BUGs go away :
   
   latencyc-use-qos-infrastructure.patch
   pm-qos-infrastructure-and-interface.patch
   pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch
   
   
   Gabriel
   
   
  This looks like the same issue Rafael saw.
  
  Try the patch in the following post:
  
  http://marc.info/?l=linux-kernelm=119265627228498w=2
  
 
 Well that's not very good.  _I_ can go fishing in my lkml archives for random
 patches but not everyone is set up to do that.  And the diff to which you
 refer gets 100% rejects against rc2-mm1 anyway.
 
 Please prepare a tested, changelogged patch against rc2-mm1 asap.  

my bad.  I didn't catch the 24-rc2-mm1 baseline for this issue.  I'll
look at this right way.

--mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.24-rc2-mm1

2007-11-15 Thread mark gross
On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote:
 On Wed, 14 Nov 2007 12:29:59 -0800 mark gross [EMAIL PROTECTED] wrote:
 
[  102.366932]  ===
[  108.552031] printk: 31 messages suppressed.
   
   
   All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 
   patches makes the BUGs go away :
   
   latencyc-use-qos-infrastructure.patch
   pm-qos-infrastructure-and-interface.patch
   pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch
   
   
   Gabriel
   
   
  This looks like the same issue Rafael saw.
  
  Try the patch in the following post:
  
  http://marc.info/?l=linux-kernelm=119265627228498w=2
  
 
 Well that's not very good.  _I_ can go fishing in my lkml archives for random
 patches but not everyone is set up to do that.  And the diff to which you
 refer gets 100% rejects against rc2-mm1 anyway.
 
 Please prepare a tested, changelogged patch against rc2-mm1 asap.  

I'm having difficulty coming up with a .config that boots, I'll continue
working on this but the following is what I'm pretty confident will fix
the warnings.  

You should hold off until I get a system booting 2.6.24-rc2-mm1 before
taking this.

pm-qos-remove-locks-around-blocking-notifier-registration.patch

Changelog: 
Remove spin locking around blocking notifier calls that can sleep.

--mgross

Signed-off-by: mark gross [EMAIL PROTECTED]




Index: linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c
===
--- linux-2.6.24-rc2-mm1.orig/kernel/pm_qos_params.c2007-11-15 
11:09:27.0 -0800
+++ linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c 2007-11-15 11:10:08.0 
-0800
@@ -319,13 +319,10 @@
  */
  int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
 {
-   unsigned long flags;
int retval;
 
-   spin_lock_irqsave(pm_qos_lock, flags);
retval = blocking_notifier_chain_register(
pm_qos_array[pm_qos_class]-notifiers, notifier);
-   spin_unlock_irqrestore(pm_qos_lock, flags);
 
return retval;
 }
@@ -341,13 +338,10 @@
  */
 int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
 {
-   unsigned long flags;
int retval;
 
-   spin_lock_irqsave(pm_qos_lock, flags);
retval = blocking_notifier_chain_unregister(
pm_qos_array[pm_qos_class]-notifiers, notifier);
-   spin_unlock_irqrestore(pm_qos_lock, flags);
 
return retval;
 }
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.24-rc2-mm1

2007-11-15 Thread mark gross
On Thu, Nov 15, 2007 at 11:19:50AM -0800, mark gross wrote:
 On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote:
  On Wed, 14 Nov 2007 12:29:59 -0800 mark gross [EMAIL PROTECTED] wrote:
  
 [  102.366932]  ===
 [  108.552031] printk: 31 messages suppressed.


All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 
patches makes the BUGs go away :

latencyc-use-qos-infrastructure.patch
pm-qos-infrastructure-and-interface.patch
pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch


Gabriel


   This looks like the same issue Rafael saw.
   
   Try the patch in the following post:
   
   http://marc.info/?l=linux-kernelm=119265627228498w=2
   
  
  Well that's not very good.  _I_ can go fishing in my lkml archives for 
  random
  patches but not everyone is set up to do that.  And the diff to which you
  refer gets 100% rejects against rc2-mm1 anyway.
  
  Please prepare a tested, changelogged patch against rc2-mm1 asap.  
 
 I'm having difficulty coming up with a .config that boots, I'll continue
 working on this but the following is what I'm pretty confident will fix
 the warnings.  
 
 You should hold off until I get a system booting 2.6.24-rc2-mm1 before
 taking this.

I'm able to boot and have done a bit of testing and this patch wasn't
good enough.  I'm sending a new one in a min.

--mgross

 
 pm-qos-remove-locks-around-blocking-notifier-registration.patch
 
 Changelog: 
 Remove spin locking around blocking notifier calls that can sleep.
 
 --mgross
 
 Signed-off-by: mark gross [EMAIL PROTECTED]
 
 
 
 
 Index: linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c
 ===
 --- linux-2.6.24-rc2-mm1.orig/kernel/pm_qos_params.c  2007-11-15 
 11:09:27.0 -0800
 +++ linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c   2007-11-15 
 11:10:08.0 -0800
 @@ -319,13 +319,10 @@
   */
   int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
  {
 - unsigned long flags;
   int retval;
  
 - spin_lock_irqsave(pm_qos_lock, flags);
   retval = blocking_notifier_chain_register(
   pm_qos_array[pm_qos_class]-notifiers, notifier);
 - spin_unlock_irqrestore(pm_qos_lock, flags);
  
   return retval;
  }
 @@ -341,13 +338,10 @@
   */
  int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
  {
 - unsigned long flags;
   int retval;
  
 - spin_lock_irqsave(pm_qos_lock, flags);
   retval = blocking_notifier_chain_unregister(
   pm_qos_array[pm_qos_class]-notifiers, notifier);
 - spin_unlock_irqrestore(pm_qos_lock, flags);
  
   return retval;
  }
 -
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to [EMAIL PROTECTED]
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] pm-qos-remove-locks-around-blocking-notifier.patch ... was Re: 2.6.24-rc2-mm1

2007-11-15 Thread mark gross
On Thu, Nov 15, 2007 at 11:19:50AM -0800, mark gross wrote:
 On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote:
  On Wed, 14 Nov 2007 12:29:59 -0800 mark gross [EMAIL PROTECTED] wrote:
  
 [  102.366932]  ===
 [  108.552031] printk: 31 messages suppressed.


All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 
patches makes the BUGs go away :

latencyc-use-qos-infrastructure.patch
pm-qos-infrastructure-and-interface.patch
pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch


Gabriel


   This looks like the same issue Rafael saw.
   
   Try the patch in the following post:
   
   http://marc.info/?l=linux-kernelm=119265627228498w=2
   
  
  Well that's not very good.  _I_ can go fishing in my lkml archives for 
  random
  patches but not everyone is set up to do that.  And the diff to which you
  refer gets 100% rejects against rc2-mm1 anyway.
  
  Please prepare a tested, changelogged patch against rc2-mm1 asap.  
 
 
pm-qos-remove-locks-around-blocking-notifier.patch

I have done some testing with this fix, I think it addresses all the
sleep within a held lock warnings reported.

please apply.

Changelog: 
Remove spin locking around all blocking notifier calls that can sleep.
I had to re-structure some of the code to avoid locking issues.

--mgross

Signed-off-by: mark gross [EMAIL PROTECTED]

 
Index: linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c
===
--- linux-2.6.24-rc2-mm1.orig/kernel/pm_qos_params.c2007-11-15 
11:09:27.0 -0800
+++ linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c 2007-11-15 14:10:09.0 
-0800
@@ -135,13 +135,14 @@
 }
 
 
-
-/* assumes pm_qos_lock is held */
 static void update_target(int target)
 {
s32 extreme_value;
struct requirement_list *node;
+   unsigned long flags;
+   int call_notifier = 0;
 
+   spin_lock_irqsave(pm_qos_lock, flags);
extreme_value = pm_qos_array[target]-default_value;
list_for_each_entry(node,
pm_qos_array[target]-requirements.list, list) {
@@ -149,13 +150,16 @@
extreme_value, node-value);
}
if (pm_qos_array[target]-target_value != extreme_value) {
+   call_notifier = 1;
pm_qos_array[target]-target_value = extreme_value;
pr_debug(KERN_ERR new target for qos %d is %d\n, target,
pm_qos_array[target]-target_value);
-   blocking_notifier_call_chain(pm_qos_array[target]-notifiers,
-   (unsigned long) pm_qos_array[target]-target_value,
-   NULL);
}
+   spin_unlock_irqrestore(pm_qos_lock, flags);
+
+   if (call_notifier)
+   blocking_notifier_call_chain(pm_qos_array[target]-notifiers,
+   (unsigned long) extreme_value, NULL);
 }
 
 static int register_pm_qos_misc(struct pm_qos_object *qos)
@@ -227,8 +231,8 @@
spin_lock_irqsave(pm_qos_lock, flags);
list_add(dep-list,
pm_qos_array[pm_qos_class]-requirements.list);
-   update_target(pm_qos_class);
spin_unlock_irqrestore(pm_qos_lock, flags);
+   update_target(pm_qos_class);
 
return 0;
}
@@ -269,11 +273,10 @@
break;
}
}
+   spin_unlock_irqrestore(pm_qos_lock, flags);
if (pending_update)
update_target(pm_qos_class);
 
-   spin_unlock_irqrestore(pm_qos_lock, flags);
-
return 0;
 }
 EXPORT_SYMBOL_GPL(pm_qos_update_requirement);
@@ -303,9 +306,9 @@
break;
}
}
+   spin_unlock_irqrestore(pm_qos_lock, flags);
if (pending_update)
update_target(pm_qos_class);
-   spin_unlock_irqrestore(pm_qos_lock, flags);
 }
 EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
 
@@ -319,13 +322,10 @@
  */
  int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
 {
-   unsigned long flags;
int retval;
 
-   spin_lock_irqsave(pm_qos_lock, flags);
retval = blocking_notifier_chain_register(
pm_qos_array[pm_qos_class]-notifiers, notifier);
-   spin_unlock_irqrestore(pm_qos_lock, flags);
 
return retval;
 }
@@ -341,13 +341,10 @@
  */
 int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
 {
-   unsigned long flags;
int retval;
 
-   spin_lock_irqsave(pm_qos_lock, flags);
retval = blocking_notifier_chain_unregister(
pm_qos_array[pm_qos_class]-notifiers, notifier);
-   spin_unlock_irqrestore(pm_qos_lock, flags);
 
return retval;
 }
-
To unsubscribe from this list: send the line unsubscribe

Re: 2.6.24-rc2-mm1

2007-11-14 Thread mark gross
On Wed, Nov 14, 2007 at 05:18:02AM +0100, Gabriel C wrote:
> Gabriel C wrote:
> > Andrew Morton wrote:
> >> ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.24-rc2/2.6.24-rc2-mm1/
> > 
> > I got it to boot but .. 
> 
> > 
> > ...
> > [   45.030261] input: Power Button (CM) as /devices/virtual/input/input4
> > [   45.031331] BUG: sleeping function called from invalid context at 
> > kernel/rwsem.c:47
> > [   45.031560] in_atomic():0, irqs_disabled():1
> > [   45.031569] 1 lock held by modprobe/2105:
> > [   45.031574]  #0:  (pm_qos_lock){}, at: [] 
> > pm_qos_add_notifier+0x14/0x3c
> > [   45.031606] irq event stamp: 4036
> > [   45.031612] hardirqs last  enabled at (4035): [] 
> > debug_check_no_locks_freed+0xf9/0x105
> > [   45.031632] hardirqs last disabled at (4036): [] 
> > _spin_lock_irqsave+0x10/0x55
> > [   45.031653] softirqs last  enabled at (3710): [] 
> > __do_softirq+0xe9/0xf1
> > [   45.031670] softirqs last disabled at (3703): [] 
> > do_softirq+0x3a/0x52
> > [   45.031685]  [] show_trace_log_lvl+0x12/0x25
> > [   45.031702]  [] show_trace+0xd/0x10
> > [   45.031717]  [] dump_stack+0x16/0x18
> > [   45.031728]  [] __might_sleep+0xc2/0xc9
> > [   45.031740]  [] down_write+0x17/0x6f
> > [   45.031754]  [] blocking_notifier_chain_register+0x26/0x3f
> > [   45.031766]  [] pm_qos_add_notifier+0x27/0x3c
> > [   45.031778]  [] acpi_processor_power_init+0x4d/0x164 
> > [processor]
> > [   45.031802]  [] acpi_processor_start+0x503/0x556 [processor]
> > [   45.031820]  [] acpi_start_single_object+0x20/0x3d
> > [   45.031837]  [] acpi_device_probe+0x78/0x88
> > [   45.031850]  [] driver_probe_device+0xb2/0x12d
> > [   45.031866]  [] __driver_attach+0x76/0xaf
> > [   45.031878]  [] bus_for_each_dev+0x3e/0x60
> > [   45.031889]  [] driver_attach+0x14/0x16
> > [   45.031899]  [] bus_add_driver+0x7a/0x180
> > [   45.031909]  [] driver_register+0x57/0x5c
> > [   45.031918]  [] acpi_bus_register_driver+0x3a/0x3c
> > [   45.031929]  [] acpi_processor_init+0x73/0xc5 [processor]
> > [   45.031943]  [] sys_init_module+0x14e3/0x15ae
> > [   45.031958]  [] sysenter_past_esp+0x5f/0xa5
> > [   45.031969]  ===
> > [   45.032213] ACPI: Invalid PBLK length [0]
> > [   45.032465] ACPI: Invalid PBLK length [0]
> > [   45.032656] ACPI: Invalid PBLK length [0]
> >
> > 
> > ...
> > 
> > ...
> > [  102.331554] BUG: sleeping function called from invalid context at 
> > kernel/rwsem.c:20
> > [  102.331575] in_atomic():0, irqs_disabled():1
> > [  102.331583] 1 lock held by artsd/4385:
> > [  102.331589]  #0:  (pm_qos_lock){}, at: [] 
> > pm_qos_add_requirement+0x5a/0x98
> > [  102.331619] irq event stamp: 19022
> > [  102.331624] hardirqs last  enabled at (19021): [] 
> > _spin_unlock_irqrestore+0x36/0x3c
> > [  102.331641] hardirqs last disabled at (19022): [] 
> > _spin_lock_irqsave+0x10/0x55
> > [  102.331655] softirqs last  enabled at (18846): [] 
> > __do_softirq+0xe9/0xf1
> > [  102.331672] softirqs last disabled at (18839): [] 
> > do_softirq+0x3a/0x52
> > [  102.331688]  [] show_trace_log_lvl+0x12/0x25
> > [  102.331704]  [] show_trace+0xd/0x10
> > [  102.331715]  [] dump_stack+0x16/0x18
> > [  102.331727]  [] __might_sleep+0xc2/0xc9
> > [  102.331739]  [] down_read+0x16/0x6a
> > [  102.331750]  [] __blocking_notifier_call_chain+0x24/0x4c
> > [  102.331761]  [] blocking_notifier_call_chain+0xc/0xe
> > [  102.331773]  [] update_target+0x3e/0x43
> > [  102.331784]  [] pm_qos_add_requirement+0x76/0x98
> > [  102.331795]  [] snd_pcm_hw_params_user+0x2b1/0x302
> > [  102.331811]  [] snd_pcm_common_ioctl1+0x17a/0xda3
> > [  102.331825]  [] snd_pcm_playback_ioctl1+0x3ab/0x3c2
> > [  102.331840]  [] snd_pcm_playback_ioctl+0x27/0x35
> > [  102.331853]  [] vfs_ioctl+0x22/0x67
> > [  102.331867]  [] do_vfs_ioctl+0x25a/0x268
> > [  102.331878]  [] sys_ioctl+0x2c/0x45
> > [  102.331889]  [] sysenter_past_esp+0x5f/0xa5
> > [  102.331901]  ===
> > [  102.331911] WARNING: at arch/x86/kernel/smp_32.c:561 
> > native_smp_call_function_mask()
> > [  102.331920]  [] show_trace_log_lvl+0x12/0x25
> > [  102.331932]  [] show_trace+0xd/0x10
> > [  102.331944]  [] dump_stack+0x16/0x18
> > [  102.331955]  [] native_smp_call_function_mask+0x39/0x11d
> > [  102.331970]  [] smp_call_function+0x18/0x1d
> > [  102.331984]  [] acpi_processor_latency_notify+0x13/0x1a 
> > [processor]
> > [  102.332011]  [] notifier_call_chain+0x2b/0x4a
> > [  102.332023]  [] __blocking_notifier_call_chain+0x37/0x4c
> > [  102.332035]  [] blocking_notifier_call_chain+0xc/0xe
> > [  102.332047]  [] update_target+0x3e/0x43
> > [  102.332058]  [] pm_qos_add_requirement+0x76/0x98
> > [  102.332070]  [] snd_pcm_hw_params_user+0x2b1/0x302
> > [  102.332085]  [] snd_pcm_common_ioctl1+0x17a/0xda3
> > [  102.332097]  [] snd_pcm_playback_ioctl1+0x3ab/0x3c2
> > [  102.332111]  [] snd_pcm_playback_ioctl+0x27/0x35
> > [  

Re: 2.6.24-rc2-mm1

2007-11-14 Thread mark gross
On Wed, Nov 14, 2007 at 05:18:02AM +0100, Gabriel C wrote:
 Gabriel C wrote:
  Andrew Morton wrote:
  ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.24-rc2/2.6.24-rc2-mm1/
  
  I got it to boot but .. 
 
  
  ...
  [   45.030261] input: Power Button (CM) as /devices/virtual/input/input4
  [   45.031331] BUG: sleeping function called from invalid context at 
  kernel/rwsem.c:47
  [   45.031560] in_atomic():0, irqs_disabled():1
  [   45.031569] 1 lock held by modprobe/2105:
  [   45.031574]  #0:  (pm_qos_lock){}, at: [c0139b1c] 
  pm_qos_add_notifier+0x14/0x3c
  [   45.031606] irq event stamp: 4036
  [   45.031612] hardirqs last  enabled at (4035): [c0140cc3] 
  debug_check_no_locks_freed+0xf9/0x105
  [   45.031632] hardirqs last disabled at (4036): [c0333bf5] 
  _spin_lock_irqsave+0x10/0x55
  [   45.031653] softirqs last  enabled at (3710): [c0128e96] 
  __do_softirq+0xe9/0xf1
  [   45.031670] softirqs last disabled at (3703): [c0128ed8] 
  do_softirq+0x3a/0x52
  [   45.031685]  [c0104e01] show_trace_log_lvl+0x12/0x25
  [   45.031702]  [c010562d] show_trace+0xd/0x10
  [   45.031717]  [c0105704] dump_stack+0x16/0x18
  [   45.031728]  [c011db69] __might_sleep+0xc2/0xc9
  [   45.031740]  [c0139093] down_write+0x17/0x6f
  [   45.031754]  [c013991e] blocking_notifier_chain_register+0x26/0x3f
  [   45.031766]  [c0139b2f] pm_qos_add_notifier+0x27/0x3c
  [   45.031778]  [e885bc77] acpi_processor_power_init+0x4d/0x164 
  [processor]
  [   45.031802]  [e885a5b3] acpi_processor_start+0x503/0x556 [processor]
  [   45.031820]  [c022125c] acpi_start_single_object+0x20/0x3d
  [   45.031837]  [c0222392] acpi_device_probe+0x78/0x88
  [   45.031850]  [c0244700] driver_probe_device+0xb2/0x12d
  [   45.031866]  [c02448a2] __driver_attach+0x76/0xaf
  [   45.031878]  [c0243cb0] bus_for_each_dev+0x3e/0x60
  [   45.031889]  [c0244588] driver_attach+0x14/0x16
  [   45.031899]  [c0243f9a] bus_add_driver+0x7a/0x180
  [   45.031909]  [c0244a9d] driver_register+0x57/0x5c
  [   45.031918]  [c022267e] acpi_bus_register_driver+0x3a/0x3c
  [   45.031929]  [e882f073] acpi_processor_init+0x73/0xc5 [processor]
  [   45.031943]  [c0148ec5] sys_init_module+0x14e3/0x15ae
  [   45.031958]  [c0103d86] sysenter_past_esp+0x5f/0xa5
  [   45.031969]  ===
  [   45.032213] ACPI: Invalid PBLK length [0]
  [   45.032465] ACPI: Invalid PBLK length [0]
  [   45.032656] ACPI: Invalid PBLK length [0]
 
  
  ...
  
  ...
  [  102.331554] BUG: sleeping function called from invalid context at 
  kernel/rwsem.c:20
  [  102.331575] in_atomic():0, irqs_disabled():1
  [  102.331583] 1 lock held by artsd/4385:
  [  102.331589]  #0:  (pm_qos_lock){}, at: [c0139d4a] 
  pm_qos_add_requirement+0x5a/0x98
  [  102.331619] irq event stamp: 19022
  [  102.331624] hardirqs last  enabled at (19021): [c0333d1a] 
  _spin_unlock_irqrestore+0x36/0x3c
  [  102.331641] hardirqs last disabled at (19022): [c0333bf5] 
  _spin_lock_irqsave+0x10/0x55
  [  102.331655] softirqs last  enabled at (18846): [c0128e96] 
  __do_softirq+0xe9/0xf1
  [  102.331672] softirqs last disabled at (18839): [c0128ed8] 
  do_softirq+0x3a/0x52
  [  102.331688]  [c0104e01] show_trace_log_lvl+0x12/0x25
  [  102.331704]  [c010562d] show_trace+0xd/0x10
  [  102.331715]  [c0105704] dump_stack+0x16/0x18
  [  102.331727]  [c011db69] __might_sleep+0xc2/0xc9
  [  102.331739]  [c0138fe7] down_read+0x16/0x6a
  [  102.331750]  [c0139872] __blocking_notifier_call_chain+0x24/0x4c
  [  102.331761]  [c01398a6] blocking_notifier_call_chain+0xc/0xe
  [  102.331773]  [c0139b82] update_target+0x3e/0x43
  [  102.331784]  [c0139d66] pm_qos_add_requirement+0x76/0x98
  [  102.331795]  [c02bd8a0] snd_pcm_hw_params_user+0x2b1/0x302
  [  102.331811]  [c02bdad9] snd_pcm_common_ioctl1+0x17a/0xda3
  [  102.331825]  [c02bee6b] snd_pcm_playback_ioctl1+0x3ab/0x3c2
  [  102.331840]  [c02bef06] snd_pcm_playback_ioctl+0x27/0x35
  [  102.331853]  [c01795ce] vfs_ioctl+0x22/0x67
  [  102.331867]  [c017986d] do_vfs_ioctl+0x25a/0x268
  [  102.331878]  [c01798a7] sys_ioctl+0x2c/0x45
  [  102.331889]  [c0103d86] sysenter_past_esp+0x5f/0xa5
  [  102.331901]  ===
  [  102.331911] WARNING: at arch/x86/kernel/smp_32.c:561 
  native_smp_call_function_mask()
  [  102.331920]  [c0104e01] show_trace_log_lvl+0x12/0x25
  [  102.331932]  [c010562d] show_trace+0xd/0x10
  [  102.331944]  [c0105704] dump_stack+0x16/0x18
  [  102.331955]  [c0112f35] native_smp_call_function_mask+0x39/0x11d
  [  102.331970]  [c01142ef] smp_call_function+0x18/0x1d
  [  102.331984]  [e885b449] acpi_processor_latency_notify+0x13/0x1a 
  [processor]
  [  102.332011]  [c01396e2] notifier_call_chain+0x2b/0x4a
  [  102.332023]  [c0139885] __blocking_notifier_call_chain+0x37/0x4c
  [  102.332035]  [c01398a6] blocking_notifier_call_chain+0xc/0xe
  [  102.332047]  [c0139b82] update_target+0x3e/0x43
  [  102.332058]  [c0139d66] 

Re: 2.6.23-mm1 breaks C-state support on Intel T7200 x86_64

2007-11-12 Thread mark gross
On Fri, Nov 09, 2007 at 03:24:44PM -0500, [EMAIL PROTECTED] wrote:
> On Thu, 08 Nov 2007 14:30:07 PST, Mark Gross said:
> 
> > wing patch fixes up the cpuidle / pm-qos integration.
> > 
> > I suspect that this is folded into another mm patch but it should fix
> > C-state issue identified.
> 
> Confirming that patch left my CPUs mostly in C3 again. Thanks.
> 
> I'll have to let Mark and Andrew figure out this code's status in the -mm 
> queue.

I checked on this last week, I'm pretty sure its in Andrew's current
tree.

Thanks again for looking at this.

--mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.23-mm1 breaks C-state support on Intel T7200 x86_64

2007-11-12 Thread mark gross
On Fri, Nov 09, 2007 at 03:24:44PM -0500, [EMAIL PROTECTED] wrote:
 On Thu, 08 Nov 2007 14:30:07 PST, Mark Gross said:
 
  wing patch fixes up the cpuidle / pm-qos integration.
  
  I suspect that this is folded into another mm patch but it should fix
  C-state issue identified.
 
 Confirming that patch left my CPUs mostly in C3 again. Thanks.
 
 I'll have to let Mark and Andrew figure out this code's status in the -mm 
 queue.

I checked on this last week, I'm pretty sure its in Andrew's current
tree.

Thanks again for looking at this.

--mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.23-mm1 breaks C-state support on Intel T7200 x86_64

2007-11-08 Thread Mark Gross
On Thu, Nov 08, 2007 at 12:19:44PM -0500, [EMAIL PROTECTED] wrote:
> (Sorry for not reporting this sooner - I haven't been running off battery
> much in the last 3 weeks, so I didn't notice it till now...)
> 
> Dell Latitude D820 laptop, T7200 Core2 Duo CPU, x86_64 kernel.
> 
> As reported by 'powertop' on a basically idle machine:
> 
> 2.6.23-mm1:
> 
> CnAvg residency   P-states (frequencies)
> C0 (cpu running)(100.0%)2.00 Ghz 0.8%
> C10.0ms ( 0.0%) 1.67 Ghz 0.0%
> C20.0ms ( 0.0%) 1333 Mhz 0.0%
> C30.0ms ( 0.0%) 1000 Mhz99.2%
> 
> 2.6.23-rc8-mm2:
> 
> CnAvg residency   P-states (frequencies)
> C0 (cpu running)( 0.3%) 2.00 Ghz 0.0%
> C10.0ms ( 0.0%) 1.67 Ghz 0.0%
> C20.0ms ( 0.0%) 1333 Mhz 0.0%
> C3   31.5ms (99.7%) 1000 Mhz   100.0%
> 
> In addition, the ACPI power estimate reported about 25 watts for 23-mm1,
> but only 21 watts for -rc8-mm2, a significant regression.
> 
> I bisected this down to this set of patches:
> 
> pm-qos-infrastructure-and-interface.patch
> pm-qos-infrastructure-and-interface-fix.patch
> pm-qos-infrastructure-and-interface-vs-git-acpi.patch
> pm-qos-infrastructure-and-interface-vs-git-acpi-2.patch
> latencyc-use-qos-infrastructure.patch
> 
> The patch says:
> 
>   To register the default pm_qos target for the specific parameter, the
>   process must open one of /dev/[cpu_dma_latency, network_latency,
>   network_throughput]
> 
>   As long as the device node is held open that process has a registered
>   requirement on the parameter.  The name of the requirement is
>   "process_" derived from the current->pid from within the open system
>   call.
> 
> I shouldn't have to have a process open a /dev/file, write a number, and then
> stay around forever so the file doesn't close in order to get the same 
> behavior
> I was getting by default before.  What needs to happen to get this to not
> be a behavior regression/change?
> 
> 
> 
> 

wing patch fixes up the cpuidle / pm-qos integration.

I suspect that this is folded into another mm patch but it should fix
C-state issue identified.

--mgross



Signed-off-by: mark gross <[EMAIL PROTECTED]>

-

Index: linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c
===
--- linux-2.6.23-mm1.orig/drivers/cpuidle/cpuidle.c 2007-11-08 
13:09:53.0 -0800
+++ linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c  2007-11-08 13:25:13.0 
-0800
@@ -268,7 +268,7 @@
 
 static inline void latency_notifier_init(struct notifier_block *n)
 {
-pm_qos_add_notifier(PM_QOS_CPUIDLE, n);
+   pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n);
 }
 
 #else /* CONFIG_SMP */
Index: linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c
===
--- linux-2.6.23-mm1.orig/drivers/cpuidle/governors/ladder.c2007-11-08 
13:09:53.0 -0800
+++ linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c 2007-11-08 
13:11:30.0 -0800
@@ -82,7 +82,7 @@
if (last_idx < dev->state_count - 1 &&
last_residency > last_state->threshold.promotion_time &&
dev->states[last_idx + 1].exit_latency <=
-   pm_qos_requirement(PM_QOS_CPUIDLE)) {
+   pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
last_state->stats.promotion_count++;
last_state->stats.demotion_count = 0;
if (last_state->stats.promotion_count >= 
last_state->threshold.promotion_count) {
Index: linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c
===
--- linux-2.6.23-mm1.orig/drivers/cpuidle/governors/menu.c  2007-11-08 
13:12:11.0 -0800
+++ linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c   2007-11-08 
13:24:03.0 -0800
@@ -48,7 +48,8 @@
break;
if (s->target_residency > data->predicted_us)
break;
-   if (s->exit_latency > pm_qos_requirement(PM_QOS_CPUIDLE))
+   if (s->exit_latency >
+   pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY))
break;
}
 
Index: linux-2.6.23-mm1/include/linux/pm_qos_params.h
===
--- linux-2.6.23-mm1.orig/include/linux/pm_qos_params.h 2007-11-08 
13:09:53.0 -0800
+++ linux-2.6.23-mm1/include/linux/pm_qos_params.h  2007-11-08 
13:14:05.0 -0800
@@ -6,23 +6,12 @

Re: 2.6.23-mm1 breaks C-state support on Intel T7200 x86_64

2007-11-08 Thread Mark Gross
On Thu, Nov 08, 2007 at 10:02:12AM -0800, Andrew Morton wrote:
> > On Thu, 08 Nov 2007 12:19:44 -0500 [EMAIL PROTECTED] wrote:
> > (Sorry for not reporting this sooner - I haven't been running off battery
> > much in the last 3 weeks, so I didn't notice it till now...)
> > 
> > Dell Latitude D820 laptop, T7200 Core2 Duo CPU, x86_64 kernel.
> > 
> > As reported by 'powertop' on a basically idle machine:
> > 
> > 2.6.23-mm1:
> > 
> > CnAvg residency   P-states (frequencies)
> > C0 (cpu running)(100.0%)2.00 Ghz 0.8%
> > C10.0ms ( 0.0%) 1.67 Ghz 0.0%
> > C20.0ms ( 0.0%) 1333 Mhz 0.0%
> > C30.0ms ( 0.0%) 1000 Mhz99.2%
> > 
> > 2.6.23-rc8-mm2:
> > 
> > CnAvg residency   P-states (frequencies)
> > C0 (cpu running)( 0.3%) 2.00 Ghz 0.0%
> > C10.0ms ( 0.0%) 1.67 Ghz 0.0%
> > C20.0ms ( 0.0%) 1333 Mhz 0.0%
> > C3   31.5ms (99.7%) 1000 Mhz   100.0%
> > 
> > In addition, the ACPI power estimate reported about 25 watts for 23-mm1,
> > but only 21 watts for -rc8-mm2, a significant regression.
> > 
> > I bisected this down to this set of patches:
> > 
> > pm-qos-infrastructure-and-interface.patch
> > pm-qos-infrastructure-and-interface-fix.patch
> > pm-qos-infrastructure-and-interface-vs-git-acpi.patch
> > pm-qos-infrastructure-and-interface-vs-git-acpi-2.patch
> > latencyc-use-qos-infrastructure.patch
> > 
> > The patch says:
> > 
> >   To register the default pm_qos target for the specific parameter, the
> >   process must open one of /dev/[cpu_dma_latency, network_latency,
> >   network_throughput]
> > 
> >   As long as the device node is held open that process has a registered
> >   requirement on the parameter.  The name of the requirement is
> >   "process_" derived from the current->pid from within the open system
> >   call.
> > 
> > I shouldn't have to have a process open a /dev/file, write a number, and 
> > then
> > stay around forever so the file doesn't close in order to get the same 
> > behavior
> > I was getting by default before.  What needs to happen to get this to not
> > be a behavior regression/change?
> > 
> 
> That's a great report, thanks.  Over to you, Mark ;)
> 
> btw, I also have a note here that these patches caused Rafael to see an
> smp_call_function() inside local_irq_save().  Did that get fixed?

Ah, I see the problem.  I think I posted a fix to this.  The problem is
that what's in the mm1 tree has a parameter PM_QOS_IDLE that needed to
be PM_QOS_CPU_DMA_LATENCY. 

I'm not sure what's in the current MM tree at this point so I can't say
its been fixed.  Is there an easy way from me to see what's currently in
MM?  

FWIW I think I fixed this when I fixed up Rafael's issue.  Would you
like me to send out a re-fresh patch against 2.6.23-mm1?

--mgross

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.23-mm1 breaks C-state support on Intel T7200 x86_64

2007-11-08 Thread Mark Gross
On Thu, Nov 08, 2007 at 12:19:44PM -0500, [EMAIL PROTECTED] wrote:
> (Sorry for not reporting this sooner - I haven't been running off battery
> much in the last 3 weeks, so I didn't notice it till now...)
> 
> Dell Latitude D820 laptop, T7200 Core2 Duo CPU, x86_64 kernel.
> 
> As reported by 'powertop' on a basically idle machine:
> 
> 2.6.23-mm1:
> 
> CnAvg residency   P-states (frequencies)
> C0 (cpu running)(100.0%)2.00 Ghz 0.8%
> C10.0ms ( 0.0%) 1.67 Ghz 0.0%
> C20.0ms ( 0.0%) 1333 Mhz 0.0%
> C30.0ms ( 0.0%) 1000 Mhz99.2%
> 
> 2.6.23-rc8-mm2:
> 
> CnAvg residency   P-states (frequencies)
> C0 (cpu running)( 0.3%) 2.00 Ghz 0.0%
> C10.0ms ( 0.0%) 1.67 Ghz 0.0%
> C20.0ms ( 0.0%) 1333 Mhz 0.0%
> C3   31.5ms (99.7%) 1000 Mhz   100.0%
> 
> In addition, the ACPI power estimate reported about 25 watts for 23-mm1,
> but only 21 watts for -rc8-mm2, a significant regression.

well, thats because you burn less watts if you get into C3.  

> 
> I bisected this down to this set of patches:
> 
> pm-qos-infrastructure-and-interface.patch
> pm-qos-infrastructure-and-interface-fix.patch
> pm-qos-infrastructure-and-interface-vs-git-acpi.patch
> pm-qos-infrastructure-and-interface-vs-git-acpi-2.patch
> latencyc-use-qos-infrastructure.patch

yipes!  I'll look at it right away.  It looks like an integration issue
with CPU-IDLE patches (those control the C-state entry).  I'll get it
fixed up.

> 
> The patch says:
> 
>   To register the default pm_qos target for the specific parameter, the
>   process must open one of /dev/[cpu_dma_latency, network_latency,
>   network_throughput]
> 
>   As long as the device node is held open that process has a registered
>   requirement on the parameter.  The name of the requirement is
>   "process_" derived from the current->pid from within the open system
>   call.
> 
> I shouldn't have to have a process open a /dev/file, write a number, and then
> stay around forever so the file doesn't close in order to get the same 
> behavior
> I was getting by default before.  What needs to happen to get this to not
> be a behavior regression/change?
>
you won't have such a process (at least I highly doubt you do)  I need
to fix this.  Thanks for taking the time to bisect it and reporting it to me!

--mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.23-mm1 breaks C-state support on Intel T7200 x86_64

2007-11-08 Thread Mark Gross
On Thu, Nov 08, 2007 at 12:19:44PM -0500, [EMAIL PROTECTED] wrote:
 (Sorry for not reporting this sooner - I haven't been running off battery
 much in the last 3 weeks, so I didn't notice it till now...)
 
 Dell Latitude D820 laptop, T7200 Core2 Duo CPU, x86_64 kernel.
 
 As reported by 'powertop' on a basically idle machine:
 
 2.6.23-mm1:
 
 CnAvg residency   P-states (frequencies)
 C0 (cpu running)(100.0%)2.00 Ghz 0.8%
 C10.0ms ( 0.0%) 1.67 Ghz 0.0%
 C20.0ms ( 0.0%) 1333 Mhz 0.0%
 C30.0ms ( 0.0%) 1000 Mhz99.2%
 
 2.6.23-rc8-mm2:
 
 CnAvg residency   P-states (frequencies)
 C0 (cpu running)( 0.3%) 2.00 Ghz 0.0%
 C10.0ms ( 0.0%) 1.67 Ghz 0.0%
 C20.0ms ( 0.0%) 1333 Mhz 0.0%
 C3   31.5ms (99.7%) 1000 Mhz   100.0%
 
 In addition, the ACPI power estimate reported about 25 watts for 23-mm1,
 but only 21 watts for -rc8-mm2, a significant regression.

well, thats because you burn less watts if you get into C3.  

 
 I bisected this down to this set of patches:
 
 pm-qos-infrastructure-and-interface.patch
 pm-qos-infrastructure-and-interface-fix.patch
 pm-qos-infrastructure-and-interface-vs-git-acpi.patch
 pm-qos-infrastructure-and-interface-vs-git-acpi-2.patch
 latencyc-use-qos-infrastructure.patch

yipes!  I'll look at it right away.  It looks like an integration issue
with CPU-IDLE patches (those control the C-state entry).  I'll get it
fixed up.

 
 The patch says:
 
   To register the default pm_qos target for the specific parameter, the
   process must open one of /dev/[cpu_dma_latency, network_latency,
   network_throughput]
 
   As long as the device node is held open that process has a registered
   requirement on the parameter.  The name of the requirement is
   process_PID derived from the current-pid from within the open system
   call.
 
 I shouldn't have to have a process open a /dev/file, write a number, and then
 stay around forever so the file doesn't close in order to get the same 
 behavior
 I was getting by default before.  What needs to happen to get this to not
 be a behavior regression/change?

you won't have such a process (at least I highly doubt you do)  I need
to fix this.  Thanks for taking the time to bisect it and reporting it to me!

--mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.23-mm1 breaks C-state support on Intel T7200 x86_64

2007-11-08 Thread Mark Gross
On Thu, Nov 08, 2007 at 10:02:12AM -0800, Andrew Morton wrote:
  On Thu, 08 Nov 2007 12:19:44 -0500 [EMAIL PROTECTED] wrote:
  (Sorry for not reporting this sooner - I haven't been running off battery
  much in the last 3 weeks, so I didn't notice it till now...)
  
  Dell Latitude D820 laptop, T7200 Core2 Duo CPU, x86_64 kernel.
  
  As reported by 'powertop' on a basically idle machine:
  
  2.6.23-mm1:
  
  CnAvg residency   P-states (frequencies)
  C0 (cpu running)(100.0%)2.00 Ghz 0.8%
  C10.0ms ( 0.0%) 1.67 Ghz 0.0%
  C20.0ms ( 0.0%) 1333 Mhz 0.0%
  C30.0ms ( 0.0%) 1000 Mhz99.2%
  
  2.6.23-rc8-mm2:
  
  CnAvg residency   P-states (frequencies)
  C0 (cpu running)( 0.3%) 2.00 Ghz 0.0%
  C10.0ms ( 0.0%) 1.67 Ghz 0.0%
  C20.0ms ( 0.0%) 1333 Mhz 0.0%
  C3   31.5ms (99.7%) 1000 Mhz   100.0%
  
  In addition, the ACPI power estimate reported about 25 watts for 23-mm1,
  but only 21 watts for -rc8-mm2, a significant regression.
  
  I bisected this down to this set of patches:
  
  pm-qos-infrastructure-and-interface.patch
  pm-qos-infrastructure-and-interface-fix.patch
  pm-qos-infrastructure-and-interface-vs-git-acpi.patch
  pm-qos-infrastructure-and-interface-vs-git-acpi-2.patch
  latencyc-use-qos-infrastructure.patch
  
  The patch says:
  
To register the default pm_qos target for the specific parameter, the
process must open one of /dev/[cpu_dma_latency, network_latency,
network_throughput]
  
As long as the device node is held open that process has a registered
requirement on the parameter.  The name of the requirement is
process_PID derived from the current-pid from within the open system
call.
  
  I shouldn't have to have a process open a /dev/file, write a number, and 
  then
  stay around forever so the file doesn't close in order to get the same 
  behavior
  I was getting by default before.  What needs to happen to get this to not
  be a behavior regression/change?
  
 
 That's a great report, thanks.  Over to you, Mark ;)
 
 btw, I also have a note here that these patches caused Rafael to see an
 smp_call_function() inside local_irq_save().  Did that get fixed?

Ah, I see the problem.  I think I posted a fix to this.  The problem is
that what's in the mm1 tree has a parameter PM_QOS_IDLE that needed to
be PM_QOS_CPU_DMA_LATENCY. 

I'm not sure what's in the current MM tree at this point so I can't say
its been fixed.  Is there an easy way from me to see what's currently in
MM?  

FWIW I think I fixed this when I fixed up Rafael's issue.  Would you
like me to send out a re-fresh patch against 2.6.23-mm1?

--mgross

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.23-mm1 breaks C-state support on Intel T7200 x86_64

2007-11-08 Thread Mark Gross
On Thu, Nov 08, 2007 at 12:19:44PM -0500, [EMAIL PROTECTED] wrote:
 (Sorry for not reporting this sooner - I haven't been running off battery
 much in the last 3 weeks, so I didn't notice it till now...)
 
 Dell Latitude D820 laptop, T7200 Core2 Duo CPU, x86_64 kernel.
 
 As reported by 'powertop' on a basically idle machine:
 
 2.6.23-mm1:
 
 CnAvg residency   P-states (frequencies)
 C0 (cpu running)(100.0%)2.00 Ghz 0.8%
 C10.0ms ( 0.0%) 1.67 Ghz 0.0%
 C20.0ms ( 0.0%) 1333 Mhz 0.0%
 C30.0ms ( 0.0%) 1000 Mhz99.2%
 
 2.6.23-rc8-mm2:
 
 CnAvg residency   P-states (frequencies)
 C0 (cpu running)( 0.3%) 2.00 Ghz 0.0%
 C10.0ms ( 0.0%) 1.67 Ghz 0.0%
 C20.0ms ( 0.0%) 1333 Mhz 0.0%
 C3   31.5ms (99.7%) 1000 Mhz   100.0%
 
 In addition, the ACPI power estimate reported about 25 watts for 23-mm1,
 but only 21 watts for -rc8-mm2, a significant regression.
 
 I bisected this down to this set of patches:
 
 pm-qos-infrastructure-and-interface.patch
 pm-qos-infrastructure-and-interface-fix.patch
 pm-qos-infrastructure-and-interface-vs-git-acpi.patch
 pm-qos-infrastructure-and-interface-vs-git-acpi-2.patch
 latencyc-use-qos-infrastructure.patch
 
 The patch says:
 
   To register the default pm_qos target for the specific parameter, the
   process must open one of /dev/[cpu_dma_latency, network_latency,
   network_throughput]
 
   As long as the device node is held open that process has a registered
   requirement on the parameter.  The name of the requirement is
   process_PID derived from the current-pid from within the open system
   call.
 
 I shouldn't have to have a process open a /dev/file, write a number, and then
 stay around forever so the file doesn't close in order to get the same 
 behavior
 I was getting by default before.  What needs to happen to get this to not
 be a behavior regression/change?
 
 
 
 

wing patch fixes up the cpuidle / pm-qos integration.

I suspect that this is folded into another mm patch but it should fix
C-state issue identified.

--mgross



Signed-off-by: mark gross [EMAIL PROTECTED]

-

Index: linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c
===
--- linux-2.6.23-mm1.orig/drivers/cpuidle/cpuidle.c 2007-11-08 
13:09:53.0 -0800
+++ linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c  2007-11-08 13:25:13.0 
-0800
@@ -268,7 +268,7 @@
 
 static inline void latency_notifier_init(struct notifier_block *n)
 {
-pm_qos_add_notifier(PM_QOS_CPUIDLE, n);
+   pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n);
 }
 
 #else /* CONFIG_SMP */
Index: linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c
===
--- linux-2.6.23-mm1.orig/drivers/cpuidle/governors/ladder.c2007-11-08 
13:09:53.0 -0800
+++ linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c 2007-11-08 
13:11:30.0 -0800
@@ -82,7 +82,7 @@
if (last_idx  dev-state_count - 1 
last_residency  last_state-threshold.promotion_time 
dev-states[last_idx + 1].exit_latency =
-   pm_qos_requirement(PM_QOS_CPUIDLE)) {
+   pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
last_state-stats.promotion_count++;
last_state-stats.demotion_count = 0;
if (last_state-stats.promotion_count = 
last_state-threshold.promotion_count) {
Index: linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c
===
--- linux-2.6.23-mm1.orig/drivers/cpuidle/governors/menu.c  2007-11-08 
13:12:11.0 -0800
+++ linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c   2007-11-08 
13:24:03.0 -0800
@@ -48,7 +48,8 @@
break;
if (s-target_residency  data-predicted_us)
break;
-   if (s-exit_latency  pm_qos_requirement(PM_QOS_CPUIDLE))
+   if (s-exit_latency 
+   pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY))
break;
}
 
Index: linux-2.6.23-mm1/include/linux/pm_qos_params.h
===
--- linux-2.6.23-mm1.orig/include/linux/pm_qos_params.h 2007-11-08 
13:09:53.0 -0800
+++ linux-2.6.23-mm1/include/linux/pm_qos_params.h  2007-11-08 
13:14:05.0 -0800
@@ -6,23 +6,12 @@
 #include linux/notifier.h
 #include linux/miscdevice.h
 
-struct requirement_list {
-   struct list_head list;
-   union {
-   s32 value;
-   s32 usec;
-   s32 kbps;
-   };
-   char *name;
-};
-
 #define PM_QOS_RESERVED 0
 #define PM_QOS_CPU_DMA_LATENCY 1
 #define PM_QOS_NETWORK_LATENCY

Re: [PATCH] iommu-PMEN_REG boot up support

2007-10-29 Thread Mark Gross
On Sat, Oct 27, 2007 at 02:19:38AM +0200, Muli Ben-Yehuda wrote:
> On Fri, Oct 26, 2007 at 11:18:49AM -0700, Mark Gross wrote:
> 
> > The following patch clears the portect memory region enable bit at
> > boot time by default.  It also provides a kernel parrameter for
> > disabling this behavior and leave the PMEN_REG untouched if so
> > wanted.
> > 
> > If the boot loader or platform has protected memory regions enabled
> > at boot time it could prevent DMA's from happening as drivers are
> > loaded and used.
> 
> What's the value of adding a command line option for this? Under what
> circumstances would we want to not clear this bit?

umm, /me asks around and finds out that at this time there isn't much
point to having a command line to preserve any protected areas set up at
boot time.

> 
> > +   } else if (!strncmp(str, "no_epm_clear", 12)) {
> > +   printk(KERN_INFO
> > +   "Intel-IOMMU: subress clearing of Enable "
> > +   "Protected Memory bit\n");
> > +   clear_pmen_epm = 0;
> 
> `suppress', I assume. Rest looks fine, if the configuration option is
> really needed.
> 
> Cheers,
> Muli
> -- 
> SYSTOR 2007 --- 1st Annual Haifa Systems and Storage Conference 2007
> http://www.haifa.il.ibm.com/Workshops/systor2007/
> 
> Virtualization workshop: Oct 29th, 2007 | Storage workshop: Oct 30th, 2007
> -


Signed-off-by: mark gross <[EMAIL PROTECTED]>

--mgross


Index: linux-2.6/drivers/pci/intel-iommu.c
===
--- linux-2.6.orig/drivers/pci/intel-iommu.c2007-10-24 09:31:32.0 
-0700
+++ linux-2.6/drivers/pci/intel-iommu.c 2007-10-29 13:30:29.0 -0700
@@ -692,6 +692,23 @@
DMA_TLB_PSI_FLUSH, non_present_entry_flush);
 }
 
+static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
+{
+   u32 pmen;
+   unsigned long flags;
+
+   spin_lock_irqsave(>register_lock, flags);
+   pmen = readl(iommu->reg + DMAR_PMEN_REG);
+   pmen &= ~DMA_PMEN_EPM;
+   writel(pmen, iommu->reg + DMAR_PMEN_REG);
+
+   /* wait for the protected region status bit to clear */
+   IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
+   readl, (pmen & DMA_PMEN_PRS), pmen);
+
+   spin_unlock_irqrestore(>register_lock, flags);
+}
+
 static int iommu_enable_translation(struct intel_iommu *iommu)
 {
u32 sts;
@@ -1731,6 +1748,8 @@
iommu_flush_context_global(iommu, 0);
iommu_flush_iotlb_global(iommu, 0);
 
+   iommu_disable_protect_mem_regions(iommu);
+
ret = iommu_enable_translation(iommu);
if (ret)
goto error;
Index: linux-2.6/drivers/pci/intel-iommu.h
===
--- linux-2.6.orig/drivers/pci/intel-iommu.h2007-10-24 09:31:32.0 
-0700
+++ linux-2.6/drivers/pci/intel-iommu.h 2007-10-29 10:30:24.0 -0700
@@ -126,6 +126,10 @@
 #define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
 #define DMA_TLB_MAX_SIZE (0x3f)
 
+/* PMEN_REG */
+#define DMA_PMEN_EPM (((u32)1)<<31)
+#define DMA_PMEN_PRS (((u32)1)<<0)
+
 /* GCMD_REG */
 #define DMA_GCMD_TE (((u32)1) << 31)
 #define DMA_GCMD_SRTP (((u32)1) << 30)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] intel-iommu fixes

2007-10-29 Thread Mark Gross
On Mon, Oct 29, 2007 at 04:51:16AM +, Al Viro wrote:
>   * off by one in dmar_get_fault_reason() (maximal index in
> array is ARRAY_SIZE()-1, not ARRAY_SIZE())
>   * NULL noise removal
>   * __iomem annotation fix
> 
> Signed-off-by: Al Viro <[EMAIL PROTECTED]>
> ---
> diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
> index 0c4ab3b..9b35259 100644
> --- a/drivers/pci/intel-iommu.c
> +++ b/drivers/pci/intel-iommu.c
> @@ -745,7 +745,7 @@ static char *fault_reason_strings[] =
>   "non-zero reserved fields in PTE",
>   "Unknown"
>  };
> -#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings)
> +#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings) - 1

Probably should be 
+#define MAX_FAULT_REASON_IDX   (ARRAY_SIZE(fault_reason_strings) - 1)

>  
>  char *dmar_get_fault_reason(u8 fault_reason)
>  {
> @@ -995,7 +995,6 @@ static struct intel_iommu *alloc_iommu(struct 
> dmar_drhd_unit *drhd)
>   return iommu;
>  error_unmap:
>   iounmap(iommu->reg);
> - iommu->reg = 0;
>  error:
>   kfree(iommu);
>   return NULL;
> @@ -1808,7 +1807,7 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
>   if (!domain) {
>   printk(KERN_ERR
>   "Allocating domain for %s failed", pci_name(pdev));
> - return 0;
> + return NULL;
>   }
>  
>   /* make sure context mapping is ok */
> @@ -1818,7 +1817,7 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
>   printk(KERN_ERR
>   "Domain context map for %s failed",
>   pci_name(pdev));
> - return 0;
> + return NULL;
>   }
>   }
>  
> diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h
> index ee88dd2..459ad1f 100644
> --- a/drivers/pci/intel-iommu.h
> +++ b/drivers/pci/intel-iommu.h
> @@ -58,7 +58,7 @@
>   hi = readl(dmar + reg + 4); \
>   (((u64) hi) << 32) + lo; })
>  */
> -static inline u64 dmar_readq(void *addr)
> +static inline u64 dmar_readq(void __iomem *addr)
>  {
>   u32 lo, hi;
>   lo = readl(addr);
Looks good to me.

ACK --mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] intel-iommu fixes

2007-10-29 Thread Mark Gross
On Mon, Oct 29, 2007 at 04:51:16AM +, Al Viro wrote:
   * off by one in dmar_get_fault_reason() (maximal index in
 array is ARRAY_SIZE()-1, not ARRAY_SIZE())
   * NULL noise removal
   * __iomem annotation fix
 
 Signed-off-by: Al Viro [EMAIL PROTECTED]
 ---
 diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
 index 0c4ab3b..9b35259 100644
 --- a/drivers/pci/intel-iommu.c
 +++ b/drivers/pci/intel-iommu.c
 @@ -745,7 +745,7 @@ static char *fault_reason_strings[] =
   non-zero reserved fields in PTE,
   Unknown
  };
 -#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings)
 +#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings) - 1

Probably should be 
+#define MAX_FAULT_REASON_IDX   (ARRAY_SIZE(fault_reason_strings) - 1)

  
  char *dmar_get_fault_reason(u8 fault_reason)
  {
 @@ -995,7 +995,6 @@ static struct intel_iommu *alloc_iommu(struct 
 dmar_drhd_unit *drhd)
   return iommu;
  error_unmap:
   iounmap(iommu-reg);
 - iommu-reg = 0;
  error:
   kfree(iommu);
   return NULL;
 @@ -1808,7 +1807,7 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
   if (!domain) {
   printk(KERN_ERR
   Allocating domain for %s failed, pci_name(pdev));
 - return 0;
 + return NULL;
   }
  
   /* make sure context mapping is ok */
 @@ -1818,7 +1817,7 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
   printk(KERN_ERR
   Domain context map for %s failed,
   pci_name(pdev));
 - return 0;
 + return NULL;
   }
   }
  
 diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h
 index ee88dd2..459ad1f 100644
 --- a/drivers/pci/intel-iommu.h
 +++ b/drivers/pci/intel-iommu.h
 @@ -58,7 +58,7 @@
   hi = readl(dmar + reg + 4); \
   (((u64) hi)  32) + lo; })
  */
 -static inline u64 dmar_readq(void *addr)
 +static inline u64 dmar_readq(void __iomem *addr)
  {
   u32 lo, hi;
   lo = readl(addr);
Looks good to me.

ACK --mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] iommu-PMEN_REG boot up support

2007-10-29 Thread Mark Gross
On Sat, Oct 27, 2007 at 02:19:38AM +0200, Muli Ben-Yehuda wrote:
 On Fri, Oct 26, 2007 at 11:18:49AM -0700, Mark Gross wrote:
 
  The following patch clears the portect memory region enable bit at
  boot time by default.  It also provides a kernel parrameter for
  disabling this behavior and leave the PMEN_REG untouched if so
  wanted.
  
  If the boot loader or platform has protected memory regions enabled
  at boot time it could prevent DMA's from happening as drivers are
  loaded and used.
 
 What's the value of adding a command line option for this? Under what
 circumstances would we want to not clear this bit?

umm, /me asks around and finds out that at this time there isn't much
point to having a command line to preserve any protected areas set up at
boot time.

 
  +   } else if (!strncmp(str, no_epm_clear, 12)) {
  +   printk(KERN_INFO
  +   Intel-IOMMU: subress clearing of Enable 
  +   Protected Memory bit\n);
  +   clear_pmen_epm = 0;
 
 `suppress', I assume. Rest looks fine, if the configuration option is
 really needed.
 
 Cheers,
 Muli
 -- 
 SYSTOR 2007 --- 1st Annual Haifa Systems and Storage Conference 2007
 http://www.haifa.il.ibm.com/Workshops/systor2007/
 
 Virtualization workshop: Oct 29th, 2007 | Storage workshop: Oct 30th, 2007
 -


Signed-off-by: mark gross [EMAIL PROTECTED]

--mgross


Index: linux-2.6/drivers/pci/intel-iommu.c
===
--- linux-2.6.orig/drivers/pci/intel-iommu.c2007-10-24 09:31:32.0 
-0700
+++ linux-2.6/drivers/pci/intel-iommu.c 2007-10-29 13:30:29.0 -0700
@@ -692,6 +692,23 @@
DMA_TLB_PSI_FLUSH, non_present_entry_flush);
 }
 
+static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
+{
+   u32 pmen;
+   unsigned long flags;
+
+   spin_lock_irqsave(iommu-register_lock, flags);
+   pmen = readl(iommu-reg + DMAR_PMEN_REG);
+   pmen = ~DMA_PMEN_EPM;
+   writel(pmen, iommu-reg + DMAR_PMEN_REG);
+
+   /* wait for the protected region status bit to clear */
+   IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
+   readl, (pmen  DMA_PMEN_PRS), pmen);
+
+   spin_unlock_irqrestore(iommu-register_lock, flags);
+}
+
 static int iommu_enable_translation(struct intel_iommu *iommu)
 {
u32 sts;
@@ -1731,6 +1748,8 @@
iommu_flush_context_global(iommu, 0);
iommu_flush_iotlb_global(iommu, 0);
 
+   iommu_disable_protect_mem_regions(iommu);
+
ret = iommu_enable_translation(iommu);
if (ret)
goto error;
Index: linux-2.6/drivers/pci/intel-iommu.h
===
--- linux-2.6.orig/drivers/pci/intel-iommu.h2007-10-24 09:31:32.0 
-0700
+++ linux-2.6/drivers/pci/intel-iommu.h 2007-10-29 10:30:24.0 -0700
@@ -126,6 +126,10 @@
 #define DMA_TLB_IH_NONLEAF (((u64)1)  6)
 #define DMA_TLB_MAX_SIZE (0x3f)
 
+/* PMEN_REG */
+#define DMA_PMEN_EPM (((u32)1)31)
+#define DMA_PMEN_PRS (((u32)1)0)
+
 /* GCMD_REG */
 #define DMA_GCMD_TE (((u32)1)  31)
 #define DMA_GCMD_SRTP (((u32)1)  30)
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] iommu-PMEN_REG boot up support

2007-10-26 Thread Mark Gross
The following patch clears the portect memory region enable bit at boot
time by default.  It also provides a kernel parrameter for disabling
this behavior and leave the PMEN_REG untouched if so wanted.

If the boot loader or platform has protected memory regions enabled at
boot time it could prevent DMA's from happening as drivers are loaded
and used.


Singed-off-by: mark gross <[EMAIL PROTECTED]>

--mgross


Index: linux-2.6/drivers/pci/intel-iommu.c
===
--- linux-2.6.orig/drivers/pci/intel-iommu.c2007-10-24 09:31:32.0 
-0700
+++ linux-2.6/drivers/pci/intel-iommu.c 2007-10-26 11:18:55.0 -0700
@@ -55,6 +55,7 @@
 static int dmar_disabled;
 static int __initdata dmar_map_gfx = 1;
 static int dmar_forcedac;
+static int clear_pmen_epm = 1;
 
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 static DEFINE_SPINLOCK(device_domain_lock);
@@ -76,8 +77,14 @@
printk (KERN_INFO
"Intel-IOMMU: Forcing DAC for PCI devices\n");
dmar_forcedac = 1;
+   } else if (!strncmp(str, "no_epm_clear", 12)) {
+   printk(KERN_INFO
+   "Intel-IOMMU: subress clearing of Enable "
+   "Protected Memory bit\n");
+   clear_pmen_epm = 0;
}
 
+
str += strcspn(str, ",");
while (*str == ',')
str++;
@@ -692,6 +699,23 @@
DMA_TLB_PSI_FLUSH, non_present_entry_flush);
 }
 
+static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
+{
+   u32 pmen;
+   unsigned long flags;
+
+   spin_lock_irqsave(>register_lock, flags);
+   pmen = readl(iommu->reg + DMAR_PMEN_REG);
+   pmen &= ~DMA_PMEN_EPM;
+   writel(pmen, iommu->reg + DMAR_PMEN_REG);
+
+   /* wait for the protected region status bit to clear */
+   IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
+   readl, (pmen & DMA_PMEN_PRS), pmen);
+
+   spin_unlock_irqrestore(>register_lock, flags);
+}
+
 static int iommu_enable_translation(struct intel_iommu *iommu)
 {
u32 sts;
@@ -1731,6 +1755,9 @@
iommu_flush_context_global(iommu, 0);
iommu_flush_iotlb_global(iommu, 0);
 
+   if (clear_pmen_epm)
+   iommu_disable_protect_mem_regions(iommu);
+
ret = iommu_enable_translation(iommu);
if (ret)
goto error;
Index: linux-2.6/drivers/pci/intel-iommu.h
===
--- linux-2.6.orig/drivers/pci/intel-iommu.h2007-10-24 09:31:32.0 
-0700
+++ linux-2.6/drivers/pci/intel-iommu.h 2007-10-26 11:11:44.0 -0700
@@ -126,6 +126,10 @@
 #define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
 #define DMA_TLB_MAX_SIZE (0x3f)
 
+/* PMEN_REG */
+#define DMA_PMEN_EPM (((u32)1)<<31)
+#define DMA_PMEN_PRS (((u32)1)<<0)
+
 /* GCMD_REG */
 #define DMA_GCMD_TE (((u32)1) << 31)
 #define DMA_GCMD_SRTP (((u32)1) << 30)
Index: linux-2.6/Documentation/kernel-parameters.txt
===
--- linux-2.6.orig/Documentation/kernel-parameters.txt  2007-10-24 
09:31:30.0 -0700
+++ linux-2.6/Documentation/kernel-parameters.txt   2007-10-26 
11:11:44.0 -0700
@@ -789,6 +789,10 @@
than 32 bit addressing. The default is to look
for translation below 32 bit and if not available
then look in the higher range.
+   no_epm_clear
+   This will override the default of disabling protected
+   memory regions from DMA to leave the PMEM_REG settings
+   as they where set up by the boot loader / platform.
 
io7=[HW] IO7 for Marvel based alpha systems
See comment before marvel_specify_io7 in
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] iommu-PMEN_REG boot up support

2007-10-26 Thread Mark Gross
The following patch clears the portect memory region enable bit at boot
time by default.  It also provides a kernel parrameter for disabling
this behavior and leave the PMEN_REG untouched if so wanted.

If the boot loader or platform has protected memory regions enabled at
boot time it could prevent DMA's from happening as drivers are loaded
and used.


Singed-off-by: mark gross [EMAIL PROTECTED]

--mgross


Index: linux-2.6/drivers/pci/intel-iommu.c
===
--- linux-2.6.orig/drivers/pci/intel-iommu.c2007-10-24 09:31:32.0 
-0700
+++ linux-2.6/drivers/pci/intel-iommu.c 2007-10-26 11:18:55.0 -0700
@@ -55,6 +55,7 @@
 static int dmar_disabled;
 static int __initdata dmar_map_gfx = 1;
 static int dmar_forcedac;
+static int clear_pmen_epm = 1;
 
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 static DEFINE_SPINLOCK(device_domain_lock);
@@ -76,8 +77,14 @@
printk (KERN_INFO
Intel-IOMMU: Forcing DAC for PCI devices\n);
dmar_forcedac = 1;
+   } else if (!strncmp(str, no_epm_clear, 12)) {
+   printk(KERN_INFO
+   Intel-IOMMU: subress clearing of Enable 
+   Protected Memory bit\n);
+   clear_pmen_epm = 0;
}
 
+
str += strcspn(str, ,);
while (*str == ',')
str++;
@@ -692,6 +699,23 @@
DMA_TLB_PSI_FLUSH, non_present_entry_flush);
 }
 
+static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
+{
+   u32 pmen;
+   unsigned long flags;
+
+   spin_lock_irqsave(iommu-register_lock, flags);
+   pmen = readl(iommu-reg + DMAR_PMEN_REG);
+   pmen = ~DMA_PMEN_EPM;
+   writel(pmen, iommu-reg + DMAR_PMEN_REG);
+
+   /* wait for the protected region status bit to clear */
+   IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
+   readl, (pmen  DMA_PMEN_PRS), pmen);
+
+   spin_unlock_irqrestore(iommu-register_lock, flags);
+}
+
 static int iommu_enable_translation(struct intel_iommu *iommu)
 {
u32 sts;
@@ -1731,6 +1755,9 @@
iommu_flush_context_global(iommu, 0);
iommu_flush_iotlb_global(iommu, 0);
 
+   if (clear_pmen_epm)
+   iommu_disable_protect_mem_regions(iommu);
+
ret = iommu_enable_translation(iommu);
if (ret)
goto error;
Index: linux-2.6/drivers/pci/intel-iommu.h
===
--- linux-2.6.orig/drivers/pci/intel-iommu.h2007-10-24 09:31:32.0 
-0700
+++ linux-2.6/drivers/pci/intel-iommu.h 2007-10-26 11:11:44.0 -0700
@@ -126,6 +126,10 @@
 #define DMA_TLB_IH_NONLEAF (((u64)1)  6)
 #define DMA_TLB_MAX_SIZE (0x3f)
 
+/* PMEN_REG */
+#define DMA_PMEN_EPM (((u32)1)31)
+#define DMA_PMEN_PRS (((u32)1)0)
+
 /* GCMD_REG */
 #define DMA_GCMD_TE (((u32)1)  31)
 #define DMA_GCMD_SRTP (((u32)1)  30)
Index: linux-2.6/Documentation/kernel-parameters.txt
===
--- linux-2.6.orig/Documentation/kernel-parameters.txt  2007-10-24 
09:31:30.0 -0700
+++ linux-2.6/Documentation/kernel-parameters.txt   2007-10-26 
11:11:44.0 -0700
@@ -789,6 +789,10 @@
than 32 bit addressing. The default is to look
for translation below 32 bit and if not available
then look in the higher range.
+   no_epm_clear
+   This will override the default of disabling protected
+   memory regions from DMA to leave the PMEM_REG settings
+   as they where set up by the boot loader / platform.
 
io7=[HW] IO7 for Marvel based alpha systems
See comment before marvel_specify_io7 in
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] intel-iommu: Fix array overflow

2007-10-24 Thread Mark Gross
On Tue, Oct 23, 2007 at 10:57:51AM +0200, Takashi Iwai wrote:
> Fix possible array overflow:
> 
> drivers/pci/intel-iommu.c: In function ‘dmar_get_fault_reason’:
> drivers/pci/intel-iommu.c:753: warning: array subscript is above array bounds
> drivers/pci/intel-iommu.c: In function ‘iommu_page_fault’:
> drivers/pci/intel-iommu.c:753: warning: array subscript is above array bounds
> 
> Signed-off-by: Takashi Iwai <[EMAIL PROTECTED]>
> 
> ---
>  drivers/pci/intel-iommu.c |4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
> index b3d7031..e4b0a0d 100644
> --- a/drivers/pci/intel-iommu.c
> +++ b/drivers/pci/intel-iommu.c
> @@ -749,8 +749,8 @@ static char *fault_reason_strings[] =
>  
>  char *dmar_get_fault_reason(u8 fault_reason)
>  {
> - if (fault_reason > MAX_FAULT_REASON_IDX)
> - return fault_reason_strings[MAX_FAULT_REASON_IDX];
> + if (fault_reason >= MAX_FAULT_REASON_IDX)
> + return fault_reason_strings[MAX_FAULT_REASON_IDX - 1];

This looks like what the code meant to implement.  I guess future
hardware may be able to generate more types of faults, otherwise I'd put
a BUG here.

--mgross


>   else
>   return fault_reason_strings[fault_reason];
>  }
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] intel-iommu: Fix array overflow

2007-10-24 Thread Mark Gross
On Tue, Oct 23, 2007 at 10:57:51AM +0200, Takashi Iwai wrote:
 Fix possible array overflow:
 
 drivers/pci/intel-iommu.c: In function ‘dmar_get_fault_reason’:
 drivers/pci/intel-iommu.c:753: warning: array subscript is above array bounds
 drivers/pci/intel-iommu.c: In function ‘iommu_page_fault’:
 drivers/pci/intel-iommu.c:753: warning: array subscript is above array bounds
 
 Signed-off-by: Takashi Iwai [EMAIL PROTECTED]
 
 ---
  drivers/pci/intel-iommu.c |4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)
 
 diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
 index b3d7031..e4b0a0d 100644
 --- a/drivers/pci/intel-iommu.c
 +++ b/drivers/pci/intel-iommu.c
 @@ -749,8 +749,8 @@ static char *fault_reason_strings[] =
  
  char *dmar_get_fault_reason(u8 fault_reason)
  {
 - if (fault_reason  MAX_FAULT_REASON_IDX)
 - return fault_reason_strings[MAX_FAULT_REASON_IDX];
 + if (fault_reason = MAX_FAULT_REASON_IDX)
 + return fault_reason_strings[MAX_FAULT_REASON_IDX - 1];

This looks like what the code meant to implement.  I guess future
hardware may be able to generate more types of faults, otherwise I'd put
a BUG here.

--mgross


   else
   return fault_reason_strings[fault_reason];
  }
 -
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to [EMAIL PROTECTED]
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/9] Remove 'irq' argument from all irq handlers

2007-10-19 Thread Mark Gross
On Fri, Oct 19, 2007 at 03:54:43AM -0400, Jeff Garzik wrote:
> 
> WARNING NOT FOR MERGE WARNING NOT FOR MERGE WARNING NOT FOR MERGE

then whats the point ?

> 
> This posting is just to demonstrate something that I have been keeping
> alive in the background.  I have no urge to push it upstream anytime
> soon.

why not? 

--mgross

> 
> The overwhelming majority of drivers do not ever bother with the 'irq'
> argument that is passed to each driver's irq handler.
> 
> Of the minority of drivers that do use the arg, the majority of those
> have the irq number stored in their private-info structure somewhere.
> 
> There are a tiny few -- a couple Mac drivers -- which do weird things
> with that argument, but that's it.
> 
> For the large sweeps through the tree, these patches are grouped into
> "trivial" changes -- simply removing the unused irq arg -- or all other
> changes.
> 
>[IRQ ARG REMOVAL] core interrupt delivery infrastructure updates
>[IRQ ARG REMOVAL] various non-trivial arch updates
>[IRQ ARG REMOVAL] trivial arch updates
>[IRQ ARG REMOVAL] non-trivial driver updates
>[IRQ ARG REMOVAL] trivial net driver updates
>[IRQ ARG REMOVAL] trivial sound driver updates
>[IRQ ARG REMOVAL] trivial scsi driver updates
>[IRQ ARG REMOVAL] trivial driver updates
>[IRQ ARG REMOVAL] x86-64 build fixes, cleanups
> 
> WARNING NOT FOR MERGE WARNING NOT FOR MERGE WARNING NOT FOR MERGE
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/9] Remove 'irq' argument from all irq handlers

2007-10-19 Thread Mark Gross
On Fri, Oct 19, 2007 at 03:54:43AM -0400, Jeff Garzik wrote:
 
 WARNING NOT FOR MERGE WARNING NOT FOR MERGE WARNING NOT FOR MERGE

then whats the point ?

 
 This posting is just to demonstrate something that I have been keeping
 alive in the background.  I have no urge to push it upstream anytime
 soon.

why not? 

--mgross

 
 The overwhelming majority of drivers do not ever bother with the 'irq'
 argument that is passed to each driver's irq handler.
 
 Of the minority of drivers that do use the arg, the majority of those
 have the irq number stored in their private-info structure somewhere.
 
 There are a tiny few -- a couple Mac drivers -- which do weird things
 with that argument, but that's it.
 
 For the large sweeps through the tree, these patches are grouped into
 trivial changes -- simply removing the unused irq arg -- or all other
 changes.
 
[IRQ ARG REMOVAL] core interrupt delivery infrastructure updates
[IRQ ARG REMOVAL] various non-trivial arch updates
[IRQ ARG REMOVAL] trivial arch updates
[IRQ ARG REMOVAL] non-trivial driver updates
[IRQ ARG REMOVAL] trivial net driver updates
[IRQ ARG REMOVAL] trivial sound driver updates
[IRQ ARG REMOVAL] trivial scsi driver updates
[IRQ ARG REMOVAL] trivial driver updates
[IRQ ARG REMOVAL] x86-64 build fixes, cleanups
 
 WARNING NOT FOR MERGE WARNING NOT FOR MERGE WARNING NOT FOR MERGE
 -
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to [EMAIL PROTECTED]
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] static initialization with blocking notifiers. was :wqRe: 2.6.23-mm1

2007-10-17 Thread Mark Gross
I didn't see my patch show up on the list so I'm resending it.



On Wed, Oct 17, 2007 at 01:53:48AM +0200, Rafael J. Wysocki wrote:
> On Wednesday, 17 October 2007 01:31, Mark Gross wrote:
> > On Tue, Oct 16, 2007 at 10:28:13PM +0200, Rafael J. Wysocki wrote:
> > > On Tuesday, 16 October 2007 21:58, Mark Gross wrote:
> > > > On Mon, Oct 15, 2007 at 10:40:02PM +0200, Rafael J. Wysocki wrote:
> > > > > On Monday, 15 October 2007 18:09, Mark Gross wrote:
> > > > > > On Fri, Oct 12, 2007 at 11:32:40PM +0200, Rafael J. Wysocki wrote:
> > > > > > > On Friday, 12 October 2007 06:31, Andrew Morton wrote:
> > > > > > > > 
> > > > > > > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/
> > > > > > > > 
> > > > > > > > - I've been largely avoiding applying anything since rc8-mm2 in 
> > > > > > > > an attempt
> > > > > > > >   to stabilise things for the 2.6.23 merge.
> > > > > > > > 
> > > > > > > >   But that didn't stop all the subsystem maintainers from going 
> > > > > > > > nuts, with
> > > > > > > >   the usual accuracy.  We're up to a 37MB diff now, but it 
> > > > > > > > seems to be working
> > > > > > > >   a bit better.
> > > > > > > 
> > > > > > > I get many traces similar to the one below from it (w/ hotfixes):
> > > > > > > 
> > > > > > > WARNING: at 
> > > > > > > /home/rafael/src/mm/linux-2.6.23-mm1/arch/x86_64/kernel/smp.c:397 
> > > > > > > smp_call_function_mask()
> > > > > > 
> > > > > > This is from : WARN_ON(irqs_disabled()) in the 
> > > > > > cmp_call_function_mask
> > > > > > processor_idle.c is registering a acpi_processor_latency_notify 
> > > > > > 
> > > > > > my code changed the notifier call from blocking_notifier_call_chain 
> > > > > > to
> > > > > > srcu_notifier_call_chain, because dynamic creation of notifier 
> > > > > > chains at
> > > > > > runtime where easier with the srcu_notifier_call_chain than the
> > > > > > blocking_notifier_call_chain.  
> > > > > > 
> > > > > > As dynamic creation of PM_QOS parameters are no longer needed I can
> > > > > > change the notifiers back to match what was in lanency.c
> > > > > > 
> > > > > > However; looking at the call tree differences between
> > > > > > blockin_notifier_call_chain and srcu_notifier_call_chain I cannot 
> > > > > > see a
> > > > > > difference in irq enabling / disabling.  I'm not confident this will
> > > > > > address this yet.
> > > > > 
> > > > > Well, you can send me a patch to check. :-)

The following is a patch to update the pm_qos code in the mm1 tree.  It
removes the PM_QOS_CPUIDLE parameter (replacing it with
PM_CPU_DMA_LATENCY), It changes the notifications from srcu to blocking
in hopes of fixing the WARNS reported by xxx, and it changes the
initialization to me largely static to avoid initialization race with
cpu-idle.

I think we will have to re-visit the static vrs dynamic initialization
and this init race in a while to support pm_qos parameters per power
domain (i.e. per cpu-socket) based on platform information (ACPI) but
for now lets see if this fixes the warning's reported.

Thanks,

Signed-off-by: mark gross <[EMAIL PROTECTED]>


Binary files linux-2.6.23-mm1/arch/x86_64/ia32/vsyscall-syscall.so.dbg and 
linux-2.6.23-mm1-pmqos/arch/x86_64/ia32/vsyscall-syscall.so.dbg differ
Binary files linux-2.6.23-mm1/arch/x86_64/ia32/vsyscall-sysenter.so.dbg and 
linux-2.6.23-mm1-pmqos/arch/x86_64/ia32/vsyscall-sysenter.so.dbg differ
Binary files linux-2.6.23-mm1/arch/x86_64/vdso/vdso.so.dbg and 
linux-2.6.23-mm1-pmqos/arch/x86_64/vdso/vdso.so.dbg differ
diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff 
linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c 
linux-2.6.23-mm1-pmqos/drivers/cpuidle/cpuidle.c
--- linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c  2007-10-16 15:03:30.0 
-0700
+++ linux-2.6.23-mm1-pmqos/drivers/cpuidle/cpuidle.c2007-10-17 
09:26:21.0 -0700
@@ -268,7 +268,7 @@
 
 static inline void latency_notifier_init(struct notifier_block *n)
 {
-pm_qos_add_notifier(PM_QOS_CPUIDLE, n);
+pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n);
 }
 
 #else /* CONFIG_SMP */
dif

Re: WANTED: kernel projects for CS students

2007-10-17 Thread Mark Gross
On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote:
> The kernel newbies community often gets inquiries from CS students who
> need a project for their studies and would like to do something with
> the Linux kernel, but would also like their code to be useful to the
> community afterwards.
> 
> In order to make it easier for them, I am trying to put together a
> page with projects that:
> - Are self contained enough that the students can implement the
>   project by themselves, since that is often a university requirement.
> - Are self contained enough that Linux could merge the code (maybe
>   with additional changes) after the student has been working on it
>   for a few months.
> - Are large enough to qualify as a student project, luckily there is
>   flexibility here since we get inquiries for anything from 6 week
>   projects to 6 month projects.
> 
> If you have ideas on what projects would be useful, please add them
> to this page (or email me):
> 
> http://kernelnewbies.org/KernelProjects
>

How about a static code tool that will check for initialization races?
yesterday I found a lurker bug in some of my code that wouldn't have
been exposed had not tripped over it.  I wrote some infrastructure code
that initializes its lists and notification trees in late_init.  

Then I found  out that there was as client of my infrastructure calling
my register API at core_init time.  It didn't crash / fail noticeably,
but wasn't correct, because at that time I was using a static array. 
When I changed my code to use an array of pointers instead it went boom!
(FWIW I've fixed this issue for now...)

It made me feel uneasy how that issue got by un-noticed and I worry that
there could be more like it.  A tool to scan the code for boot up init
calls and check for any callers into any module for entry before the
module is fully initialized.  

--mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] static initialization and blocking notification for pm_qos... was Re: 2.6.23-mm1

2007-10-17 Thread Mark Gross
On Fri, Oct 12, 2007 at 11:32:40PM +0200, Rafael J. Wysocki wrote:
> On Friday, 12 October 2007 06:31, Andrew Morton wrote:
> > 
> > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/
> > 
> > - I've been largely avoiding applying anything since rc8-mm2 in an attempt
> >   to stabilise things for the 2.6.23 merge.
> > 
> >   But that didn't stop all the subsystem maintainers from going nuts, with
> >   the usual accuracy.  We're up to a 37MB diff now, but it seems to be 
> > working
> >   a bit better.
> 
> I get many traces similar to the one below from it (w/ hotfixes):
> 
> WARNING: at /home/rafael/src/mm/linux-2.6.23-mm1/arch/x86_64/kernel/smp.c:397 
> smp_call_function_mask()
> 
> Call Trace:
>  [] smp_call_function_mask+0x4b/0x82
>  [] smp_call_function+0x23/0x25
>  [] :processor:acpi_processor_latency_notify+0x19/0x20
>  [] notifier_call_chain+0x33/0x65
>  [] __srcu_notifier_call_chain+0x4b/0x69
>  [] pm_qos_add_requirement+0x24/0xd2
>  [] srcu_notifier_call_chain+0xf/0x11
>  [] update_target+0x71/0x76
>  [] pm_qos_add_requirement+0xa9/0xd2
>  [] :snd_pcm:snd_pcm_hw_params+0x349/0x382
>  [] kmem_cache_alloc+0x8a/0xbc
>  [] :snd_pcm:snd_pcm_hw_params_user+0x50/0x87
>  [] :snd_pcm:snd_pcm_common_ioctl1+0x1ae/0xd4f
>  [] :snd_pcm:snd_pcm_open+0xd6/0x1f2
>  [] cache_alloc_debugcheck_after+0x11a/0x199
>  [] remove_wait_queue+0x40/0x45
>  [] :snd_pcm:snd_pcm_open+0x13e/0x1f2
>  [] default_wake_function+0x0/0xf
>  [] prio_tree_insert+0x18c/0x231
>  [] vma_prio_tree_insert+0x23/0x39
>  [] vma_link+0xdd/0x10b
>  [] :snd_pcm:snd_pcm_playback_ioctl1+0x24d/0x26a
>  [] :snd_pcm:snd_pcm_playback_ioctl+0x2e/0x36
>  [] do_ioctl+0x2a/0x77
>  [] vfs_ioctl+0x251/0x26e
>  [] sys_ioctl+0x57/0x7b
>  [] system_call+0x7e/0x83
> 
> Full dmesg attached.
>


ubject: [PATCH] static initialization and blocking notification for pm_qos... 
was Re: 2.6.23-mm1

please try this patch and let me know if the warnings go away.  (I have
not been able to reproduce your issue.)

The following is a patch to update the pm_qos code in the mm1 tree.  It
removes the PM_QOS_CPUIDLE parameter (replacing it with
PM_CPU_DMA_LATENCY), It changes the notifications from srcu to blocking
in hopes of fixing the WARNS reported by xxx, and it changes the
initialization to me largely static to avoid initialization race with
cpu-idle.

I think we will have to re-visit the static vrs dynamic initialization
and this init race in a while to support pm_qos parameters per power
domain (i.e. per cpu-socket) based on platform information (ACPI) but
for now lets see if this fixes the warning's reported.

Thanks,

Signed-off-by: mark gross <[EMAIL PROTECTED]>


Binary files linux-2.6.23-mm1/arch/x86_64/ia32/vsyscall-syscall.so.dbg and 
linux-2.6.23-mm1-pmqos/arch/x86_64/ia32/vsyscall-syscall.so.dbg differ
Binary files linux-2.6.23-mm1/arch/x86_64/ia32/vsyscall-sysenter.so.dbg and 
linux-2.6.23-mm1-pmqos/arch/x86_64/ia32/vsyscall-sysenter.so.dbg differ
Binary files linux-2.6.23-mm1/arch/x86_64/vdso/vdso.so.dbg and 
linux-2.6.23-mm1-pmqos/arch/x86_64/vdso/vdso.so.dbg differ
diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff 
linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c 
linux-2.6.23-mm1-pmqos/drivers/cpuidle/cpuidle.c
--- linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c  2007-10-16 15:03:30.0 
-0700
+++ linux-2.6.23-mm1-pmqos/drivers/cpuidle/cpuidle.c2007-10-17 
09:26:21.0 -0700
@@ -268,7 +268,7 @@
 
 static inline void latency_notifier_init(struct notifier_block *n)
 {
-pm_qos_add_notifier(PM_QOS_CPUIDLE, n);
+pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n);
 }
 
 #else /* CONFIG_SMP */
diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff 
linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c 
linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/ladder.c
--- linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c 2007-10-16 
15:03:30.0 -0700
+++ linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/ladder.c   2007-10-17 
09:26:21.0 -0700
@@ -82,7 +82,7 @@
if (last_idx < dev->state_count - 1 &&
last_residency > last_state->threshold.promotion_time &&
dev->states[last_idx + 1].exit_latency <=
-   pm_qos_requirement(PM_QOS_CPUIDLE)) {
+   pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
last_state->stats.promotion_count++;
last_state->stats.demotion_count = 0;
if (last_state->stats.promotion_count >= 
last_state->threshold.promotion_count) {
diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff 
linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c 
linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/menu.c
--- linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c   2007

[PATCH] static initialization and blocking notification for pm_qos... was Re: 2.6.23-mm1

2007-10-17 Thread Mark Gross
On Fri, Oct 12, 2007 at 11:32:40PM +0200, Rafael J. Wysocki wrote:
 On Friday, 12 October 2007 06:31, Andrew Morton wrote:
  
  ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/
  
  - I've been largely avoiding applying anything since rc8-mm2 in an attempt
to stabilise things for the 2.6.23 merge.
  
But that didn't stop all the subsystem maintainers from going nuts, with
the usual accuracy.  We're up to a 37MB diff now, but it seems to be 
  working
a bit better.
 
 I get many traces similar to the one below from it (w/ hotfixes):
 
 WARNING: at /home/rafael/src/mm/linux-2.6.23-mm1/arch/x86_64/kernel/smp.c:397 
 smp_call_function_mask()
 
 Call Trace:
  [8021b290] smp_call_function_mask+0x4b/0x82
  [8021b2ea] smp_call_function+0x23/0x25
  [884a0b80] :processor:acpi_processor_latency_notify+0x19/0x20
  [80437ace] notifier_call_chain+0x33/0x65
  [8024f32f] __srcu_notifier_call_chain+0x4b/0x69
  [8024f07c] pm_qos_add_requirement+0x24/0xd2
  [8024f35c] srcu_notifier_call_chain+0xf/0x11
  [8024ee6d] update_target+0x71/0x76
  [8024f101] pm_qos_add_requirement+0xa9/0xd2
  [88160bf9] :snd_pcm:snd_pcm_hw_params+0x349/0x382
  [80291110] kmem_cache_alloc+0x8a/0xbc
  [88160d75] :snd_pcm:snd_pcm_hw_params_user+0x50/0x87
  [88160fe1] :snd_pcm:snd_pcm_common_ioctl1+0x1ae/0xd4f
  [8815f755] :snd_pcm:snd_pcm_open+0xd6/0x1f2
  [8028fc17] cache_alloc_debugcheck_after+0x11a/0x199
  [8024b514] remove_wait_queue+0x40/0x45
  [8815f7bd] :snd_pcm:snd_pcm_open+0x13e/0x1f2
  [8022f18e] default_wake_function+0x0/0xf
  [8030b24d] prio_tree_insert+0x18c/0x231
  [8027b5fb] vma_prio_tree_insert+0x23/0x39
  [80282e91] vma_link+0xdd/0x10b
  [8816206f] :snd_pcm:snd_pcm_playback_ioctl1+0x24d/0x26a
  [8816292c] :snd_pcm:snd_pcm_playback_ioctl+0x2e/0x36
  [802a0896] do_ioctl+0x2a/0x77
  [802a0b34] vfs_ioctl+0x251/0x26e
  [802a0ba8] sys_ioctl+0x57/0x7b
  [8020bfde] system_call+0x7e/0x83
 
 Full dmesg attached.



ubject: [PATCH] static initialization and blocking notification for pm_qos... 
was Re: 2.6.23-mm1

please try this patch and let me know if the warnings go away.  (I have
not been able to reproduce your issue.)

The following is a patch to update the pm_qos code in the mm1 tree.  It
removes the PM_QOS_CPUIDLE parameter (replacing it with
PM_CPU_DMA_LATENCY), It changes the notifications from srcu to blocking
in hopes of fixing the WARNS reported by xxx, and it changes the
initialization to me largely static to avoid initialization race with
cpu-idle.

I think we will have to re-visit the static vrs dynamic initialization
and this init race in a while to support pm_qos parameters per power
domain (i.e. per cpu-socket) based on platform information (ACPI) but
for now lets see if this fixes the warning's reported.

Thanks,

Signed-off-by: mark gross [EMAIL PROTECTED]


Binary files linux-2.6.23-mm1/arch/x86_64/ia32/vsyscall-syscall.so.dbg and 
linux-2.6.23-mm1-pmqos/arch/x86_64/ia32/vsyscall-syscall.so.dbg differ
Binary files linux-2.6.23-mm1/arch/x86_64/ia32/vsyscall-sysenter.so.dbg and 
linux-2.6.23-mm1-pmqos/arch/x86_64/ia32/vsyscall-sysenter.so.dbg differ
Binary files linux-2.6.23-mm1/arch/x86_64/vdso/vdso.so.dbg and 
linux-2.6.23-mm1-pmqos/arch/x86_64/vdso/vdso.so.dbg differ
diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff 
linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c 
linux-2.6.23-mm1-pmqos/drivers/cpuidle/cpuidle.c
--- linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c  2007-10-16 15:03:30.0 
-0700
+++ linux-2.6.23-mm1-pmqos/drivers/cpuidle/cpuidle.c2007-10-17 
09:26:21.0 -0700
@@ -268,7 +268,7 @@
 
 static inline void latency_notifier_init(struct notifier_block *n)
 {
-pm_qos_add_notifier(PM_QOS_CPUIDLE, n);
+pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n);
 }
 
 #else /* CONFIG_SMP */
diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff 
linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c 
linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/ladder.c
--- linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c 2007-10-16 
15:03:30.0 -0700
+++ linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/ladder.c   2007-10-17 
09:26:21.0 -0700
@@ -82,7 +82,7 @@
if (last_idx  dev-state_count - 1 
last_residency  last_state-threshold.promotion_time 
dev-states[last_idx + 1].exit_latency =
-   pm_qos_requirement(PM_QOS_CPUIDLE)) {
+   pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
last_state-stats.promotion_count++;
last_state-stats.demotion_count = 0;
if (last_state-stats.promotion_count = 
last_state-threshold.promotion_count) {
diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff 
linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c

Re: WANTED: kernel projects for CS students

2007-10-17 Thread Mark Gross
On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote:
 The kernel newbies community often gets inquiries from CS students who
 need a project for their studies and would like to do something with
 the Linux kernel, but would also like their code to be useful to the
 community afterwards.
 
 In order to make it easier for them, I am trying to put together a
 page with projects that:
 - Are self contained enough that the students can implement the
   project by themselves, since that is often a university requirement.
 - Are self contained enough that Linux could merge the code (maybe
   with additional changes) after the student has been working on it
   for a few months.
 - Are large enough to qualify as a student project, luckily there is
   flexibility here since we get inquiries for anything from 6 week
   projects to 6 month projects.
 
 If you have ideas on what projects would be useful, please add them
 to this page (or email me):
 
 http://kernelnewbies.org/KernelProjects


How about a static code tool that will check for initialization races?
yesterday I found a lurker bug in some of my code that wouldn't have
been exposed had not tripped over it.  I wrote some infrastructure code
that initializes its lists and notification trees in late_init.  

Then I found  out that there was as client of my infrastructure calling
my register API at core_init time.  It didn't crash / fail noticeably,
but wasn't correct, because at that time I was using a static array. 
When I changed my code to use an array of pointers instead it went boom!
(FWIW I've fixed this issue for now...)

It made me feel uneasy how that issue got by un-noticed and I worry that
there could be more like it.  A tool to scan the code for boot up init
calls and check for any callers into any module for entry before the
module is fully initialized.  

--mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] static initialization with blocking notifiers. was :wqRe: 2.6.23-mm1

2007-10-17 Thread Mark Gross
I didn't see my patch show up on the list so I'm resending it.



On Wed, Oct 17, 2007 at 01:53:48AM +0200, Rafael J. Wysocki wrote:
 On Wednesday, 17 October 2007 01:31, Mark Gross wrote:
  On Tue, Oct 16, 2007 at 10:28:13PM +0200, Rafael J. Wysocki wrote:
   On Tuesday, 16 October 2007 21:58, Mark Gross wrote:
On Mon, Oct 15, 2007 at 10:40:02PM +0200, Rafael J. Wysocki wrote:
 On Monday, 15 October 2007 18:09, Mark Gross wrote:
  On Fri, Oct 12, 2007 at 11:32:40PM +0200, Rafael J. Wysocki wrote:
   On Friday, 12 October 2007 06:31, Andrew Morton wrote:

ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/

- I've been largely avoiding applying anything since rc8-mm2 in 
an attempt
  to stabilise things for the 2.6.23 merge.

  But that didn't stop all the subsystem maintainers from going 
nuts, with
  the usual accuracy.  We're up to a 37MB diff now, but it 
seems to be working
  a bit better.
   
   I get many traces similar to the one below from it (w/ hotfixes):
   
   WARNING: at 
   /home/rafael/src/mm/linux-2.6.23-mm1/arch/x86_64/kernel/smp.c:397 
   smp_call_function_mask()
  
  This is from : WARN_ON(irqs_disabled()) in the 
  cmp_call_function_mask
  processor_idle.c is registering a acpi_processor_latency_notify 
  
  my code changed the notifier call from blocking_notifier_call_chain 
  to
  srcu_notifier_call_chain, because dynamic creation of notifier 
  chains at
  runtime where easier with the srcu_notifier_call_chain than the
  blocking_notifier_call_chain.  
  
  As dynamic creation of PM_QOS parameters are no longer needed I can
  change the notifiers back to match what was in lanency.c
  
  However; looking at the call tree differences between
  blockin_notifier_call_chain and srcu_notifier_call_chain I cannot 
  see a
  difference in irq enabling / disabling.  I'm not confident this will
  address this yet.
 
 Well, you can send me a patch to check. :-)

The following is a patch to update the pm_qos code in the mm1 tree.  It
removes the PM_QOS_CPUIDLE parameter (replacing it with
PM_CPU_DMA_LATENCY), It changes the notifications from srcu to blocking
in hopes of fixing the WARNS reported by xxx, and it changes the
initialization to me largely static to avoid initialization race with
cpu-idle.

I think we will have to re-visit the static vrs dynamic initialization
and this init race in a while to support pm_qos parameters per power
domain (i.e. per cpu-socket) based on platform information (ACPI) but
for now lets see if this fixes the warning's reported.

Thanks,

Signed-off-by: mark gross [EMAIL PROTECTED]


Binary files linux-2.6.23-mm1/arch/x86_64/ia32/vsyscall-syscall.so.dbg and 
linux-2.6.23-mm1-pmqos/arch/x86_64/ia32/vsyscall-syscall.so.dbg differ
Binary files linux-2.6.23-mm1/arch/x86_64/ia32/vsyscall-sysenter.so.dbg and 
linux-2.6.23-mm1-pmqos/arch/x86_64/ia32/vsyscall-sysenter.so.dbg differ
Binary files linux-2.6.23-mm1/arch/x86_64/vdso/vdso.so.dbg and 
linux-2.6.23-mm1-pmqos/arch/x86_64/vdso/vdso.so.dbg differ
diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff 
linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c 
linux-2.6.23-mm1-pmqos/drivers/cpuidle/cpuidle.c
--- linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c  2007-10-16 15:03:30.0 
-0700
+++ linux-2.6.23-mm1-pmqos/drivers/cpuidle/cpuidle.c2007-10-17 
09:26:21.0 -0700
@@ -268,7 +268,7 @@
 
 static inline void latency_notifier_init(struct notifier_block *n)
 {
-pm_qos_add_notifier(PM_QOS_CPUIDLE, n);
+pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n);
 }
 
 #else /* CONFIG_SMP */
diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff 
linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c 
linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/ladder.c
--- linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c 2007-10-16 
15:03:30.0 -0700
+++ linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/ladder.c   2007-10-17 
09:26:21.0 -0700
@@ -82,7 +82,7 @@
if (last_idx  dev-state_count - 1 
last_residency  last_state-threshold.promotion_time 
dev-states[last_idx + 1].exit_latency =
-   pm_qos_requirement(PM_QOS_CPUIDLE)) {
+   pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
last_state-stats.promotion_count++;
last_state-stats.demotion_count = 0;
if (last_state-stats.promotion_count = 
last_state-threshold.promotion_count) {
diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff 
linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c 
linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/menu.c
--- linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c   2007-10-16 
15:03:30.0 -0700
+++ linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/menu.c 2007-10-17

Re: 2.6.23-mm1

2007-10-16 Thread Mark Gross
On Tue, Oct 16, 2007 at 10:28:13PM +0200, Rafael J. Wysocki wrote:
> On Tuesday, 16 October 2007 21:58, Mark Gross wrote:
> > On Mon, Oct 15, 2007 at 10:40:02PM +0200, Rafael J. Wysocki wrote:
> > > On Monday, 15 October 2007 18:09, Mark Gross wrote:
> > > > On Fri, Oct 12, 2007 at 11:32:40PM +0200, Rafael J. Wysocki wrote:
> > > > > On Friday, 12 October 2007 06:31, Andrew Morton wrote:
> > > > > > 
> > > > > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/
> > > > > > 
> > > > > > - I've been largely avoiding applying anything since rc8-mm2 in an 
> > > > > > attempt
> > > > > >   to stabilise things for the 2.6.23 merge.
> > > > > > 
> > > > > >   But that didn't stop all the subsystem maintainers from going 
> > > > > > nuts, with
> > > > > >   the usual accuracy.  We're up to a 37MB diff now, but it seems to 
> > > > > > be working
> > > > > >   a bit better.
> > > > > 
> > > > > I get many traces similar to the one below from it (w/ hotfixes):
> > > > > 
> > > > > WARNING: at 
> > > > > /home/rafael/src/mm/linux-2.6.23-mm1/arch/x86_64/kernel/smp.c:397 
> > > > > smp_call_function_mask()
> > > > 
> > > > This is from : WARN_ON(irqs_disabled()) in the cmp_call_function_mask
> > > > processor_idle.c is registering a acpi_processor_latency_notify 
> > > > 
> > > > my code changed the notifier call from blocking_notifier_call_chain to
> > > > srcu_notifier_call_chain, because dynamic creation of notifier chains at
> > > > runtime where easier with the srcu_notifier_call_chain than the
> > > > blocking_notifier_call_chain.  
> > > > 
> > > > As dynamic creation of PM_QOS parameters are no longer needed I can
> > > > change the notifiers back to match what was in lanency.c
> > > > 
> > > > However; looking at the call tree differences between
> > > > blockin_notifier_call_chain and srcu_notifier_call_chain I cannot see a
> > > > difference in irq enabling / disabling.  I'm not confident this will
> > > > address this yet.
> > > 
> > > Well, you can send me a patch to check. :-)
> > 
> > I think I'll have to send you a patch that changes the notifiers but I
> > doubt it will fix it.  
> > 
> > After a bit of messing around I have the 2.6.23-mm1 running on my core-2
> > box  note: Ubuntu's make-kpkg on the mm1 tree resulted in a system that
> > wouldn't boot past the intrd.  Looks like the pivot root failed or
> > something.
> > 
> > Anyway, I'm not reproducing your experience, snd_pcm is loaded.  I don't
> > know none of the WARN's are not hitting on my box.
> > 
> > do you have some configuration information that could help me reproduce
> > the issue?
> 
> Well, I can send you the .config, but the box is AMD-based (Turion 64 X2),
> with an ATI chipset and an HP BIOS, so it seems to be much different from
> yours.

it may be worth a shot anyway.  

BTW while changing my code to use the blocking notifiers I found that
there is a initialization race between cpu-idle and pm_qos I have to
fix.

I need to re factor my start up code to handle cpuidle registering
itself in as a notifier at core_initcall time.

I'll have a patch ready tomorrow.

thanks,

--mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: WANTED: kernel projects for CS students

2007-10-16 Thread Mark Gross
On Mon, Oct 15, 2007 at 11:17:32PM +0400, Alexey Dobriyan wrote:
> On Mon, Oct 15, 2007 at 10:04:11AM -0700, Mark Gross wrote:
> > On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote:
> > > http://kernelnewbies.org/KernelProjects
> > 
> > Is there already a make config option that will do a good job at setting
> > a default .config file based on what is already running on a system?  
> > 
> > I get tiered of trimming down my .config for my laptop build so it takes
> > less than 30min to build a kernel.
> 
> Ehh? You do it once, then leave it aside or in /proc/config.gz, on new
> kernel copy it back, "make oldconfig", answer several questions and here
> we go.

yeah I know that.  Its a lot more than a few questions, and as we are
talking about a linear search for a fully tweaked .config where each
pass takes 30 min to know if things work this isn't how I want to spend
my time.

--mgross

> 
> > Bonus credit to additional "expert" options (like those powertop puts
> > out) for target uses, laptop, HPC, home file share, embedded targets
> > 
> > Oh, and lets make the expert configs easily extensible.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: WANTED: kernel projects for CS students

2007-10-16 Thread Mark Gross
On Mon, Oct 15, 2007 at 09:54:42PM +0200, Giacomo Catenazzi wrote:
> Sam Ravnborg wrote:
> > On Mon, Oct 15, 2007 at 10:04:11AM -0700, Mark Gross wrote:
> >> On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote:
> >>> The kernel newbies community often gets inquiries from CS students who
> >>> need a project for their studies and would like to do something with
> >>> the Linux kernel, but would also like their code to be useful to the
> >>> community afterwards.
> >>>
> >>> In order to make it easier for them, I am trying to put together a
> >>> page with projects that:
> >>> - Are self contained enough that the students can implement the
> >>>   project by themselves, since that is often a university requirement.
> >>> - Are self contained enough that Linux could merge the code (maybe
> >>>   with additional changes) after the student has been working on it
> >>>   for a few months.
> >>> - Are large enough to qualify as a student project, luckily there is
> >>>   flexibility here since we get inquiries for anything from 6 week
> >>>   projects to 6 month projects.
> >>>
> >>> If you have ideas on what projects would be useful, please add them
> >>> to this page (or email me):
> >>>
> >>> http://kernelnewbies.org/KernelProjects
> >> Is there already a make config option that will do a good job at setting
> >> a default .config file based on what is already running on a system?  
> > I have discussed this briefly with Kay Sievers.
> > What udev can provide is the list of modules needed, so what the kernel
> > need to provide is a simple module to CONFIG option(s) converter + a base
> > config to start out with.
> > Nothing particular difficult but needs a few days work to do.
> 
> could you explain better what you need?  I think I've already such
> tools ;-)
>

base function:
Starting from a stock distro (FC, Ubuntu, OpenSuSE...) and put down a
kernel.org tree and automatically create a .config with all the drivers
needed for the platform I'm building on.

expert configs for different applications:
laptop battery, vitalization, HPC, tiny, multi-media, testing

--mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.23-mm1

2007-10-16 Thread Mark Gross
On Mon, Oct 15, 2007 at 10:40:02PM +0200, Rafael J. Wysocki wrote:
> On Monday, 15 October 2007 18:09, Mark Gross wrote:
> > On Fri, Oct 12, 2007 at 11:32:40PM +0200, Rafael J. Wysocki wrote:
> > > On Friday, 12 October 2007 06:31, Andrew Morton wrote:
> > > > 
> > > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/
> > > > 
> > > > - I've been largely avoiding applying anything since rc8-mm2 in an 
> > > > attempt
> > > >   to stabilise things for the 2.6.23 merge.
> > > > 
> > > >   But that didn't stop all the subsystem maintainers from going nuts, 
> > > > with
> > > >   the usual accuracy.  We're up to a 37MB diff now, but it seems to be 
> > > > working
> > > >   a bit better.
> > > 
> > > I get many traces similar to the one below from it (w/ hotfixes):
> > > 
> > > WARNING: at 
> > > /home/rafael/src/mm/linux-2.6.23-mm1/arch/x86_64/kernel/smp.c:397 
> > > smp_call_function_mask()
> > 
> > This is from : WARN_ON(irqs_disabled()) in the cmp_call_function_mask
> > processor_idle.c is registering a acpi_processor_latency_notify 
> > 
> > my code changed the notifier call from blocking_notifier_call_chain to
> > srcu_notifier_call_chain, because dynamic creation of notifier chains at
> > runtime where easier with the srcu_notifier_call_chain than the
> > blocking_notifier_call_chain.  
> > 
> > As dynamic creation of PM_QOS parameters are no longer needed I can
> > change the notifiers back to match what was in lanency.c
> > 
> > However; looking at the call tree differences between
> > blockin_notifier_call_chain and srcu_notifier_call_chain I cannot see a
> > difference in irq enabling / disabling.  I'm not confident this will
> > address this yet.
> 
> Well, you can send me a patch to check. :-)

I think I'll have to send you a patch that changes the notifiers but I
doubt it will fix it.  

After a bit of messing around I have the 2.6.23-mm1 running on my core-2
box  note: Ubuntu's make-kpkg on the mm1 tree resulted in a system that
wouldn't boot past the intrd.  Looks like the pivot root failed or
something.

Anyway, I'm not reproducing your experience, snd_pcm is loaded.  I don't
know none of the WARN's are not hitting on my box.

do you have some configuration information that could help me reproduce
the issue?

> 
> > I'll change the PM_QOS params patch to use blocking notifiers and test
> > on a 64bit boot and see what happens.  I've been needing to setup my
> > x86_64 dev box for a while now anyway.
> 
> OK, thanks.

well its booting but I'm not reproducing the trace messages.

I'll do the patch for you to test.

--mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.23-mm1

2007-10-16 Thread Mark Gross
On Mon, Oct 15, 2007 at 10:40:02PM +0200, Rafael J. Wysocki wrote:
 On Monday, 15 October 2007 18:09, Mark Gross wrote:
  On Fri, Oct 12, 2007 at 11:32:40PM +0200, Rafael J. Wysocki wrote:
   On Friday, 12 October 2007 06:31, Andrew Morton wrote:

ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/

- I've been largely avoiding applying anything since rc8-mm2 in an 
attempt
  to stabilise things for the 2.6.23 merge.

  But that didn't stop all the subsystem maintainers from going nuts, 
with
  the usual accuracy.  We're up to a 37MB diff now, but it seems to be 
working
  a bit better.
   
   I get many traces similar to the one below from it (w/ hotfixes):
   
   WARNING: at 
   /home/rafael/src/mm/linux-2.6.23-mm1/arch/x86_64/kernel/smp.c:397 
   smp_call_function_mask()
  
  This is from : WARN_ON(irqs_disabled()) in the cmp_call_function_mask
  processor_idle.c is registering a acpi_processor_latency_notify 
  
  my code changed the notifier call from blocking_notifier_call_chain to
  srcu_notifier_call_chain, because dynamic creation of notifier chains at
  runtime where easier with the srcu_notifier_call_chain than the
  blocking_notifier_call_chain.  
  
  As dynamic creation of PM_QOS parameters are no longer needed I can
  change the notifiers back to match what was in lanency.c
  
  However; looking at the call tree differences between
  blockin_notifier_call_chain and srcu_notifier_call_chain I cannot see a
  difference in irq enabling / disabling.  I'm not confident this will
  address this yet.
 
 Well, you can send me a patch to check. :-)

I think I'll have to send you a patch that changes the notifiers but I
doubt it will fix it.  

After a bit of messing around I have the 2.6.23-mm1 running on my core-2
box  note: Ubuntu's make-kpkg on the mm1 tree resulted in a system that
wouldn't boot past the intrd.  Looks like the pivot root failed or
something.

Anyway, I'm not reproducing your experience, snd_pcm is loaded.  I don't
know none of the WARN's are not hitting on my box.

do you have some configuration information that could help me reproduce
the issue?

 
  I'll change the PM_QOS params patch to use blocking notifiers and test
  on a 64bit boot and see what happens.  I've been needing to setup my
  x86_64 dev box for a while now anyway.
 
 OK, thanks.

well its booting but I'm not reproducing the trace messages.

I'll do the patch for you to test.

--mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: WANTED: kernel projects for CS students

2007-10-16 Thread Mark Gross
On Mon, Oct 15, 2007 at 09:54:42PM +0200, Giacomo Catenazzi wrote:
 Sam Ravnborg wrote:
  On Mon, Oct 15, 2007 at 10:04:11AM -0700, Mark Gross wrote:
  On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote:
  The kernel newbies community often gets inquiries from CS students who
  need a project for their studies and would like to do something with
  the Linux kernel, but would also like their code to be useful to the
  community afterwards.
 
  In order to make it easier for them, I am trying to put together a
  page with projects that:
  - Are self contained enough that the students can implement the
project by themselves, since that is often a university requirement.
  - Are self contained enough that Linux could merge the code (maybe
with additional changes) after the student has been working on it
for a few months.
  - Are large enough to qualify as a student project, luckily there is
flexibility here since we get inquiries for anything from 6 week
projects to 6 month projects.
 
  If you have ideas on what projects would be useful, please add them
  to this page (or email me):
 
  http://kernelnewbies.org/KernelProjects
  Is there already a make config option that will do a good job at setting
  a default .config file based on what is already running on a system?  
  I have discussed this briefly with Kay Sievers.
  What udev can provide is the list of modules needed, so what the kernel
  need to provide is a simple module to CONFIG option(s) converter + a base
  config to start out with.
  Nothing particular difficult but needs a few days work to do.
 
 could you explain better what you need?  I think I've already such
 tools ;-)


base function:
Starting from a stock distro (FC, Ubuntu, OpenSuSE...) and put down a
kernel.org tree and automatically create a .config with all the drivers
needed for the platform I'm building on.

expert configs for different applications:
laptop battery, vitalization, HPC, tiny, multi-media, testing

--mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: WANTED: kernel projects for CS students

2007-10-16 Thread Mark Gross
On Mon, Oct 15, 2007 at 11:17:32PM +0400, Alexey Dobriyan wrote:
 On Mon, Oct 15, 2007 at 10:04:11AM -0700, Mark Gross wrote:
  On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote:
   http://kernelnewbies.org/KernelProjects
  
  Is there already a make config option that will do a good job at setting
  a default .config file based on what is already running on a system?  
  
  I get tiered of trimming down my .config for my laptop build so it takes
  less than 30min to build a kernel.
 
 Ehh? You do it once, then leave it aside or in /proc/config.gz, on new
 kernel copy it back, make oldconfig, answer several questions and here
 we go.

yeah I know that.  Its a lot more than a few questions, and as we are
talking about a linear search for a fully tweaked .config where each
pass takes 30 min to know if things work this isn't how I want to spend
my time.

--mgross

 
  Bonus credit to additional expert options (like those powertop puts
  out) for target uses, laptop, HPC, home file share, embedded targets
  
  Oh, and lets make the expert configs easily extensible.
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.23-mm1

2007-10-16 Thread Mark Gross
On Tue, Oct 16, 2007 at 10:28:13PM +0200, Rafael J. Wysocki wrote:
 On Tuesday, 16 October 2007 21:58, Mark Gross wrote:
  On Mon, Oct 15, 2007 at 10:40:02PM +0200, Rafael J. Wysocki wrote:
   On Monday, 15 October 2007 18:09, Mark Gross wrote:
On Fri, Oct 12, 2007 at 11:32:40PM +0200, Rafael J. Wysocki wrote:
 On Friday, 12 October 2007 06:31, Andrew Morton wrote:
  
  ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/
  
  - I've been largely avoiding applying anything since rc8-mm2 in an 
  attempt
to stabilise things for the 2.6.23 merge.
  
But that didn't stop all the subsystem maintainers from going 
  nuts, with
the usual accuracy.  We're up to a 37MB diff now, but it seems to 
  be working
a bit better.
 
 I get many traces similar to the one below from it (w/ hotfixes):
 
 WARNING: at 
 /home/rafael/src/mm/linux-2.6.23-mm1/arch/x86_64/kernel/smp.c:397 
 smp_call_function_mask()

This is from : WARN_ON(irqs_disabled()) in the cmp_call_function_mask
processor_idle.c is registering a acpi_processor_latency_notify 

my code changed the notifier call from blocking_notifier_call_chain to
srcu_notifier_call_chain, because dynamic creation of notifier chains at
runtime where easier with the srcu_notifier_call_chain than the
blocking_notifier_call_chain.  

As dynamic creation of PM_QOS parameters are no longer needed I can
change the notifiers back to match what was in lanency.c

However; looking at the call tree differences between
blockin_notifier_call_chain and srcu_notifier_call_chain I cannot see a
difference in irq enabling / disabling.  I'm not confident this will
address this yet.
   
   Well, you can send me a patch to check. :-)
  
  I think I'll have to send you a patch that changes the notifiers but I
  doubt it will fix it.  
  
  After a bit of messing around I have the 2.6.23-mm1 running on my core-2
  box  note: Ubuntu's make-kpkg on the mm1 tree resulted in a system that
  wouldn't boot past the intrd.  Looks like the pivot root failed or
  something.
  
  Anyway, I'm not reproducing your experience, snd_pcm is loaded.  I don't
  know none of the WARN's are not hitting on my box.
  
  do you have some configuration information that could help me reproduce
  the issue?
 
 Well, I can send you the .config, but the box is AMD-based (Turion 64 X2),
 with an ATI chipset and an HP BIOS, so it seems to be much different from
 yours.

it may be worth a shot anyway.  

BTW while changing my code to use the blocking notifiers I found that
there is a initialization race between cpu-idle and pm_qos I have to
fix.

I need to re factor my start up code to handle cpuidle registering
itself in as a notifier at core_initcall time.

I'll have a patch ready tomorrow.

thanks,

--mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: WANTED: kernel projects for CS students

2007-10-15 Thread Mark Gross
On Mon, Oct 15, 2007 at 08:30:17PM +0200, Sam Ravnborg wrote:
> On Mon, Oct 15, 2007 at 10:04:11AM -0700, Mark Gross wrote:
> > On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote:
> > > The kernel newbies community often gets inquiries from CS students who
> > > need a project for their studies and would like to do something with
> > > the Linux kernel, but would also like their code to be useful to the
> > > community afterwards.
> > > 
> > > In order to make it easier for them, I am trying to put together a
> > > page with projects that:
> > > - Are self contained enough that the students can implement the
> > >   project by themselves, since that is often a university requirement.
> > > - Are self contained enough that Linux could merge the code (maybe
> > >   with additional changes) after the student has been working on it
> > >   for a few months.
> > > - Are large enough to qualify as a student project, luckily there is
> > >   flexibility here since we get inquiries for anything from 6 week
> > >   projects to 6 month projects.
> > > 
> > > If you have ideas on what projects would be useful, please add them
> > > to this page (or email me):
> > > 
> > > http://kernelnewbies.org/KernelProjects
> > 
> > Is there already a make config option that will do a good job at setting
> > a default .config file based on what is already running on a system?  
> I have discussed this briefly with Kay Sievers.
> What udev can provide is the list of modules needed, so what the kernel
> need to provide is a simple module to CONFIG option(s) converter + a base
> config to start out with.
> Nothing particular difficult but needs a few days work to do.
> 

that would be cool.

--mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: WANTED: kernel projects for CS students

2007-10-15 Thread Mark Gross
On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote:
> The kernel newbies community often gets inquiries from CS students who
> need a project for their studies and would like to do something with
> the Linux kernel, but would also like their code to be useful to the
> community afterwards.
> 
> In order to make it easier for them, I am trying to put together a
> page with projects that:
> - Are self contained enough that the students can implement the
>   project by themselves, since that is often a university requirement.
> - Are self contained enough that Linux could merge the code (maybe
>   with additional changes) after the student has been working on it
>   for a few months.
> - Are large enough to qualify as a student project, luckily there is
>   flexibility here since we get inquiries for anything from 6 week
>   projects to 6 month projects.
> 
> If you have ideas on what projects would be useful, please add them
> to this page (or email me):
> 
> http://kernelnewbies.org/KernelProjects

Is there already a make config option that will do a good job at setting
a default .config file based on what is already running on a system?  

I get tiered of trimming down my .config for my laptop build so it takes
less than 30min to build a kernel.

Bonus credit to additional "expert" options (like those powertop puts
out) for target uses, laptop, HPC, home file share, embedded targets

Oh, and lets make the expert configs easily extensible.


--mgross


> 
> thanks,
> 
> Rik
> -- 
> "Debugging is twice as hard as writing the code in the first place.
> Therefore, if you write the code as cleverly as possible, you are,
> by definition, not smart enough to debug it." - Brian W. Kernighan
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: WANTED: kernel projects for CS students

2007-10-15 Thread Mark Gross
On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote:
 The kernel newbies community often gets inquiries from CS students who
 need a project for their studies and would like to do something with
 the Linux kernel, but would also like their code to be useful to the
 community afterwards.
 
 In order to make it easier for them, I am trying to put together a
 page with projects that:
 - Are self contained enough that the students can implement the
   project by themselves, since that is often a university requirement.
 - Are self contained enough that Linux could merge the code (maybe
   with additional changes) after the student has been working on it
   for a few months.
 - Are large enough to qualify as a student project, luckily there is
   flexibility here since we get inquiries for anything from 6 week
   projects to 6 month projects.
 
 If you have ideas on what projects would be useful, please add them
 to this page (or email me):
 
 http://kernelnewbies.org/KernelProjects

Is there already a make config option that will do a good job at setting
a default .config file based on what is already running on a system?  

I get tiered of trimming down my .config for my laptop build so it takes
less than 30min to build a kernel.

Bonus credit to additional expert options (like those powertop puts
out) for target uses, laptop, HPC, home file share, embedded targets

Oh, and lets make the expert configs easily extensible.


--mgross


 
 thanks,
 
 Rik
 -- 
 Debugging is twice as hard as writing the code in the first place.
 Therefore, if you write the code as cleverly as possible, you are,
 by definition, not smart enough to debug it. - Brian W. Kernighan
 -
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to [EMAIL PROTECTED]
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: WANTED: kernel projects for CS students

2007-10-15 Thread Mark Gross
On Mon, Oct 15, 2007 at 08:30:17PM +0200, Sam Ravnborg wrote:
 On Mon, Oct 15, 2007 at 10:04:11AM -0700, Mark Gross wrote:
  On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote:
   The kernel newbies community often gets inquiries from CS students who
   need a project for their studies and would like to do something with
   the Linux kernel, but would also like their code to be useful to the
   community afterwards.
   
   In order to make it easier for them, I am trying to put together a
   page with projects that:
   - Are self contained enough that the students can implement the
 project by themselves, since that is often a university requirement.
   - Are self contained enough that Linux could merge the code (maybe
 with additional changes) after the student has been working on it
 for a few months.
   - Are large enough to qualify as a student project, luckily there is
 flexibility here since we get inquiries for anything from 6 week
 projects to 6 month projects.
   
   If you have ideas on what projects would be useful, please add them
   to this page (or email me):
   
   http://kernelnewbies.org/KernelProjects
  
  Is there already a make config option that will do a good job at setting
  a default .config file based on what is already running on a system?  
 I have discussed this briefly with Kay Sievers.
 What udev can provide is the list of modules needed, so what the kernel
 need to provide is a simple module to CONFIG option(s) converter + a base
 config to start out with.
 Nothing particular difficult but needs a few days work to do.
 

that would be cool.

--mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: pm qos infrastructure and interface

2007-10-11 Thread Mark Gross
On Wed, Oct 10, 2007 at 10:17:04PM -0700, Andrew Morton wrote:
> On Thu, 4 Oct 2007 14:51:39 -0700 Mark Gross <[EMAIL PROTECTED]> wrote:
> 
> > The following patch is a generalization of the latency.c implementation
> > done by Arjan last year.  It provides infrastructure for more than one
> > parameter, and exposes a user mode interface for processes to register
> > pm_qos expectations of processes.
> > 
> > 
> > This interface provides a kernel and user mode interface for registering
> > performance expectations by drivers, subsystems and user space
> > applications on one of the parameters.
> > 
> > Currently we have {cpu_dma_latency, network_latency, network_throughput}
> > as the initial set of pm_qos parameters.
> > 
> > The infrastructure exposes multiple misc device nodes one per
> > implemented parameter.  The set of parameters implement is defined by
> > pm_qos_power_init() and pm_qos_params.h.  This is done because having
> > the available parameters being runtime configurable or changeable from a
> > driver was seen as too easy to abuse.
> 
> I'm a bit surprised that this change appears to have no configurability.
> If one has set CONFIG_PM=n (for example), shouldn't it all go away?

We considered that as an option but as latency.c didn't offer it I
didn't either.  

I could see the user mode interface portion of the implementation be
made as a compile time option but the kernel infrastructure will
continue to be needed by at least cpu-idel, pcm_native.c and ipw2100.

You know it could make sense to have the user mode interface part of the
patch as configurable or a build time dependent of sysfs and misc device
support for the linux-tiny guys.  Is it practical to make a linux-tiny
without the sysfs infrastructure needed to make a misc device?  I'll ask
on the linux-tiny list.

--mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: pm qos infrastructure and interface

2007-10-11 Thread Mark Gross
On Wed, Oct 10, 2007 at 10:17:04PM -0700, Andrew Morton wrote:
 On Thu, 4 Oct 2007 14:51:39 -0700 Mark Gross [EMAIL PROTECTED] wrote:
 
  The following patch is a generalization of the latency.c implementation
  done by Arjan last year.  It provides infrastructure for more than one
  parameter, and exposes a user mode interface for processes to register
  pm_qos expectations of processes.
  
  
  This interface provides a kernel and user mode interface for registering
  performance expectations by drivers, subsystems and user space
  applications on one of the parameters.
  
  Currently we have {cpu_dma_latency, network_latency, network_throughput}
  as the initial set of pm_qos parameters.
  
  The infrastructure exposes multiple misc device nodes one per
  implemented parameter.  The set of parameters implement is defined by
  pm_qos_power_init() and pm_qos_params.h.  This is done because having
  the available parameters being runtime configurable or changeable from a
  driver was seen as too easy to abuse.
 
 I'm a bit surprised that this change appears to have no configurability.
 If one has set CONFIG_PM=n (for example), shouldn't it all go away?

We considered that as an option but as latency.c didn't offer it I
didn't either.  

I could see the user mode interface portion of the implementation be
made as a compile time option but the kernel infrastructure will
continue to be needed by at least cpu-idel, pcm_native.c and ipw2100.

You know it could make sense to have the user mode interface part of the
patch as configurable or a build time dependent of sysfs and misc device
support for the linux-tiny guys.  Is it practical to make a linux-tiny
without the sysfs infrastructure needed to make a misc device?  I'll ask
on the linux-tiny list.

--mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: gigabit ethernet power consumption

2007-10-10 Thread Mark Gross
On Tue, Oct 09, 2007 at 11:41:17AM -0700, Kok, Auke wrote:
> Lennart Sorensen wrote:
> > On Mon, Oct 08, 2007 at 03:31:51PM -0700, Kok, Auke wrote:
> >> you most certainly want to do this in userspace I think.
> >>
> >> One of the biggest problems is that link negotiation can take a 
> >> significant amount
> >> of time, well over several seconds (1 to 3 seconds typical) with gigabit, 
> >> and
> >> having your ethernet connection go offline for 3 seconds may not be the 
> >> desired
> >> effect for when you want to get more bandwidth in the first place.
> >>
> >> However, when a laptop is in battery mode, switching down from gigabit to 
> >> 100mbit
> >> makes a lot more sense, so this is something I would recommend. This can 
> >> be as
> >> easy as changing the advertisement mask of the interface and renegotiating 
> >> the
> >> link. Userspace could handle that very easily.
> > 
> > Now if you were trying to transfer a lot of data to the laptop, would it
> > be more power efficient to do it at gigabit speeds so you can finish
> > sooner and shut down the machine entirely, or to slow to 100mbit and
> > take longer to do it, and hence spend more time powering the cpu and
> > ram?
> 
> my suspicion is that the cost of switching is much higher than what you would
> consume running at 100mbit, even if the amount of data is quite large. going
> offline to renegotiate the link would already cost you 3W typically.
> 
> I definately think that userspace is the right field to solve this problem: 
> let
> the users decide how to use the available power on their sytems through a 
> decent
> power profile tool (perhaps gnome-power-manager or something like that). This 
> way
> the user can choose.
> 

Auke, 
I was wondering if we could use PM-QOS to have the driver drop to the
100Mb speed, when requests for bandwidth and latency are not in effect?

--mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: gigabit ethernet power consumption

2007-10-10 Thread Mark Gross
On Tue, Oct 09, 2007 at 11:41:17AM -0700, Kok, Auke wrote:
 Lennart Sorensen wrote:
  On Mon, Oct 08, 2007 at 03:31:51PM -0700, Kok, Auke wrote:
  you most certainly want to do this in userspace I think.
 
  One of the biggest problems is that link negotiation can take a 
  significant amount
  of time, well over several seconds (1 to 3 seconds typical) with gigabit, 
  and
  having your ethernet connection go offline for 3 seconds may not be the 
  desired
  effect for when you want to get more bandwidth in the first place.
 
  However, when a laptop is in battery mode, switching down from gigabit to 
  100mbit
  makes a lot more sense, so this is something I would recommend. This can 
  be as
  easy as changing the advertisement mask of the interface and renegotiating 
  the
  link. Userspace could handle that very easily.
  
  Now if you were trying to transfer a lot of data to the laptop, would it
  be more power efficient to do it at gigabit speeds so you can finish
  sooner and shut down the machine entirely, or to slow to 100mbit and
  take longer to do it, and hence spend more time powering the cpu and
  ram?
 
 my suspicion is that the cost of switching is much higher than what you would
 consume running at 100mbit, even if the amount of data is quite large. going
 offline to renegotiate the link would already cost you 3W typically.
 
 I definately think that userspace is the right field to solve this problem: 
 let
 the users decide how to use the available power on their sytems through a 
 decent
 power profile tool (perhaps gnome-power-manager or something like that). This 
 way
 the user can choose.
 

Auke, 
I was wondering if we could use PM-QOS to have the driver drop to the
100Mb speed, when requests for bandwidth and latency are not in effect?

--mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: RFC: reviewer's statement of oversight

2007-10-08 Thread Mark Gross
On Mon, Oct 08, 2007 at 11:24:45AM -0600, Jonathan Corbet wrote:
> Last month, at the kernel summit, there was discussion of putting a
> Reviewed-by: tag onto patches to document the oversight they had
> received on their way into the mainline.  That tag has made an
> occasional appearance since then, but there has not yet been a
> discussion of what it really means.  So it has not yet brought a whole
> lot of value to the process.
> 
> As I was trying to sleep last night, it occurred to me that what we
> might need is an equivalent of the DCO for the Reviewed-by tag.  To that
> end, I dedicated a few minutes of my life to the following bit of text.
> It's really just meant to be a starting point for the discussion.  Is
> the following something close to what we understand Reviewed-by to mean? 
> 
> jon
> 
> 
> Reviewer's statement of oversight v0.01
> 
> By offering my Reviewed-by: tag, I state that:
> 
>  (a) I have carried out a technical review of this patch to evaluate its
>  appropriateness and readiness for inclusion into the mainline kernel. 
> 
>  (b) Any problems, concerns, or questions relating to the patch have been
>  communicated back to the submitter.  I am satisfied with how the
>  submitter has responded to my comments.
> 
>  (c) While there may (or may not) be things which could be improved with
>  this submission, I believe that it is, at this time, (1) a
>  worthwhile addition to the kernel, and (2) free of serious known
>  issues which would argue against its inclusion.

C-1 "worthwhile addition..." Probably shouldn't be part of this.  That's
what additional Signed off by ACK's provide.  I think reviewed by should
limit its scope to code correctness leaving the subjective "worthwhile"
statements are better expressed with other tags.

> 
>  (d) While I have reviewed the patch and believe it to be sound, I can not
>  (unless explicitly stated elsewhere) make any warranties or guarantees
>  that it will achieve its stated purpose or function properly in any
>  given situation.
> 
>  (e) I understand and agree that this project and the contribution are
>  public and that a record of the contribution (including my Reviewed-by
>  tag and any associated public communications) is maintained
>  indefinitely and may be redistributed consistent with this project or
>  the open source license(s) involved.
> -

I think this is a good thing to have, although recruiting reviews remains
an open issue.

I think it would be easier to recruit patch testers than reviewers
should a Tested-by: tag be considered as well?

--mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: RFC: reviewer's statement of oversight

2007-10-08 Thread Mark Gross
On Mon, Oct 08, 2007 at 11:24:45AM -0600, Jonathan Corbet wrote:
 Last month, at the kernel summit, there was discussion of putting a
 Reviewed-by: tag onto patches to document the oversight they had
 received on their way into the mainline.  That tag has made an
 occasional appearance since then, but there has not yet been a
 discussion of what it really means.  So it has not yet brought a whole
 lot of value to the process.
 
 As I was trying to sleep last night, it occurred to me that what we
 might need is an equivalent of the DCO for the Reviewed-by tag.  To that
 end, I dedicated a few minutes of my life to the following bit of text.
 It's really just meant to be a starting point for the discussion.  Is
 the following something close to what we understand Reviewed-by to mean? 
 
 jon
 
 
 Reviewer's statement of oversight v0.01
 
 By offering my Reviewed-by: tag, I state that:
 
  (a) I have carried out a technical review of this patch to evaluate its
  appropriateness and readiness for inclusion into the mainline kernel. 
 
  (b) Any problems, concerns, or questions relating to the patch have been
  communicated back to the submitter.  I am satisfied with how the
  submitter has responded to my comments.
 
  (c) While there may (or may not) be things which could be improved with
  this submission, I believe that it is, at this time, (1) a
  worthwhile addition to the kernel, and (2) free of serious known
  issues which would argue against its inclusion.

C-1 worthwhile addition... Probably shouldn't be part of this.  That's
what additional Signed off by ACK's provide.  I think reviewed by should
limit its scope to code correctness leaving the subjective worthwhile
statements are better expressed with other tags.

 
  (d) While I have reviewed the patch and believe it to be sound, I can not
  (unless explicitly stated elsewhere) make any warranties or guarantees
  that it will achieve its stated purpose or function properly in any
  given situation.
 
  (e) I understand and agree that this project and the contribution are
  public and that a record of the contribution (including my Reviewed-by
  tag and any associated public communications) is maintained
  indefinitely and may be redistributed consistent with this project or
  the open source license(s) involved.
 -

I think this is a good thing to have, although recruiting reviews remains
an open issue.

I think it would be easier to recruit patch testers than reviewers
should a Tested-by: tag be considered as well?

--mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


pm qos infrastructure and interface

2007-10-04 Thread Mark Gross
The following patch is a generalization of the latency.c implementation
done by Arjan last year.  It provides infrastructure for more than one
parameter, and exposes a user mode interface for processes to register
pm_qos expectations of processes.


This interface provides a kernel and user mode interface for registering
performance expectations by drivers, subsystems and user space
applications on one of the parameters.

Currently we have {cpu_dma_latency, network_latency, network_throughput}
as the initial set of pm_qos parameters.

The infrastructure exposes multiple misc device nodes one per
implemented parameter.  The set of parameters implement is defined by
pm_qos_power_init() and pm_qos_params.h.  This is done because having
the available parameters being runtime configurable or changeable from a
driver was seen as too easy to abuse.

For each parameter a list of performance requirements is maintained
along with an aggregated target value.  The aggregated target value is
updated with changes to the requirement list or elements of the list.
Typically the aggregated target value is simply the max or min of the
requirement values held in the parameter list elements.

>From kernel mode the use of this interface is simple:
pm_qos_add_requirement(param_id, name, target_value):
Will insert a named element in the list for that identified PM_QOS
parameter with the target value.  Upon change to this list the new
target is recomputed and any registered notifiers are called only if the
target value is now different.

pm_qos_update_requirement(param_id, name, new_target_value):
Will search the list identified by the param_id for the named list
element and then update its target value, calling the notification tree
if the aggregated target is changed.  with that name is already
registered.

pm_qos_remove_requirement(param_id, name):
Will search the identified list for the named element and remove it,
after removal it will update the aggregate target and call the
notification tree if the target was changed as a result of removing the
named requirement.


>From user mode:
Only processes can register a pm_qos requirement.  To provide for
automatic cleanup for process the interface requires the process to
register its parameter requirements in the following way:

To register the default pm_qos target for the specific parameter, the
process must open one of /dev/[cpu_dma_latency, network_latency,
network_throughput]

As long as the device node is held open that process has a registered
requirement on the parameter.  The name of the requirement is
"process_" derived from the current->pid from within the open
system call.

To change the requested target value the process needs to write a s32
value to the open device node.  This translates to a
pm_qos_update_requirement call.

To remove the user mode request for a target value simply close the
device node.

--mgross


Signed-off-by: mark gross <[EMAIL PROTECTED]>

---

diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff 
linux-2.6.23-rc8/Documentation/pm_qos_interface.txt 
linux-2.6.23-rc8-qos/Documentation/pm_qos_interface.txt
--- linux-2.6.23-rc8/Documentation/pm_qos_interface.txt 1969-12-31 
16:00:00.0 -0800
+++ linux-2.6.23-rc8-qos/Documentation/pm_qos_interface.txt 2007-10-04 
14:26:58.0 -0700
@@ -0,0 +1,59 @@
+PM quality of Service interface.
+
+This interface provides a kernel and user mode interface for registering
+performance expectations by drivers, subsystems and user space applications on
+one of the parameters.
+
+Currently we have {cpu_dma_latency, network_latency, network_throughput} as the
+initial set of pm_qos parameters.
+
+The infrastructure exposes multiple misc device nodes one per implemented
+parameter.  The set of parameters implement is defined by pm_qos_power_init()
+and pm_qos_params.h.  This is done because having the available parameters
+being runtime configurable or changeable from a driver was seen as too easy to
+abuse.
+
+For each parameter a list of performance requirements is maintained along with
+an aggregated target value.  The aggregated target value is updated with
+changes to the requirement list or elements of the list.  Typically the
+aggregated target value is simply the max or min of the requirement values held
+in the parameter list elements.
+
+From kernel mode the use of this interface is simple:
+pm_qos_add_requirement(param_id, name, target_value):
+Will insert a named element in the list for that identified PM_QOS parameter
+with the target value.  Upon change to this list the new target is recomputed
+and any registered notifiers are called only if the target value is now
+different.
+
+pm_qos_update_requirement(param_id, name, new_target_value):
+Will search the list identified by the param_id for the named list element and
+then update its target value, calling the notification tree if the aggregated
+target is changed.  with that n

Re: [PATCH] PM_QOS 1 of 2

2007-10-04 Thread Mark Gross
On Thu, Oct 04, 2007 at 12:53:56PM -0700, Andrew Morton wrote:
> On Mon, 1 Oct 2007 16:45:28 -0700
> Mark Gross <[EMAIL PROTECTED]> wrote:
> 
> > The following is the cleaned up patch implementing the power management
> > quality of service infrastructure discussed at the pm summit last June.
> > 
> > It is a genralization of the latency code put into the kernel last year
> > by Arjan.
> > 
> > I would like to get this code included in the MM tree and to get some
> > milage on it.
> > 
> > One thing to note about this implementation is that it exposes an
> > interface to user space for registering pm_qos constraints in addition
> > to the kernel exports.  Its a file based interface where a module can
> > register a constraint and the constraint is valid only as long as the
> > device node is held open.  Upon closing of the device node that
> > constraint is cleaned up.
> > 
> > The patch set is in two postings.  
> > 1) the base parameter code (this email)
> > 2) replacing of latency.c/latenc.h with pm_qos_params.c/pm_qos_params.h
> 
> I wouldn't really view this as an adequate changelog.
> 
> - The Subject:s are pretty pathetic (please see my suggesed replacements)

uhg.  Your right.

> 
> - There is no description of the proposed new kernel<->userspace
>   interfaces.
> 

the above description is light on specific details. 

>   As you are proposing new and permanent enhancements to the Linux API,
>   this is something which should be spelled out in some detail.  Because we
>   can change the implementation, but we can not ever change your interface.
> 
>   It would be nice to get that interface described in Documentation/
>   somewhere, but it is *critical* that the design be fully revealed right
>   now, during review.

I'll provide this.

> 
> 
> Anyway, I am not a suitable person to review this submission.
> 
> I'll put the patches in -mm for a bit of eyeball-and-test (not that anyone
> will know how to test it, due to the secret interfaces) but I do not want
> to move this code into mainline until someone who is familiar with the PM
> code has performed a detailed review of both the implementation and the
> design (whatever that is!).
> 
> Please send new, complete descriptions of these patches.  I don't think
> they can be effectively reviewed without that information.  Except perhaps
> by someone who was at the PM summit, but that's cheating.
> 

I will do this.

--mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] PM_QOS 1 of 2

2007-10-04 Thread Mark Gross
On Thu, Oct 04, 2007 at 12:53:56PM -0700, Andrew Morton wrote:
 On Mon, 1 Oct 2007 16:45:28 -0700
 Mark Gross [EMAIL PROTECTED] wrote:
 
  The following is the cleaned up patch implementing the power management
  quality of service infrastructure discussed at the pm summit last June.
  
  It is a genralization of the latency code put into the kernel last year
  by Arjan.
  
  I would like to get this code included in the MM tree and to get some
  milage on it.
  
  One thing to note about this implementation is that it exposes an
  interface to user space for registering pm_qos constraints in addition
  to the kernel exports.  Its a file based interface where a module can
  register a constraint and the constraint is valid only as long as the
  device node is held open.  Upon closing of the device node that
  constraint is cleaned up.
  
  The patch set is in two postings.  
  1) the base parameter code (this email)
  2) replacing of latency.c/latenc.h with pm_qos_params.c/pm_qos_params.h
 
 I wouldn't really view this as an adequate changelog.
 
 - The Subject:s are pretty pathetic (please see my suggesed replacements)

uhg.  Your right.

 
 - There is no description of the proposed new kernel-userspace
   interfaces.
 

the above description is light on specific details. 

   As you are proposing new and permanent enhancements to the Linux API,
   this is something which should be spelled out in some detail.  Because we
   can change the implementation, but we can not ever change your interface.
 
   It would be nice to get that interface described in Documentation/
   somewhere, but it is *critical* that the design be fully revealed right
   now, during review.

I'll provide this.

 
 
 Anyway, I am not a suitable person to review this submission.
 
 I'll put the patches in -mm for a bit of eyeball-and-test (not that anyone
 will know how to test it, due to the secret interfaces) but I do not want
 to move this code into mainline until someone who is familiar with the PM
 code has performed a detailed review of both the implementation and the
 design (whatever that is!).
 
 Please send new, complete descriptions of these patches.  I don't think
 they can be effectively reviewed without that information.  Except perhaps
 by someone who was at the PM summit, but that's cheating.
 

I will do this.

--mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


pm qos infrastructure and interface

2007-10-04 Thread Mark Gross
The following patch is a generalization of the latency.c implementation
done by Arjan last year.  It provides infrastructure for more than one
parameter, and exposes a user mode interface for processes to register
pm_qos expectations of processes.


This interface provides a kernel and user mode interface for registering
performance expectations by drivers, subsystems and user space
applications on one of the parameters.

Currently we have {cpu_dma_latency, network_latency, network_throughput}
as the initial set of pm_qos parameters.

The infrastructure exposes multiple misc device nodes one per
implemented parameter.  The set of parameters implement is defined by
pm_qos_power_init() and pm_qos_params.h.  This is done because having
the available parameters being runtime configurable or changeable from a
driver was seen as too easy to abuse.

For each parameter a list of performance requirements is maintained
along with an aggregated target value.  The aggregated target value is
updated with changes to the requirement list or elements of the list.
Typically the aggregated target value is simply the max or min of the
requirement values held in the parameter list elements.

From kernel mode the use of this interface is simple:
pm_qos_add_requirement(param_id, name, target_value):
Will insert a named element in the list for that identified PM_QOS
parameter with the target value.  Upon change to this list the new
target is recomputed and any registered notifiers are called only if the
target value is now different.

pm_qos_update_requirement(param_id, name, new_target_value):
Will search the list identified by the param_id for the named list
element and then update its target value, calling the notification tree
if the aggregated target is changed.  with that name is already
registered.

pm_qos_remove_requirement(param_id, name):
Will search the identified list for the named element and remove it,
after removal it will update the aggregate target and call the
notification tree if the target was changed as a result of removing the
named requirement.


From user mode:
Only processes can register a pm_qos requirement.  To provide for
automatic cleanup for process the interface requires the process to
register its parameter requirements in the following way:

To register the default pm_qos target for the specific parameter, the
process must open one of /dev/[cpu_dma_latency, network_latency,
network_throughput]

As long as the device node is held open that process has a registered
requirement on the parameter.  The name of the requirement is
process_PID derived from the current-pid from within the open
system call.

To change the requested target value the process needs to write a s32
value to the open device node.  This translates to a
pm_qos_update_requirement call.

To remove the user mode request for a target value simply close the
device node.

--mgross


Signed-off-by: mark gross [EMAIL PROTECTED]

---

diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff 
linux-2.6.23-rc8/Documentation/pm_qos_interface.txt 
linux-2.6.23-rc8-qos/Documentation/pm_qos_interface.txt
--- linux-2.6.23-rc8/Documentation/pm_qos_interface.txt 1969-12-31 
16:00:00.0 -0800
+++ linux-2.6.23-rc8-qos/Documentation/pm_qos_interface.txt 2007-10-04 
14:26:58.0 -0700
@@ -0,0 +1,59 @@
+PM quality of Service interface.
+
+This interface provides a kernel and user mode interface for registering
+performance expectations by drivers, subsystems and user space applications on
+one of the parameters.
+
+Currently we have {cpu_dma_latency, network_latency, network_throughput} as the
+initial set of pm_qos parameters.
+
+The infrastructure exposes multiple misc device nodes one per implemented
+parameter.  The set of parameters implement is defined by pm_qos_power_init()
+and pm_qos_params.h.  This is done because having the available parameters
+being runtime configurable or changeable from a driver was seen as too easy to
+abuse.
+
+For each parameter a list of performance requirements is maintained along with
+an aggregated target value.  The aggregated target value is updated with
+changes to the requirement list or elements of the list.  Typically the
+aggregated target value is simply the max or min of the requirement values held
+in the parameter list elements.
+
+From kernel mode the use of this interface is simple:
+pm_qos_add_requirement(param_id, name, target_value):
+Will insert a named element in the list for that identified PM_QOS parameter
+with the target value.  Upon change to this list the new target is recomputed
+and any registered notifiers are called only if the target value is now
+different.
+
+pm_qos_update_requirement(param_id, name, new_target_value):
+Will search the list identified by the param_id for the named list element and
+then update its target value, calling the notification tree if the aggregated
+target is changed.  with that name is already registered.
+
+pm_qos_remove_requirement

Re: [linux-pm] [PATCH] PM_QOS 2 of 2

2007-10-03 Thread Mark Gross
this is the second part of the patch to replace latency.c use with
pm_qos_params use.


--mgross



Signed-off-by: mark gross <[EMAIL PROTECTED]>

---

diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff 
linux-2.6.23-rc8-qos/drivers/acpi/processor_idle.c 
linux-2.6.23-rc8-qos-nolatency.c/drivers/acpi/processor_idle.c
--- linux-2.6.23-rc8-qos/drivers/acpi/processor_idle.c  2007-09-26 
13:54:28.0 -0700
+++ linux-2.6.23-rc8-qos-nolatency.c/drivers/acpi/processor_idle.c  
2007-10-01 11:32:13.0 -0700
@@ -38,7 +38,7 @@
 #include 
 #include 
 #include/* need_resched() */
-#include 
+#include 
 #include 
 
 /*
@@ -605,7 +605,8 @@
if (cx->promotion.state &&
((cx->promotion.state - pr->power.states) <= max_cstate)) {
if (sleep_ticks > cx->promotion.threshold.ticks &&
- cx->promotion.state->latency <= system_latency_constraint()) {
+ cx->promotion.state->latency <=
+   pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
cx->promotion.count++;
cx->demotion.count = 0;
if (cx->promotion.count >=
@@ -649,7 +650,8 @@
 * or if the latency of the current state is unacceptable
 */
if ((pr->power.state - pr->power.states) > max_cstate ||
-   pr->power.state->latency > system_latency_constraint()) {
+   pr->power.state->latency >
+   pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
if (cx->demotion.state)
next_state = cx->demotion.state;
}
@@ -1173,7 +1175,7 @@
   "maximum allowed latency: %d usec\n",
   pr->power.state ? pr->power.state - pr->power.states : 0,
   max_cstate, (unsigned)pr->power.bm_activity,
-  system_latency_constraint());
+  pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY));
 
seq_puts(seq, "states:\n");
 
@@ -1280,7 +1282,8 @@
   max_cstate);
first_run++;
 #ifdef CONFIG_SMP
-   register_latency_notifier(_processor_latency_notifier);
+   pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY,
+   _processor_latency_notifier);
 #endif
}
 
@@ -1354,7 +1357,8 @@
 */
cpu_idle_wait();
 #ifdef CONFIG_SMP
-   unregister_latency_notifier(_processor_latency_notifier);
+   pm_qos_remove_notifier(PM_QOS_CPU_DMA_LATENCY,
+   _processor_latency_notifier);
 #endif
}
 
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff 
linux-2.6.23-rc8-qos/drivers/net/wireless/ipw2100.c 
linux-2.6.23-rc8-qos-nolatency.c/drivers/net/wireless/ipw2100.c
--- linux-2.6.23-rc8-qos/drivers/net/wireless/ipw2100.c 2007-09-26 
13:54:34.0 -0700
+++ linux-2.6.23-rc8-qos-nolatency.c/drivers/net/wireless/ipw2100.c 
2007-10-01 12:32:56.0 -0700
@@ -162,7 +162,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 
 #include "ipw2100.h"
 
@@ -1701,7 +1701,7 @@
/* the ipw2100 hardware really doesn't want power management delays
 * longer than 175usec
 */
-   modify_acceptable_latency("ipw2100", 175);
+   pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100", 175);
 
/* If the interrupt is enabled, turn it off... */
spin_lock_irqsave(>low_lock, flags);
@@ -1856,7 +1856,8 @@
ipw2100_disable_interrupts(priv);
spin_unlock_irqrestore(>low_lock, flags);
 
-   modify_acceptable_latency("ipw2100", INFINITE_LATENCY);
+   pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100",
+   PM_QOS_DEFAULT_VALUE);
 
 #ifdef ACPI_CSTATE_LIMIT_DEFINED
if (priv->config & CFG_C3_DISABLED) {
@@ -6544,7 +6545,8 @@
if (ret)
goto out;
 
-   set_acceptable_latency("ipw2100", INFINITE_LATENCY);
+   pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100",
+   PM_QOS_DEFAULT_VALUE);
 #ifdef CONFIG_IPW2100_DEBUG
ipw2100_debug_level = debug;
ret = driver_create_file(_pci_driver.driver,
@@ -6566,7 +6568,7 @@
   _attr_debug_level);
 #endif
pci_unregister_driver(_pci_driver);
-   remove_acceptable_latency("ipw2100");
+   pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100");
 }
 
 module_init(ipw2100_init);
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff 
linux-2.6.23-rc8-qos/include/linux/latency.h 
linux-2.6.23-rc8-qos-nolatency.c/include/linux/latency.h
--- linux-2.6.23-rc8-qos/include/linux/latency.h2007-07-08 
1

Re: [linux-pm] [PATCH] PM_QOS 2 of 2

2007-10-03 Thread Mark Gross
this is the second part of the patch to replace latency.c use with
pm_qos_params use.


--mgross



Signed-off-by: mark gross [EMAIL PROTECTED]

---

diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff 
linux-2.6.23-rc8-qos/drivers/acpi/processor_idle.c 
linux-2.6.23-rc8-qos-nolatency.c/drivers/acpi/processor_idle.c
--- linux-2.6.23-rc8-qos/drivers/acpi/processor_idle.c  2007-09-26 
13:54:28.0 -0700
+++ linux-2.6.23-rc8-qos-nolatency.c/drivers/acpi/processor_idle.c  
2007-10-01 11:32:13.0 -0700
@@ -38,7 +38,7 @@
 #include linux/dmi.h
 #include linux/moduleparam.h
 #include linux/sched.h   /* need_resched() */
-#include linux/latency.h
+#include linux/pm_qos_params.h
 #include linux/clockchips.h
 
 /*
@@ -605,7 +605,8 @@
if (cx-promotion.state 
((cx-promotion.state - pr-power.states) = max_cstate)) {
if (sleep_ticks  cx-promotion.threshold.ticks 
- cx-promotion.state-latency = system_latency_constraint()) {
+ cx-promotion.state-latency =
+   pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
cx-promotion.count++;
cx-demotion.count = 0;
if (cx-promotion.count =
@@ -649,7 +650,8 @@
 * or if the latency of the current state is unacceptable
 */
if ((pr-power.state - pr-power.states)  max_cstate ||
-   pr-power.state-latency  system_latency_constraint()) {
+   pr-power.state-latency 
+   pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) {
if (cx-demotion.state)
next_state = cx-demotion.state;
}
@@ -1173,7 +1175,7 @@
   maximum allowed latency: %d usec\n,
   pr-power.state ? pr-power.state - pr-power.states : 0,
   max_cstate, (unsigned)pr-power.bm_activity,
-  system_latency_constraint());
+  pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY));
 
seq_puts(seq, states:\n);
 
@@ -1280,7 +1282,8 @@
   max_cstate);
first_run++;
 #ifdef CONFIG_SMP
-   register_latency_notifier(acpi_processor_latency_notifier);
+   pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY,
+   acpi_processor_latency_notifier);
 #endif
}
 
@@ -1354,7 +1357,8 @@
 */
cpu_idle_wait();
 #ifdef CONFIG_SMP
-   unregister_latency_notifier(acpi_processor_latency_notifier);
+   pm_qos_remove_notifier(PM_QOS_CPU_DMA_LATENCY,
+   acpi_processor_latency_notifier);
 #endif
}
 
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff 
linux-2.6.23-rc8-qos/drivers/net/wireless/ipw2100.c 
linux-2.6.23-rc8-qos-nolatency.c/drivers/net/wireless/ipw2100.c
--- linux-2.6.23-rc8-qos/drivers/net/wireless/ipw2100.c 2007-09-26 
13:54:34.0 -0700
+++ linux-2.6.23-rc8-qos-nolatency.c/drivers/net/wireless/ipw2100.c 
2007-10-01 12:32:56.0 -0700
@@ -162,7 +162,7 @@
 #include linux/firmware.h
 #include linux/acpi.h
 #include linux/ctype.h
-#include linux/latency.h
+#include linux/pm_qos_params.h
 
 #include ipw2100.h
 
@@ -1701,7 +1701,7 @@
/* the ipw2100 hardware really doesn't want power management delays
 * longer than 175usec
 */
-   modify_acceptable_latency(ipw2100, 175);
+   pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY, ipw2100, 175);
 
/* If the interrupt is enabled, turn it off... */
spin_lock_irqsave(priv-low_lock, flags);
@@ -1856,7 +1856,8 @@
ipw2100_disable_interrupts(priv);
spin_unlock_irqrestore(priv-low_lock, flags);
 
-   modify_acceptable_latency(ipw2100, INFINITE_LATENCY);
+   pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY, ipw2100,
+   PM_QOS_DEFAULT_VALUE);
 
 #ifdef ACPI_CSTATE_LIMIT_DEFINED
if (priv-config  CFG_C3_DISABLED) {
@@ -6544,7 +6545,8 @@
if (ret)
goto out;
 
-   set_acceptable_latency(ipw2100, INFINITE_LATENCY);
+   pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY, ipw2100,
+   PM_QOS_DEFAULT_VALUE);
 #ifdef CONFIG_IPW2100_DEBUG
ipw2100_debug_level = debug;
ret = driver_create_file(ipw2100_pci_driver.driver,
@@ -6566,7 +6568,7 @@
   driver_attr_debug_level);
 #endif
pci_unregister_driver(ipw2100_pci_driver);
-   remove_acceptable_latency(ipw2100);
+   pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY, ipw2100);
 }
 
 module_init(ipw2100_init);
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff 
linux-2.6.23-rc8-qos/include/linux/latency.h 
linux-2.6.23-rc8-qos-nolatency.c/include/linux/latency.h
--- linux-2.6.23-rc8-qos/include/linux/latency.h2007-07-08 
16:32:17.0 -0700
+++ linux-2.6.23-rc8-qos-nolatency.c/include/linux/latency.h

[PATCH] PM_QOS 1 of 2

2007-10-01 Thread Mark Gross
The following is the cleaned up patch implementing the power management
quality of service infrastructure discussed at the pm summit last June.

It is a genralization of the latency code put into the kernel last year
by Arjan.

I would like to get this code included in the MM tree and to get some
milage on it.

One thing to note about this implementation is that it exposes an
interface to user space for registering pm_qos constraints in addition
to the kernel exports.  Its a file based interface where a module can
register a constraint and the constraint is valid only as long as the
device node is held open.  Upon closing of the device node that
constraint is cleaned up.

The patch set is in two postings.  
1) the base parameter code (this email)
2) replacing of latency.c/latenc.h with pm_qos_params.c/pm_qos_params.h


thanks,

--mgross


Signed-off-by: mark gross <[EMAIL PROTECTED]>

---

diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff 
linux-2.6.23-rc8/include/linux/pm_qos_params.h 
linux-2.6.23-rc8-qos/include/linux/pm_qos_params.h
--- linux-2.6.23-rc8/include/linux/pm_qos_params.h  1969-12-31 
16:00:00.0 -0800
+++ linux-2.6.23-rc8-qos/include/linux/pm_qos_params.h  2007-10-01 
11:19:13.0 -0700
@@ -0,0 +1,35 @@
+/* interface for the pm_qos_power infrastructure of the linux kernel.
+ *
+ * Mark Gross
+ */
+#include 
+#include 
+#include 
+
+struct requirement_list {
+   struct list_head list;
+   union {
+   s32 value;
+   s32 usec;
+   s32 kbps;
+   };
+   char *name;
+};
+
+#define PM_QOS_RESERVED 0
+#define PM_QOS_CPU_DMA_LATENCY 1
+#define PM_QOS_NETWORK_LATENCY 2
+#define PM_QOS_NETWORK_THROUGHPUT 3
+
+#define PM_QOS_NUM_CLASSES 4
+#define PM_QOS_DEFAULT_VALUE -1
+
+int pm_qos_add_requirement(int qos, char *name, s32 value);
+int pm_qos_update_requirement(int qos, char *name, s32 new_value);
+void pm_qos_remove_requirement(int qos, char *name);
+
+int pm_qos_requirement(int qos);
+
+int pm_qos_add_notifier(int qos, struct notifier_block *notifier);
+int pm_qos_remove_notifier(int qos, struct notifier_block *notifier);
+
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff 
linux-2.6.23-rc8/kernel/Makefile linux-2.6.23-rc8-qos/kernel/Makefile
--- linux-2.6.23-rc8/kernel/Makefile2007-09-26 13:54:54.0 -0700
+++ linux-2.6.23-rc8-qos/kernel/Makefile2007-10-01 11:13:35.0 
-0700
@@ -9,7 +9,7 @@
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
-   utsname.o
+   utsname.o pm_qos_params.o
 
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff 
linux-2.6.23-rc8/kernel/pm_qos_params.c 
linux-2.6.23-rc8-qos/kernel/pm_qos_params.c
--- linux-2.6.23-rc8/kernel/pm_qos_params.c 1969-12-31 16:00:00.0 
-0800
+++ linux-2.6.23-rc8-qos/kernel/pm_qos_params.c 2007-10-01 16:03:31.0 
-0700
@@ -0,0 +1,417 @@
+/*
+ * This module exposes the interface to kernel space for specifying
+ * QoS dependencies.  It provides infrastructure for registration of:
+ *
+ * Dependents on a QoS value : register requirements
+ * Watchers of QoS value : get notified when target QoS value changes
+ *
+ * This QoS design is best effort based.  Dependents register their QoS needs.
+ * Watchers register to keep track of the current QoS needs of the system.
+ *
+ * There are 3 basic classes of QoS parameter: latency, timeout, throughput
+ * each have defined units:
+ * latency: usec
+ * timeout: usec <-- currently not used.
+ * throughput: kbs (kilo byte / sec)
+ *
+ * There are lists of pm_qos_objects each one wrapping requirements, notifiers
+ *
+ * User mode requirements on a QOS parameter register themselves to the
+ * subsystem by opening the device node /dev/... and writing there request to
+ * the node.  As long as the process holds a file handle open to the node the
+ * client continues to be accounted for.  Upon file release the usermode
+ * requirement is removed and a new qos target is computed.  This way when the
+ * requirement that the application has is cleaned up when closes the file
+ * pointer or exits the pm_qos_object will get an opportunity to clean up.
+ *
+ * mark gross [EMAIL PROTECTED]
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+/*
+ * locking rule: all changes to target_value or requirements or notifiers lists
+ * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
+ * held, taken with _irqsave.  One lock to rule them all
+ */
+
+struct pm_qos_object {
+   struct requirement_list requirements;
+   struct srcu_notifier_head notifiers;
+   struct miscdevice pm_qos_power_miscdev;
+   char *name;
+   s32 default_value;
+ 

Re: [RFC] QoS params patch

2007-10-01 Thread Mark Gross
On Fri, Sep 28, 2007 at 11:51:41AM -0700, Andrew Morton wrote:
> On Fri, 28 Sep 2007 10:19:21 -0700 Mark Gross <[EMAIL PROTECTED]> wrote:
> 
> > On Thu, Sep 27, 2007 at 11:25:01PM -0700, Andrew Morton wrote:
> > > On Wed, 26 Sep 2007 15:40:26 -0700 Mark Gross <[EMAIL PROTECTED]> wrote:
> > > 
> > > > +#define QOS_RESERVED 0
> > > > +#define QOS_CPU_DMA_LATENCY 1
> > > > +#define QOS_NETWORK_LATENCY 2
> > > > +#define QOS_NETWORK_THROUGHPUT 3
> > > > +
> > > > +#define QOS_NUM_CLASSES 4
> > > > +#define QOS_DEFAULT_VALUE -1
> > > > +
> > > > +int qos_add_requirement(int qos, char *name, s32 value);
> > > > +int qos_update_requirement(int qos, char *name, s32 new_value);
> > > > +void qos_remove_requirement(int qos, char *name);
> > > 
> > > It's a bit rude stealing the entire "qos" namespace like this - there are
> > > many different forms of QoS, some already in-kernel.
> > > 
> > > s/qos/pm_qos/g ?
> > 
> > I suppose it is a bit inconiderate.  I could grow to like pm_qos,
> > performance_throttling_constraint_hint_infrastructure is a bit too
> > wordy. 
> > 
> > I suppose I should use qospm as thats the way it was put up on that
> > lesswatts.org web page. 
> > 
> > Would qospm be good enough?
> > 
> 
> Don't think it matters a lot, but kernel naming tends to be big-endian (ie:
> we have net_ratelimit, not ratelimit_net), so the major part (pm) would
> come first under that scheme.

this makes sense.  pm_qos it is.

--mgross
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] QoS params patch

2007-10-01 Thread Mark Gross
On Fri, Sep 28, 2007 at 11:51:41AM -0700, Andrew Morton wrote:
 On Fri, 28 Sep 2007 10:19:21 -0700 Mark Gross [EMAIL PROTECTED] wrote:
 
  On Thu, Sep 27, 2007 at 11:25:01PM -0700, Andrew Morton wrote:
   On Wed, 26 Sep 2007 15:40:26 -0700 Mark Gross [EMAIL PROTECTED] wrote:
   
+#define QOS_RESERVED 0
+#define QOS_CPU_DMA_LATENCY 1
+#define QOS_NETWORK_LATENCY 2
+#define QOS_NETWORK_THROUGHPUT 3
+
+#define QOS_NUM_CLASSES 4
+#define QOS_DEFAULT_VALUE -1
+
+int qos_add_requirement(int qos, char *name, s32 value);
+int qos_update_requirement(int qos, char *name, s32 new_value);
+void qos_remove_requirement(int qos, char *name);
   
   It's a bit rude stealing the entire qos namespace like this - there are
   many different forms of QoS, some already in-kernel.
   
   s/qos/pm_qos/g ?
  
  I suppose it is a bit inconiderate.  I could grow to like pm_qos,
  performance_throttling_constraint_hint_infrastructure is a bit too
  wordy. 
  
  I suppose I should use qospm as thats the way it was put up on that
  lesswatts.org web page. 
  
  Would qospm be good enough?
  
 
 Don't think it matters a lot, but kernel naming tends to be big-endian (ie:
 we have net_ratelimit, not ratelimit_net), so the major part (pm) would
 come first under that scheme.

this makes sense.  pm_qos it is.

--mgross
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] PM_QOS 1 of 2

2007-10-01 Thread Mark Gross
The following is the cleaned up patch implementing the power management
quality of service infrastructure discussed at the pm summit last June.

It is a genralization of the latency code put into the kernel last year
by Arjan.

I would like to get this code included in the MM tree and to get some
milage on it.

One thing to note about this implementation is that it exposes an
interface to user space for registering pm_qos constraints in addition
to the kernel exports.  Its a file based interface where a module can
register a constraint and the constraint is valid only as long as the
device node is held open.  Upon closing of the device node that
constraint is cleaned up.

The patch set is in two postings.  
1) the base parameter code (this email)
2) replacing of latency.c/latenc.h with pm_qos_params.c/pm_qos_params.h


thanks,

--mgross


Signed-off-by: mark gross [EMAIL PROTECTED]

---

diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff 
linux-2.6.23-rc8/include/linux/pm_qos_params.h 
linux-2.6.23-rc8-qos/include/linux/pm_qos_params.h
--- linux-2.6.23-rc8/include/linux/pm_qos_params.h  1969-12-31 
16:00:00.0 -0800
+++ linux-2.6.23-rc8-qos/include/linux/pm_qos_params.h  2007-10-01 
11:19:13.0 -0700
@@ -0,0 +1,35 @@
+/* interface for the pm_qos_power infrastructure of the linux kernel.
+ *
+ * Mark Gross
+ */
+#include linux/list.h
+#include linux/notifier.h
+#include linux/miscdevice.h
+
+struct requirement_list {
+   struct list_head list;
+   union {
+   s32 value;
+   s32 usec;
+   s32 kbps;
+   };
+   char *name;
+};
+
+#define PM_QOS_RESERVED 0
+#define PM_QOS_CPU_DMA_LATENCY 1
+#define PM_QOS_NETWORK_LATENCY 2
+#define PM_QOS_NETWORK_THROUGHPUT 3
+
+#define PM_QOS_NUM_CLASSES 4
+#define PM_QOS_DEFAULT_VALUE -1
+
+int pm_qos_add_requirement(int qos, char *name, s32 value);
+int pm_qos_update_requirement(int qos, char *name, s32 new_value);
+void pm_qos_remove_requirement(int qos, char *name);
+
+int pm_qos_requirement(int qos);
+
+int pm_qos_add_notifier(int qos, struct notifier_block *notifier);
+int pm_qos_remove_notifier(int qos, struct notifier_block *notifier);
+
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff 
linux-2.6.23-rc8/kernel/Makefile linux-2.6.23-rc8-qos/kernel/Makefile
--- linux-2.6.23-rc8/kernel/Makefile2007-09-26 13:54:54.0 -0700
+++ linux-2.6.23-rc8-qos/kernel/Makefile2007-10-01 11:13:35.0 
-0700
@@ -9,7 +9,7 @@
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
-   utsname.o
+   utsname.o pm_qos_params.o
 
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff 
linux-2.6.23-rc8/kernel/pm_qos_params.c 
linux-2.6.23-rc8-qos/kernel/pm_qos_params.c
--- linux-2.6.23-rc8/kernel/pm_qos_params.c 1969-12-31 16:00:00.0 
-0800
+++ linux-2.6.23-rc8-qos/kernel/pm_qos_params.c 2007-10-01 16:03:31.0 
-0700
@@ -0,0 +1,417 @@
+/*
+ * This module exposes the interface to kernel space for specifying
+ * QoS dependencies.  It provides infrastructure for registration of:
+ *
+ * Dependents on a QoS value : register requirements
+ * Watchers of QoS value : get notified when target QoS value changes
+ *
+ * This QoS design is best effort based.  Dependents register their QoS needs.
+ * Watchers register to keep track of the current QoS needs of the system.
+ *
+ * There are 3 basic classes of QoS parameter: latency, timeout, throughput
+ * each have defined units:
+ * latency: usec
+ * timeout: usec -- currently not used.
+ * throughput: kbs (kilo byte / sec)
+ *
+ * There are lists of pm_qos_objects each one wrapping requirements, notifiers
+ *
+ * User mode requirements on a QOS parameter register themselves to the
+ * subsystem by opening the device node /dev/... and writing there request to
+ * the node.  As long as the process holds a file handle open to the node the
+ * client continues to be accounted for.  Upon file release the usermode
+ * requirement is removed and a new qos target is computed.  This way when the
+ * requirement that the application has is cleaned up when closes the file
+ * pointer or exits the pm_qos_object will get an opportunity to clean up.
+ *
+ * mark gross [EMAIL PROTECTED]
+ */
+
+#include linux/pm_qos_params.h
+#include linux/sched.h
+#include linux/spinlock.h
+#include linux/slab.h
+#include linux/time.h
+#include linux/fs.h
+#include linux/device.h
+#include linux/miscdevice.h
+#include linux/string.h
+#include linux/platform_device.h
+#include linux/init.h
+
+#include linux/uaccess.h
+
+/*
+ * locking rule: all changes to target_value or requirements or notifiers lists
+ * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
+ * held, taken with _irqsave.  One lock to rule them all

<    1   2   3   >