[PATCH]iova-lockdep-false-alarm-fix.
lockdep goes off on the iova copy_reserved_iova because it and a function it calls grabs locks in the from, and the to of the copy operation. This patch gives the reserved_ioval_list locks special lockdep classes. --mgross Signed-off-by: [EMAIL PROTECTED] Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c === --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-20 15:52:23.0 -0800 +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c 2008-02-20 16:08:27.0 -0800 @@ -1127,6 +1127,8 @@ } static struct iova_domain reserved_iova_list; +static struct lock_class_key reserved_alloc_key; +static struct lock_class_key reserved_rbtree_key; static void dmar_init_reserved_ranges(void) { @@ -1137,6 +1139,11 @@ init_iova_domain(reserved_iova_list, DMA_32BIT_PFN); + lockdep_set_class(reserved_iova_list.iova_alloc_lock, + reserved_alloc_key); + lockdep_set_class(reserved_iova_list.iova_rbtree_lock, + reserved_rbtree_key); + /* IOAPIC ranges shouldn't be accessed by DMA */ iova = reserve_iova(reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START), IOVA_PFN(IOAPIC_RANGE_END)); -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH]intel-iommu batched iotlb flushes
On Wed, Feb 13, 2008 at 10:23:34AM -0800, Randy Dunlap wrote: >> Index: linux-2.6.24-mm1/Documentation/kernel-parameters.txt >> === >> --- linux-2.6.24-mm1.orig/Documentation/kernel-parameters.txt >> 2008-02-12 >> 07:12:06.0 -0800 >> +++ linux-2.6.24-mm1/Documentation/kernel-parameters.txt 2008-02-12 >> 11:36:07.0 -0800 >> @@ -822,6 +822,10 @@ >> than 32 bit addressing. The default is to look >> for translation below 32 bit and if not available >> then look in the higher range. >> +strict [Default Off] >> +With this option on every umap_single operation will > > so I'll ask one more time :( > Shouldn't this be "unmap_single" ? > >> +result in a hardware IOTLB flush operation as opposed >> +to batching them for performance. >> io_delay= [X86-32,X86-64] I/O delay method >> 0x80 > > crap I was focused on the single. Signed-off-by: <[EMAIL PROTECTED]> Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c === --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-12 07:12:06.0 -0800 +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c 2008-02-12 11:35:42.0 -0800 @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -30,6 +31,7 @@ #include #include #include +#include #include "iova.h" #include "intel-iommu.h" #include /* force_iommu in this header in x86-64*/ @@ -50,11 +52,39 @@ #define DOMAIN_MAX_ADDR(gaw) u64)1) << gaw) - 1) + +static void flush_unmaps_timeout(unsigned long data); + +static struct timer_list unmap_timer = + TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0); + +struct unmap_list { + struct list_head list; + struct dmar_domain *domain; + struct iova *iova; +}; + +static struct intel_iommu *g_iommus; +/* bitmap for indexing intel_iommus */ +static unsigned long *g_iommus_to_flush; +static int g_num_of_iommus; + +static DEFINE_SPINLOCK(async_umap_flush_lock); +static LIST_HEAD(unmaps_to_do); + +static int timer_on; +static long list_size; +static int high_watermark; + +static struct dentry *intel_iommu_debug, *debug; + + static void domain_remove_dev_info(struct dmar_domain *domain); static int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; +static int intel_iommu_strict; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -73,9 +103,13 @@ printk(KERN_INFO "Intel-IOMMU: disable GFX device mapping\n"); } else if (!strncmp(str, "forcedac", 8)) { - printk (KERN_INFO + printk(KERN_INFO "Intel-IOMMU: Forcing DAC for PCI devices\n"); dmar_forcedac = 1; + } else if (!strncmp(str, "strict", 8)) { + printk(KERN_INFO + "Intel-IOMMU: disable batched IOTLB flush\n"); + intel_iommu_strict = 1; } str += strcspn(str, ","); @@ -965,17 +999,13 @@ set_bit(0, iommu->domain_ids); return 0; } - -static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) +static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd) { - struct intel_iommu *iommu; int ret; int map_size; u32 ver; - iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); - if (!iommu) - return NULL; iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); if (!iommu->reg) { printk(KERN_ERR "IOMMU: can't map the region\n"); @@ -1396,7 +1426,7 @@ int index; while (dev) { - for (index = 0; index < cnt; index ++) + for (index = 0; index < cnt; index++) if (dev == devices[index]) return 1; @@ -1661,7 +1691,7 @@ struct dmar_rmrr_unit *rmrr; struct pci_dev *pdev; struct intel_iommu *iommu; - int ret, unit = 0; + int nlongs, i, ret, unit = 0; /* * for each drhd @@ -1672,7 +1702,30 @@ for_each_drhd_unit(drhd) { if (drhd->ignored) continue; - iommu = alloc_iommu(drhd); + g_num_of_iommus++; + } + + nlongs = BITS_TO_LONGS(g_num_of_iommus); + g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL); + if (!g_iommus_to_flush) { + printk(KERN_ERR "Intel-IOMMU: " + "Allocating
Re: [PATCH]intel-iommu batched iotlb flushes
On Tue, Feb 12, 2008 at 07:54:48AM -0800, David Miller wrote: > > Something could be done: > > we could enable drivers to have DMA-pools they manage that get mapped > > and are re-used. > > > > I would rather the DMA-pools be tied to PID's that way any bad behavior > > would be limited to the address space of the process using the device. > > I haven't thought about how hard this would be to do but it would be > > nice. I think this could be tricky. > > Yes, this is a good idea especially for networking. > > For transmit on 10GB links the IOMMU setup is near the top > of the profiles. true. > What a driver could do is determine the maximum number of > IOMMU pages it could need to map one maximally sized packet. > So then it allocates enough space for all such entries in > it's TX ring. > > This eliminates the range allocation from the transmit path. > All that's left is "remap DMA range X to scatterlist Y" > > And yes it would be nice to have dma_map_skb() type interfaces > so that we don't walk into the IOMMU code N times per packet. /me starts looking more closely at how this could be done... --mgross -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH]intel-iommu batched iotlb flushes
On Tue, Feb 12, 2008 at 12:21:08PM -0800, Randy Dunlap wrote: >> Index: linux-2.6.24-mm1/Documentation/kernel-parameters.txt >> === >> --- linux-2.6.24-mm1.orig/Documentation/kernel-parameters.txt >> 2008-02-12 >> 07:12:06.0 -0800 >> +++ linux-2.6.24-mm1/Documentation/kernel-parameters.txt 2008-02-12 >> 11:36:07.0 -0800 >> @@ -822,6 +822,10 @@ >> than 32 bit addressing. The default is to look >> for translation below 32 bit and if not available >> then look in the higher range. >> +strict [Default Off] >> +With this option on every umap_signle operation will > > umap_signle ??? (again) > >> +result in a hardware IOTLB flush operation as opposed >> +to batching them for performance. >> io_delay= [X86-32,X86-64] I/O delay method >> 0x80 > > > -- > ~Randy > Signed-off-by: <[EMAIL PROTECTED]> Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c === --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-12 07:12:06.0 -0800 +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c 2008-02-12 11:35:42.0 -0800 @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -30,6 +31,7 @@ #include #include #include +#include #include "iova.h" #include "intel-iommu.h" #include /* force_iommu in this header in x86-64*/ @@ -50,11 +52,39 @@ #define DOMAIN_MAX_ADDR(gaw) u64)1) << gaw) - 1) + +static void flush_unmaps_timeout(unsigned long data); + +static struct timer_list unmap_timer = + TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0); + +struct unmap_list { + struct list_head list; + struct dmar_domain *domain; + struct iova *iova; +}; + +static struct intel_iommu *g_iommus; +/* bitmap for indexing intel_iommus */ +static unsigned long *g_iommus_to_flush; +static int g_num_of_iommus; + +static DEFINE_SPINLOCK(async_umap_flush_lock); +static LIST_HEAD(unmaps_to_do); + +static int timer_on; +static long list_size; +static int high_watermark; + +static struct dentry *intel_iommu_debug, *debug; + + static void domain_remove_dev_info(struct dmar_domain *domain); static int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; +static int intel_iommu_strict; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -73,9 +103,13 @@ printk(KERN_INFO "Intel-IOMMU: disable GFX device mapping\n"); } else if (!strncmp(str, "forcedac", 8)) { - printk (KERN_INFO + printk(KERN_INFO "Intel-IOMMU: Forcing DAC for PCI devices\n"); dmar_forcedac = 1; + } else if (!strncmp(str, "strict", 8)) { + printk(KERN_INFO + "Intel-IOMMU: disable batched IOTLB flush\n"); + intel_iommu_strict = 1; } str += strcspn(str, ","); @@ -965,17 +999,13 @@ set_bit(0, iommu->domain_ids); return 0; } - -static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) +static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd) { - struct intel_iommu *iommu; int ret; int map_size; u32 ver; - iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); - if (!iommu) - return NULL; iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); if (!iommu->reg) { printk(KERN_ERR "IOMMU: can't map the region\n"); @@ -1396,7 +1426,7 @@ int index; while (dev) { - for (index = 0; index < cnt; index ++) + for (index = 0; index < cnt; index++) if (dev == devices[index]) return 1; @@ -1661,7 +1691,7 @@ struct dmar_rmrr_unit *rmrr; struct pci_dev *pdev; struct intel_iommu *iommu; - int ret, unit = 0; + int nlongs, i, ret, unit = 0; /* * for each drhd @@ -1672,7 +1702,30 @@ for_each_drhd_unit(drhd) { if (drhd->ignored) continue; - iommu = alloc_iommu(drhd); + g_num_of_iommus++; + } + + nlongs = BITS_TO_LONGS(g_num_of_iommus); + g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL); + if (!g_iommus_to_flush) { + printk(KERN_ERR "Intel-IOMMU: " + "Allocating bitmap array failed\n"); + return -ENOMEM;
Re: [PATCH]intel-iommu batched iotlb flushes
On Tue, Feb 12, 2008 at 12:21:08PM -0800, Randy Dunlap wrote: Index: linux-2.6.24-mm1/Documentation/kernel-parameters.txt === --- linux-2.6.24-mm1.orig/Documentation/kernel-parameters.txt 2008-02-12 07:12:06.0 -0800 +++ linux-2.6.24-mm1/Documentation/kernel-parameters.txt 2008-02-12 11:36:07.0 -0800 @@ -822,6 +822,10 @@ than 32 bit addressing. The default is to look for translation below 32 bit and if not available then look in the higher range. +strict [Default Off] +With this option on every umap_signle operation will umap_signle ??? (again) +result in a hardware IOTLB flush operation as opposed +to batching them for performance. io_delay= [X86-32,X86-64] I/O delay method 0x80 -- ~Randy Signed-off-by: [EMAIL PROTECTED] Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c === --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-12 07:12:06.0 -0800 +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c 2008-02-12 11:35:42.0 -0800 @@ -21,6 +21,7 @@ #include linux/init.h #include linux/bitmap.h +#include linux/debugfs.h #include linux/slab.h #include linux/irq.h #include linux/interrupt.h @@ -30,6 +31,7 @@ #include linux/dmar.h #include linux/dma-mapping.h #include linux/mempool.h +#include linux/timer.h #include iova.h #include intel-iommu.h #include asm/proto.h /* force_iommu in this header in x86-64*/ @@ -50,11 +52,39 @@ #define DOMAIN_MAX_ADDR(gaw) u64)1) gaw) - 1) + +static void flush_unmaps_timeout(unsigned long data); + +static struct timer_list unmap_timer = + TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0); + +struct unmap_list { + struct list_head list; + struct dmar_domain *domain; + struct iova *iova; +}; + +static struct intel_iommu *g_iommus; +/* bitmap for indexing intel_iommus */ +static unsigned long *g_iommus_to_flush; +static int g_num_of_iommus; + +static DEFINE_SPINLOCK(async_umap_flush_lock); +static LIST_HEAD(unmaps_to_do); + +static int timer_on; +static long list_size; +static int high_watermark; + +static struct dentry *intel_iommu_debug, *debug; + + static void domain_remove_dev_info(struct dmar_domain *domain); static int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; +static int intel_iommu_strict; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -73,9 +103,13 @@ printk(KERN_INFO Intel-IOMMU: disable GFX device mapping\n); } else if (!strncmp(str, forcedac, 8)) { - printk (KERN_INFO + printk(KERN_INFO Intel-IOMMU: Forcing DAC for PCI devices\n); dmar_forcedac = 1; + } else if (!strncmp(str, strict, 8)) { + printk(KERN_INFO + Intel-IOMMU: disable batched IOTLB flush\n); + intel_iommu_strict = 1; } str += strcspn(str, ,); @@ -965,17 +999,13 @@ set_bit(0, iommu-domain_ids); return 0; } - -static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) +static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd) { - struct intel_iommu *iommu; int ret; int map_size; u32 ver; - iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); - if (!iommu) - return NULL; iommu-reg = ioremap(drhd-reg_base_addr, PAGE_SIZE_4K); if (!iommu-reg) { printk(KERN_ERR IOMMU: can't map the region\n); @@ -1396,7 +1426,7 @@ int index; while (dev) { - for (index = 0; index cnt; index ++) + for (index = 0; index cnt; index++) if (dev == devices[index]) return 1; @@ -1661,7 +1691,7 @@ struct dmar_rmrr_unit *rmrr; struct pci_dev *pdev; struct intel_iommu *iommu; - int ret, unit = 0; + int nlongs, i, ret, unit = 0; /* * for each drhd @@ -1672,7 +1702,30 @@ for_each_drhd_unit(drhd) { if (drhd-ignored) continue; - iommu = alloc_iommu(drhd); + g_num_of_iommus++; + } + + nlongs = BITS_TO_LONGS(g_num_of_iommus); + g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL); + if (!g_iommus_to_flush) { + printk(KERN_ERR Intel-IOMMU: +
Re: [PATCH]intel-iommu batched iotlb flushes
On Tue, Feb 12, 2008 at 07:54:48AM -0800, David Miller wrote: Something could be done: we could enable drivers to have DMA-pools they manage that get mapped and are re-used. I would rather the DMA-pools be tied to PID's that way any bad behavior would be limited to the address space of the process using the device. I haven't thought about how hard this would be to do but it would be nice. I think this could be tricky. Yes, this is a good idea especially for networking. For transmit on 10GB links the IOMMU setup is near the top of the profiles. true. What a driver could do is determine the maximum number of IOMMU pages it could need to map one maximally sized packet. So then it allocates enough space for all such entries in it's TX ring. This eliminates the range allocation from the transmit path. All that's left is remap DMA range X to scatterlist Y And yes it would be nice to have dma_map_skb() type interfaces so that we don't walk into the IOMMU code N times per packet. /me starts looking more closely at how this could be done... --mgross -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH]intel-iommu batched iotlb flushes
On Wed, Feb 13, 2008 at 10:23:34AM -0800, Randy Dunlap wrote: Index: linux-2.6.24-mm1/Documentation/kernel-parameters.txt === --- linux-2.6.24-mm1.orig/Documentation/kernel-parameters.txt 2008-02-12 07:12:06.0 -0800 +++ linux-2.6.24-mm1/Documentation/kernel-parameters.txt 2008-02-12 11:36:07.0 -0800 @@ -822,6 +822,10 @@ than 32 bit addressing. The default is to look for translation below 32 bit and if not available then look in the higher range. +strict [Default Off] +With this option on every umap_single operation will so I'll ask one more time :( Shouldn't this be unmap_single ? +result in a hardware IOTLB flush operation as opposed +to batching them for performance. io_delay= [X86-32,X86-64] I/O delay method 0x80 crap I was focused on the single. Signed-off-by: [EMAIL PROTECTED] Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c === --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-12 07:12:06.0 -0800 +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c 2008-02-12 11:35:42.0 -0800 @@ -21,6 +21,7 @@ #include linux/init.h #include linux/bitmap.h +#include linux/debugfs.h #include linux/slab.h #include linux/irq.h #include linux/interrupt.h @@ -30,6 +31,7 @@ #include linux/dmar.h #include linux/dma-mapping.h #include linux/mempool.h +#include linux/timer.h #include iova.h #include intel-iommu.h #include asm/proto.h /* force_iommu in this header in x86-64*/ @@ -50,11 +52,39 @@ #define DOMAIN_MAX_ADDR(gaw) u64)1) gaw) - 1) + +static void flush_unmaps_timeout(unsigned long data); + +static struct timer_list unmap_timer = + TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0); + +struct unmap_list { + struct list_head list; + struct dmar_domain *domain; + struct iova *iova; +}; + +static struct intel_iommu *g_iommus; +/* bitmap for indexing intel_iommus */ +static unsigned long *g_iommus_to_flush; +static int g_num_of_iommus; + +static DEFINE_SPINLOCK(async_umap_flush_lock); +static LIST_HEAD(unmaps_to_do); + +static int timer_on; +static long list_size; +static int high_watermark; + +static struct dentry *intel_iommu_debug, *debug; + + static void domain_remove_dev_info(struct dmar_domain *domain); static int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; +static int intel_iommu_strict; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -73,9 +103,13 @@ printk(KERN_INFO Intel-IOMMU: disable GFX device mapping\n); } else if (!strncmp(str, forcedac, 8)) { - printk (KERN_INFO + printk(KERN_INFO Intel-IOMMU: Forcing DAC for PCI devices\n); dmar_forcedac = 1; + } else if (!strncmp(str, strict, 8)) { + printk(KERN_INFO + Intel-IOMMU: disable batched IOTLB flush\n); + intel_iommu_strict = 1; } str += strcspn(str, ,); @@ -965,17 +999,13 @@ set_bit(0, iommu-domain_ids); return 0; } - -static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) +static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd) { - struct intel_iommu *iommu; int ret; int map_size; u32 ver; - iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); - if (!iommu) - return NULL; iommu-reg = ioremap(drhd-reg_base_addr, PAGE_SIZE_4K); if (!iommu-reg) { printk(KERN_ERR IOMMU: can't map the region\n); @@ -1396,7 +1426,7 @@ int index; while (dev) { - for (index = 0; index cnt; index ++) + for (index = 0; index cnt; index++) if (dev == devices[index]) return 1; @@ -1661,7 +1691,7 @@ struct dmar_rmrr_unit *rmrr; struct pci_dev *pdev; struct intel_iommu *iommu; - int ret, unit = 0; + int nlongs, i, ret, unit = 0; /* * for each drhd @@ -1672,7 +1702,30 @@ for_each_drhd_unit(drhd) { if (drhd-ignored) continue; - iommu = alloc_iommu(drhd); + g_num_of_iommus++; + } + + nlongs = BITS_TO_LONGS(g_num_of_iommus); + g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL); + if (!g_iommus_to_flush) { +
Re: [PATCH]intel-iommu batched iotlb flushes
On Tue, Feb 12, 2008 at 08:34:39AM -0800, Randy Dunlap wrote: > mark gross wrote: >> Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c >> === >> --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-12 >> 07:12:06.0 -0800 >> +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c 2008-02-12 >> 07:47:04.0 -0800 > >> @@ -1672,7 +1702,30 @@ >> for_each_drhd_unit(drhd) { >> if (drhd->ignored) >> continue; >> -iommu = alloc_iommu(drhd); >> +g_num_of_iommus++; >> +} >> + >> +nlongs = BITS_TO_LONGS(g_num_of_iommus); >> +g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL); >> +if (!g_iommus_to_flush) { >> +printk(KERN_ERR "Intel-IOMMU: \ >> +Allocating bitmap array failed\n"); > > I think that will make a string like below: > Intel-IOMMU: Allocating bitmap array failed > you are right. >> +return -ENOMEM; >> +} >> +strict [Default Off] >> +With this option on every umap_signal opperation will > > unmap_single operation > >> +result in a hardware IOTLB flush opperation as opposed Signed-off-by: <[EMAIL PROTECTED]> Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c === --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-12 07:12:06.0 -0800 +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c 2008-02-12 11:35:42.0 -0800 @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -30,6 +31,7 @@ #include #include #include +#include #include "iova.h" #include "intel-iommu.h" #include /* force_iommu in this header in x86-64*/ @@ -50,11 +52,39 @@ #define DOMAIN_MAX_ADDR(gaw) u64)1) << gaw) - 1) + +static void flush_unmaps_timeout(unsigned long data); + +static struct timer_list unmap_timer = + TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0); + +struct unmap_list { + struct list_head list; + struct dmar_domain *domain; + struct iova *iova; +}; + +static struct intel_iommu *g_iommus; +/* bitmap for indexing intel_iommus */ +static unsigned long *g_iommus_to_flush; +static int g_num_of_iommus; + +static DEFINE_SPINLOCK(async_umap_flush_lock); +static LIST_HEAD(unmaps_to_do); + +static int timer_on; +static long list_size; +static int high_watermark; + +static struct dentry *intel_iommu_debug, *debug; + + static void domain_remove_dev_info(struct dmar_domain *domain); static int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; +static int intel_iommu_strict; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -73,9 +103,13 @@ printk(KERN_INFO "Intel-IOMMU: disable GFX device mapping\n"); } else if (!strncmp(str, "forcedac", 8)) { - printk (KERN_INFO + printk(KERN_INFO "Intel-IOMMU: Forcing DAC for PCI devices\n"); dmar_forcedac = 1; + } else if (!strncmp(str, "strict", 8)) { + printk(KERN_INFO + "Intel-IOMMU: disable batched IOTLB flush\n"); + intel_iommu_strict = 1; } str += strcspn(str, ","); @@ -965,17 +999,13 @@ set_bit(0, iommu->domain_ids); return 0; } - -static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) +static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd) { - struct intel_iommu *iommu; int ret; int map_size; u32 ver; - iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); - if (!iommu) - return NULL; iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); if (!iommu->reg) { printk(KERN_ERR "IOMMU: can't map the region\n"); @@ -1396,7 +1426,7 @@ int index; while (dev) { - for (index = 0; index < cnt; index ++) + for (index = 0; index < cnt; index++) if (dev == devices[index]) return 1; @@ -1661,7 +1691,7 @@ struct dmar_rmrr_unit *rmrr; struct pci_dev *pdev; struct intel_iommu *iommu;
Re: [PATCH]intel-iommu batched iotlb flushes
On Mon, Feb 11, 2008 at 03:27:16PM -0800, Randy Dunlap wrote: > On Mon, 11 Feb 2008 14:41:05 -0800 mark gross wrote: > > > The hole is the following scenarios: > > do many map_signal operations, do some unmap_signals, reuse a recently > > unmapped page, > memory> > > > > Or: you have rouge hardware using DMA's to look at pages: do many > > or rogue hardware? yes bad-guy hardware. > > > map_signal's, do many unmap_singles, reuse some unmapped pages : > > signal single > > > > >why rouge? because I'm a dumb ass. > > > Note : these holes are very hard to get too, as the IOTLB is small and > > only the PTE's still in the IOTLB can be accessed through this > > mechanism. > > > > Its recommended that strict is used when developing drivers that do DMA > > operations to catch bugs early. For production code where performance > > is desired running with the batched IOTLB flushing is a good way to > > go. > > > > > > --mgross > > > > > > Signed-off-by: <[EMAIL PROTECTED]> > > > > > > > > Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c > > === > > --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-07 > > 13:03:10.0 -0800 > > +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c 2008-02-11 > > 10:38:49.0 -0800 > > > @@ -50,11 +52,39 @@ > > > > #define DOMAIN_MAX_ADDR(gaw) u64)1) << gaw) - 1) > > > > + > > +static void flush_unmaps_timeout(unsigned long data); > > + > > +static struct timer_list unmap_timer = > > + TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0); > > + > > +struct unmap_list { > > + struct list_head list; > > + struct dmar_domain *domain; > > + struct iova *iova; > > +}; > > + > > +static struct intel_iommu *g_iommus; > > +/* bitmap for indexing intel_iommus */ > > +static unsigned long *g_iommus_to_flush; > > +static int g_num_of_iommus; > > + > > +static DEFINE_SPINLOCK(async_umap_flush_lock); > > +static LIST_HEAD(unmaps_to_do); > > + > > +static int timer_on; > > +static long list_size; > > +static int high_watermark; > > + > > +static struct dentry *intel_iommu_debug, *debug; > > + > > + > > static void domain_remove_dev_info(struct dmar_domain *domain); > > > > static int dmar_disabled; > > static int __initdata dmar_map_gfx = 1; > > static int dmar_forcedac; > > +static int intel_iommu_strict; > > > > #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) > > static DEFINE_SPINLOCK(device_domain_lock); > > @@ -73,9 +103,13 @@ > > printk(KERN_INFO > > "Intel-IOMMU: disable GFX device mapping\n"); > > } else if (!strncmp(str, "forcedac", 8)) { > > - printk (KERN_INFO > > + printk(KERN_INFO > > "Intel-IOMMU: Forcing DAC for PCI devices\n"); > > dmar_forcedac = 1; > > + } else if (!strncmp(str, "strict", 8)) { > > + printk(KERN_INFO > > + "Intel-IOMMU: disable bached IOTLB flush\n"); > > batched fixed. > > > + intel_iommu_strict = 1; > > } > > > > str += strcspn(str, ","); > > > @@ -1672,7 +1702,29 @@ > > for_each_drhd_unit(drhd) { > > if (drhd->ignored) > > continue; > > - iommu = alloc_iommu(drhd); > > + g_num_of_iommus++; > > + } > > + > > + nlongs = BITS_TO_LONGS(g_num_of_iommus); > > + g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL); > > + if (!g_iommus_to_flush) { > > + printk(KERN_ERR "Allocating bitmap array failed\n"); > > identify: "IOMMU: > > > + return -ENOMEM; > > + } > > + > > + g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL); > > + if (!g_iommus) { > > + kfree(g_iommus_to_flush); > > + ret = -ENOMEM; > > + goto error; > > + } > > + > > + i = 0; > > + for_each_drhd_unit(drhd) { > > + i
Re: [PATCH]intel-iommu batched iotlb flushes
On Tue, Feb 12, 2008 at 01:00:06AM -0800, David Miller wrote: > From: Muli Ben-Yehuda <[EMAIL PROTECTED]> > Date: Tue, 12 Feb 2008 10:52:56 +0200 > > > The streaming DMA-API was designed to conserve IOMMU mappings for > > machines where IOMMU mappings are a scarce resource, and is a poor > > fit for a modern IOMMU such as VT-d with a 64-bit IO address space > > (or even an IOMMU with a 32-bit address space such as Calgary) where > > there are plenty of IOMMU mappings available. > > For the 64-bit case what you are suggesting eventually amounts > to mapping all available RAM in the IOMMU. Something could be done: we could enable drivers to have DMA-pools they manage that get mapped and are re-used. I would rather the DMA-pools be tied to PID's that way any bad behavior would be limited to the address space of the process using the device. I haven't thought about how hard this would be to do but it would be nice. I think this could be tricky. Application sets up ring buffer of device DMA memory, passes this to driver/stack. Need to handle hitting high water marks and application exit clean up sanely... --mgross > > Although an extreme version of your suggestion, it would be the > most efficient as it would require zero IOMMU flush operations. > > But we'd lose things like protection and other benefits. > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to [EMAIL PROTECTED] > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH]intel-iommu batched iotlb flushes
On Tue, Feb 12, 2008 at 10:52:56AM +0200, Muli Ben-Yehuda wrote: > On Mon, Feb 11, 2008 at 02:41:05PM -0800, mark gross wrote: > > > The intel-iommu hardware requires a polling operation to flush IOTLB > > PTE's after an unmap operation. Through some TSC instrumentation of > > a netperf UDP stream with small packets test case it was seen that > > the flush operations where sucking up to 16% of the CPU time doing > > iommu_flush_iotlb's > > > > The following patch batches the IOTLB flushes removing most of the > > overhead in flushing the IOTLB's. It works by building a list of to > > be released IOVA's that is iterated over when a timer goes off or > > when a high water mark is reached. > > > > The wrinkle this has is that the memory protection and page fault > > warnings from errant DMA operations is somewhat reduced, hence a kernel > > parameter is added to revert back to the "strict" page flush / unmap > > behavior. > > > > The hole is the following scenarios: > > do many map_signal operations, do some unmap_signals, reuse a recently > > unmapped page, > memory> > > > > Or: you have rouge hardware using DMA's to look at pages: do many > > map_signal's, do many unmap_singles, reuse some unmapped pages : > > > > > > Note : these holes are very hard to get too, as the IOTLB is small > > and only the PTE's still in the IOTLB can be accessed through this > > mechanism. > > > > Its recommended that strict is used when developing drivers that do > > DMA operations to catch bugs early. For production code where > > performance is desired running with the batched IOTLB flushing is a > > good way to go. > > While I don't disagree with this patch in principle (Calgary does the > same thing due to expensive IOTLB flushes) the right way to fix it > IMHO is to fix the drivers to batch mapping and unmapping operations > or map up-front and unmap when done. The streaming DMA-API was > designed to conserve IOMMU mappings for machines where IOMMU mappings > are a scarce resource, and is a poor fit for a modern IOMMU such as > VT-d with a 64-bit IO address space (or even an IOMMU with a 32-bit > address space such as Calgary) where there are plenty of IOMMU > mappings available. Yes, have a DMA pool of DMA addresses to use and re-use in the stack instead of setting up and tearing down the PTE's is something we need to look at closely for network and other high DMA traffic stacks. --mgross -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: iova RB tree setup tweak.
On Mon, Feb 11, 2008 at 02:29:46PM -0800, Andrew Morton wrote: > On Mon, 11 Feb 2008 13:56:51 -0800 > mark gross <[EMAIL PROTECTED]> wrote: > > > The following patch merges two functions into one allowing for a 3% > > reduction in overhead in locating, allocating and inserting pages for > > use in IOMMU operations. > > > > Its a bit of a eye-crosser so I welcome any RB-tree / MM experts to take > > a look. It works by re-using some of the information gathered in the > > search for the pages to use in setting up the IOTLB's in the insertion > > of the iova structure into the RB tree. > > > > I guess this is a PCI patch hence I'd be tagging is as > to-be-merged-via-greg's-pci-tree. > > > > > Singed-off-by: <[EMAIL PROTECTED]> > > > > ow, that musta hurt. Not at first but ever since I posted the patch its been a bit. > > > > > Index: linux-2.6.24-mm1/drivers/pci/iova.c > > === > > --- linux-2.6.24-mm1.orig/drivers/pci/iova.c2008-02-07 > > 11:05:44.0 -0800 > > +++ linux-2.6.24-mm1/drivers/pci/iova.c 2008-02-07 13:05:03.0 > > -0800 > > @@ -72,20 +72,25 @@ > > return pad_size; > > } > > > > -static int __alloc_iova_range(struct iova_domain *iovad, unsigned long > > size, > > - unsigned long limit_pfn, struct iova *new, bool size_aligned) > > +static int __alloc_and_insert_iova_range(struct iova_domain *iovad, > > + unsigned long size, unsigned long limit_pfn, > > + struct iova *new, bool size_aligned) > > { > > - struct rb_node *curr = NULL; > > + struct rb_node *prev, *curr = NULL; > > unsigned long flags; > > unsigned long saved_pfn; > > unsigned int pad_size = 0; > > > > /* Walk the tree backwards */ > > spin_lock_irqsave(>iova_rbtree_lock, flags); > > saved_pfn = limit_pfn; > > curr = __get_cached_rbnode(iovad, _pfn); > > + prev = curr; > > while (curr) { > > struct iova *curr_iova = container_of(curr, struct iova, node); > > + > > if (limit_pfn < curr_iova->pfn_lo) > > goto move_left; > > else if (limit_pfn < curr_iova->pfn_hi) > > @@ -99,6 +104,7 @@ > > For some reason patch(1) claims that the patch is corrupted at this line. > My bad. Try the following: --mgross Signed-off-by : <[EMAIL PROTECTED]> fa-la-la Index: linux-2.6.24-mm1/drivers/pci/iova.c === --- linux-2.6.24-mm1.orig/drivers/pci/iova.c2008-02-07 11:05:44.0 -0800 +++ linux-2.6.24-mm1/drivers/pci/iova.c 2008-02-12 07:12:47.0 -0800 @@ -72,10 +72,11 @@ return pad_size; } -static int __alloc_iova_range(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn, struct iova *new, bool size_aligned) +static int __alloc_and_insert_iova_range(struct iova_domain *iovad, + unsigned long size, unsigned long limit_pfn, + struct iova *new, bool size_aligned) { - struct rb_node *curr = NULL; + struct rb_node *prev, *curr = NULL; unsigned long flags; unsigned long saved_pfn; unsigned int pad_size = 0; @@ -84,8 +85,10 @@ spin_lock_irqsave(>iova_rbtree_lock, flags); saved_pfn = limit_pfn; curr = __get_cached_rbnode(iovad, _pfn); + prev = curr; while (curr) { struct iova *curr_iova = container_of(curr, struct iova, node); + if (limit_pfn < curr_iova->pfn_lo) goto move_left; else if (limit_pfn < curr_iova->pfn_hi) @@ -99,6 +102,7 @@ adjust_limit_pfn: limit_pfn = curr_iova->pfn_lo - 1; move_left: + prev = curr; curr = rb_prev(curr); } @@ -115,7 +119,33 @@ new->pfn_lo = limit_pfn - (size + pad_size) + 1; new->pfn_hi = new->pfn_lo + size - 1; + /* Insert the new_iova into domain rbtree by holding writer lock */ + /* Add new node and rebalance tree. */ + { + struct rb_node **entry = &((prev)), *parent = NULL; + /* Figure out where to put new node */ + while (*entry) { + struct iova *this = container_of(*entry, + struct iova, node); + parent = *entry; + + if (new->pfn_lo < this->pfn_lo) + entry = &((*entry)-&
Re: [PATCH]intel-iommu batched iotlb flushes
On Tue, Feb 12, 2008 at 08:34:39AM -0800, Randy Dunlap wrote: mark gross wrote: Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c === --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-12 07:12:06.0 -0800 +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c 2008-02-12 07:47:04.0 -0800 @@ -1672,7 +1702,30 @@ for_each_drhd_unit(drhd) { if (drhd-ignored) continue; -iommu = alloc_iommu(drhd); +g_num_of_iommus++; +} + +nlongs = BITS_TO_LONGS(g_num_of_iommus); +g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL); +if (!g_iommus_to_flush) { +printk(KERN_ERR Intel-IOMMU: \ +Allocating bitmap array failed\n); I think that will make a string like below: Intel-IOMMU: Allocating bitmap array failed you are right. +return -ENOMEM; +} +strict [Default Off] +With this option on every umap_signal opperation will unmap_single operation +result in a hardware IOTLB flush opperation as opposed Signed-off-by: [EMAIL PROTECTED] Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c === --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-12 07:12:06.0 -0800 +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c 2008-02-12 11:35:42.0 -0800 @@ -21,6 +21,7 @@ #include linux/init.h #include linux/bitmap.h +#include linux/debugfs.h #include linux/slab.h #include linux/irq.h #include linux/interrupt.h @@ -30,6 +31,7 @@ #include linux/dmar.h #include linux/dma-mapping.h #include linux/mempool.h +#include linux/timer.h #include iova.h #include intel-iommu.h #include asm/proto.h /* force_iommu in this header in x86-64*/ @@ -50,11 +52,39 @@ #define DOMAIN_MAX_ADDR(gaw) u64)1) gaw) - 1) + +static void flush_unmaps_timeout(unsigned long data); + +static struct timer_list unmap_timer = + TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0); + +struct unmap_list { + struct list_head list; + struct dmar_domain *domain; + struct iova *iova; +}; + +static struct intel_iommu *g_iommus; +/* bitmap for indexing intel_iommus */ +static unsigned long *g_iommus_to_flush; +static int g_num_of_iommus; + +static DEFINE_SPINLOCK(async_umap_flush_lock); +static LIST_HEAD(unmaps_to_do); + +static int timer_on; +static long list_size; +static int high_watermark; + +static struct dentry *intel_iommu_debug, *debug; + + static void domain_remove_dev_info(struct dmar_domain *domain); static int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; +static int intel_iommu_strict; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -73,9 +103,13 @@ printk(KERN_INFO Intel-IOMMU: disable GFX device mapping\n); } else if (!strncmp(str, forcedac, 8)) { - printk (KERN_INFO + printk(KERN_INFO Intel-IOMMU: Forcing DAC for PCI devices\n); dmar_forcedac = 1; + } else if (!strncmp(str, strict, 8)) { + printk(KERN_INFO + Intel-IOMMU: disable batched IOTLB flush\n); + intel_iommu_strict = 1; } str += strcspn(str, ,); @@ -965,17 +999,13 @@ set_bit(0, iommu-domain_ids); return 0; } - -static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) +static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd) { - struct intel_iommu *iommu; int ret; int map_size; u32 ver; - iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); - if (!iommu) - return NULL; iommu-reg = ioremap(drhd-reg_base_addr, PAGE_SIZE_4K); if (!iommu-reg) { printk(KERN_ERR IOMMU: can't map the region\n); @@ -1396,7 +1426,7 @@ int index; while (dev) { - for (index = 0; index cnt; index ++) + for (index = 0; index cnt; index++) if (dev == devices[index]) return 1; @@ -1661,7 +1691,7 @@ struct dmar_rmrr_unit *rmrr; struct pci_dev *pdev; struct intel_iommu *iommu; - int ret, unit = 0; + int nlongs, i, ret, unit = 0; /* * for each drhd @@ -1672,7 +1702,30 @@ for_each_drhd_unit(drhd) { if (drhd-ignored) continue
Re: [PATCH]intel-iommu batched iotlb flushes
On Tue, Feb 12, 2008 at 01:00:06AM -0800, David Miller wrote: From: Muli Ben-Yehuda [EMAIL PROTECTED] Date: Tue, 12 Feb 2008 10:52:56 +0200 The streaming DMA-API was designed to conserve IOMMU mappings for machines where IOMMU mappings are a scarce resource, and is a poor fit for a modern IOMMU such as VT-d with a 64-bit IO address space (or even an IOMMU with a 32-bit address space such as Calgary) where there are plenty of IOMMU mappings available. For the 64-bit case what you are suggesting eventually amounts to mapping all available RAM in the IOMMU. Something could be done: we could enable drivers to have DMA-pools they manage that get mapped and are re-used. I would rather the DMA-pools be tied to PID's that way any bad behavior would be limited to the address space of the process using the device. I haven't thought about how hard this would be to do but it would be nice. I think this could be tricky. Application sets up ring buffer of device DMA memory, passes this to driver/stack. Need to handle hitting high water marks and application exit clean up sanely... --mgross Although an extreme version of your suggestion, it would be the most efficient as it would require zero IOMMU flush operations. But we'd lose things like protection and other benefits. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH]intel-iommu batched iotlb flushes
On Tue, Feb 12, 2008 at 10:52:56AM +0200, Muli Ben-Yehuda wrote: On Mon, Feb 11, 2008 at 02:41:05PM -0800, mark gross wrote: The intel-iommu hardware requires a polling operation to flush IOTLB PTE's after an unmap operation. Through some TSC instrumentation of a netperf UDP stream with small packets test case it was seen that the flush operations where sucking up to 16% of the CPU time doing iommu_flush_iotlb's The following patch batches the IOTLB flushes removing most of the overhead in flushing the IOTLB's. It works by building a list of to be released IOVA's that is iterated over when a timer goes off or when a high water mark is reached. The wrinkle this has is that the memory protection and page fault warnings from errant DMA operations is somewhat reduced, hence a kernel parameter is added to revert back to the strict page flush / unmap behavior. The hole is the following scenarios: do many map_signal operations, do some unmap_signals, reuse a recently unmapped page, errant DMA hardware sneaks through and steps on reused memory Or: you have rouge hardware using DMA's to look at pages: do many map_signal's, do many unmap_singles, reuse some unmapped pages : rouge hardware looks at reused page Note : these holes are very hard to get too, as the IOTLB is small and only the PTE's still in the IOTLB can be accessed through this mechanism. Its recommended that strict is used when developing drivers that do DMA operations to catch bugs early. For production code where performance is desired running with the batched IOTLB flushing is a good way to go. While I don't disagree with this patch in principle (Calgary does the same thing due to expensive IOTLB flushes) the right way to fix it IMHO is to fix the drivers to batch mapping and unmapping operations or map up-front and unmap when done. The streaming DMA-API was designed to conserve IOMMU mappings for machines where IOMMU mappings are a scarce resource, and is a poor fit for a modern IOMMU such as VT-d with a 64-bit IO address space (or even an IOMMU with a 32-bit address space such as Calgary) where there are plenty of IOMMU mappings available. Yes, have a DMA pool of DMA addresses to use and re-use in the stack instead of setting up and tearing down the PTE's is something we need to look at closely for network and other high DMA traffic stacks. --mgross -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: iova RB tree setup tweak.
On Mon, Feb 11, 2008 at 02:29:46PM -0800, Andrew Morton wrote: On Mon, 11 Feb 2008 13:56:51 -0800 mark gross [EMAIL PROTECTED] wrote: The following patch merges two functions into one allowing for a 3% reduction in overhead in locating, allocating and inserting pages for use in IOMMU operations. Its a bit of a eye-crosser so I welcome any RB-tree / MM experts to take a look. It works by re-using some of the information gathered in the search for the pages to use in setting up the IOTLB's in the insertion of the iova structure into the RB tree. I guess this is a PCI patch hence I'd be tagging is as to-be-merged-via-greg's-pci-tree. Singed-off-by: [EMAIL PROTECTED] ow, that musta hurt. Not at first but ever since I posted the patch its been a bit. Index: linux-2.6.24-mm1/drivers/pci/iova.c === --- linux-2.6.24-mm1.orig/drivers/pci/iova.c2008-02-07 11:05:44.0 -0800 +++ linux-2.6.24-mm1/drivers/pci/iova.c 2008-02-07 13:05:03.0 -0800 @@ -72,20 +72,25 @@ return pad_size; } -static int __alloc_iova_range(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn, struct iova *new, bool size_aligned) +static int __alloc_and_insert_iova_range(struct iova_domain *iovad, + unsigned long size, unsigned long limit_pfn, + struct iova *new, bool size_aligned) { - struct rb_node *curr = NULL; + struct rb_node *prev, *curr = NULL; unsigned long flags; unsigned long saved_pfn; unsigned int pad_size = 0; /* Walk the tree backwards */ spin_lock_irqsave(iovad-iova_rbtree_lock, flags); saved_pfn = limit_pfn; curr = __get_cached_rbnode(iovad, limit_pfn); + prev = curr; while (curr) { struct iova *curr_iova = container_of(curr, struct iova, node); + if (limit_pfn curr_iova-pfn_lo) goto move_left; else if (limit_pfn curr_iova-pfn_hi) @@ -99,6 +104,7 @@ For some reason patch(1) claims that the patch is corrupted at this line. My bad. Try the following: --mgross Signed-off-by : [EMAIL PROTECTED] fa-la-la Index: linux-2.6.24-mm1/drivers/pci/iova.c === --- linux-2.6.24-mm1.orig/drivers/pci/iova.c2008-02-07 11:05:44.0 -0800 +++ linux-2.6.24-mm1/drivers/pci/iova.c 2008-02-12 07:12:47.0 -0800 @@ -72,10 +72,11 @@ return pad_size; } -static int __alloc_iova_range(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn, struct iova *new, bool size_aligned) +static int __alloc_and_insert_iova_range(struct iova_domain *iovad, + unsigned long size, unsigned long limit_pfn, + struct iova *new, bool size_aligned) { - struct rb_node *curr = NULL; + struct rb_node *prev, *curr = NULL; unsigned long flags; unsigned long saved_pfn; unsigned int pad_size = 0; @@ -84,8 +85,10 @@ spin_lock_irqsave(iovad-iova_rbtree_lock, flags); saved_pfn = limit_pfn; curr = __get_cached_rbnode(iovad, limit_pfn); + prev = curr; while (curr) { struct iova *curr_iova = container_of(curr, struct iova, node); + if (limit_pfn curr_iova-pfn_lo) goto move_left; else if (limit_pfn curr_iova-pfn_hi) @@ -99,6 +102,7 @@ adjust_limit_pfn: limit_pfn = curr_iova-pfn_lo - 1; move_left: + prev = curr; curr = rb_prev(curr); } @@ -115,7 +119,33 @@ new-pfn_lo = limit_pfn - (size + pad_size) + 1; new-pfn_hi = new-pfn_lo + size - 1; + /* Insert the new_iova into domain rbtree by holding writer lock */ + /* Add new node and rebalance tree. */ + { + struct rb_node **entry = ((prev)), *parent = NULL; + /* Figure out where to put new node */ + while (*entry) { + struct iova *this = container_of(*entry, + struct iova, node); + parent = *entry; + + if (new-pfn_lo this-pfn_lo) + entry = ((*entry)-rb_left); + else if (new-pfn_lo this-pfn_lo) + entry = ((*entry)-rb_right); + else + BUG(); /* this should not happen */ + } + + /* Add new node and rebalance tree. */ + rb_link_node(new-node, parent, entry); + rb_insert_color(new-node, iovad-rbroot); + } + __cached_rbnode_insert_update(iovad, saved_pfn, new); + spin_unlock_irqrestore(iovad-iova_rbtree_lock
Re: [PATCH]intel-iommu batched iotlb flushes
On Mon, Feb 11, 2008 at 03:27:16PM -0800, Randy Dunlap wrote: On Mon, 11 Feb 2008 14:41:05 -0800 mark gross wrote: The hole is the following scenarios: do many map_signal operations, do some unmap_signals, reuse a recently unmapped page, errant DMA hardware sneaks through and steps on reused memory Or: you have rouge hardware using DMA's to look at pages: do many or rogue hardware? yes bad-guy hardware. map_signal's, do many unmap_singles, reuse some unmapped pages : signal single rouge hardware looks at reused page why rouge? because I'm a dumb ass. Note : these holes are very hard to get too, as the IOTLB is small and only the PTE's still in the IOTLB can be accessed through this mechanism. Its recommended that strict is used when developing drivers that do DMA operations to catch bugs early. For production code where performance is desired running with the batched IOTLB flushing is a good way to go. --mgross Signed-off-by: [EMAIL PROTECTED] Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c === --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-07 13:03:10.0 -0800 +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c 2008-02-11 10:38:49.0 -0800 @@ -50,11 +52,39 @@ #define DOMAIN_MAX_ADDR(gaw) u64)1) gaw) - 1) + +static void flush_unmaps_timeout(unsigned long data); + +static struct timer_list unmap_timer = + TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0); + +struct unmap_list { + struct list_head list; + struct dmar_domain *domain; + struct iova *iova; +}; + +static struct intel_iommu *g_iommus; +/* bitmap for indexing intel_iommus */ +static unsigned long *g_iommus_to_flush; +static int g_num_of_iommus; + +static DEFINE_SPINLOCK(async_umap_flush_lock); +static LIST_HEAD(unmaps_to_do); + +static int timer_on; +static long list_size; +static int high_watermark; + +static struct dentry *intel_iommu_debug, *debug; + + static void domain_remove_dev_info(struct dmar_domain *domain); static int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; +static int intel_iommu_strict; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -73,9 +103,13 @@ printk(KERN_INFO Intel-IOMMU: disable GFX device mapping\n); } else if (!strncmp(str, forcedac, 8)) { - printk (KERN_INFO + printk(KERN_INFO Intel-IOMMU: Forcing DAC for PCI devices\n); dmar_forcedac = 1; + } else if (!strncmp(str, strict, 8)) { + printk(KERN_INFO + Intel-IOMMU: disable bached IOTLB flush\n); batched fixed. + intel_iommu_strict = 1; } str += strcspn(str, ,); @@ -1672,7 +1702,29 @@ for_each_drhd_unit(drhd) { if (drhd-ignored) continue; - iommu = alloc_iommu(drhd); + g_num_of_iommus++; + } + + nlongs = BITS_TO_LONGS(g_num_of_iommus); + g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL); + if (!g_iommus_to_flush) { + printk(KERN_ERR Allocating bitmap array failed\n); identify: IOMMU: + return -ENOMEM; + } + + g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL); + if (!g_iommus) { + kfree(g_iommus_to_flush); + ret = -ENOMEM; + goto error; + } + + i = 0; + for_each_drhd_unit(drhd) { + if (drhd-ignored) + continue; + iommu = alloc_iommu(g_iommus[i], drhd); + i++; if (!iommu) { ret = -ENOMEM; goto error; Index: linux-2.6.24-mm1/Documentation/kernel-parameters.txt === --- linux-2.6.24-mm1.orig/Documentation/kernel-parameters.txt 2008-02-11 13:44:23.0 -0800 +++ linux-2.6.24-mm1/Documentation/kernel-parameters.txt2008-02-11 14:23:37.0 -0800 @@ -822,6 +822,10 @@ than 32 bit addressing. The default is to look for translation below 32 bit and if not available then look in the higher range. + strict [Default Off] + With this option on every umap_signle will on, every unmap_si{ngle,gnal} ?? fixed. + result
[PATCH]intel-iommu batched iotlb flushes
The intel-iommu hardware requires a polling operation to flush IOTLB PTE's after an unmap operation. Through some TSC instrumentation of a netperf UDP stream with small packets test case it was seen that the flush operations where sucking up to 16% of the CPU time doing iommu_flush_iotlb's The following patch batches the IOTLB flushes removing most of the overhead in flushing the IOTLB's. It works by building a list of to be released IOVA's that is iterated over when a timer goes off or when a high water mark is reached. The wrinkle this has is that the memory protection and page fault warnings from errant DMA operations is somewhat reduced, hence a kernel parameter is added to revert back to the "strict" page flush / unmap behavior. The hole is the following scenarios: do many map_signal operations, do some unmap_signals, reuse a recently unmapped page, Or: you have rouge hardware using DMA's to look at pages: do many map_signal's, do many unmap_singles, reuse some unmapped pages : Note : these holes are very hard to get too, as the IOTLB is small and only the PTE's still in the IOTLB can be accessed through this mechanism. Its recommended that strict is used when developing drivers that do DMA operations to catch bugs early. For production code where performance is desired running with the batched IOTLB flushing is a good way to go. --mgross Signed-off-by: <[EMAIL PROTECTED]> Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c === --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-07 13:03:10.0 -0800 +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c 2008-02-11 10:38:49.0 -0800 @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -30,6 +31,7 @@ #include #include #include +#include #include "iova.h" #include "intel-iommu.h" #include /* force_iommu in this header in x86-64*/ @@ -50,11 +52,39 @@ #define DOMAIN_MAX_ADDR(gaw) u64)1) << gaw) - 1) + +static void flush_unmaps_timeout(unsigned long data); + +static struct timer_list unmap_timer = + TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0); + +struct unmap_list { + struct list_head list; + struct dmar_domain *domain; + struct iova *iova; +}; + +static struct intel_iommu *g_iommus; +/* bitmap for indexing intel_iommus */ +static unsigned long *g_iommus_to_flush; +static int g_num_of_iommus; + +static DEFINE_SPINLOCK(async_umap_flush_lock); +static LIST_HEAD(unmaps_to_do); + +static int timer_on; +static long list_size; +static int high_watermark; + +static struct dentry *intel_iommu_debug, *debug; + + static void domain_remove_dev_info(struct dmar_domain *domain); static int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; +static int intel_iommu_strict; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -73,9 +103,13 @@ printk(KERN_INFO "Intel-IOMMU: disable GFX device mapping\n"); } else if (!strncmp(str, "forcedac", 8)) { - printk (KERN_INFO + printk(KERN_INFO "Intel-IOMMU: Forcing DAC for PCI devices\n"); dmar_forcedac = 1; + } else if (!strncmp(str, "strict", 8)) { + printk(KERN_INFO + "Intel-IOMMU: disable bached IOTLB flush\n"); + intel_iommu_strict = 1; } str += strcspn(str, ","); @@ -965,17 +999,13 @@ set_bit(0, iommu->domain_ids); return 0; } - -static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) +static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd) { - struct intel_iommu *iommu; int ret; int map_size; u32 ver; - iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); - if (!iommu) - return NULL; iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); if (!iommu->reg) { printk(KERN_ERR "IOMMU: can't map the region\n"); @@ -1396,7 +1426,7 @@ int index; while (dev) { - for (index = 0; index < cnt; index ++) + for (index = 0; index < cnt; index++) if (dev == devices[index]) return 1; @@ -1661,7 +1691,7 @@ struct dmar_rmrr_unit *rmrr; struct pci_dev *pdev; struct intel_iommu *iommu; - int ret, unit = 0; + int nlongs, i, ret, unit = 0; /* * for each drhd @@ -1672,7 +1702,29 @@ for_each_drhd_unit(drhd) { if (drhd->ignored) continue; - iommu =
iova RB tree setup tweak.
The following patch merges two functions into one allowing for a 3% reduction in overhead in locating, allocating and inserting pages for use in IOMMU operations. Its a bit of a eye-crosser so I welcome any RB-tree / MM experts to take a look. It works by re-using some of the information gathered in the search for the pages to use in setting up the IOTLB's in the insertion of the iova structure into the RB tree. --mgross Singed-off-by: <[EMAIL PROTECTED]> Index: linux-2.6.24-mm1/drivers/pci/iova.c === --- linux-2.6.24-mm1.orig/drivers/pci/iova.c2008-02-07 11:05:44.0 -0800 +++ linux-2.6.24-mm1/drivers/pci/iova.c 2008-02-07 13:05:03.0 -0800 @@ -72,20 +72,25 @@ return pad_size; } -static int __alloc_iova_range(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn, struct iova *new, bool size_aligned) +static int __alloc_and_insert_iova_range(struct iova_domain *iovad, + unsigned long size, unsigned long limit_pfn, + struct iova *new, bool size_aligned) { - struct rb_node *curr = NULL; + struct rb_node *prev, *curr = NULL; unsigned long flags; unsigned long saved_pfn; unsigned int pad_size = 0; /* Walk the tree backwards */ spin_lock_irqsave(>iova_rbtree_lock, flags); saved_pfn = limit_pfn; curr = __get_cached_rbnode(iovad, _pfn); + prev = curr; while (curr) { struct iova *curr_iova = container_of(curr, struct iova, node); + if (limit_pfn < curr_iova->pfn_lo) goto move_left; else if (limit_pfn < curr_iova->pfn_hi) @@ -99,6 +104,7 @@ adjust_limit_pfn: limit_pfn = curr_iova->pfn_lo - 1; move_left: + prev = curr; curr = rb_prev(curr); } @@ -115,7 +121,33 @@ new->pfn_lo = limit_pfn - (size + pad_size) + 1; new->pfn_hi = new->pfn_lo + size - 1; + /* Insert the new_iova into domain rbtree by holding writer lock */ + /* Add new node and rebalance tree. */ + { + struct rb_node **entry = &((prev)), *parent = NULL; + /* Figure out where to put new node */ + while (*entry) { + struct iova *this = container_of(*entry, + struct iova, node); + parent = *entry; + + if (new->pfn_lo < this->pfn_lo) + entry = &((*entry)->rb_left); + else if (new->pfn_lo > this->pfn_lo) + entry = &((*entry)->rb_right); + else + BUG(); /* this should not happen */ + } + + /* Add new node and rebalance tree. */ + rb_link_node(>node, parent, entry); + rb_insert_color(>node, >rbroot); + } + __cached_rbnode_insert_update(iovad, saved_pfn, new); + spin_unlock_irqrestore(>iova_rbtree_lock, flags); + + return 0; } @@ -171,23 +203,15 @@ size = __roundup_pow_of_two(size); spin_lock_irqsave(>iova_alloc_lock, flags); - ret = __alloc_iova_range(iovad, size, limit_pfn, new_iova, - size_aligned); + ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn, + new_iova, size_aligned); + spin_unlock_irqrestore(>iova_alloc_lock, flags); if (ret) { - spin_unlock_irqrestore(>iova_alloc_lock, flags); free_iova_mem(new_iova); return NULL; } - /* Insert the new_iova into domain rbtree by holding writer lock */ - spin_lock(>iova_rbtree_lock); - iova_insert_rbtree(>rbroot, new_iova); - __cached_rbnode_insert_update(iovad, limit_pfn, new_iova); - spin_unlock(>iova_rbtree_lock); - - spin_unlock_irqrestore(>iova_alloc_lock, flags); - return new_iova; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH]intel-iommu batched iotlb flushes
The intel-iommu hardware requires a polling operation to flush IOTLB PTE's after an unmap operation. Through some TSC instrumentation of a netperf UDP stream with small packets test case it was seen that the flush operations where sucking up to 16% of the CPU time doing iommu_flush_iotlb's The following patch batches the IOTLB flushes removing most of the overhead in flushing the IOTLB's. It works by building a list of to be released IOVA's that is iterated over when a timer goes off or when a high water mark is reached. The wrinkle this has is that the memory protection and page fault warnings from errant DMA operations is somewhat reduced, hence a kernel parameter is added to revert back to the strict page flush / unmap behavior. The hole is the following scenarios: do many map_signal operations, do some unmap_signals, reuse a recently unmapped page, errant DMA hardware sneaks through and steps on reused memory Or: you have rouge hardware using DMA's to look at pages: do many map_signal's, do many unmap_singles, reuse some unmapped pages : rouge hardware looks at reused page Note : these holes are very hard to get too, as the IOTLB is small and only the PTE's still in the IOTLB can be accessed through this mechanism. Its recommended that strict is used when developing drivers that do DMA operations to catch bugs early. For production code where performance is desired running with the batched IOTLB flushing is a good way to go. --mgross Signed-off-by: [EMAIL PROTECTED] Index: linux-2.6.24-mm1/drivers/pci/intel-iommu.c === --- linux-2.6.24-mm1.orig/drivers/pci/intel-iommu.c 2008-02-07 13:03:10.0 -0800 +++ linux-2.6.24-mm1/drivers/pci/intel-iommu.c 2008-02-11 10:38:49.0 -0800 @@ -21,6 +21,7 @@ #include linux/init.h #include linux/bitmap.h +#include linux/debugfs.h #include linux/slab.h #include linux/irq.h #include linux/interrupt.h @@ -30,6 +31,7 @@ #include linux/dmar.h #include linux/dma-mapping.h #include linux/mempool.h +#include linux/timer.h #include iova.h #include intel-iommu.h #include asm/proto.h /* force_iommu in this header in x86-64*/ @@ -50,11 +52,39 @@ #define DOMAIN_MAX_ADDR(gaw) u64)1) gaw) - 1) + +static void flush_unmaps_timeout(unsigned long data); + +static struct timer_list unmap_timer = + TIMER_INITIALIZER(flush_unmaps_timeout, 0, 0); + +struct unmap_list { + struct list_head list; + struct dmar_domain *domain; + struct iova *iova; +}; + +static struct intel_iommu *g_iommus; +/* bitmap for indexing intel_iommus */ +static unsigned long *g_iommus_to_flush; +static int g_num_of_iommus; + +static DEFINE_SPINLOCK(async_umap_flush_lock); +static LIST_HEAD(unmaps_to_do); + +static int timer_on; +static long list_size; +static int high_watermark; + +static struct dentry *intel_iommu_debug, *debug; + + static void domain_remove_dev_info(struct dmar_domain *domain); static int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; +static int intel_iommu_strict; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -73,9 +103,13 @@ printk(KERN_INFO Intel-IOMMU: disable GFX device mapping\n); } else if (!strncmp(str, forcedac, 8)) { - printk (KERN_INFO + printk(KERN_INFO Intel-IOMMU: Forcing DAC for PCI devices\n); dmar_forcedac = 1; + } else if (!strncmp(str, strict, 8)) { + printk(KERN_INFO + Intel-IOMMU: disable bached IOTLB flush\n); + intel_iommu_strict = 1; } str += strcspn(str, ,); @@ -965,17 +999,13 @@ set_bit(0, iommu-domain_ids); return 0; } - -static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) +static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd) { - struct intel_iommu *iommu; int ret; int map_size; u32 ver; - iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); - if (!iommu) - return NULL; iommu-reg = ioremap(drhd-reg_base_addr, PAGE_SIZE_4K); if (!iommu-reg) { printk(KERN_ERR IOMMU: can't map the region\n); @@ -1396,7 +1426,7 @@ int index; while (dev) { - for (index = 0; index cnt; index ++) + for (index = 0; index cnt; index++) if (dev == devices[index]) return 1; @@ -1661,7 +1691,7 @@ struct dmar_rmrr_unit *rmrr; struct pci_dev *pdev; struct intel_iommu *iommu; - int ret, unit = 0; + int nlongs, i,
Re: Pull request: DMA pool updates
On Mon, Jan 28, 2008 at 05:11:47PM -0700, Matthew Wilcox wrote: > G'day Linus, mate > > Could you pull the dmapool branch of > git://git.kernel.org/pub/scm/linux/kernel/git/willy/misc.git please? > > All the patches have been posted to linux-kernel before, and various > comments (and acks) have been taken into account. > > It's a fairly nice performance improvement, so would be good to get in. > It's survived a few hours of *mumble* high-stress database benchmark, > so I have high confidence in its stability. I haven't looked at this yet but, I've been doing some work with the IOMMU performance impacts on DMA operations. DMApooling could provide a way to mitigate some of the spankage gotten when using IOMMU's. --mgross > > -- > Intel are signing my paycheques ... these opinions are still mine > "Bill, look, we understand that you're interested in selling us this > operating system, but compare it to ours. We can't possibly take such > a retrograde step." > > -- > To unsubscribe, send a message with 'unsubscribe linux-mm' in > the body to [EMAIL PROTECTED] For more info on Linux MM, > see: http://www.linux-mm.org/ . > Don't email: mailto:"[EMAIL PROTECTED]"> [EMAIL PROTECTED] -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Pull request: DMA pool updates
On Mon, Jan 28, 2008 at 05:11:47PM -0700, Matthew Wilcox wrote: G'day Linus, mate Could you pull the dmapool branch of git://git.kernel.org/pub/scm/linux/kernel/git/willy/misc.git please? All the patches have been posted to linux-kernel before, and various comments (and acks) have been taken into account. It's a fairly nice performance improvement, so would be good to get in. It's survived a few hours of *mumble* high-stress database benchmark, so I have high confidence in its stability. I haven't looked at this yet but, I've been doing some work with the IOMMU performance impacts on DMA operations. DMApooling could provide a way to mitigate some of the spankage gotten when using IOMMU's. --mgross -- Intel are signing my paycheques ... these opinions are still mine Bill, look, we understand that you're interested in selling us this operating system, but compare it to ours. We can't possibly take such a retrograde step. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to [EMAIL PROTECTED] For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: a href=mailto:[EMAIL PROTECTED] [EMAIL PROTECTED] /a -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [agp-mm][PATCH 1/4][intel_iommu] explicit export current graphics dmar status
On Fri, Jan 25, 2008 at 11:02:55AM +0800, Zhenyu Wang wrote: > > Mark, sorry for missing this for long time... > > On 2008.01.08 12:44:20 -0800, mark gross wrote: > > > > > > > > [agp-mm] [intel_iommu] explicit export current graphics dmar status > > > > > > > > To make it possbile to tell other modules about curent > > > > graphics dmar engine status, that could decide if graphics > > > > driver should remap physical address to dma address. > > > > > > > > Also this one trys to make dmar_disabled really present > > > > current status of DMAR, which would be used for completely > > > > express graphics dmar engine is active or not. > > > > do you think this exporting will be needed? > > If so why and when would someone use it? > > This is used for our graphics driver module to know if we have to > do dma remapping in iommu case, both in kernel config and kernel > boot time param config. ok. How soon do you need this export? > > The simplest case is that DMAR_GFX_WA is on, which no dma mapping > will act in intel_agp. > > If no DMAR_GFX_WA, we still have boot param to turn off whole intel > iommu (intel_iommu=off), just turn off graphics remap engine > (intel_iommu=igfx_off). So this exported interface is used to know > that in runtime. > > > > > > > > > > > Signed-off-by: Zhenyu Wang <[EMAIL PROTECTED]> > > > > --- > > > > drivers/pci/intel-iommu.c | 18 -- > > > > include/linux/dmar.h |6 ++ > > > > 2 files changed, 22 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c > > > > index e079a52..81a0abc 100644 > > > > --- a/drivers/pci/intel-iommu.c > > > > +++ b/drivers/pci/intel-iommu.c > > > > @@ -53,7 +53,7 @@ > > > > static void domain_remove_dev_info(struct dmar_domain *domain); > > > > > > > > static int dmar_disabled; > > > > -static int __initdata dmar_map_gfx = 1; > > > > +static int dmar_map_gfx = 1; > > > > static int dmar_forcedac; > > > > > > > > #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) > > > > @@ -86,6 +86,16 @@ static int __init intel_iommu_setup(char *str) > > > > } > > > > __setup("intel_iommu=", intel_iommu_setup); > > > > > > > > +int intel_iommu_gfx_remapping(void) > > > > +{ > > > > +#ifndef CONFIG_DMAR_GFX_WA > > > > + if (!dmar_disabled && iommu_detected && dmar_map_gfx) > > > > + return 1; > > > > +#endif > > > > + return 0; > > > > +} > > > > +EXPORT_SYMBOL_GPL(intel_iommu_gfx_remapping); > > > > + > > > > static struct kmem_cache *iommu_domain_cache; > > > > static struct kmem_cache *iommu_devinfo_cache; > > > > static struct kmem_cache *iommu_iova_cache; > > > > @@ -2189,8 +2199,12 @@ static void __init iommu_exit_mempool(void) > > > > > > > > void __init detect_intel_iommu(void) > > > > { > > > > - if (swiotlb || no_iommu || iommu_detected || dmar_disabled) > > > > + if (dmar_disabled) > > > > + return; > > > > + if (swiotlb || no_iommu || iommu_detected) { > > > > + dmar_disabled = 1; > > > > return; > > > > Why the bloat? This block of 7 lines looks like it does the same as the > > 2 you replaced. > > No, dmar_disabled is not set in origin, which can't tell if it's turned > off with (intel_iommu=off) later. > oops, I missed that how about something like if (swiotlb || no_iommu || iommu_detected || dmar_disabled) { dmar_disabled = 1; return; } I guess your way is ok too. > > > > > > + } > > > > if (early_dmar_detect()) { > > > > iommu_detected = 1; > > > > } > > > > diff --git a/include/linux/dmar.h b/include/linux/dmar.h > > > > index ffb6439..8ae435e 100644 > > > > --- a/include/linux/dmar.h > > > > +++ b/include/linux/dmar.h > > > > @@ -47,6 +47,8 @@ extern int intel_iommu_init(void); > > > > extern int dmar_table_init(void); > > > > extern int early_dmar_detect(void); > > > > > > > > +extern int in
Re: [agp-mm][PATCH 1/4][intel_iommu] explicit export current graphics dmar status
On Fri, Jan 25, 2008 at 11:02:55AM +0800, Zhenyu Wang wrote: Mark, sorry for missing this for long time... On 2008.01.08 12:44:20 -0800, mark gross wrote: [agp-mm] [intel_iommu] explicit export current graphics dmar status To make it possbile to tell other modules about curent graphics dmar engine status, that could decide if graphics driver should remap physical address to dma address. Also this one trys to make dmar_disabled really present current status of DMAR, which would be used for completely express graphics dmar engine is active or not. do you think this exporting will be needed? If so why and when would someone use it? This is used for our graphics driver module to know if we have to do dma remapping in iommu case, both in kernel config and kernel boot time param config. ok. How soon do you need this export? The simplest case is that DMAR_GFX_WA is on, which no dma mapping will act in intel_agp. If no DMAR_GFX_WA, we still have boot param to turn off whole intel iommu (intel_iommu=off), just turn off graphics remap engine (intel_iommu=igfx_off). So this exported interface is used to know that in runtime. Signed-off-by: Zhenyu Wang [EMAIL PROTECTED] --- drivers/pci/intel-iommu.c | 18 -- include/linux/dmar.h |6 ++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index e079a52..81a0abc 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -53,7 +53,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain); static int dmar_disabled; -static int __initdata dmar_map_gfx = 1; +static int dmar_map_gfx = 1; static int dmar_forcedac; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) @@ -86,6 +86,16 @@ static int __init intel_iommu_setup(char *str) } __setup(intel_iommu=, intel_iommu_setup); +int intel_iommu_gfx_remapping(void) +{ +#ifndef CONFIG_DMAR_GFX_WA + if (!dmar_disabled iommu_detected dmar_map_gfx) + return 1; +#endif + return 0; +} +EXPORT_SYMBOL_GPL(intel_iommu_gfx_remapping); + static struct kmem_cache *iommu_domain_cache; static struct kmem_cache *iommu_devinfo_cache; static struct kmem_cache *iommu_iova_cache; @@ -2189,8 +2199,12 @@ static void __init iommu_exit_mempool(void) void __init detect_intel_iommu(void) { - if (swiotlb || no_iommu || iommu_detected || dmar_disabled) + if (dmar_disabled) + return; + if (swiotlb || no_iommu || iommu_detected) { + dmar_disabled = 1; return; Why the bloat? This block of 7 lines looks like it does the same as the 2 you replaced. No, dmar_disabled is not set in origin, which can't tell if it's turned off with (intel_iommu=off) later. oops, I missed that how about something like if (swiotlb || no_iommu || iommu_detected || dmar_disabled) { dmar_disabled = 1; return; } I guess your way is ok too. + } if (early_dmar_detect()) { iommu_detected = 1; } diff --git a/include/linux/dmar.h b/include/linux/dmar.h index ffb6439..8ae435e 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -47,6 +47,8 @@ extern int intel_iommu_init(void); extern int dmar_table_init(void); extern int early_dmar_detect(void); +extern int intel_iommu_gfx_remapping(void); + extern struct list_head dmar_drhd_units; extern struct list_head dmar_rmrr_units; @@ -81,6 +83,10 @@ static inline int intel_iommu_init(void) { return -ENODEV; } +static inline int intel_iommu_gfx_remapping(void) +{ + return 0; +} Um this function is silly lets not post it. #endif /* !CONFIG_DMAR */ #endif /* __DMAR_H__ */ -- 1.5.3.4 Any comments to this one? Is it ok to push this iommu patch with agp dma remapping patches to next -mm? Its not ready for posting. send me your current patch and eta for the graphics module patches so I can better coordinate with you. --mgross -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [agp-mm][PATCH 1/4][intel_iommu] explicit export current graphics dmar status
Sorry for the late reply. comments below... On Fri, Jan 04, 2008 at 09:53:38AM +0800, Zhenyu Wang wrote: > On 2007.12.19 13:26:08 +, Zhenyu Wang wrote: > > > > [agp-mm] [intel_iommu] explicit export current graphics dmar status > > > > To make it possbile to tell other modules about curent > > graphics dmar engine status, that could decide if graphics > > driver should remap physical address to dma address. > > > > Also this one trys to make dmar_disabled really present > > current status of DMAR, which would be used for completely > > express graphics dmar engine is active or not. do you think this exporting will be needed? If so why and when would someone use it? > > > > Signed-off-by: Zhenyu Wang <[EMAIL PROTECTED]> > > --- > > drivers/pci/intel-iommu.c | 18 -- > > include/linux/dmar.h |6 ++ > > 2 files changed, 22 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c > > index e079a52..81a0abc 100644 > > --- a/drivers/pci/intel-iommu.c > > +++ b/drivers/pci/intel-iommu.c > > @@ -53,7 +53,7 @@ > > static void domain_remove_dev_info(struct dmar_domain *domain); > > > > static int dmar_disabled; > > -static int __initdata dmar_map_gfx = 1; > > +static int dmar_map_gfx = 1; > > static int dmar_forcedac; > > > > #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) > > @@ -86,6 +86,16 @@ static int __init intel_iommu_setup(char *str) > > } > > __setup("intel_iommu=", intel_iommu_setup); > > > > +int intel_iommu_gfx_remapping(void) > > +{ > > +#ifndef CONFIG_DMAR_GFX_WA > > + if (!dmar_disabled && iommu_detected && dmar_map_gfx) > > + return 1; > > +#endif > > + return 0; > > +} > > +EXPORT_SYMBOL_GPL(intel_iommu_gfx_remapping); > > + > > static struct kmem_cache *iommu_domain_cache; > > static struct kmem_cache *iommu_devinfo_cache; > > static struct kmem_cache *iommu_iova_cache; > > @@ -2189,8 +2199,12 @@ static void __init iommu_exit_mempool(void) > > > > void __init detect_intel_iommu(void) > > { > > - if (swiotlb || no_iommu || iommu_detected || dmar_disabled) > > + if (dmar_disabled) > > + return; > > + if (swiotlb || no_iommu || iommu_detected) { > > + dmar_disabled = 1; > > return; Why the bloat? This block of 7 lines looks like it does the same as the 2 you replaced. > > + } > > if (early_dmar_detect()) { > > iommu_detected = 1; > > } > > diff --git a/include/linux/dmar.h b/include/linux/dmar.h > > index ffb6439..8ae435e 100644 > > --- a/include/linux/dmar.h > > +++ b/include/linux/dmar.h > > @@ -47,6 +47,8 @@ extern int intel_iommu_init(void); > > extern int dmar_table_init(void); > > extern int early_dmar_detect(void); > > > > +extern int intel_iommu_gfx_remapping(void); > > + > > extern struct list_head dmar_drhd_units; > > extern struct list_head dmar_rmrr_units; > > > > @@ -81,6 +83,10 @@ static inline int intel_iommu_init(void) > > { > > return -ENODEV; > > } > > +static inline int intel_iommu_gfx_remapping(void) > > +{ > > + return 0; > > +} Um this function is silly lets not post it. > > > > #endif /* !CONFIG_DMAR */ > > #endif /* __DMAR_H__ */ > > -- > > 1.5.3.4 > > > > Any comments to this one? Is it ok to push this iommu patch with > agp dma remapping patches to next -mm? > Its not ready for posting. --mgross -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [agp-mm][PATCH 1/4][intel_iommu] explicit export current graphics dmar status
Sorry for the late reply. comments below... On Fri, Jan 04, 2008 at 09:53:38AM +0800, Zhenyu Wang wrote: On 2007.12.19 13:26:08 +, Zhenyu Wang wrote: [agp-mm] [intel_iommu] explicit export current graphics dmar status To make it possbile to tell other modules about curent graphics dmar engine status, that could decide if graphics driver should remap physical address to dma address. Also this one trys to make dmar_disabled really present current status of DMAR, which would be used for completely express graphics dmar engine is active or not. do you think this exporting will be needed? If so why and when would someone use it? Signed-off-by: Zhenyu Wang [EMAIL PROTECTED] --- drivers/pci/intel-iommu.c | 18 -- include/linux/dmar.h |6 ++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index e079a52..81a0abc 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -53,7 +53,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain); static int dmar_disabled; -static int __initdata dmar_map_gfx = 1; +static int dmar_map_gfx = 1; static int dmar_forcedac; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) @@ -86,6 +86,16 @@ static int __init intel_iommu_setup(char *str) } __setup(intel_iommu=, intel_iommu_setup); +int intel_iommu_gfx_remapping(void) +{ +#ifndef CONFIG_DMAR_GFX_WA + if (!dmar_disabled iommu_detected dmar_map_gfx) + return 1; +#endif + return 0; +} +EXPORT_SYMBOL_GPL(intel_iommu_gfx_remapping); + static struct kmem_cache *iommu_domain_cache; static struct kmem_cache *iommu_devinfo_cache; static struct kmem_cache *iommu_iova_cache; @@ -2189,8 +2199,12 @@ static void __init iommu_exit_mempool(void) void __init detect_intel_iommu(void) { - if (swiotlb || no_iommu || iommu_detected || dmar_disabled) + if (dmar_disabled) + return; + if (swiotlb || no_iommu || iommu_detected) { + dmar_disabled = 1; return; Why the bloat? This block of 7 lines looks like it does the same as the 2 you replaced. + } if (early_dmar_detect()) { iommu_detected = 1; } diff --git a/include/linux/dmar.h b/include/linux/dmar.h index ffb6439..8ae435e 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -47,6 +47,8 @@ extern int intel_iommu_init(void); extern int dmar_table_init(void); extern int early_dmar_detect(void); +extern int intel_iommu_gfx_remapping(void); + extern struct list_head dmar_drhd_units; extern struct list_head dmar_rmrr_units; @@ -81,6 +83,10 @@ static inline int intel_iommu_init(void) { return -ENODEV; } +static inline int intel_iommu_gfx_remapping(void) +{ + return 0; +} Um this function is silly lets not post it. #endif /* !CONFIG_DMAR */ #endif /* __DMAR_H__ */ -- 1.5.3.4 Any comments to this one? Is it ok to push this iommu patch with agp dma remapping patches to next -mm? Its not ready for posting. --mgross -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[EMAIL PROTECTED]: intel-iommu-PMEN-think-oh patch.]
I forgot to cc the list. --mgross - Forwarded message from mark gross <[EMAIL PROTECTED]> - Date: Tue, 27 Nov 2007 15:46:09 -0800 From: mark gross <[EMAIL PROTECTED]> To: Andrew Morton <[EMAIL PROTECTED]> Reply-To: [EMAIL PROTECTED] Subject: intel-iommu-PMEN-think-oh patch. I screwed up with my earlier patch to enable the portected memroy. The macro IOMMU_WAIT, exits when the condition goes true. Without this patch the code will hang at boot and some ( all?) vtd enabled systems. --mgross Signed-off-by: mark gross <[EMAIL PROTECTED]> Index: linux-2.6.24-rc2+/drivers/pci/intel-iommu.c === --- linux-2.6.24-rc2+.orig/drivers/pci/intel-iommu.c2007-11-27 15:29:38.0 -0800 +++ linux-2.6.24-rc2+/drivers/pci/intel-iommu.c 2007-11-27 15:30:06.0 -0800 @@ -704,7 +704,7 @@ /* wait for the protected region status bit to clear */ IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, - readl, (pmen & DMA_PMEN_PRS), pmen); + readl, !(pmen & DMA_PMEN_PRS), pmen); spin_unlock_irqrestore(>register_lock, flags); } - End forwarded message - - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[EMAIL PROTECTED]: intel-iommu-PMEN-think-oh patch.]
I forgot to cc the list. --mgross - Forwarded message from mark gross [EMAIL PROTECTED] - Date: Tue, 27 Nov 2007 15:46:09 -0800 From: mark gross [EMAIL PROTECTED] To: Andrew Morton [EMAIL PROTECTED] Reply-To: [EMAIL PROTECTED] Subject: intel-iommu-PMEN-think-oh patch. I screwed up with my earlier patch to enable the portected memroy. The macro IOMMU_WAIT, exits when the condition goes true. Without this patch the code will hang at boot and some ( all?) vtd enabled systems. --mgross Signed-off-by: mark gross [EMAIL PROTECTED] Index: linux-2.6.24-rc2+/drivers/pci/intel-iommu.c === --- linux-2.6.24-rc2+.orig/drivers/pci/intel-iommu.c2007-11-27 15:29:38.0 -0800 +++ linux-2.6.24-rc2+/drivers/pci/intel-iommu.c 2007-11-27 15:30:06.0 -0800 @@ -704,7 +704,7 @@ /* wait for the protected region status bit to clear */ IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, - readl, (pmen DMA_PMEN_PRS), pmen); + readl, !(pmen DMA_PMEN_PRS), pmen); spin_unlock_irqrestore(iommu-register_lock, flags); } - End forwarded message - - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] More Sanity checks for DMAR
On Mon, Nov 19, 2007 at 03:03:48PM -0800, Fenghua Yu wrote: > > This patch adds and changes a few sanity checks in dmar.c. > > 1. The haw field in ACPI DMAR table in VT-d spec doesn't describe the range of > haw. But since DMA page size is 4KB in DMA remapping, haw should be at least > 4KB. The current VT-d code in dmar.c returns failure when haw==0. This sanity > check is not accurate and execution can pass when haw is less than one page > size 4KB. This patch changes the haw sanity check to validate if haw is less > than 4KB. > 2. Add dmar_rmrr_units verification. > 3. Add parse_dmar_table() verification. > > Thanks. > > -Fenghua > > Signed-off-by: Fenghua Yu <[EMAIL PROTECTED]> > > --- > > dmar.c | 20 +--- > 1 files changed, 17 insertions(+), 3 deletions(-) > > diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c > index 5dfdfda..a5b6ed0 100644 > --- a/drivers/pci/dmar.c > +++ b/drivers/pci/dmar.c > @@ -25,6 +25,7 @@ > > #include > #include > +#include "iova.h" > > #undef PREFIX > #define PREFIX "DMAR:" > @@ -263,8 +264,8 @@ parse_dmar_table(void) > if (!dmar) > return -ENODEV; > > - if (!dmar->width) { > - printk (KERN_WARNING PREFIX "Zero: Invalid DMAR haw\n"); > + if (dmar->width < PAGE_SHIFT_4K - 1) { > + printk (KERN_WARNING PREFIX "Invalid DMAR haw\n"); > return -EINVAL; > } > > @@ -301,11 +302,24 @@ parse_dmar_table(void) > int __init dmar_table_init(void) > { > > - parse_dmar_table(); > + int ret; > + > + ret = parse_dmar_table(); > + if (ret) { > + printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); > + return ret; > + } > + > if (list_empty(_drhd_units)) { > printk(KERN_INFO PREFIX "No DMAR devices found\n"); > return -ENODEV; > } > + > + if (list_empty(_rmrr_units)) { > + printk(KERN_INFO PREFIX "No RMRR found\n"); > + return -ENODEV; > + } > + > return 0; > } > > - Ack : mark gross <[EMAIL PROTECTED]> Thanks, --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] intel-iommu-fault_reason_index_cleanup.patch
The following patch fixes an off by one bug in the fault reason string reporting function, and cleans up some of the code around this buglet. please apply. --mgross Signed-off-by: mark gross <[EMAIL PROTECTED]> Index: linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.c === --- linux-2.6.23-rc2-iommu.orig/drivers/pci/intel-iommu.c 2007-11-20 10:04:39.0 -0800 +++ linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.c2007-11-20 10:10:48.0 -0800 @@ -745,7 +745,7 @@ /* iommu interrupt handling. Most stuff are MSI-like. */ -static char *fault_reason_strings[] = +static const char *fault_reason_strings[] = { "Software", "Present bit in root entry is clear", @@ -759,15 +759,14 @@ "Context table ptr is invalid", "non-zero reserved fields in RTP", "non-zero reserved fields in CTP", - "non-zero reserved fields in PTE", - "Unknown" + "non-zero reserved fields in PTE" }; #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) -char *dmar_get_fault_reason(u8 fault_reason) +const char *dmar_get_fault_reason(u8 fault_reason) { - if (fault_reason >= MAX_FAULT_REASON_IDX) - return fault_reason_strings[MAX_FAULT_REASON_IDX - 1]; + if (fault_reason > MAX_FAULT_REASON_IDX) + return "Unknown"; else return fault_reason_strings[fault_reason]; } @@ -825,7 +824,7 @@ static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type, u8 fault_reason, u16 source_id, u64 addr) { - char *reason; + const char *reason; reason = dmar_get_fault_reason(fault_reason); Index: linux-2.6.23-rc2-iommu/include/linux/dmar.h === --- linux-2.6.23-rc2-iommu.orig/include/linux/dmar.h2007-11-20 10:12:06.0 -0800 +++ linux-2.6.23-rc2-iommu/include/linux/dmar.h 2007-11-20 10:13:43.0 -0800 @@ -28,7 +28,7 @@ #ifdef CONFIG_DMAR struct intel_iommu; -extern char *dmar_get_fault_reason(u8 fault_reason); +extern const char *dmar_get_fault_reason(u8 fault_reason); /* Can't use the common MSI interrupt functions * since DMAR is not a pci device - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH]intel-iommu-PMEN support
On Mon, Nov 19, 2007 at 04:38:02PM -0800, Andrew Morton wrote: > On Fri, 16 Nov 2007 14:39:57 -0800 > mark gross <[EMAIL PROTECTED]> wrote: > > > -#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings) - 1 > > +#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) > > hm. The logic in there looks screwy. I didn't like it when they added the -1 to the above, but after looking at it sideways for a while I convinced myself that it was ok. so I thought it be good to add the parentheses. > > > static char *fault_reason_strings[] = > { > "Software", > "Present bit in root entry is clear", > "Present bit in context entry is clear", > "Invalid context entry", > "Access beyond MGAW", > "PTE Write access is not set", > "PTE Read access is not set", > "Next page table ptr is invalid", > "Root table address invalid", > "Context table ptr is invalid", > "non-zero reserved fields in RTP", > "non-zero reserved fields in CTP", > "non-zero reserved fields in PTE", > "Unknown" > }; > #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) > > char *dmar_get_fault_reason(u8 fault_reason) > { > if (fault_reason >= MAX_FAULT_REASON_IDX) > return fault_reason_strings[MAX_FAULT_REASON_IDX - 1]; > else > return fault_reason_strings[fault_reason]; > } > > > so all invalid fault_reasons will cause us to display "non-zero reserved > fields in PTE". GAH! Thats wrong. I also don't like the use of >= here, its harder for me to parse in my head than just >. > > Why not just do > > if (fault_reason >= ARRAY_SIZE(fault_reason_strings)) > return "Unknown"; > return fault_reason_strings[fault_reason]; > > ? Why not indeed. :) > > (might as well make fault_reason_strings[] const, too). Thats a good thing too. I'll put together a patch this morning to address these think-ohs. --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH]intel-iommu-PMEN support
On Mon, Nov 19, 2007 at 04:38:02PM -0800, Andrew Morton wrote: On Fri, 16 Nov 2007 14:39:57 -0800 mark gross [EMAIL PROTECTED] wrote: -#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings) - 1 +#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) hm. The logic in there looks screwy. I didn't like it when they added the -1 to the above, but after looking at it sideways for a while I convinced myself that it was ok. so I thought it be good to add the parentheses. static char *fault_reason_strings[] = { Software, Present bit in root entry is clear, Present bit in context entry is clear, Invalid context entry, Access beyond MGAW, PTE Write access is not set, PTE Read access is not set, Next page table ptr is invalid, Root table address invalid, Context table ptr is invalid, non-zero reserved fields in RTP, non-zero reserved fields in CTP, non-zero reserved fields in PTE, Unknown }; #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) char *dmar_get_fault_reason(u8 fault_reason) { if (fault_reason = MAX_FAULT_REASON_IDX) return fault_reason_strings[MAX_FAULT_REASON_IDX - 1]; else return fault_reason_strings[fault_reason]; } so all invalid fault_reasons will cause us to display non-zero reserved fields in PTE. GAH! Thats wrong. I also don't like the use of = here, its harder for me to parse in my head than just . Why not just do if (fault_reason = ARRAY_SIZE(fault_reason_strings)) return Unknown; return fault_reason_strings[fault_reason]; ? Why not indeed. :) (might as well make fault_reason_strings[] const, too). Thats a good thing too. I'll put together a patch this morning to address these think-ohs. --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] intel-iommu-fault_reason_index_cleanup.patch
The following patch fixes an off by one bug in the fault reason string reporting function, and cleans up some of the code around this buglet. please apply. --mgross Signed-off-by: mark gross [EMAIL PROTECTED] Index: linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.c === --- linux-2.6.23-rc2-iommu.orig/drivers/pci/intel-iommu.c 2007-11-20 10:04:39.0 -0800 +++ linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.c2007-11-20 10:10:48.0 -0800 @@ -745,7 +745,7 @@ /* iommu interrupt handling. Most stuff are MSI-like. */ -static char *fault_reason_strings[] = +static const char *fault_reason_strings[] = { Software, Present bit in root entry is clear, @@ -759,15 +759,14 @@ Context table ptr is invalid, non-zero reserved fields in RTP, non-zero reserved fields in CTP, - non-zero reserved fields in PTE, - Unknown + non-zero reserved fields in PTE }; #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) -char *dmar_get_fault_reason(u8 fault_reason) +const char *dmar_get_fault_reason(u8 fault_reason) { - if (fault_reason = MAX_FAULT_REASON_IDX) - return fault_reason_strings[MAX_FAULT_REASON_IDX - 1]; + if (fault_reason MAX_FAULT_REASON_IDX) + return Unknown; else return fault_reason_strings[fault_reason]; } @@ -825,7 +824,7 @@ static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type, u8 fault_reason, u16 source_id, u64 addr) { - char *reason; + const char *reason; reason = dmar_get_fault_reason(fault_reason); Index: linux-2.6.23-rc2-iommu/include/linux/dmar.h === --- linux-2.6.23-rc2-iommu.orig/include/linux/dmar.h2007-11-20 10:12:06.0 -0800 +++ linux-2.6.23-rc2-iommu/include/linux/dmar.h 2007-11-20 10:13:43.0 -0800 @@ -28,7 +28,7 @@ #ifdef CONFIG_DMAR struct intel_iommu; -extern char *dmar_get_fault_reason(u8 fault_reason); +extern const char *dmar_get_fault_reason(u8 fault_reason); /* Can't use the common MSI interrupt functions * since DMAR is not a pci device - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] More Sanity checks for DMAR
On Mon, Nov 19, 2007 at 03:03:48PM -0800, Fenghua Yu wrote: This patch adds and changes a few sanity checks in dmar.c. 1. The haw field in ACPI DMAR table in VT-d spec doesn't describe the range of haw. But since DMA page size is 4KB in DMA remapping, haw should be at least 4KB. The current VT-d code in dmar.c returns failure when haw==0. This sanity check is not accurate and execution can pass when haw is less than one page size 4KB. This patch changes the haw sanity check to validate if haw is less than 4KB. 2. Add dmar_rmrr_units verification. 3. Add parse_dmar_table() verification. Thanks. -Fenghua Signed-off-by: Fenghua Yu [EMAIL PROTECTED] --- dmar.c | 20 +--- 1 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 5dfdfda..a5b6ed0 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -25,6 +25,7 @@ #include linux/pci.h #include linux/dmar.h +#include iova.h #undef PREFIX #define PREFIX DMAR: @@ -263,8 +264,8 @@ parse_dmar_table(void) if (!dmar) return -ENODEV; - if (!dmar-width) { - printk (KERN_WARNING PREFIX Zero: Invalid DMAR haw\n); + if (dmar-width PAGE_SHIFT_4K - 1) { + printk (KERN_WARNING PREFIX Invalid DMAR haw\n); return -EINVAL; } @@ -301,11 +302,24 @@ parse_dmar_table(void) int __init dmar_table_init(void) { - parse_dmar_table(); + int ret; + + ret = parse_dmar_table(); + if (ret) { + printk(KERN_INFO PREFIX parse DMAR table failure.\n); + return ret; + } + if (list_empty(dmar_drhd_units)) { printk(KERN_INFO PREFIX No DMAR devices found\n); return -ENODEV; } + + if (list_empty(dmar_rmrr_units)) { + printk(KERN_INFO PREFIX No RMRR found\n); + return -ENODEV; + } + return 0; } - Ack : mark gross [EMAIL PROTECTED] Thanks, --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH]intel-iommu-PMEN support
The following patch adds support for protected memory enable bits by clearing them if they are set at startup time. Some future boot loaders or firmware could have this bit set after it loads the kernel, and it needs to be cleared if DMA's are going to happen effectively. please apply --mgross Signed-off-by: mark gross <[EMAIL PROTECTED]> Index: linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.c === --- linux-2.6.23-rc2-iommu.orig/drivers/pci/intel-iommu.c 2007-11-16 13:25:14.0 -0800 +++ linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.c2007-11-16 13:26:07.0 -0800 @@ -692,6 +692,23 @@ DMA_TLB_PSI_FLUSH, non_present_entry_flush); } +static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) +{ + u32 pmen; + unsigned long flags; + + spin_lock_irqsave(>register_lock, flags); + pmen = readl(iommu->reg + DMAR_PMEN_REG); + pmen &= ~DMA_PMEN_EPM; + writel(pmen, iommu->reg + DMAR_PMEN_REG); + + /* wait for the protected region status bit to clear */ + IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, + readl, (pmen & DMA_PMEN_PRS), pmen); + + spin_unlock_irqrestore(>register_lock, flags); +} + static int iommu_enable_translation(struct intel_iommu *iommu) { u32 sts; @@ -745,7 +762,7 @@ "non-zero reserved fields in PTE", "Unknown" }; -#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings) - 1 +#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) char *dmar_get_fault_reason(u8 fault_reason) { @@ -1730,6 +1747,8 @@ iommu_flush_context_global(iommu, 0); iommu_flush_iotlb_global(iommu, 0); + iommu_disable_protect_mem_regions(iommu); + ret = iommu_enable_translation(iommu); if (ret) goto error; Index: linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.h === --- linux-2.6.23-rc2-iommu.orig/drivers/pci/intel-iommu.h 2007-11-16 13:26:01.0 -0800 +++ linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.h2007-11-16 13:26:36.0 -0800 @@ -71,7 +71,7 @@ hi = readl(dmar + reg + 4); \ (((u64) hi) << 32) + lo; }) */ -static inline u64 dmar_readq(void *addr) +static inline u64 dmar_readq(void __iomem *addr) { u32 lo, hi; lo = readl(addr); @@ -139,6 +139,10 @@ #define DMA_TLB_IH_NONLEAF (((u64)1) << 6) #define DMA_TLB_MAX_SIZE (0x3f) +/* PMEN_REG */ +#define DMA_PMEN_EPM (((u32)1)<<31) +#define DMA_PMEN_PRS (((u32)1)<<0) + /* GCMD_REG */ #define DMA_GCMD_TE (((u32)1) << 31) #define DMA_GCMD_SRTP (((u32)1) << 30) - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH]intel-iommu-PMEN support
The following patch adds support for protected memory enable bits by clearing them if they are set at startup time. Some future boot loaders or firmware could have this bit set after it loads the kernel, and it needs to be cleared if DMA's are going to happen effectively. please apply --mgross Signed-off-by: mark gross [EMAIL PROTECTED] Index: linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.c === --- linux-2.6.23-rc2-iommu.orig/drivers/pci/intel-iommu.c 2007-11-16 13:25:14.0 -0800 +++ linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.c2007-11-16 13:26:07.0 -0800 @@ -692,6 +692,23 @@ DMA_TLB_PSI_FLUSH, non_present_entry_flush); } +static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) +{ + u32 pmen; + unsigned long flags; + + spin_lock_irqsave(iommu-register_lock, flags); + pmen = readl(iommu-reg + DMAR_PMEN_REG); + pmen = ~DMA_PMEN_EPM; + writel(pmen, iommu-reg + DMAR_PMEN_REG); + + /* wait for the protected region status bit to clear */ + IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, + readl, (pmen DMA_PMEN_PRS), pmen); + + spin_unlock_irqrestore(iommu-register_lock, flags); +} + static int iommu_enable_translation(struct intel_iommu *iommu) { u32 sts; @@ -745,7 +762,7 @@ non-zero reserved fields in PTE, Unknown }; -#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings) - 1 +#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) char *dmar_get_fault_reason(u8 fault_reason) { @@ -1730,6 +1747,8 @@ iommu_flush_context_global(iommu, 0); iommu_flush_iotlb_global(iommu, 0); + iommu_disable_protect_mem_regions(iommu); + ret = iommu_enable_translation(iommu); if (ret) goto error; Index: linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.h === --- linux-2.6.23-rc2-iommu.orig/drivers/pci/intel-iommu.h 2007-11-16 13:26:01.0 -0800 +++ linux-2.6.23-rc2-iommu/drivers/pci/intel-iommu.h2007-11-16 13:26:36.0 -0800 @@ -71,7 +71,7 @@ hi = readl(dmar + reg + 4); \ (((u64) hi) 32) + lo; }) */ -static inline u64 dmar_readq(void *addr) +static inline u64 dmar_readq(void __iomem *addr) { u32 lo, hi; lo = readl(addr); @@ -139,6 +139,10 @@ #define DMA_TLB_IH_NONLEAF (((u64)1) 6) #define DMA_TLB_MAX_SIZE (0x3f) +/* PMEN_REG */ +#define DMA_PMEN_EPM (((u32)1)31) +#define DMA_PMEN_PRS (((u32)1)0) + /* GCMD_REG */ #define DMA_GCMD_TE (((u32)1) 31) #define DMA_GCMD_SRTP (((u32)1) 30) - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] pm-qos-remove-locks-around-blocking-notifier.patch ... was Re: 2.6.24-rc2-mm1
On Thu, Nov 15, 2007 at 11:19:50AM -0800, mark gross wrote: > On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote: > > On Wed, 14 Nov 2007 12:29:59 -0800 mark gross <[EMAIL PROTECTED]> wrote: > > > > > > > [ 102.366932] === > > > > > [ 108.552031] printk: 31 messages suppressed. > > > > > > > > > > > > All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 > > > > patches makes the BUGs go away : > > > > > > > > latencyc-use-qos-infrastructure.patch > > > > pm-qos-infrastructure-and-interface.patch > > > > pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch > > > > > > > > > > > > Gabriel > > > > > > > > > > > This looks like the same issue Rafael saw. > > > > > > Try the patch in the following post: > > > > > > http://marc.info/?l=linux-kernel=119265627228498=2 > > > > > > > Well that's not very good. _I_ can go fishing in my lkml archives for > > random > > patches but not everyone is set up to do that. And the diff to which you > > refer gets 100% rejects against rc2-mm1 anyway. > > > > Please prepare a tested, changelogged patch against rc2-mm1 asap. > > pm-qos-remove-locks-around-blocking-notifier.patch I have done some testing with this fix, I think it addresses all the sleep within a held lock warnings reported. please apply. Changelog: Remove spin locking around all blocking notifier calls that can sleep. I had to re-structure some of the code to avoid locking issues. --mgross Signed-off-by: mark gross <[EMAIL PROTECTED]> Index: linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c === --- linux-2.6.24-rc2-mm1.orig/kernel/pm_qos_params.c2007-11-15 11:09:27.0 -0800 +++ linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c 2007-11-15 14:10:09.0 -0800 @@ -135,13 +135,14 @@ } - -/* assumes pm_qos_lock is held */ static void update_target(int target) { s32 extreme_value; struct requirement_list *node; + unsigned long flags; + int call_notifier = 0; + spin_lock_irqsave(_qos_lock, flags); extreme_value = pm_qos_array[target]->default_value; list_for_each_entry(node, _qos_array[target]->requirements.list, list) { @@ -149,13 +150,16 @@ extreme_value, node->value); } if (pm_qos_array[target]->target_value != extreme_value) { + call_notifier = 1; pm_qos_array[target]->target_value = extreme_value; pr_debug(KERN_ERR "new target for qos %d is %d\n", target, pm_qos_array[target]->target_value); - blocking_notifier_call_chain(pm_qos_array[target]->notifiers, - (unsigned long) pm_qos_array[target]->target_value, - NULL); } + spin_unlock_irqrestore(_qos_lock, flags); + + if (call_notifier) + blocking_notifier_call_chain(pm_qos_array[target]->notifiers, + (unsigned long) extreme_value, NULL); } static int register_pm_qos_misc(struct pm_qos_object *qos) @@ -227,8 +231,8 @@ spin_lock_irqsave(_qos_lock, flags); list_add(>list, _qos_array[pm_qos_class]->requirements.list); - update_target(pm_qos_class); spin_unlock_irqrestore(_qos_lock, flags); + update_target(pm_qos_class); return 0; } @@ -269,11 +273,10 @@ break; } } + spin_unlock_irqrestore(_qos_lock, flags); if (pending_update) update_target(pm_qos_class); - spin_unlock_irqrestore(_qos_lock, flags); - return 0; } EXPORT_SYMBOL_GPL(pm_qos_update_requirement); @@ -303,9 +306,9 @@ break; } } + spin_unlock_irqrestore(_qos_lock, flags); if (pending_update) update_target(pm_qos_class); - spin_unlock_irqrestore(_qos_lock, flags); } EXPORT_SYMBOL_GPL(pm_qos_remove_requirement); @@ -319,13 +322,10 @@ */ int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) { - unsigned long flags; int retval; - spin_lock_irqsave(_qos_lock, flags); retval = blocking_notifier_chain_register( pm_qos_array[pm_qos_class]->notifiers, notifier); - spin_unlock_irqrestore(_qos_lock, flags); return retval; } @@ -341,13 +341,10
Re: 2.6.24-rc2-mm1
On Thu, Nov 15, 2007 at 11:19:50AM -0800, mark gross wrote: > On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote: > > On Wed, 14 Nov 2007 12:29:59 -0800 mark gross <[EMAIL PROTECTED]> wrote: > > > > > > > [ 102.366932] === > > > > > [ 108.552031] printk: 31 messages suppressed. > > > > > > > > > > > > All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 > > > > patches makes the BUGs go away : > > > > > > > > latencyc-use-qos-infrastructure.patch > > > > pm-qos-infrastructure-and-interface.patch > > > > pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch > > > > > > > > > > > > Gabriel > > > > > > > > > > > This looks like the same issue Rafael saw. > > > > > > Try the patch in the following post: > > > > > > http://marc.info/?l=linux-kernel=119265627228498=2 > > > > > > > Well that's not very good. _I_ can go fishing in my lkml archives for > > random > > patches but not everyone is set up to do that. And the diff to which you > > refer gets 100% rejects against rc2-mm1 anyway. > > > > Please prepare a tested, changelogged patch against rc2-mm1 asap. > > I'm having difficulty coming up with a .config that boots, I'll continue > working on this but the following is what I'm pretty confident will fix > the warnings. > > You should hold off until I get a system booting 2.6.24-rc2-mm1 before > taking this. I'm able to boot and have done a bit of testing and this patch wasn't good enough. I'm sending a new one in a min. --mgross > > pm-qos-remove-locks-around-blocking-notifier-registration.patch > > Changelog: > Remove spin locking around blocking notifier calls that can sleep. > > --mgross > > Signed-off-by: mark gross <[EMAIL PROTECTED]> > > > > > Index: linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c > === > --- linux-2.6.24-rc2-mm1.orig/kernel/pm_qos_params.c 2007-11-15 > 11:09:27.0 -0800 > +++ linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c 2007-11-15 > 11:10:08.0 -0800 > @@ -319,13 +319,10 @@ > */ > int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) > { > - unsigned long flags; > int retval; > > - spin_lock_irqsave(_qos_lock, flags); > retval = blocking_notifier_chain_register( > pm_qos_array[pm_qos_class]->notifiers, notifier); > - spin_unlock_irqrestore(_qos_lock, flags); > > return retval; > } > @@ -341,13 +338,10 @@ > */ > int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) > { > - unsigned long flags; > int retval; > > - spin_lock_irqsave(_qos_lock, flags); > retval = blocking_notifier_chain_unregister( > pm_qos_array[pm_qos_class]->notifiers, notifier); > - spin_unlock_irqrestore(_qos_lock, flags); > > return retval; > } > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to [EMAIL PROTECTED] > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.24-rc2-mm1
On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote: > On Wed, 14 Nov 2007 12:29:59 -0800 mark gross <[EMAIL PROTECTED]> wrote: > > > > > [ 102.366932] === > > > > [ 108.552031] printk: 31 messages suppressed. > > > > > > > > > All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 > > > patches makes the BUGs go away : > > > > > > latencyc-use-qos-infrastructure.patch > > > pm-qos-infrastructure-and-interface.patch > > > pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch > > > > > > > > > Gabriel > > > > > > > > This looks like the same issue Rafael saw. > > > > Try the patch in the following post: > > > > http://marc.info/?l=linux-kernel=119265627228498=2 > > > > Well that's not very good. _I_ can go fishing in my lkml archives for random > patches but not everyone is set up to do that. And the diff to which you > refer gets 100% rejects against rc2-mm1 anyway. > > Please prepare a tested, changelogged patch against rc2-mm1 asap. I'm having difficulty coming up with a .config that boots, I'll continue working on this but the following is what I'm pretty confident will fix the warnings. You should hold off until I get a system booting 2.6.24-rc2-mm1 before taking this. pm-qos-remove-locks-around-blocking-notifier-registration.patch Changelog: Remove spin locking around blocking notifier calls that can sleep. --mgross Signed-off-by: mark gross <[EMAIL PROTECTED]> Index: linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c === --- linux-2.6.24-rc2-mm1.orig/kernel/pm_qos_params.c2007-11-15 11:09:27.0 -0800 +++ linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c 2007-11-15 11:10:08.0 -0800 @@ -319,13 +319,10 @@ */ int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) { - unsigned long flags; int retval; - spin_lock_irqsave(_qos_lock, flags); retval = blocking_notifier_chain_register( pm_qos_array[pm_qos_class]->notifiers, notifier); - spin_unlock_irqrestore(_qos_lock, flags); return retval; } @@ -341,13 +338,10 @@ */ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) { - unsigned long flags; int retval; - spin_lock_irqsave(_qos_lock, flags); retval = blocking_notifier_chain_unregister( pm_qos_array[pm_qos_class]->notifiers, notifier); - spin_unlock_irqrestore(_qos_lock, flags); return retval; } - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.24-rc2-mm1
On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote: > On Wed, 14 Nov 2007 12:29:59 -0800 mark gross <[EMAIL PROTECTED]> wrote: > > > > > [ 102.366932] === > > > > [ 108.552031] printk: 31 messages suppressed. > > > > > > > > > All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 > > > patches makes the BUGs go away : > > > > > > latencyc-use-qos-infrastructure.patch > > > pm-qos-infrastructure-and-interface.patch > > > pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch > > > > > > > > > Gabriel > > > > > > > > This looks like the same issue Rafael saw. > > > > Try the patch in the following post: > > > > http://marc.info/?l=linux-kernel=119265627228498=2 > > > > Well that's not very good. _I_ can go fishing in my lkml archives for random > patches but not everyone is set up to do that. And the diff to which you > refer gets 100% rejects against rc2-mm1 anyway. > > Please prepare a tested, changelogged patch against rc2-mm1 asap. my bad. I didn't catch the 24-rc2-mm1 baseline for this issue. I'll look at this right way. --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.24-rc2-mm1
On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote: > On Wed, 14 Nov 2007 12:29:59 -0800 mark gross <[EMAIL PROTECTED]> wrote: > > > > > [ 102.366932] === > > > > [ 108.552031] printk: 31 messages suppressed. > > > > > > > > > All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 > > > patches makes the BUGs go away : > > > > > > latencyc-use-qos-infrastructure.patch > > > pm-qos-infrastructure-and-interface.patch > > > pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch > > > > > > > > > Gabriel > > > > > > > > This looks like the same issue Rafael saw. > > > > Try the patch in the following post: > > > > http://marc.info/?l=linux-kernel=119265627228498=2 > > > > Well that's not very good. _I_ can go fishing in my lkml archives for random > patches but not everyone is set up to do that. And the diff to which you > refer gets 100% rejects against rc2-mm1 anyway. > > Please prepare a tested, changelogged patch against rc2-mm1 asap. I'm sorry for not being clear, this patch is already in your tree from a few weeks ago. I was pointing Gabriel at the fix from back then. --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.24-rc2-mm1
On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote: On Wed, 14 Nov 2007 12:29:59 -0800 mark gross [EMAIL PROTECTED] wrote: [ 102.366932] === [ 108.552031] printk: 31 messages suppressed. All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 patches makes the BUGs go away : latencyc-use-qos-infrastructure.patch pm-qos-infrastructure-and-interface.patch pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch Gabriel This looks like the same issue Rafael saw. Try the patch in the following post: http://marc.info/?l=linux-kernelm=119265627228498w=2 Well that's not very good. _I_ can go fishing in my lkml archives for random patches but not everyone is set up to do that. And the diff to which you refer gets 100% rejects against rc2-mm1 anyway. Please prepare a tested, changelogged patch against rc2-mm1 asap. I'm sorry for not being clear, this patch is already in your tree from a few weeks ago. I was pointing Gabriel at the fix from back then. --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.24-rc2-mm1
On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote: On Wed, 14 Nov 2007 12:29:59 -0800 mark gross [EMAIL PROTECTED] wrote: [ 102.366932] === [ 108.552031] printk: 31 messages suppressed. All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 patches makes the BUGs go away : latencyc-use-qos-infrastructure.patch pm-qos-infrastructure-and-interface.patch pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch Gabriel This looks like the same issue Rafael saw. Try the patch in the following post: http://marc.info/?l=linux-kernelm=119265627228498w=2 Well that's not very good. _I_ can go fishing in my lkml archives for random patches but not everyone is set up to do that. And the diff to which you refer gets 100% rejects against rc2-mm1 anyway. Please prepare a tested, changelogged patch against rc2-mm1 asap. my bad. I didn't catch the 24-rc2-mm1 baseline for this issue. I'll look at this right way. --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.24-rc2-mm1
On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote: On Wed, 14 Nov 2007 12:29:59 -0800 mark gross [EMAIL PROTECTED] wrote: [ 102.366932] === [ 108.552031] printk: 31 messages suppressed. All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 patches makes the BUGs go away : latencyc-use-qos-infrastructure.patch pm-qos-infrastructure-and-interface.patch pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch Gabriel This looks like the same issue Rafael saw. Try the patch in the following post: http://marc.info/?l=linux-kernelm=119265627228498w=2 Well that's not very good. _I_ can go fishing in my lkml archives for random patches but not everyone is set up to do that. And the diff to which you refer gets 100% rejects against rc2-mm1 anyway. Please prepare a tested, changelogged patch against rc2-mm1 asap. I'm having difficulty coming up with a .config that boots, I'll continue working on this but the following is what I'm pretty confident will fix the warnings. You should hold off until I get a system booting 2.6.24-rc2-mm1 before taking this. pm-qos-remove-locks-around-blocking-notifier-registration.patch Changelog: Remove spin locking around blocking notifier calls that can sleep. --mgross Signed-off-by: mark gross [EMAIL PROTECTED] Index: linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c === --- linux-2.6.24-rc2-mm1.orig/kernel/pm_qos_params.c2007-11-15 11:09:27.0 -0800 +++ linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c 2007-11-15 11:10:08.0 -0800 @@ -319,13 +319,10 @@ */ int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) { - unsigned long flags; int retval; - spin_lock_irqsave(pm_qos_lock, flags); retval = blocking_notifier_chain_register( pm_qos_array[pm_qos_class]-notifiers, notifier); - spin_unlock_irqrestore(pm_qos_lock, flags); return retval; } @@ -341,13 +338,10 @@ */ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) { - unsigned long flags; int retval; - spin_lock_irqsave(pm_qos_lock, flags); retval = blocking_notifier_chain_unregister( pm_qos_array[pm_qos_class]-notifiers, notifier); - spin_unlock_irqrestore(pm_qos_lock, flags); return retval; } - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.24-rc2-mm1
On Thu, Nov 15, 2007 at 11:19:50AM -0800, mark gross wrote: On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote: On Wed, 14 Nov 2007 12:29:59 -0800 mark gross [EMAIL PROTECTED] wrote: [ 102.366932] === [ 108.552031] printk: 31 messages suppressed. All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 patches makes the BUGs go away : latencyc-use-qos-infrastructure.patch pm-qos-infrastructure-and-interface.patch pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch Gabriel This looks like the same issue Rafael saw. Try the patch in the following post: http://marc.info/?l=linux-kernelm=119265627228498w=2 Well that's not very good. _I_ can go fishing in my lkml archives for random patches but not everyone is set up to do that. And the diff to which you refer gets 100% rejects against rc2-mm1 anyway. Please prepare a tested, changelogged patch against rc2-mm1 asap. I'm having difficulty coming up with a .config that boots, I'll continue working on this but the following is what I'm pretty confident will fix the warnings. You should hold off until I get a system booting 2.6.24-rc2-mm1 before taking this. I'm able to boot and have done a bit of testing and this patch wasn't good enough. I'm sending a new one in a min. --mgross pm-qos-remove-locks-around-blocking-notifier-registration.patch Changelog: Remove spin locking around blocking notifier calls that can sleep. --mgross Signed-off-by: mark gross [EMAIL PROTECTED] Index: linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c === --- linux-2.6.24-rc2-mm1.orig/kernel/pm_qos_params.c 2007-11-15 11:09:27.0 -0800 +++ linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c 2007-11-15 11:10:08.0 -0800 @@ -319,13 +319,10 @@ */ int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) { - unsigned long flags; int retval; - spin_lock_irqsave(pm_qos_lock, flags); retval = blocking_notifier_chain_register( pm_qos_array[pm_qos_class]-notifiers, notifier); - spin_unlock_irqrestore(pm_qos_lock, flags); return retval; } @@ -341,13 +338,10 @@ */ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) { - unsigned long flags; int retval; - spin_lock_irqsave(pm_qos_lock, flags); retval = blocking_notifier_chain_unregister( pm_qos_array[pm_qos_class]-notifiers, notifier); - spin_unlock_irqrestore(pm_qos_lock, flags); return retval; } - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] pm-qos-remove-locks-around-blocking-notifier.patch ... was Re: 2.6.24-rc2-mm1
On Thu, Nov 15, 2007 at 11:19:50AM -0800, mark gross wrote: On Wed, Nov 14, 2007 at 12:40:08PM -0800, Andrew Morton wrote: On Wed, 14 Nov 2007 12:29:59 -0800 mark gross [EMAIL PROTECTED] wrote: [ 102.366932] === [ 108.552031] printk: 31 messages suppressed. All this BUG / WARNINGS are caused by *-qos* patches. Reverting this 3 patches makes the BUGs go away : latencyc-use-qos-infrastructure.patch pm-qos-infrastructure-and-interface.patch pm-qos-infrastructure-and-interface-static-initialization-with-blocking-notifiers.patch Gabriel This looks like the same issue Rafael saw. Try the patch in the following post: http://marc.info/?l=linux-kernelm=119265627228498w=2 Well that's not very good. _I_ can go fishing in my lkml archives for random patches but not everyone is set up to do that. And the diff to which you refer gets 100% rejects against rc2-mm1 anyway. Please prepare a tested, changelogged patch against rc2-mm1 asap. pm-qos-remove-locks-around-blocking-notifier.patch I have done some testing with this fix, I think it addresses all the sleep within a held lock warnings reported. please apply. Changelog: Remove spin locking around all blocking notifier calls that can sleep. I had to re-structure some of the code to avoid locking issues. --mgross Signed-off-by: mark gross [EMAIL PROTECTED] Index: linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c === --- linux-2.6.24-rc2-mm1.orig/kernel/pm_qos_params.c2007-11-15 11:09:27.0 -0800 +++ linux-2.6.24-rc2-mm1/kernel/pm_qos_params.c 2007-11-15 14:10:09.0 -0800 @@ -135,13 +135,14 @@ } - -/* assumes pm_qos_lock is held */ static void update_target(int target) { s32 extreme_value; struct requirement_list *node; + unsigned long flags; + int call_notifier = 0; + spin_lock_irqsave(pm_qos_lock, flags); extreme_value = pm_qos_array[target]-default_value; list_for_each_entry(node, pm_qos_array[target]-requirements.list, list) { @@ -149,13 +150,16 @@ extreme_value, node-value); } if (pm_qos_array[target]-target_value != extreme_value) { + call_notifier = 1; pm_qos_array[target]-target_value = extreme_value; pr_debug(KERN_ERR new target for qos %d is %d\n, target, pm_qos_array[target]-target_value); - blocking_notifier_call_chain(pm_qos_array[target]-notifiers, - (unsigned long) pm_qos_array[target]-target_value, - NULL); } + spin_unlock_irqrestore(pm_qos_lock, flags); + + if (call_notifier) + blocking_notifier_call_chain(pm_qos_array[target]-notifiers, + (unsigned long) extreme_value, NULL); } static int register_pm_qos_misc(struct pm_qos_object *qos) @@ -227,8 +231,8 @@ spin_lock_irqsave(pm_qos_lock, flags); list_add(dep-list, pm_qos_array[pm_qos_class]-requirements.list); - update_target(pm_qos_class); spin_unlock_irqrestore(pm_qos_lock, flags); + update_target(pm_qos_class); return 0; } @@ -269,11 +273,10 @@ break; } } + spin_unlock_irqrestore(pm_qos_lock, flags); if (pending_update) update_target(pm_qos_class); - spin_unlock_irqrestore(pm_qos_lock, flags); - return 0; } EXPORT_SYMBOL_GPL(pm_qos_update_requirement); @@ -303,9 +306,9 @@ break; } } + spin_unlock_irqrestore(pm_qos_lock, flags); if (pending_update) update_target(pm_qos_class); - spin_unlock_irqrestore(pm_qos_lock, flags); } EXPORT_SYMBOL_GPL(pm_qos_remove_requirement); @@ -319,13 +322,10 @@ */ int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) { - unsigned long flags; int retval; - spin_lock_irqsave(pm_qos_lock, flags); retval = blocking_notifier_chain_register( pm_qos_array[pm_qos_class]-notifiers, notifier); - spin_unlock_irqrestore(pm_qos_lock, flags); return retval; } @@ -341,13 +341,10 @@ */ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) { - unsigned long flags; int retval; - spin_lock_irqsave(pm_qos_lock, flags); retval = blocking_notifier_chain_unregister( pm_qos_array[pm_qos_class]-notifiers, notifier); - spin_unlock_irqrestore(pm_qos_lock, flags); return retval; } - To unsubscribe from this list: send the line unsubscribe
Re: 2.6.24-rc2-mm1
On Wed, Nov 14, 2007 at 05:18:02AM +0100, Gabriel C wrote: > Gabriel C wrote: > > Andrew Morton wrote: > >> ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.24-rc2/2.6.24-rc2-mm1/ > > > > I got it to boot but .. > > > > > ... > > [ 45.030261] input: Power Button (CM) as /devices/virtual/input/input4 > > [ 45.031331] BUG: sleeping function called from invalid context at > > kernel/rwsem.c:47 > > [ 45.031560] in_atomic():0, irqs_disabled():1 > > [ 45.031569] 1 lock held by modprobe/2105: > > [ 45.031574] #0: (pm_qos_lock){}, at: [] > > pm_qos_add_notifier+0x14/0x3c > > [ 45.031606] irq event stamp: 4036 > > [ 45.031612] hardirqs last enabled at (4035): [] > > debug_check_no_locks_freed+0xf9/0x105 > > [ 45.031632] hardirqs last disabled at (4036): [] > > _spin_lock_irqsave+0x10/0x55 > > [ 45.031653] softirqs last enabled at (3710): [] > > __do_softirq+0xe9/0xf1 > > [ 45.031670] softirqs last disabled at (3703): [] > > do_softirq+0x3a/0x52 > > [ 45.031685] [] show_trace_log_lvl+0x12/0x25 > > [ 45.031702] [] show_trace+0xd/0x10 > > [ 45.031717] [] dump_stack+0x16/0x18 > > [ 45.031728] [] __might_sleep+0xc2/0xc9 > > [ 45.031740] [] down_write+0x17/0x6f > > [ 45.031754] [] blocking_notifier_chain_register+0x26/0x3f > > [ 45.031766] [] pm_qos_add_notifier+0x27/0x3c > > [ 45.031778] [] acpi_processor_power_init+0x4d/0x164 > > [processor] > > [ 45.031802] [] acpi_processor_start+0x503/0x556 [processor] > > [ 45.031820] [] acpi_start_single_object+0x20/0x3d > > [ 45.031837] [] acpi_device_probe+0x78/0x88 > > [ 45.031850] [] driver_probe_device+0xb2/0x12d > > [ 45.031866] [] __driver_attach+0x76/0xaf > > [ 45.031878] [] bus_for_each_dev+0x3e/0x60 > > [ 45.031889] [] driver_attach+0x14/0x16 > > [ 45.031899] [] bus_add_driver+0x7a/0x180 > > [ 45.031909] [] driver_register+0x57/0x5c > > [ 45.031918] [] acpi_bus_register_driver+0x3a/0x3c > > [ 45.031929] [] acpi_processor_init+0x73/0xc5 [processor] > > [ 45.031943] [] sys_init_module+0x14e3/0x15ae > > [ 45.031958] [] sysenter_past_esp+0x5f/0xa5 > > [ 45.031969] === > > [ 45.032213] ACPI: Invalid PBLK length [0] > > [ 45.032465] ACPI: Invalid PBLK length [0] > > [ 45.032656] ACPI: Invalid PBLK length [0] > > > > > > ... > > > > ... > > [ 102.331554] BUG: sleeping function called from invalid context at > > kernel/rwsem.c:20 > > [ 102.331575] in_atomic():0, irqs_disabled():1 > > [ 102.331583] 1 lock held by artsd/4385: > > [ 102.331589] #0: (pm_qos_lock){}, at: [] > > pm_qos_add_requirement+0x5a/0x98 > > [ 102.331619] irq event stamp: 19022 > > [ 102.331624] hardirqs last enabled at (19021): [] > > _spin_unlock_irqrestore+0x36/0x3c > > [ 102.331641] hardirqs last disabled at (19022): [] > > _spin_lock_irqsave+0x10/0x55 > > [ 102.331655] softirqs last enabled at (18846): [] > > __do_softirq+0xe9/0xf1 > > [ 102.331672] softirqs last disabled at (18839): [] > > do_softirq+0x3a/0x52 > > [ 102.331688] [] show_trace_log_lvl+0x12/0x25 > > [ 102.331704] [] show_trace+0xd/0x10 > > [ 102.331715] [] dump_stack+0x16/0x18 > > [ 102.331727] [] __might_sleep+0xc2/0xc9 > > [ 102.331739] [] down_read+0x16/0x6a > > [ 102.331750] [] __blocking_notifier_call_chain+0x24/0x4c > > [ 102.331761] [] blocking_notifier_call_chain+0xc/0xe > > [ 102.331773] [] update_target+0x3e/0x43 > > [ 102.331784] [] pm_qos_add_requirement+0x76/0x98 > > [ 102.331795] [] snd_pcm_hw_params_user+0x2b1/0x302 > > [ 102.331811] [] snd_pcm_common_ioctl1+0x17a/0xda3 > > [ 102.331825] [] snd_pcm_playback_ioctl1+0x3ab/0x3c2 > > [ 102.331840] [] snd_pcm_playback_ioctl+0x27/0x35 > > [ 102.331853] [] vfs_ioctl+0x22/0x67 > > [ 102.331867] [] do_vfs_ioctl+0x25a/0x268 > > [ 102.331878] [] sys_ioctl+0x2c/0x45 > > [ 102.331889] [] sysenter_past_esp+0x5f/0xa5 > > [ 102.331901] === > > [ 102.331911] WARNING: at arch/x86/kernel/smp_32.c:561 > > native_smp_call_function_mask() > > [ 102.331920] [] show_trace_log_lvl+0x12/0x25 > > [ 102.331932] [] show_trace+0xd/0x10 > > [ 102.331944] [] dump_stack+0x16/0x18 > > [ 102.331955] [] native_smp_call_function_mask+0x39/0x11d > > [ 102.331970] [] smp_call_function+0x18/0x1d > > [ 102.331984] [] acpi_processor_latency_notify+0x13/0x1a > > [processor] > > [ 102.332011] [] notifier_call_chain+0x2b/0x4a > > [ 102.332023] [] __blocking_notifier_call_chain+0x37/0x4c > > [ 102.332035] [] blocking_notifier_call_chain+0xc/0xe > > [ 102.332047] [] update_target+0x3e/0x43 > > [ 102.332058] [] pm_qos_add_requirement+0x76/0x98 > > [ 102.332070] [] snd_pcm_hw_params_user+0x2b1/0x302 > > [ 102.332085] [] snd_pcm_common_ioctl1+0x17a/0xda3 > > [ 102.332097] [] snd_pcm_playback_ioctl1+0x3ab/0x3c2 > > [ 102.332111] [] snd_pcm_playback_ioctl+0x27/0x35 > > [
Re: 2.6.24-rc2-mm1
On Wed, Nov 14, 2007 at 05:18:02AM +0100, Gabriel C wrote: Gabriel C wrote: Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.24-rc2/2.6.24-rc2-mm1/ I got it to boot but .. ... [ 45.030261] input: Power Button (CM) as /devices/virtual/input/input4 [ 45.031331] BUG: sleeping function called from invalid context at kernel/rwsem.c:47 [ 45.031560] in_atomic():0, irqs_disabled():1 [ 45.031569] 1 lock held by modprobe/2105: [ 45.031574] #0: (pm_qos_lock){}, at: [c0139b1c] pm_qos_add_notifier+0x14/0x3c [ 45.031606] irq event stamp: 4036 [ 45.031612] hardirqs last enabled at (4035): [c0140cc3] debug_check_no_locks_freed+0xf9/0x105 [ 45.031632] hardirqs last disabled at (4036): [c0333bf5] _spin_lock_irqsave+0x10/0x55 [ 45.031653] softirqs last enabled at (3710): [c0128e96] __do_softirq+0xe9/0xf1 [ 45.031670] softirqs last disabled at (3703): [c0128ed8] do_softirq+0x3a/0x52 [ 45.031685] [c0104e01] show_trace_log_lvl+0x12/0x25 [ 45.031702] [c010562d] show_trace+0xd/0x10 [ 45.031717] [c0105704] dump_stack+0x16/0x18 [ 45.031728] [c011db69] __might_sleep+0xc2/0xc9 [ 45.031740] [c0139093] down_write+0x17/0x6f [ 45.031754] [c013991e] blocking_notifier_chain_register+0x26/0x3f [ 45.031766] [c0139b2f] pm_qos_add_notifier+0x27/0x3c [ 45.031778] [e885bc77] acpi_processor_power_init+0x4d/0x164 [processor] [ 45.031802] [e885a5b3] acpi_processor_start+0x503/0x556 [processor] [ 45.031820] [c022125c] acpi_start_single_object+0x20/0x3d [ 45.031837] [c0222392] acpi_device_probe+0x78/0x88 [ 45.031850] [c0244700] driver_probe_device+0xb2/0x12d [ 45.031866] [c02448a2] __driver_attach+0x76/0xaf [ 45.031878] [c0243cb0] bus_for_each_dev+0x3e/0x60 [ 45.031889] [c0244588] driver_attach+0x14/0x16 [ 45.031899] [c0243f9a] bus_add_driver+0x7a/0x180 [ 45.031909] [c0244a9d] driver_register+0x57/0x5c [ 45.031918] [c022267e] acpi_bus_register_driver+0x3a/0x3c [ 45.031929] [e882f073] acpi_processor_init+0x73/0xc5 [processor] [ 45.031943] [c0148ec5] sys_init_module+0x14e3/0x15ae [ 45.031958] [c0103d86] sysenter_past_esp+0x5f/0xa5 [ 45.031969] === [ 45.032213] ACPI: Invalid PBLK length [0] [ 45.032465] ACPI: Invalid PBLK length [0] [ 45.032656] ACPI: Invalid PBLK length [0] ... ... [ 102.331554] BUG: sleeping function called from invalid context at kernel/rwsem.c:20 [ 102.331575] in_atomic():0, irqs_disabled():1 [ 102.331583] 1 lock held by artsd/4385: [ 102.331589] #0: (pm_qos_lock){}, at: [c0139d4a] pm_qos_add_requirement+0x5a/0x98 [ 102.331619] irq event stamp: 19022 [ 102.331624] hardirqs last enabled at (19021): [c0333d1a] _spin_unlock_irqrestore+0x36/0x3c [ 102.331641] hardirqs last disabled at (19022): [c0333bf5] _spin_lock_irqsave+0x10/0x55 [ 102.331655] softirqs last enabled at (18846): [c0128e96] __do_softirq+0xe9/0xf1 [ 102.331672] softirqs last disabled at (18839): [c0128ed8] do_softirq+0x3a/0x52 [ 102.331688] [c0104e01] show_trace_log_lvl+0x12/0x25 [ 102.331704] [c010562d] show_trace+0xd/0x10 [ 102.331715] [c0105704] dump_stack+0x16/0x18 [ 102.331727] [c011db69] __might_sleep+0xc2/0xc9 [ 102.331739] [c0138fe7] down_read+0x16/0x6a [ 102.331750] [c0139872] __blocking_notifier_call_chain+0x24/0x4c [ 102.331761] [c01398a6] blocking_notifier_call_chain+0xc/0xe [ 102.331773] [c0139b82] update_target+0x3e/0x43 [ 102.331784] [c0139d66] pm_qos_add_requirement+0x76/0x98 [ 102.331795] [c02bd8a0] snd_pcm_hw_params_user+0x2b1/0x302 [ 102.331811] [c02bdad9] snd_pcm_common_ioctl1+0x17a/0xda3 [ 102.331825] [c02bee6b] snd_pcm_playback_ioctl1+0x3ab/0x3c2 [ 102.331840] [c02bef06] snd_pcm_playback_ioctl+0x27/0x35 [ 102.331853] [c01795ce] vfs_ioctl+0x22/0x67 [ 102.331867] [c017986d] do_vfs_ioctl+0x25a/0x268 [ 102.331878] [c01798a7] sys_ioctl+0x2c/0x45 [ 102.331889] [c0103d86] sysenter_past_esp+0x5f/0xa5 [ 102.331901] === [ 102.331911] WARNING: at arch/x86/kernel/smp_32.c:561 native_smp_call_function_mask() [ 102.331920] [c0104e01] show_trace_log_lvl+0x12/0x25 [ 102.331932] [c010562d] show_trace+0xd/0x10 [ 102.331944] [c0105704] dump_stack+0x16/0x18 [ 102.331955] [c0112f35] native_smp_call_function_mask+0x39/0x11d [ 102.331970] [c01142ef] smp_call_function+0x18/0x1d [ 102.331984] [e885b449] acpi_processor_latency_notify+0x13/0x1a [processor] [ 102.332011] [c01396e2] notifier_call_chain+0x2b/0x4a [ 102.332023] [c0139885] __blocking_notifier_call_chain+0x37/0x4c [ 102.332035] [c01398a6] blocking_notifier_call_chain+0xc/0xe [ 102.332047] [c0139b82] update_target+0x3e/0x43 [ 102.332058] [c0139d66]
Re: 2.6.23-mm1 breaks C-state support on Intel T7200 x86_64
On Fri, Nov 09, 2007 at 03:24:44PM -0500, [EMAIL PROTECTED] wrote: > On Thu, 08 Nov 2007 14:30:07 PST, Mark Gross said: > > > wing patch fixes up the cpuidle / pm-qos integration. > > > > I suspect that this is folded into another mm patch but it should fix > > C-state issue identified. > > Confirming that patch left my CPUs mostly in C3 again. Thanks. > > I'll have to let Mark and Andrew figure out this code's status in the -mm > queue. I checked on this last week, I'm pretty sure its in Andrew's current tree. Thanks again for looking at this. --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1 breaks C-state support on Intel T7200 x86_64
On Fri, Nov 09, 2007 at 03:24:44PM -0500, [EMAIL PROTECTED] wrote: On Thu, 08 Nov 2007 14:30:07 PST, Mark Gross said: wing patch fixes up the cpuidle / pm-qos integration. I suspect that this is folded into another mm patch but it should fix C-state issue identified. Confirming that patch left my CPUs mostly in C3 again. Thanks. I'll have to let Mark and Andrew figure out this code's status in the -mm queue. I checked on this last week, I'm pretty sure its in Andrew's current tree. Thanks again for looking at this. --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1 breaks C-state support on Intel T7200 x86_64
On Thu, Nov 08, 2007 at 12:19:44PM -0500, [EMAIL PROTECTED] wrote: > (Sorry for not reporting this sooner - I haven't been running off battery > much in the last 3 weeks, so I didn't notice it till now...) > > Dell Latitude D820 laptop, T7200 Core2 Duo CPU, x86_64 kernel. > > As reported by 'powertop' on a basically idle machine: > > 2.6.23-mm1: > > CnAvg residency P-states (frequencies) > C0 (cpu running)(100.0%)2.00 Ghz 0.8% > C10.0ms ( 0.0%) 1.67 Ghz 0.0% > C20.0ms ( 0.0%) 1333 Mhz 0.0% > C30.0ms ( 0.0%) 1000 Mhz99.2% > > 2.6.23-rc8-mm2: > > CnAvg residency P-states (frequencies) > C0 (cpu running)( 0.3%) 2.00 Ghz 0.0% > C10.0ms ( 0.0%) 1.67 Ghz 0.0% > C20.0ms ( 0.0%) 1333 Mhz 0.0% > C3 31.5ms (99.7%) 1000 Mhz 100.0% > > In addition, the ACPI power estimate reported about 25 watts for 23-mm1, > but only 21 watts for -rc8-mm2, a significant regression. > > I bisected this down to this set of patches: > > pm-qos-infrastructure-and-interface.patch > pm-qos-infrastructure-and-interface-fix.patch > pm-qos-infrastructure-and-interface-vs-git-acpi.patch > pm-qos-infrastructure-and-interface-vs-git-acpi-2.patch > latencyc-use-qos-infrastructure.patch > > The patch says: > > To register the default pm_qos target for the specific parameter, the > process must open one of /dev/[cpu_dma_latency, network_latency, > network_throughput] > > As long as the device node is held open that process has a registered > requirement on the parameter. The name of the requirement is > "process_" derived from the current->pid from within the open system > call. > > I shouldn't have to have a process open a /dev/file, write a number, and then > stay around forever so the file doesn't close in order to get the same > behavior > I was getting by default before. What needs to happen to get this to not > be a behavior regression/change? > > > > wing patch fixes up the cpuidle / pm-qos integration. I suspect that this is folded into another mm patch but it should fix C-state issue identified. --mgross Signed-off-by: mark gross <[EMAIL PROTECTED]> - Index: linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c === --- linux-2.6.23-mm1.orig/drivers/cpuidle/cpuidle.c 2007-11-08 13:09:53.0 -0800 +++ linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c 2007-11-08 13:25:13.0 -0800 @@ -268,7 +268,7 @@ static inline void latency_notifier_init(struct notifier_block *n) { -pm_qos_add_notifier(PM_QOS_CPUIDLE, n); + pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n); } #else /* CONFIG_SMP */ Index: linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c === --- linux-2.6.23-mm1.orig/drivers/cpuidle/governors/ladder.c2007-11-08 13:09:53.0 -0800 +++ linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c 2007-11-08 13:11:30.0 -0800 @@ -82,7 +82,7 @@ if (last_idx < dev->state_count - 1 && last_residency > last_state->threshold.promotion_time && dev->states[last_idx + 1].exit_latency <= - pm_qos_requirement(PM_QOS_CPUIDLE)) { + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) { last_state->stats.promotion_count++; last_state->stats.demotion_count = 0; if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) { Index: linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c === --- linux-2.6.23-mm1.orig/drivers/cpuidle/governors/menu.c 2007-11-08 13:12:11.0 -0800 +++ linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c 2007-11-08 13:24:03.0 -0800 @@ -48,7 +48,8 @@ break; if (s->target_residency > data->predicted_us) break; - if (s->exit_latency > pm_qos_requirement(PM_QOS_CPUIDLE)) + if (s->exit_latency > + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) break; } Index: linux-2.6.23-mm1/include/linux/pm_qos_params.h === --- linux-2.6.23-mm1.orig/include/linux/pm_qos_params.h 2007-11-08 13:09:53.0 -0800 +++ linux-2.6.23-mm1/include/linux/pm_qos_params.h 2007-11-08 13:14:05.0 -0800 @@ -6,23 +6,12 @
Re: 2.6.23-mm1 breaks C-state support on Intel T7200 x86_64
On Thu, Nov 08, 2007 at 10:02:12AM -0800, Andrew Morton wrote: > > On Thu, 08 Nov 2007 12:19:44 -0500 [EMAIL PROTECTED] wrote: > > (Sorry for not reporting this sooner - I haven't been running off battery > > much in the last 3 weeks, so I didn't notice it till now...) > > > > Dell Latitude D820 laptop, T7200 Core2 Duo CPU, x86_64 kernel. > > > > As reported by 'powertop' on a basically idle machine: > > > > 2.6.23-mm1: > > > > CnAvg residency P-states (frequencies) > > C0 (cpu running)(100.0%)2.00 Ghz 0.8% > > C10.0ms ( 0.0%) 1.67 Ghz 0.0% > > C20.0ms ( 0.0%) 1333 Mhz 0.0% > > C30.0ms ( 0.0%) 1000 Mhz99.2% > > > > 2.6.23-rc8-mm2: > > > > CnAvg residency P-states (frequencies) > > C0 (cpu running)( 0.3%) 2.00 Ghz 0.0% > > C10.0ms ( 0.0%) 1.67 Ghz 0.0% > > C20.0ms ( 0.0%) 1333 Mhz 0.0% > > C3 31.5ms (99.7%) 1000 Mhz 100.0% > > > > In addition, the ACPI power estimate reported about 25 watts for 23-mm1, > > but only 21 watts for -rc8-mm2, a significant regression. > > > > I bisected this down to this set of patches: > > > > pm-qos-infrastructure-and-interface.patch > > pm-qos-infrastructure-and-interface-fix.patch > > pm-qos-infrastructure-and-interface-vs-git-acpi.patch > > pm-qos-infrastructure-and-interface-vs-git-acpi-2.patch > > latencyc-use-qos-infrastructure.patch > > > > The patch says: > > > > To register the default pm_qos target for the specific parameter, the > > process must open one of /dev/[cpu_dma_latency, network_latency, > > network_throughput] > > > > As long as the device node is held open that process has a registered > > requirement on the parameter. The name of the requirement is > > "process_" derived from the current->pid from within the open system > > call. > > > > I shouldn't have to have a process open a /dev/file, write a number, and > > then > > stay around forever so the file doesn't close in order to get the same > > behavior > > I was getting by default before. What needs to happen to get this to not > > be a behavior regression/change? > > > > That's a great report, thanks. Over to you, Mark ;) > > btw, I also have a note here that these patches caused Rafael to see an > smp_call_function() inside local_irq_save(). Did that get fixed? Ah, I see the problem. I think I posted a fix to this. The problem is that what's in the mm1 tree has a parameter PM_QOS_IDLE that needed to be PM_QOS_CPU_DMA_LATENCY. I'm not sure what's in the current MM tree at this point so I can't say its been fixed. Is there an easy way from me to see what's currently in MM? FWIW I think I fixed this when I fixed up Rafael's issue. Would you like me to send out a re-fresh patch against 2.6.23-mm1? --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1 breaks C-state support on Intel T7200 x86_64
On Thu, Nov 08, 2007 at 12:19:44PM -0500, [EMAIL PROTECTED] wrote: > (Sorry for not reporting this sooner - I haven't been running off battery > much in the last 3 weeks, so I didn't notice it till now...) > > Dell Latitude D820 laptop, T7200 Core2 Duo CPU, x86_64 kernel. > > As reported by 'powertop' on a basically idle machine: > > 2.6.23-mm1: > > CnAvg residency P-states (frequencies) > C0 (cpu running)(100.0%)2.00 Ghz 0.8% > C10.0ms ( 0.0%) 1.67 Ghz 0.0% > C20.0ms ( 0.0%) 1333 Mhz 0.0% > C30.0ms ( 0.0%) 1000 Mhz99.2% > > 2.6.23-rc8-mm2: > > CnAvg residency P-states (frequencies) > C0 (cpu running)( 0.3%) 2.00 Ghz 0.0% > C10.0ms ( 0.0%) 1.67 Ghz 0.0% > C20.0ms ( 0.0%) 1333 Mhz 0.0% > C3 31.5ms (99.7%) 1000 Mhz 100.0% > > In addition, the ACPI power estimate reported about 25 watts for 23-mm1, > but only 21 watts for -rc8-mm2, a significant regression. well, thats because you burn less watts if you get into C3. > > I bisected this down to this set of patches: > > pm-qos-infrastructure-and-interface.patch > pm-qos-infrastructure-and-interface-fix.patch > pm-qos-infrastructure-and-interface-vs-git-acpi.patch > pm-qos-infrastructure-and-interface-vs-git-acpi-2.patch > latencyc-use-qos-infrastructure.patch yipes! I'll look at it right away. It looks like an integration issue with CPU-IDLE patches (those control the C-state entry). I'll get it fixed up. > > The patch says: > > To register the default pm_qos target for the specific parameter, the > process must open one of /dev/[cpu_dma_latency, network_latency, > network_throughput] > > As long as the device node is held open that process has a registered > requirement on the parameter. The name of the requirement is > "process_" derived from the current->pid from within the open system > call. > > I shouldn't have to have a process open a /dev/file, write a number, and then > stay around forever so the file doesn't close in order to get the same > behavior > I was getting by default before. What needs to happen to get this to not > be a behavior regression/change? > you won't have such a process (at least I highly doubt you do) I need to fix this. Thanks for taking the time to bisect it and reporting it to me! --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1 breaks C-state support on Intel T7200 x86_64
On Thu, Nov 08, 2007 at 12:19:44PM -0500, [EMAIL PROTECTED] wrote: (Sorry for not reporting this sooner - I haven't been running off battery much in the last 3 weeks, so I didn't notice it till now...) Dell Latitude D820 laptop, T7200 Core2 Duo CPU, x86_64 kernel. As reported by 'powertop' on a basically idle machine: 2.6.23-mm1: CnAvg residency P-states (frequencies) C0 (cpu running)(100.0%)2.00 Ghz 0.8% C10.0ms ( 0.0%) 1.67 Ghz 0.0% C20.0ms ( 0.0%) 1333 Mhz 0.0% C30.0ms ( 0.0%) 1000 Mhz99.2% 2.6.23-rc8-mm2: CnAvg residency P-states (frequencies) C0 (cpu running)( 0.3%) 2.00 Ghz 0.0% C10.0ms ( 0.0%) 1.67 Ghz 0.0% C20.0ms ( 0.0%) 1333 Mhz 0.0% C3 31.5ms (99.7%) 1000 Mhz 100.0% In addition, the ACPI power estimate reported about 25 watts for 23-mm1, but only 21 watts for -rc8-mm2, a significant regression. well, thats because you burn less watts if you get into C3. I bisected this down to this set of patches: pm-qos-infrastructure-and-interface.patch pm-qos-infrastructure-and-interface-fix.patch pm-qos-infrastructure-and-interface-vs-git-acpi.patch pm-qos-infrastructure-and-interface-vs-git-acpi-2.patch latencyc-use-qos-infrastructure.patch yipes! I'll look at it right away. It looks like an integration issue with CPU-IDLE patches (those control the C-state entry). I'll get it fixed up. The patch says: To register the default pm_qos target for the specific parameter, the process must open one of /dev/[cpu_dma_latency, network_latency, network_throughput] As long as the device node is held open that process has a registered requirement on the parameter. The name of the requirement is process_PID derived from the current-pid from within the open system call. I shouldn't have to have a process open a /dev/file, write a number, and then stay around forever so the file doesn't close in order to get the same behavior I was getting by default before. What needs to happen to get this to not be a behavior regression/change? you won't have such a process (at least I highly doubt you do) I need to fix this. Thanks for taking the time to bisect it and reporting it to me! --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1 breaks C-state support on Intel T7200 x86_64
On Thu, Nov 08, 2007 at 10:02:12AM -0800, Andrew Morton wrote: On Thu, 08 Nov 2007 12:19:44 -0500 [EMAIL PROTECTED] wrote: (Sorry for not reporting this sooner - I haven't been running off battery much in the last 3 weeks, so I didn't notice it till now...) Dell Latitude D820 laptop, T7200 Core2 Duo CPU, x86_64 kernel. As reported by 'powertop' on a basically idle machine: 2.6.23-mm1: CnAvg residency P-states (frequencies) C0 (cpu running)(100.0%)2.00 Ghz 0.8% C10.0ms ( 0.0%) 1.67 Ghz 0.0% C20.0ms ( 0.0%) 1333 Mhz 0.0% C30.0ms ( 0.0%) 1000 Mhz99.2% 2.6.23-rc8-mm2: CnAvg residency P-states (frequencies) C0 (cpu running)( 0.3%) 2.00 Ghz 0.0% C10.0ms ( 0.0%) 1.67 Ghz 0.0% C20.0ms ( 0.0%) 1333 Mhz 0.0% C3 31.5ms (99.7%) 1000 Mhz 100.0% In addition, the ACPI power estimate reported about 25 watts for 23-mm1, but only 21 watts for -rc8-mm2, a significant regression. I bisected this down to this set of patches: pm-qos-infrastructure-and-interface.patch pm-qos-infrastructure-and-interface-fix.patch pm-qos-infrastructure-and-interface-vs-git-acpi.patch pm-qos-infrastructure-and-interface-vs-git-acpi-2.patch latencyc-use-qos-infrastructure.patch The patch says: To register the default pm_qos target for the specific parameter, the process must open one of /dev/[cpu_dma_latency, network_latency, network_throughput] As long as the device node is held open that process has a registered requirement on the parameter. The name of the requirement is process_PID derived from the current-pid from within the open system call. I shouldn't have to have a process open a /dev/file, write a number, and then stay around forever so the file doesn't close in order to get the same behavior I was getting by default before. What needs to happen to get this to not be a behavior regression/change? That's a great report, thanks. Over to you, Mark ;) btw, I also have a note here that these patches caused Rafael to see an smp_call_function() inside local_irq_save(). Did that get fixed? Ah, I see the problem. I think I posted a fix to this. The problem is that what's in the mm1 tree has a parameter PM_QOS_IDLE that needed to be PM_QOS_CPU_DMA_LATENCY. I'm not sure what's in the current MM tree at this point so I can't say its been fixed. Is there an easy way from me to see what's currently in MM? FWIW I think I fixed this when I fixed up Rafael's issue. Would you like me to send out a re-fresh patch against 2.6.23-mm1? --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1 breaks C-state support on Intel T7200 x86_64
On Thu, Nov 08, 2007 at 12:19:44PM -0500, [EMAIL PROTECTED] wrote: (Sorry for not reporting this sooner - I haven't been running off battery much in the last 3 weeks, so I didn't notice it till now...) Dell Latitude D820 laptop, T7200 Core2 Duo CPU, x86_64 kernel. As reported by 'powertop' on a basically idle machine: 2.6.23-mm1: CnAvg residency P-states (frequencies) C0 (cpu running)(100.0%)2.00 Ghz 0.8% C10.0ms ( 0.0%) 1.67 Ghz 0.0% C20.0ms ( 0.0%) 1333 Mhz 0.0% C30.0ms ( 0.0%) 1000 Mhz99.2% 2.6.23-rc8-mm2: CnAvg residency P-states (frequencies) C0 (cpu running)( 0.3%) 2.00 Ghz 0.0% C10.0ms ( 0.0%) 1.67 Ghz 0.0% C20.0ms ( 0.0%) 1333 Mhz 0.0% C3 31.5ms (99.7%) 1000 Mhz 100.0% In addition, the ACPI power estimate reported about 25 watts for 23-mm1, but only 21 watts for -rc8-mm2, a significant regression. I bisected this down to this set of patches: pm-qos-infrastructure-and-interface.patch pm-qos-infrastructure-and-interface-fix.patch pm-qos-infrastructure-and-interface-vs-git-acpi.patch pm-qos-infrastructure-and-interface-vs-git-acpi-2.patch latencyc-use-qos-infrastructure.patch The patch says: To register the default pm_qos target for the specific parameter, the process must open one of /dev/[cpu_dma_latency, network_latency, network_throughput] As long as the device node is held open that process has a registered requirement on the parameter. The name of the requirement is process_PID derived from the current-pid from within the open system call. I shouldn't have to have a process open a /dev/file, write a number, and then stay around forever so the file doesn't close in order to get the same behavior I was getting by default before. What needs to happen to get this to not be a behavior regression/change? wing patch fixes up the cpuidle / pm-qos integration. I suspect that this is folded into another mm patch but it should fix C-state issue identified. --mgross Signed-off-by: mark gross [EMAIL PROTECTED] - Index: linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c === --- linux-2.6.23-mm1.orig/drivers/cpuidle/cpuidle.c 2007-11-08 13:09:53.0 -0800 +++ linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c 2007-11-08 13:25:13.0 -0800 @@ -268,7 +268,7 @@ static inline void latency_notifier_init(struct notifier_block *n) { -pm_qos_add_notifier(PM_QOS_CPUIDLE, n); + pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n); } #else /* CONFIG_SMP */ Index: linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c === --- linux-2.6.23-mm1.orig/drivers/cpuidle/governors/ladder.c2007-11-08 13:09:53.0 -0800 +++ linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c 2007-11-08 13:11:30.0 -0800 @@ -82,7 +82,7 @@ if (last_idx dev-state_count - 1 last_residency last_state-threshold.promotion_time dev-states[last_idx + 1].exit_latency = - pm_qos_requirement(PM_QOS_CPUIDLE)) { + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) { last_state-stats.promotion_count++; last_state-stats.demotion_count = 0; if (last_state-stats.promotion_count = last_state-threshold.promotion_count) { Index: linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c === --- linux-2.6.23-mm1.orig/drivers/cpuidle/governors/menu.c 2007-11-08 13:12:11.0 -0800 +++ linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c 2007-11-08 13:24:03.0 -0800 @@ -48,7 +48,8 @@ break; if (s-target_residency data-predicted_us) break; - if (s-exit_latency pm_qos_requirement(PM_QOS_CPUIDLE)) + if (s-exit_latency + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) break; } Index: linux-2.6.23-mm1/include/linux/pm_qos_params.h === --- linux-2.6.23-mm1.orig/include/linux/pm_qos_params.h 2007-11-08 13:09:53.0 -0800 +++ linux-2.6.23-mm1/include/linux/pm_qos_params.h 2007-11-08 13:14:05.0 -0800 @@ -6,23 +6,12 @@ #include linux/notifier.h #include linux/miscdevice.h -struct requirement_list { - struct list_head list; - union { - s32 value; - s32 usec; - s32 kbps; - }; - char *name; -}; - #define PM_QOS_RESERVED 0 #define PM_QOS_CPU_DMA_LATENCY 1 #define PM_QOS_NETWORK_LATENCY
Re: [PATCH] iommu-PMEN_REG boot up support
On Sat, Oct 27, 2007 at 02:19:38AM +0200, Muli Ben-Yehuda wrote: > On Fri, Oct 26, 2007 at 11:18:49AM -0700, Mark Gross wrote: > > > The following patch clears the portect memory region enable bit at > > boot time by default. It also provides a kernel parrameter for > > disabling this behavior and leave the PMEN_REG untouched if so > > wanted. > > > > If the boot loader or platform has protected memory regions enabled > > at boot time it could prevent DMA's from happening as drivers are > > loaded and used. > > What's the value of adding a command line option for this? Under what > circumstances would we want to not clear this bit? umm, /me asks around and finds out that at this time there isn't much point to having a command line to preserve any protected areas set up at boot time. > > > + } else if (!strncmp(str, "no_epm_clear", 12)) { > > + printk(KERN_INFO > > + "Intel-IOMMU: subress clearing of Enable " > > + "Protected Memory bit\n"); > > + clear_pmen_epm = 0; > > `suppress', I assume. Rest looks fine, if the configuration option is > really needed. > > Cheers, > Muli > -- > SYSTOR 2007 --- 1st Annual Haifa Systems and Storage Conference 2007 > http://www.haifa.il.ibm.com/Workshops/systor2007/ > > Virtualization workshop: Oct 29th, 2007 | Storage workshop: Oct 30th, 2007 > - Signed-off-by: mark gross <[EMAIL PROTECTED]> --mgross Index: linux-2.6/drivers/pci/intel-iommu.c === --- linux-2.6.orig/drivers/pci/intel-iommu.c2007-10-24 09:31:32.0 -0700 +++ linux-2.6/drivers/pci/intel-iommu.c 2007-10-29 13:30:29.0 -0700 @@ -692,6 +692,23 @@ DMA_TLB_PSI_FLUSH, non_present_entry_flush); } +static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) +{ + u32 pmen; + unsigned long flags; + + spin_lock_irqsave(>register_lock, flags); + pmen = readl(iommu->reg + DMAR_PMEN_REG); + pmen &= ~DMA_PMEN_EPM; + writel(pmen, iommu->reg + DMAR_PMEN_REG); + + /* wait for the protected region status bit to clear */ + IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, + readl, (pmen & DMA_PMEN_PRS), pmen); + + spin_unlock_irqrestore(>register_lock, flags); +} + static int iommu_enable_translation(struct intel_iommu *iommu) { u32 sts; @@ -1731,6 +1748,8 @@ iommu_flush_context_global(iommu, 0); iommu_flush_iotlb_global(iommu, 0); + iommu_disable_protect_mem_regions(iommu); + ret = iommu_enable_translation(iommu); if (ret) goto error; Index: linux-2.6/drivers/pci/intel-iommu.h === --- linux-2.6.orig/drivers/pci/intel-iommu.h2007-10-24 09:31:32.0 -0700 +++ linux-2.6/drivers/pci/intel-iommu.h 2007-10-29 10:30:24.0 -0700 @@ -126,6 +126,10 @@ #define DMA_TLB_IH_NONLEAF (((u64)1) << 6) #define DMA_TLB_MAX_SIZE (0x3f) +/* PMEN_REG */ +#define DMA_PMEN_EPM (((u32)1)<<31) +#define DMA_PMEN_PRS (((u32)1)<<0) + /* GCMD_REG */ #define DMA_GCMD_TE (((u32)1) << 31) #define DMA_GCMD_SRTP (((u32)1) << 30) - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] intel-iommu fixes
On Mon, Oct 29, 2007 at 04:51:16AM +, Al Viro wrote: > * off by one in dmar_get_fault_reason() (maximal index in > array is ARRAY_SIZE()-1, not ARRAY_SIZE()) > * NULL noise removal > * __iomem annotation fix > > Signed-off-by: Al Viro <[EMAIL PROTECTED]> > --- > diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c > index 0c4ab3b..9b35259 100644 > --- a/drivers/pci/intel-iommu.c > +++ b/drivers/pci/intel-iommu.c > @@ -745,7 +745,7 @@ static char *fault_reason_strings[] = > "non-zero reserved fields in PTE", > "Unknown" > }; > -#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings) > +#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings) - 1 Probably should be +#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) > > char *dmar_get_fault_reason(u8 fault_reason) > { > @@ -995,7 +995,6 @@ static struct intel_iommu *alloc_iommu(struct > dmar_drhd_unit *drhd) > return iommu; > error_unmap: > iounmap(iommu->reg); > - iommu->reg = 0; > error: > kfree(iommu); > return NULL; > @@ -1808,7 +1807,7 @@ get_valid_domain_for_dev(struct pci_dev *pdev) > if (!domain) { > printk(KERN_ERR > "Allocating domain for %s failed", pci_name(pdev)); > - return 0; > + return NULL; > } > > /* make sure context mapping is ok */ > @@ -1818,7 +1817,7 @@ get_valid_domain_for_dev(struct pci_dev *pdev) > printk(KERN_ERR > "Domain context map for %s failed", > pci_name(pdev)); > - return 0; > + return NULL; > } > } > > diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h > index ee88dd2..459ad1f 100644 > --- a/drivers/pci/intel-iommu.h > +++ b/drivers/pci/intel-iommu.h > @@ -58,7 +58,7 @@ > hi = readl(dmar + reg + 4); \ > (((u64) hi) << 32) + lo; }) > */ > -static inline u64 dmar_readq(void *addr) > +static inline u64 dmar_readq(void __iomem *addr) > { > u32 lo, hi; > lo = readl(addr); Looks good to me. ACK --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] intel-iommu fixes
On Mon, Oct 29, 2007 at 04:51:16AM +, Al Viro wrote: * off by one in dmar_get_fault_reason() (maximal index in array is ARRAY_SIZE()-1, not ARRAY_SIZE()) * NULL noise removal * __iomem annotation fix Signed-off-by: Al Viro [EMAIL PROTECTED] --- diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 0c4ab3b..9b35259 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -745,7 +745,7 @@ static char *fault_reason_strings[] = non-zero reserved fields in PTE, Unknown }; -#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings) +#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings) - 1 Probably should be +#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1) char *dmar_get_fault_reason(u8 fault_reason) { @@ -995,7 +995,6 @@ static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) return iommu; error_unmap: iounmap(iommu-reg); - iommu-reg = 0; error: kfree(iommu); return NULL; @@ -1808,7 +1807,7 @@ get_valid_domain_for_dev(struct pci_dev *pdev) if (!domain) { printk(KERN_ERR Allocating domain for %s failed, pci_name(pdev)); - return 0; + return NULL; } /* make sure context mapping is ok */ @@ -1818,7 +1817,7 @@ get_valid_domain_for_dev(struct pci_dev *pdev) printk(KERN_ERR Domain context map for %s failed, pci_name(pdev)); - return 0; + return NULL; } } diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index ee88dd2..459ad1f 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -58,7 +58,7 @@ hi = readl(dmar + reg + 4); \ (((u64) hi) 32) + lo; }) */ -static inline u64 dmar_readq(void *addr) +static inline u64 dmar_readq(void __iomem *addr) { u32 lo, hi; lo = readl(addr); Looks good to me. ACK --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] iommu-PMEN_REG boot up support
On Sat, Oct 27, 2007 at 02:19:38AM +0200, Muli Ben-Yehuda wrote: On Fri, Oct 26, 2007 at 11:18:49AM -0700, Mark Gross wrote: The following patch clears the portect memory region enable bit at boot time by default. It also provides a kernel parrameter for disabling this behavior and leave the PMEN_REG untouched if so wanted. If the boot loader or platform has protected memory regions enabled at boot time it could prevent DMA's from happening as drivers are loaded and used. What's the value of adding a command line option for this? Under what circumstances would we want to not clear this bit? umm, /me asks around and finds out that at this time there isn't much point to having a command line to preserve any protected areas set up at boot time. + } else if (!strncmp(str, no_epm_clear, 12)) { + printk(KERN_INFO + Intel-IOMMU: subress clearing of Enable + Protected Memory bit\n); + clear_pmen_epm = 0; `suppress', I assume. Rest looks fine, if the configuration option is really needed. Cheers, Muli -- SYSTOR 2007 --- 1st Annual Haifa Systems and Storage Conference 2007 http://www.haifa.il.ibm.com/Workshops/systor2007/ Virtualization workshop: Oct 29th, 2007 | Storage workshop: Oct 30th, 2007 - Signed-off-by: mark gross [EMAIL PROTECTED] --mgross Index: linux-2.6/drivers/pci/intel-iommu.c === --- linux-2.6.orig/drivers/pci/intel-iommu.c2007-10-24 09:31:32.0 -0700 +++ linux-2.6/drivers/pci/intel-iommu.c 2007-10-29 13:30:29.0 -0700 @@ -692,6 +692,23 @@ DMA_TLB_PSI_FLUSH, non_present_entry_flush); } +static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) +{ + u32 pmen; + unsigned long flags; + + spin_lock_irqsave(iommu-register_lock, flags); + pmen = readl(iommu-reg + DMAR_PMEN_REG); + pmen = ~DMA_PMEN_EPM; + writel(pmen, iommu-reg + DMAR_PMEN_REG); + + /* wait for the protected region status bit to clear */ + IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, + readl, (pmen DMA_PMEN_PRS), pmen); + + spin_unlock_irqrestore(iommu-register_lock, flags); +} + static int iommu_enable_translation(struct intel_iommu *iommu) { u32 sts; @@ -1731,6 +1748,8 @@ iommu_flush_context_global(iommu, 0); iommu_flush_iotlb_global(iommu, 0); + iommu_disable_protect_mem_regions(iommu); + ret = iommu_enable_translation(iommu); if (ret) goto error; Index: linux-2.6/drivers/pci/intel-iommu.h === --- linux-2.6.orig/drivers/pci/intel-iommu.h2007-10-24 09:31:32.0 -0700 +++ linux-2.6/drivers/pci/intel-iommu.h 2007-10-29 10:30:24.0 -0700 @@ -126,6 +126,10 @@ #define DMA_TLB_IH_NONLEAF (((u64)1) 6) #define DMA_TLB_MAX_SIZE (0x3f) +/* PMEN_REG */ +#define DMA_PMEN_EPM (((u32)1)31) +#define DMA_PMEN_PRS (((u32)1)0) + /* GCMD_REG */ #define DMA_GCMD_TE (((u32)1) 31) #define DMA_GCMD_SRTP (((u32)1) 30) - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] iommu-PMEN_REG boot up support
The following patch clears the portect memory region enable bit at boot time by default. It also provides a kernel parrameter for disabling this behavior and leave the PMEN_REG untouched if so wanted. If the boot loader or platform has protected memory regions enabled at boot time it could prevent DMA's from happening as drivers are loaded and used. Singed-off-by: mark gross <[EMAIL PROTECTED]> --mgross Index: linux-2.6/drivers/pci/intel-iommu.c === --- linux-2.6.orig/drivers/pci/intel-iommu.c2007-10-24 09:31:32.0 -0700 +++ linux-2.6/drivers/pci/intel-iommu.c 2007-10-26 11:18:55.0 -0700 @@ -55,6 +55,7 @@ static int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; +static int clear_pmen_epm = 1; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -76,8 +77,14 @@ printk (KERN_INFO "Intel-IOMMU: Forcing DAC for PCI devices\n"); dmar_forcedac = 1; + } else if (!strncmp(str, "no_epm_clear", 12)) { + printk(KERN_INFO + "Intel-IOMMU: subress clearing of Enable " + "Protected Memory bit\n"); + clear_pmen_epm = 0; } + str += strcspn(str, ","); while (*str == ',') str++; @@ -692,6 +699,23 @@ DMA_TLB_PSI_FLUSH, non_present_entry_flush); } +static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) +{ + u32 pmen; + unsigned long flags; + + spin_lock_irqsave(>register_lock, flags); + pmen = readl(iommu->reg + DMAR_PMEN_REG); + pmen &= ~DMA_PMEN_EPM; + writel(pmen, iommu->reg + DMAR_PMEN_REG); + + /* wait for the protected region status bit to clear */ + IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, + readl, (pmen & DMA_PMEN_PRS), pmen); + + spin_unlock_irqrestore(>register_lock, flags); +} + static int iommu_enable_translation(struct intel_iommu *iommu) { u32 sts; @@ -1731,6 +1755,9 @@ iommu_flush_context_global(iommu, 0); iommu_flush_iotlb_global(iommu, 0); + if (clear_pmen_epm) + iommu_disable_protect_mem_regions(iommu); + ret = iommu_enable_translation(iommu); if (ret) goto error; Index: linux-2.6/drivers/pci/intel-iommu.h === --- linux-2.6.orig/drivers/pci/intel-iommu.h2007-10-24 09:31:32.0 -0700 +++ linux-2.6/drivers/pci/intel-iommu.h 2007-10-26 11:11:44.0 -0700 @@ -126,6 +126,10 @@ #define DMA_TLB_IH_NONLEAF (((u64)1) << 6) #define DMA_TLB_MAX_SIZE (0x3f) +/* PMEN_REG */ +#define DMA_PMEN_EPM (((u32)1)<<31) +#define DMA_PMEN_PRS (((u32)1)<<0) + /* GCMD_REG */ #define DMA_GCMD_TE (((u32)1) << 31) #define DMA_GCMD_SRTP (((u32)1) << 30) Index: linux-2.6/Documentation/kernel-parameters.txt === --- linux-2.6.orig/Documentation/kernel-parameters.txt 2007-10-24 09:31:30.0 -0700 +++ linux-2.6/Documentation/kernel-parameters.txt 2007-10-26 11:11:44.0 -0700 @@ -789,6 +789,10 @@ than 32 bit addressing. The default is to look for translation below 32 bit and if not available then look in the higher range. + no_epm_clear + This will override the default of disabling protected + memory regions from DMA to leave the PMEM_REG settings + as they where set up by the boot loader / platform. io7=[HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] iommu-PMEN_REG boot up support
The following patch clears the portect memory region enable bit at boot time by default. It also provides a kernel parrameter for disabling this behavior and leave the PMEN_REG untouched if so wanted. If the boot loader or platform has protected memory regions enabled at boot time it could prevent DMA's from happening as drivers are loaded and used. Singed-off-by: mark gross [EMAIL PROTECTED] --mgross Index: linux-2.6/drivers/pci/intel-iommu.c === --- linux-2.6.orig/drivers/pci/intel-iommu.c2007-10-24 09:31:32.0 -0700 +++ linux-2.6/drivers/pci/intel-iommu.c 2007-10-26 11:18:55.0 -0700 @@ -55,6 +55,7 @@ static int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; +static int clear_pmen_epm = 1; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -76,8 +77,14 @@ printk (KERN_INFO Intel-IOMMU: Forcing DAC for PCI devices\n); dmar_forcedac = 1; + } else if (!strncmp(str, no_epm_clear, 12)) { + printk(KERN_INFO + Intel-IOMMU: subress clearing of Enable + Protected Memory bit\n); + clear_pmen_epm = 0; } + str += strcspn(str, ,); while (*str == ',') str++; @@ -692,6 +699,23 @@ DMA_TLB_PSI_FLUSH, non_present_entry_flush); } +static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) +{ + u32 pmen; + unsigned long flags; + + spin_lock_irqsave(iommu-register_lock, flags); + pmen = readl(iommu-reg + DMAR_PMEN_REG); + pmen = ~DMA_PMEN_EPM; + writel(pmen, iommu-reg + DMAR_PMEN_REG); + + /* wait for the protected region status bit to clear */ + IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, + readl, (pmen DMA_PMEN_PRS), pmen); + + spin_unlock_irqrestore(iommu-register_lock, flags); +} + static int iommu_enable_translation(struct intel_iommu *iommu) { u32 sts; @@ -1731,6 +1755,9 @@ iommu_flush_context_global(iommu, 0); iommu_flush_iotlb_global(iommu, 0); + if (clear_pmen_epm) + iommu_disable_protect_mem_regions(iommu); + ret = iommu_enable_translation(iommu); if (ret) goto error; Index: linux-2.6/drivers/pci/intel-iommu.h === --- linux-2.6.orig/drivers/pci/intel-iommu.h2007-10-24 09:31:32.0 -0700 +++ linux-2.6/drivers/pci/intel-iommu.h 2007-10-26 11:11:44.0 -0700 @@ -126,6 +126,10 @@ #define DMA_TLB_IH_NONLEAF (((u64)1) 6) #define DMA_TLB_MAX_SIZE (0x3f) +/* PMEN_REG */ +#define DMA_PMEN_EPM (((u32)1)31) +#define DMA_PMEN_PRS (((u32)1)0) + /* GCMD_REG */ #define DMA_GCMD_TE (((u32)1) 31) #define DMA_GCMD_SRTP (((u32)1) 30) Index: linux-2.6/Documentation/kernel-parameters.txt === --- linux-2.6.orig/Documentation/kernel-parameters.txt 2007-10-24 09:31:30.0 -0700 +++ linux-2.6/Documentation/kernel-parameters.txt 2007-10-26 11:11:44.0 -0700 @@ -789,6 +789,10 @@ than 32 bit addressing. The default is to look for translation below 32 bit and if not available then look in the higher range. + no_epm_clear + This will override the default of disabling protected + memory regions from DMA to leave the PMEM_REG settings + as they where set up by the boot loader / platform. io7=[HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] intel-iommu: Fix array overflow
On Tue, Oct 23, 2007 at 10:57:51AM +0200, Takashi Iwai wrote: > Fix possible array overflow: > > drivers/pci/intel-iommu.c: In function ‘dmar_get_fault_reason’: > drivers/pci/intel-iommu.c:753: warning: array subscript is above array bounds > drivers/pci/intel-iommu.c: In function ‘iommu_page_fault’: > drivers/pci/intel-iommu.c:753: warning: array subscript is above array bounds > > Signed-off-by: Takashi Iwai <[EMAIL PROTECTED]> > > --- > drivers/pci/intel-iommu.c |4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c > index b3d7031..e4b0a0d 100644 > --- a/drivers/pci/intel-iommu.c > +++ b/drivers/pci/intel-iommu.c > @@ -749,8 +749,8 @@ static char *fault_reason_strings[] = > > char *dmar_get_fault_reason(u8 fault_reason) > { > - if (fault_reason > MAX_FAULT_REASON_IDX) > - return fault_reason_strings[MAX_FAULT_REASON_IDX]; > + if (fault_reason >= MAX_FAULT_REASON_IDX) > + return fault_reason_strings[MAX_FAULT_REASON_IDX - 1]; This looks like what the code meant to implement. I guess future hardware may be able to generate more types of faults, otherwise I'd put a BUG here. --mgross > else > return fault_reason_strings[fault_reason]; > } > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to [EMAIL PROTECTED] > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] intel-iommu: Fix array overflow
On Tue, Oct 23, 2007 at 10:57:51AM +0200, Takashi Iwai wrote: Fix possible array overflow: drivers/pci/intel-iommu.c: In function ‘dmar_get_fault_reason’: drivers/pci/intel-iommu.c:753: warning: array subscript is above array bounds drivers/pci/intel-iommu.c: In function ‘iommu_page_fault’: drivers/pci/intel-iommu.c:753: warning: array subscript is above array bounds Signed-off-by: Takashi Iwai [EMAIL PROTECTED] --- drivers/pci/intel-iommu.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index b3d7031..e4b0a0d 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -749,8 +749,8 @@ static char *fault_reason_strings[] = char *dmar_get_fault_reason(u8 fault_reason) { - if (fault_reason MAX_FAULT_REASON_IDX) - return fault_reason_strings[MAX_FAULT_REASON_IDX]; + if (fault_reason = MAX_FAULT_REASON_IDX) + return fault_reason_strings[MAX_FAULT_REASON_IDX - 1]; This looks like what the code meant to implement. I guess future hardware may be able to generate more types of faults, otherwise I'd put a BUG here. --mgross else return fault_reason_strings[fault_reason]; } - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/9] Remove 'irq' argument from all irq handlers
On Fri, Oct 19, 2007 at 03:54:43AM -0400, Jeff Garzik wrote: > > WARNING NOT FOR MERGE WARNING NOT FOR MERGE WARNING NOT FOR MERGE then whats the point ? > > This posting is just to demonstrate something that I have been keeping > alive in the background. I have no urge to push it upstream anytime > soon. why not? --mgross > > The overwhelming majority of drivers do not ever bother with the 'irq' > argument that is passed to each driver's irq handler. > > Of the minority of drivers that do use the arg, the majority of those > have the irq number stored in their private-info structure somewhere. > > There are a tiny few -- a couple Mac drivers -- which do weird things > with that argument, but that's it. > > For the large sweeps through the tree, these patches are grouped into > "trivial" changes -- simply removing the unused irq arg -- or all other > changes. > >[IRQ ARG REMOVAL] core interrupt delivery infrastructure updates >[IRQ ARG REMOVAL] various non-trivial arch updates >[IRQ ARG REMOVAL] trivial arch updates >[IRQ ARG REMOVAL] non-trivial driver updates >[IRQ ARG REMOVAL] trivial net driver updates >[IRQ ARG REMOVAL] trivial sound driver updates >[IRQ ARG REMOVAL] trivial scsi driver updates >[IRQ ARG REMOVAL] trivial driver updates >[IRQ ARG REMOVAL] x86-64 build fixes, cleanups > > WARNING NOT FOR MERGE WARNING NOT FOR MERGE WARNING NOT FOR MERGE > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to [EMAIL PROTECTED] > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/9] Remove 'irq' argument from all irq handlers
On Fri, Oct 19, 2007 at 03:54:43AM -0400, Jeff Garzik wrote: WARNING NOT FOR MERGE WARNING NOT FOR MERGE WARNING NOT FOR MERGE then whats the point ? This posting is just to demonstrate something that I have been keeping alive in the background. I have no urge to push it upstream anytime soon. why not? --mgross The overwhelming majority of drivers do not ever bother with the 'irq' argument that is passed to each driver's irq handler. Of the minority of drivers that do use the arg, the majority of those have the irq number stored in their private-info structure somewhere. There are a tiny few -- a couple Mac drivers -- which do weird things with that argument, but that's it. For the large sweeps through the tree, these patches are grouped into trivial changes -- simply removing the unused irq arg -- or all other changes. [IRQ ARG REMOVAL] core interrupt delivery infrastructure updates [IRQ ARG REMOVAL] various non-trivial arch updates [IRQ ARG REMOVAL] trivial arch updates [IRQ ARG REMOVAL] non-trivial driver updates [IRQ ARG REMOVAL] trivial net driver updates [IRQ ARG REMOVAL] trivial sound driver updates [IRQ ARG REMOVAL] trivial scsi driver updates [IRQ ARG REMOVAL] trivial driver updates [IRQ ARG REMOVAL] x86-64 build fixes, cleanups WARNING NOT FOR MERGE WARNING NOT FOR MERGE WARNING NOT FOR MERGE - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] static initialization with blocking notifiers. was :wqRe: 2.6.23-mm1
I didn't see my patch show up on the list so I'm resending it. On Wed, Oct 17, 2007 at 01:53:48AM +0200, Rafael J. Wysocki wrote: > On Wednesday, 17 October 2007 01:31, Mark Gross wrote: > > On Tue, Oct 16, 2007 at 10:28:13PM +0200, Rafael J. Wysocki wrote: > > > On Tuesday, 16 October 2007 21:58, Mark Gross wrote: > > > > On Mon, Oct 15, 2007 at 10:40:02PM +0200, Rafael J. Wysocki wrote: > > > > > On Monday, 15 October 2007 18:09, Mark Gross wrote: > > > > > > On Fri, Oct 12, 2007 at 11:32:40PM +0200, Rafael J. Wysocki wrote: > > > > > > > On Friday, 12 October 2007 06:31, Andrew Morton wrote: > > > > > > > > > > > > > > > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/ > > > > > > > > > > > > > > > > - I've been largely avoiding applying anything since rc8-mm2 in > > > > > > > > an attempt > > > > > > > > to stabilise things for the 2.6.23 merge. > > > > > > > > > > > > > > > > But that didn't stop all the subsystem maintainers from going > > > > > > > > nuts, with > > > > > > > > the usual accuracy. We're up to a 37MB diff now, but it > > > > > > > > seems to be working > > > > > > > > a bit better. > > > > > > > > > > > > > > I get many traces similar to the one below from it (w/ hotfixes): > > > > > > > > > > > > > > WARNING: at > > > > > > > /home/rafael/src/mm/linux-2.6.23-mm1/arch/x86_64/kernel/smp.c:397 > > > > > > > smp_call_function_mask() > > > > > > > > > > > > This is from : WARN_ON(irqs_disabled()) in the > > > > > > cmp_call_function_mask > > > > > > processor_idle.c is registering a acpi_processor_latency_notify > > > > > > > > > > > > my code changed the notifier call from blocking_notifier_call_chain > > > > > > to > > > > > > srcu_notifier_call_chain, because dynamic creation of notifier > > > > > > chains at > > > > > > runtime where easier with the srcu_notifier_call_chain than the > > > > > > blocking_notifier_call_chain. > > > > > > > > > > > > As dynamic creation of PM_QOS parameters are no longer needed I can > > > > > > change the notifiers back to match what was in lanency.c > > > > > > > > > > > > However; looking at the call tree differences between > > > > > > blockin_notifier_call_chain and srcu_notifier_call_chain I cannot > > > > > > see a > > > > > > difference in irq enabling / disabling. I'm not confident this will > > > > > > address this yet. > > > > > > > > > > Well, you can send me a patch to check. :-) The following is a patch to update the pm_qos code in the mm1 tree. It removes the PM_QOS_CPUIDLE parameter (replacing it with PM_CPU_DMA_LATENCY), It changes the notifications from srcu to blocking in hopes of fixing the WARNS reported by xxx, and it changes the initialization to me largely static to avoid initialization race with cpu-idle. I think we will have to re-visit the static vrs dynamic initialization and this init race in a while to support pm_qos parameters per power domain (i.e. per cpu-socket) based on platform information (ACPI) but for now lets see if this fixes the warning's reported. Thanks, Signed-off-by: mark gross <[EMAIL PROTECTED]> Binary files linux-2.6.23-mm1/arch/x86_64/ia32/vsyscall-syscall.so.dbg and linux-2.6.23-mm1-pmqos/arch/x86_64/ia32/vsyscall-syscall.so.dbg differ Binary files linux-2.6.23-mm1/arch/x86_64/ia32/vsyscall-sysenter.so.dbg and linux-2.6.23-mm1-pmqos/arch/x86_64/ia32/vsyscall-sysenter.so.dbg differ Binary files linux-2.6.23-mm1/arch/x86_64/vdso/vdso.so.dbg and linux-2.6.23-mm1-pmqos/arch/x86_64/vdso/vdso.so.dbg differ diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c linux-2.6.23-mm1-pmqos/drivers/cpuidle/cpuidle.c --- linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c 2007-10-16 15:03:30.0 -0700 +++ linux-2.6.23-mm1-pmqos/drivers/cpuidle/cpuidle.c2007-10-17 09:26:21.0 -0700 @@ -268,7 +268,7 @@ static inline void latency_notifier_init(struct notifier_block *n) { -pm_qos_add_notifier(PM_QOS_CPUIDLE, n); +pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n); } #else /* CONFIG_SMP */ dif
Re: WANTED: kernel projects for CS students
On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote: > The kernel newbies community often gets inquiries from CS students who > need a project for their studies and would like to do something with > the Linux kernel, but would also like their code to be useful to the > community afterwards. > > In order to make it easier for them, I am trying to put together a > page with projects that: > - Are self contained enough that the students can implement the > project by themselves, since that is often a university requirement. > - Are self contained enough that Linux could merge the code (maybe > with additional changes) after the student has been working on it > for a few months. > - Are large enough to qualify as a student project, luckily there is > flexibility here since we get inquiries for anything from 6 week > projects to 6 month projects. > > If you have ideas on what projects would be useful, please add them > to this page (or email me): > > http://kernelnewbies.org/KernelProjects > How about a static code tool that will check for initialization races? yesterday I found a lurker bug in some of my code that wouldn't have been exposed had not tripped over it. I wrote some infrastructure code that initializes its lists and notification trees in late_init. Then I found out that there was as client of my infrastructure calling my register API at core_init time. It didn't crash / fail noticeably, but wasn't correct, because at that time I was using a static array. When I changed my code to use an array of pointers instead it went boom! (FWIW I've fixed this issue for now...) It made me feel uneasy how that issue got by un-noticed and I worry that there could be more like it. A tool to scan the code for boot up init calls and check for any callers into any module for entry before the module is fully initialized. --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] static initialization and blocking notification for pm_qos... was Re: 2.6.23-mm1
On Fri, Oct 12, 2007 at 11:32:40PM +0200, Rafael J. Wysocki wrote: > On Friday, 12 October 2007 06:31, Andrew Morton wrote: > > > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/ > > > > - I've been largely avoiding applying anything since rc8-mm2 in an attempt > > to stabilise things for the 2.6.23 merge. > > > > But that didn't stop all the subsystem maintainers from going nuts, with > > the usual accuracy. We're up to a 37MB diff now, but it seems to be > > working > > a bit better. > > I get many traces similar to the one below from it (w/ hotfixes): > > WARNING: at /home/rafael/src/mm/linux-2.6.23-mm1/arch/x86_64/kernel/smp.c:397 > smp_call_function_mask() > > Call Trace: > [] smp_call_function_mask+0x4b/0x82 > [] smp_call_function+0x23/0x25 > [] :processor:acpi_processor_latency_notify+0x19/0x20 > [] notifier_call_chain+0x33/0x65 > [] __srcu_notifier_call_chain+0x4b/0x69 > [] pm_qos_add_requirement+0x24/0xd2 > [] srcu_notifier_call_chain+0xf/0x11 > [] update_target+0x71/0x76 > [] pm_qos_add_requirement+0xa9/0xd2 > [] :snd_pcm:snd_pcm_hw_params+0x349/0x382 > [] kmem_cache_alloc+0x8a/0xbc > [] :snd_pcm:snd_pcm_hw_params_user+0x50/0x87 > [] :snd_pcm:snd_pcm_common_ioctl1+0x1ae/0xd4f > [] :snd_pcm:snd_pcm_open+0xd6/0x1f2 > [] cache_alloc_debugcheck_after+0x11a/0x199 > [] remove_wait_queue+0x40/0x45 > [] :snd_pcm:snd_pcm_open+0x13e/0x1f2 > [] default_wake_function+0x0/0xf > [] prio_tree_insert+0x18c/0x231 > [] vma_prio_tree_insert+0x23/0x39 > [] vma_link+0xdd/0x10b > [] :snd_pcm:snd_pcm_playback_ioctl1+0x24d/0x26a > [] :snd_pcm:snd_pcm_playback_ioctl+0x2e/0x36 > [] do_ioctl+0x2a/0x77 > [] vfs_ioctl+0x251/0x26e > [] sys_ioctl+0x57/0x7b > [] system_call+0x7e/0x83 > > Full dmesg attached. > ubject: [PATCH] static initialization and blocking notification for pm_qos... was Re: 2.6.23-mm1 please try this patch and let me know if the warnings go away. (I have not been able to reproduce your issue.) The following is a patch to update the pm_qos code in the mm1 tree. It removes the PM_QOS_CPUIDLE parameter (replacing it with PM_CPU_DMA_LATENCY), It changes the notifications from srcu to blocking in hopes of fixing the WARNS reported by xxx, and it changes the initialization to me largely static to avoid initialization race with cpu-idle. I think we will have to re-visit the static vrs dynamic initialization and this init race in a while to support pm_qos parameters per power domain (i.e. per cpu-socket) based on platform information (ACPI) but for now lets see if this fixes the warning's reported. Thanks, Signed-off-by: mark gross <[EMAIL PROTECTED]> Binary files linux-2.6.23-mm1/arch/x86_64/ia32/vsyscall-syscall.so.dbg and linux-2.6.23-mm1-pmqos/arch/x86_64/ia32/vsyscall-syscall.so.dbg differ Binary files linux-2.6.23-mm1/arch/x86_64/ia32/vsyscall-sysenter.so.dbg and linux-2.6.23-mm1-pmqos/arch/x86_64/ia32/vsyscall-sysenter.so.dbg differ Binary files linux-2.6.23-mm1/arch/x86_64/vdso/vdso.so.dbg and linux-2.6.23-mm1-pmqos/arch/x86_64/vdso/vdso.so.dbg differ diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c linux-2.6.23-mm1-pmqos/drivers/cpuidle/cpuidle.c --- linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c 2007-10-16 15:03:30.0 -0700 +++ linux-2.6.23-mm1-pmqos/drivers/cpuidle/cpuidle.c2007-10-17 09:26:21.0 -0700 @@ -268,7 +268,7 @@ static inline void latency_notifier_init(struct notifier_block *n) { -pm_qos_add_notifier(PM_QOS_CPUIDLE, n); +pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n); } #else /* CONFIG_SMP */ diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/ladder.c --- linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c 2007-10-16 15:03:30.0 -0700 +++ linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/ladder.c 2007-10-17 09:26:21.0 -0700 @@ -82,7 +82,7 @@ if (last_idx < dev->state_count - 1 && last_residency > last_state->threshold.promotion_time && dev->states[last_idx + 1].exit_latency <= - pm_qos_requirement(PM_QOS_CPUIDLE)) { + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) { last_state->stats.promotion_count++; last_state->stats.demotion_count = 0; if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) { diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/menu.c --- linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c 2007
[PATCH] static initialization and blocking notification for pm_qos... was Re: 2.6.23-mm1
On Fri, Oct 12, 2007 at 11:32:40PM +0200, Rafael J. Wysocki wrote: On Friday, 12 October 2007 06:31, Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/ - I've been largely avoiding applying anything since rc8-mm2 in an attempt to stabilise things for the 2.6.23 merge. But that didn't stop all the subsystem maintainers from going nuts, with the usual accuracy. We're up to a 37MB diff now, but it seems to be working a bit better. I get many traces similar to the one below from it (w/ hotfixes): WARNING: at /home/rafael/src/mm/linux-2.6.23-mm1/arch/x86_64/kernel/smp.c:397 smp_call_function_mask() Call Trace: [8021b290] smp_call_function_mask+0x4b/0x82 [8021b2ea] smp_call_function+0x23/0x25 [884a0b80] :processor:acpi_processor_latency_notify+0x19/0x20 [80437ace] notifier_call_chain+0x33/0x65 [8024f32f] __srcu_notifier_call_chain+0x4b/0x69 [8024f07c] pm_qos_add_requirement+0x24/0xd2 [8024f35c] srcu_notifier_call_chain+0xf/0x11 [8024ee6d] update_target+0x71/0x76 [8024f101] pm_qos_add_requirement+0xa9/0xd2 [88160bf9] :snd_pcm:snd_pcm_hw_params+0x349/0x382 [80291110] kmem_cache_alloc+0x8a/0xbc [88160d75] :snd_pcm:snd_pcm_hw_params_user+0x50/0x87 [88160fe1] :snd_pcm:snd_pcm_common_ioctl1+0x1ae/0xd4f [8815f755] :snd_pcm:snd_pcm_open+0xd6/0x1f2 [8028fc17] cache_alloc_debugcheck_after+0x11a/0x199 [8024b514] remove_wait_queue+0x40/0x45 [8815f7bd] :snd_pcm:snd_pcm_open+0x13e/0x1f2 [8022f18e] default_wake_function+0x0/0xf [8030b24d] prio_tree_insert+0x18c/0x231 [8027b5fb] vma_prio_tree_insert+0x23/0x39 [80282e91] vma_link+0xdd/0x10b [8816206f] :snd_pcm:snd_pcm_playback_ioctl1+0x24d/0x26a [8816292c] :snd_pcm:snd_pcm_playback_ioctl+0x2e/0x36 [802a0896] do_ioctl+0x2a/0x77 [802a0b34] vfs_ioctl+0x251/0x26e [802a0ba8] sys_ioctl+0x57/0x7b [8020bfde] system_call+0x7e/0x83 Full dmesg attached. ubject: [PATCH] static initialization and blocking notification for pm_qos... was Re: 2.6.23-mm1 please try this patch and let me know if the warnings go away. (I have not been able to reproduce your issue.) The following is a patch to update the pm_qos code in the mm1 tree. It removes the PM_QOS_CPUIDLE parameter (replacing it with PM_CPU_DMA_LATENCY), It changes the notifications from srcu to blocking in hopes of fixing the WARNS reported by xxx, and it changes the initialization to me largely static to avoid initialization race with cpu-idle. I think we will have to re-visit the static vrs dynamic initialization and this init race in a while to support pm_qos parameters per power domain (i.e. per cpu-socket) based on platform information (ACPI) but for now lets see if this fixes the warning's reported. Thanks, Signed-off-by: mark gross [EMAIL PROTECTED] Binary files linux-2.6.23-mm1/arch/x86_64/ia32/vsyscall-syscall.so.dbg and linux-2.6.23-mm1-pmqos/arch/x86_64/ia32/vsyscall-syscall.so.dbg differ Binary files linux-2.6.23-mm1/arch/x86_64/ia32/vsyscall-sysenter.so.dbg and linux-2.6.23-mm1-pmqos/arch/x86_64/ia32/vsyscall-sysenter.so.dbg differ Binary files linux-2.6.23-mm1/arch/x86_64/vdso/vdso.so.dbg and linux-2.6.23-mm1-pmqos/arch/x86_64/vdso/vdso.so.dbg differ diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c linux-2.6.23-mm1-pmqos/drivers/cpuidle/cpuidle.c --- linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c 2007-10-16 15:03:30.0 -0700 +++ linux-2.6.23-mm1-pmqos/drivers/cpuidle/cpuidle.c2007-10-17 09:26:21.0 -0700 @@ -268,7 +268,7 @@ static inline void latency_notifier_init(struct notifier_block *n) { -pm_qos_add_notifier(PM_QOS_CPUIDLE, n); +pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n); } #else /* CONFIG_SMP */ diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/ladder.c --- linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c 2007-10-16 15:03:30.0 -0700 +++ linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/ladder.c 2007-10-17 09:26:21.0 -0700 @@ -82,7 +82,7 @@ if (last_idx dev-state_count - 1 last_residency last_state-threshold.promotion_time dev-states[last_idx + 1].exit_latency = - pm_qos_requirement(PM_QOS_CPUIDLE)) { + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) { last_state-stats.promotion_count++; last_state-stats.demotion_count = 0; if (last_state-stats.promotion_count = last_state-threshold.promotion_count) { diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c
Re: WANTED: kernel projects for CS students
On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote: The kernel newbies community often gets inquiries from CS students who need a project for their studies and would like to do something with the Linux kernel, but would also like their code to be useful to the community afterwards. In order to make it easier for them, I am trying to put together a page with projects that: - Are self contained enough that the students can implement the project by themselves, since that is often a university requirement. - Are self contained enough that Linux could merge the code (maybe with additional changes) after the student has been working on it for a few months. - Are large enough to qualify as a student project, luckily there is flexibility here since we get inquiries for anything from 6 week projects to 6 month projects. If you have ideas on what projects would be useful, please add them to this page (or email me): http://kernelnewbies.org/KernelProjects How about a static code tool that will check for initialization races? yesterday I found a lurker bug in some of my code that wouldn't have been exposed had not tripped over it. I wrote some infrastructure code that initializes its lists and notification trees in late_init. Then I found out that there was as client of my infrastructure calling my register API at core_init time. It didn't crash / fail noticeably, but wasn't correct, because at that time I was using a static array. When I changed my code to use an array of pointers instead it went boom! (FWIW I've fixed this issue for now...) It made me feel uneasy how that issue got by un-noticed and I worry that there could be more like it. A tool to scan the code for boot up init calls and check for any callers into any module for entry before the module is fully initialized. --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] static initialization with blocking notifiers. was :wqRe: 2.6.23-mm1
I didn't see my patch show up on the list so I'm resending it. On Wed, Oct 17, 2007 at 01:53:48AM +0200, Rafael J. Wysocki wrote: On Wednesday, 17 October 2007 01:31, Mark Gross wrote: On Tue, Oct 16, 2007 at 10:28:13PM +0200, Rafael J. Wysocki wrote: On Tuesday, 16 October 2007 21:58, Mark Gross wrote: On Mon, Oct 15, 2007 at 10:40:02PM +0200, Rafael J. Wysocki wrote: On Monday, 15 October 2007 18:09, Mark Gross wrote: On Fri, Oct 12, 2007 at 11:32:40PM +0200, Rafael J. Wysocki wrote: On Friday, 12 October 2007 06:31, Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/ - I've been largely avoiding applying anything since rc8-mm2 in an attempt to stabilise things for the 2.6.23 merge. But that didn't stop all the subsystem maintainers from going nuts, with the usual accuracy. We're up to a 37MB diff now, but it seems to be working a bit better. I get many traces similar to the one below from it (w/ hotfixes): WARNING: at /home/rafael/src/mm/linux-2.6.23-mm1/arch/x86_64/kernel/smp.c:397 smp_call_function_mask() This is from : WARN_ON(irqs_disabled()) in the cmp_call_function_mask processor_idle.c is registering a acpi_processor_latency_notify my code changed the notifier call from blocking_notifier_call_chain to srcu_notifier_call_chain, because dynamic creation of notifier chains at runtime where easier with the srcu_notifier_call_chain than the blocking_notifier_call_chain. As dynamic creation of PM_QOS parameters are no longer needed I can change the notifiers back to match what was in lanency.c However; looking at the call tree differences between blockin_notifier_call_chain and srcu_notifier_call_chain I cannot see a difference in irq enabling / disabling. I'm not confident this will address this yet. Well, you can send me a patch to check. :-) The following is a patch to update the pm_qos code in the mm1 tree. It removes the PM_QOS_CPUIDLE parameter (replacing it with PM_CPU_DMA_LATENCY), It changes the notifications from srcu to blocking in hopes of fixing the WARNS reported by xxx, and it changes the initialization to me largely static to avoid initialization race with cpu-idle. I think we will have to re-visit the static vrs dynamic initialization and this init race in a while to support pm_qos parameters per power domain (i.e. per cpu-socket) based on platform information (ACPI) but for now lets see if this fixes the warning's reported. Thanks, Signed-off-by: mark gross [EMAIL PROTECTED] Binary files linux-2.6.23-mm1/arch/x86_64/ia32/vsyscall-syscall.so.dbg and linux-2.6.23-mm1-pmqos/arch/x86_64/ia32/vsyscall-syscall.so.dbg differ Binary files linux-2.6.23-mm1/arch/x86_64/ia32/vsyscall-sysenter.so.dbg and linux-2.6.23-mm1-pmqos/arch/x86_64/ia32/vsyscall-sysenter.so.dbg differ Binary files linux-2.6.23-mm1/arch/x86_64/vdso/vdso.so.dbg and linux-2.6.23-mm1-pmqos/arch/x86_64/vdso/vdso.so.dbg differ diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c linux-2.6.23-mm1-pmqos/drivers/cpuidle/cpuidle.c --- linux-2.6.23-mm1/drivers/cpuidle/cpuidle.c 2007-10-16 15:03:30.0 -0700 +++ linux-2.6.23-mm1-pmqos/drivers/cpuidle/cpuidle.c2007-10-17 09:26:21.0 -0700 @@ -268,7 +268,7 @@ static inline void latency_notifier_init(struct notifier_block *n) { -pm_qos_add_notifier(PM_QOS_CPUIDLE, n); +pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n); } #else /* CONFIG_SMP */ diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/ladder.c --- linux-2.6.23-mm1/drivers/cpuidle/governors/ladder.c 2007-10-16 15:03:30.0 -0700 +++ linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/ladder.c 2007-10-17 09:26:21.0 -0700 @@ -82,7 +82,7 @@ if (last_idx dev-state_count - 1 last_residency last_state-threshold.promotion_time dev-states[last_idx + 1].exit_latency = - pm_qos_requirement(PM_QOS_CPUIDLE)) { + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) { last_state-stats.promotion_count++; last_state-stats.demotion_count = 0; if (last_state-stats.promotion_count = last_state-threshold.promotion_count) { diff -urN -X linux-2.6.23-mm1/Documentation/dontdiff linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/menu.c --- linux-2.6.23-mm1/drivers/cpuidle/governors/menu.c 2007-10-16 15:03:30.0 -0700 +++ linux-2.6.23-mm1-pmqos/drivers/cpuidle/governors/menu.c 2007-10-17
Re: 2.6.23-mm1
On Tue, Oct 16, 2007 at 10:28:13PM +0200, Rafael J. Wysocki wrote: > On Tuesday, 16 October 2007 21:58, Mark Gross wrote: > > On Mon, Oct 15, 2007 at 10:40:02PM +0200, Rafael J. Wysocki wrote: > > > On Monday, 15 October 2007 18:09, Mark Gross wrote: > > > > On Fri, Oct 12, 2007 at 11:32:40PM +0200, Rafael J. Wysocki wrote: > > > > > On Friday, 12 October 2007 06:31, Andrew Morton wrote: > > > > > > > > > > > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/ > > > > > > > > > > > > - I've been largely avoiding applying anything since rc8-mm2 in an > > > > > > attempt > > > > > > to stabilise things for the 2.6.23 merge. > > > > > > > > > > > > But that didn't stop all the subsystem maintainers from going > > > > > > nuts, with > > > > > > the usual accuracy. We're up to a 37MB diff now, but it seems to > > > > > > be working > > > > > > a bit better. > > > > > > > > > > I get many traces similar to the one below from it (w/ hotfixes): > > > > > > > > > > WARNING: at > > > > > /home/rafael/src/mm/linux-2.6.23-mm1/arch/x86_64/kernel/smp.c:397 > > > > > smp_call_function_mask() > > > > > > > > This is from : WARN_ON(irqs_disabled()) in the cmp_call_function_mask > > > > processor_idle.c is registering a acpi_processor_latency_notify > > > > > > > > my code changed the notifier call from blocking_notifier_call_chain to > > > > srcu_notifier_call_chain, because dynamic creation of notifier chains at > > > > runtime where easier with the srcu_notifier_call_chain than the > > > > blocking_notifier_call_chain. > > > > > > > > As dynamic creation of PM_QOS parameters are no longer needed I can > > > > change the notifiers back to match what was in lanency.c > > > > > > > > However; looking at the call tree differences between > > > > blockin_notifier_call_chain and srcu_notifier_call_chain I cannot see a > > > > difference in irq enabling / disabling. I'm not confident this will > > > > address this yet. > > > > > > Well, you can send me a patch to check. :-) > > > > I think I'll have to send you a patch that changes the notifiers but I > > doubt it will fix it. > > > > After a bit of messing around I have the 2.6.23-mm1 running on my core-2 > > box note: Ubuntu's make-kpkg on the mm1 tree resulted in a system that > > wouldn't boot past the intrd. Looks like the pivot root failed or > > something. > > > > Anyway, I'm not reproducing your experience, snd_pcm is loaded. I don't > > know none of the WARN's are not hitting on my box. > > > > do you have some configuration information that could help me reproduce > > the issue? > > Well, I can send you the .config, but the box is AMD-based (Turion 64 X2), > with an ATI chipset and an HP BIOS, so it seems to be much different from > yours. it may be worth a shot anyway. BTW while changing my code to use the blocking notifiers I found that there is a initialization race between cpu-idle and pm_qos I have to fix. I need to re factor my start up code to handle cpuidle registering itself in as a notifier at core_initcall time. I'll have a patch ready tomorrow. thanks, --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: WANTED: kernel projects for CS students
On Mon, Oct 15, 2007 at 11:17:32PM +0400, Alexey Dobriyan wrote: > On Mon, Oct 15, 2007 at 10:04:11AM -0700, Mark Gross wrote: > > On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote: > > > http://kernelnewbies.org/KernelProjects > > > > Is there already a make config option that will do a good job at setting > > a default .config file based on what is already running on a system? > > > > I get tiered of trimming down my .config for my laptop build so it takes > > less than 30min to build a kernel. > > Ehh? You do it once, then leave it aside or in /proc/config.gz, on new > kernel copy it back, "make oldconfig", answer several questions and here > we go. yeah I know that. Its a lot more than a few questions, and as we are talking about a linear search for a fully tweaked .config where each pass takes 30 min to know if things work this isn't how I want to spend my time. --mgross > > > Bonus credit to additional "expert" options (like those powertop puts > > out) for target uses, laptop, HPC, home file share, embedded targets > > > > Oh, and lets make the expert configs easily extensible. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: WANTED: kernel projects for CS students
On Mon, Oct 15, 2007 at 09:54:42PM +0200, Giacomo Catenazzi wrote: > Sam Ravnborg wrote: > > On Mon, Oct 15, 2007 at 10:04:11AM -0700, Mark Gross wrote: > >> On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote: > >>> The kernel newbies community often gets inquiries from CS students who > >>> need a project for their studies and would like to do something with > >>> the Linux kernel, but would also like their code to be useful to the > >>> community afterwards. > >>> > >>> In order to make it easier for them, I am trying to put together a > >>> page with projects that: > >>> - Are self contained enough that the students can implement the > >>> project by themselves, since that is often a university requirement. > >>> - Are self contained enough that Linux could merge the code (maybe > >>> with additional changes) after the student has been working on it > >>> for a few months. > >>> - Are large enough to qualify as a student project, luckily there is > >>> flexibility here since we get inquiries for anything from 6 week > >>> projects to 6 month projects. > >>> > >>> If you have ideas on what projects would be useful, please add them > >>> to this page (or email me): > >>> > >>> http://kernelnewbies.org/KernelProjects > >> Is there already a make config option that will do a good job at setting > >> a default .config file based on what is already running on a system? > > I have discussed this briefly with Kay Sievers. > > What udev can provide is the list of modules needed, so what the kernel > > need to provide is a simple module to CONFIG option(s) converter + a base > > config to start out with. > > Nothing particular difficult but needs a few days work to do. > > could you explain better what you need? I think I've already such > tools ;-) > base function: Starting from a stock distro (FC, Ubuntu, OpenSuSE...) and put down a kernel.org tree and automatically create a .config with all the drivers needed for the platform I'm building on. expert configs for different applications: laptop battery, vitalization, HPC, tiny, multi-media, testing --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1
On Mon, Oct 15, 2007 at 10:40:02PM +0200, Rafael J. Wysocki wrote: > On Monday, 15 October 2007 18:09, Mark Gross wrote: > > On Fri, Oct 12, 2007 at 11:32:40PM +0200, Rafael J. Wysocki wrote: > > > On Friday, 12 October 2007 06:31, Andrew Morton wrote: > > > > > > > > ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/ > > > > > > > > - I've been largely avoiding applying anything since rc8-mm2 in an > > > > attempt > > > > to stabilise things for the 2.6.23 merge. > > > > > > > > But that didn't stop all the subsystem maintainers from going nuts, > > > > with > > > > the usual accuracy. We're up to a 37MB diff now, but it seems to be > > > > working > > > > a bit better. > > > > > > I get many traces similar to the one below from it (w/ hotfixes): > > > > > > WARNING: at > > > /home/rafael/src/mm/linux-2.6.23-mm1/arch/x86_64/kernel/smp.c:397 > > > smp_call_function_mask() > > > > This is from : WARN_ON(irqs_disabled()) in the cmp_call_function_mask > > processor_idle.c is registering a acpi_processor_latency_notify > > > > my code changed the notifier call from blocking_notifier_call_chain to > > srcu_notifier_call_chain, because dynamic creation of notifier chains at > > runtime where easier with the srcu_notifier_call_chain than the > > blocking_notifier_call_chain. > > > > As dynamic creation of PM_QOS parameters are no longer needed I can > > change the notifiers back to match what was in lanency.c > > > > However; looking at the call tree differences between > > blockin_notifier_call_chain and srcu_notifier_call_chain I cannot see a > > difference in irq enabling / disabling. I'm not confident this will > > address this yet. > > Well, you can send me a patch to check. :-) I think I'll have to send you a patch that changes the notifiers but I doubt it will fix it. After a bit of messing around I have the 2.6.23-mm1 running on my core-2 box note: Ubuntu's make-kpkg on the mm1 tree resulted in a system that wouldn't boot past the intrd. Looks like the pivot root failed or something. Anyway, I'm not reproducing your experience, snd_pcm is loaded. I don't know none of the WARN's are not hitting on my box. do you have some configuration information that could help me reproduce the issue? > > > I'll change the PM_QOS params patch to use blocking notifiers and test > > on a 64bit boot and see what happens. I've been needing to setup my > > x86_64 dev box for a while now anyway. > > OK, thanks. well its booting but I'm not reproducing the trace messages. I'll do the patch for you to test. --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1
On Mon, Oct 15, 2007 at 10:40:02PM +0200, Rafael J. Wysocki wrote: On Monday, 15 October 2007 18:09, Mark Gross wrote: On Fri, Oct 12, 2007 at 11:32:40PM +0200, Rafael J. Wysocki wrote: On Friday, 12 October 2007 06:31, Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/ - I've been largely avoiding applying anything since rc8-mm2 in an attempt to stabilise things for the 2.6.23 merge. But that didn't stop all the subsystem maintainers from going nuts, with the usual accuracy. We're up to a 37MB diff now, but it seems to be working a bit better. I get many traces similar to the one below from it (w/ hotfixes): WARNING: at /home/rafael/src/mm/linux-2.6.23-mm1/arch/x86_64/kernel/smp.c:397 smp_call_function_mask() This is from : WARN_ON(irqs_disabled()) in the cmp_call_function_mask processor_idle.c is registering a acpi_processor_latency_notify my code changed the notifier call from blocking_notifier_call_chain to srcu_notifier_call_chain, because dynamic creation of notifier chains at runtime where easier with the srcu_notifier_call_chain than the blocking_notifier_call_chain. As dynamic creation of PM_QOS parameters are no longer needed I can change the notifiers back to match what was in lanency.c However; looking at the call tree differences between blockin_notifier_call_chain and srcu_notifier_call_chain I cannot see a difference in irq enabling / disabling. I'm not confident this will address this yet. Well, you can send me a patch to check. :-) I think I'll have to send you a patch that changes the notifiers but I doubt it will fix it. After a bit of messing around I have the 2.6.23-mm1 running on my core-2 box note: Ubuntu's make-kpkg on the mm1 tree resulted in a system that wouldn't boot past the intrd. Looks like the pivot root failed or something. Anyway, I'm not reproducing your experience, snd_pcm is loaded. I don't know none of the WARN's are not hitting on my box. do you have some configuration information that could help me reproduce the issue? I'll change the PM_QOS params patch to use blocking notifiers and test on a 64bit boot and see what happens. I've been needing to setup my x86_64 dev box for a while now anyway. OK, thanks. well its booting but I'm not reproducing the trace messages. I'll do the patch for you to test. --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: WANTED: kernel projects for CS students
On Mon, Oct 15, 2007 at 09:54:42PM +0200, Giacomo Catenazzi wrote: Sam Ravnborg wrote: On Mon, Oct 15, 2007 at 10:04:11AM -0700, Mark Gross wrote: On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote: The kernel newbies community often gets inquiries from CS students who need a project for their studies and would like to do something with the Linux kernel, but would also like their code to be useful to the community afterwards. In order to make it easier for them, I am trying to put together a page with projects that: - Are self contained enough that the students can implement the project by themselves, since that is often a university requirement. - Are self contained enough that Linux could merge the code (maybe with additional changes) after the student has been working on it for a few months. - Are large enough to qualify as a student project, luckily there is flexibility here since we get inquiries for anything from 6 week projects to 6 month projects. If you have ideas on what projects would be useful, please add them to this page (or email me): http://kernelnewbies.org/KernelProjects Is there already a make config option that will do a good job at setting a default .config file based on what is already running on a system? I have discussed this briefly with Kay Sievers. What udev can provide is the list of modules needed, so what the kernel need to provide is a simple module to CONFIG option(s) converter + a base config to start out with. Nothing particular difficult but needs a few days work to do. could you explain better what you need? I think I've already such tools ;-) base function: Starting from a stock distro (FC, Ubuntu, OpenSuSE...) and put down a kernel.org tree and automatically create a .config with all the drivers needed for the platform I'm building on. expert configs for different applications: laptop battery, vitalization, HPC, tiny, multi-media, testing --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: WANTED: kernel projects for CS students
On Mon, Oct 15, 2007 at 11:17:32PM +0400, Alexey Dobriyan wrote: On Mon, Oct 15, 2007 at 10:04:11AM -0700, Mark Gross wrote: On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote: http://kernelnewbies.org/KernelProjects Is there already a make config option that will do a good job at setting a default .config file based on what is already running on a system? I get tiered of trimming down my .config for my laptop build so it takes less than 30min to build a kernel. Ehh? You do it once, then leave it aside or in /proc/config.gz, on new kernel copy it back, make oldconfig, answer several questions and here we go. yeah I know that. Its a lot more than a few questions, and as we are talking about a linear search for a fully tweaked .config where each pass takes 30 min to know if things work this isn't how I want to spend my time. --mgross Bonus credit to additional expert options (like those powertop puts out) for target uses, laptop, HPC, home file share, embedded targets Oh, and lets make the expert configs easily extensible. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.23-mm1
On Tue, Oct 16, 2007 at 10:28:13PM +0200, Rafael J. Wysocki wrote: On Tuesday, 16 October 2007 21:58, Mark Gross wrote: On Mon, Oct 15, 2007 at 10:40:02PM +0200, Rafael J. Wysocki wrote: On Monday, 15 October 2007 18:09, Mark Gross wrote: On Fri, Oct 12, 2007 at 11:32:40PM +0200, Rafael J. Wysocki wrote: On Friday, 12 October 2007 06:31, Andrew Morton wrote: ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.23/2.6.23-mm1/ - I've been largely avoiding applying anything since rc8-mm2 in an attempt to stabilise things for the 2.6.23 merge. But that didn't stop all the subsystem maintainers from going nuts, with the usual accuracy. We're up to a 37MB diff now, but it seems to be working a bit better. I get many traces similar to the one below from it (w/ hotfixes): WARNING: at /home/rafael/src/mm/linux-2.6.23-mm1/arch/x86_64/kernel/smp.c:397 smp_call_function_mask() This is from : WARN_ON(irqs_disabled()) in the cmp_call_function_mask processor_idle.c is registering a acpi_processor_latency_notify my code changed the notifier call from blocking_notifier_call_chain to srcu_notifier_call_chain, because dynamic creation of notifier chains at runtime where easier with the srcu_notifier_call_chain than the blocking_notifier_call_chain. As dynamic creation of PM_QOS parameters are no longer needed I can change the notifiers back to match what was in lanency.c However; looking at the call tree differences between blockin_notifier_call_chain and srcu_notifier_call_chain I cannot see a difference in irq enabling / disabling. I'm not confident this will address this yet. Well, you can send me a patch to check. :-) I think I'll have to send you a patch that changes the notifiers but I doubt it will fix it. After a bit of messing around I have the 2.6.23-mm1 running on my core-2 box note: Ubuntu's make-kpkg on the mm1 tree resulted in a system that wouldn't boot past the intrd. Looks like the pivot root failed or something. Anyway, I'm not reproducing your experience, snd_pcm is loaded. I don't know none of the WARN's are not hitting on my box. do you have some configuration information that could help me reproduce the issue? Well, I can send you the .config, but the box is AMD-based (Turion 64 X2), with an ATI chipset and an HP BIOS, so it seems to be much different from yours. it may be worth a shot anyway. BTW while changing my code to use the blocking notifiers I found that there is a initialization race between cpu-idle and pm_qos I have to fix. I need to re factor my start up code to handle cpuidle registering itself in as a notifier at core_initcall time. I'll have a patch ready tomorrow. thanks, --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: WANTED: kernel projects for CS students
On Mon, Oct 15, 2007 at 08:30:17PM +0200, Sam Ravnborg wrote: > On Mon, Oct 15, 2007 at 10:04:11AM -0700, Mark Gross wrote: > > On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote: > > > The kernel newbies community often gets inquiries from CS students who > > > need a project for their studies and would like to do something with > > > the Linux kernel, but would also like their code to be useful to the > > > community afterwards. > > > > > > In order to make it easier for them, I am trying to put together a > > > page with projects that: > > > - Are self contained enough that the students can implement the > > > project by themselves, since that is often a university requirement. > > > - Are self contained enough that Linux could merge the code (maybe > > > with additional changes) after the student has been working on it > > > for a few months. > > > - Are large enough to qualify as a student project, luckily there is > > > flexibility here since we get inquiries for anything from 6 week > > > projects to 6 month projects. > > > > > > If you have ideas on what projects would be useful, please add them > > > to this page (or email me): > > > > > > http://kernelnewbies.org/KernelProjects > > > > Is there already a make config option that will do a good job at setting > > a default .config file based on what is already running on a system? > I have discussed this briefly with Kay Sievers. > What udev can provide is the list of modules needed, so what the kernel > need to provide is a simple module to CONFIG option(s) converter + a base > config to start out with. > Nothing particular difficult but needs a few days work to do. > that would be cool. --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: WANTED: kernel projects for CS students
On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote: > The kernel newbies community often gets inquiries from CS students who > need a project for their studies and would like to do something with > the Linux kernel, but would also like their code to be useful to the > community afterwards. > > In order to make it easier for them, I am trying to put together a > page with projects that: > - Are self contained enough that the students can implement the > project by themselves, since that is often a university requirement. > - Are self contained enough that Linux could merge the code (maybe > with additional changes) after the student has been working on it > for a few months. > - Are large enough to qualify as a student project, luckily there is > flexibility here since we get inquiries for anything from 6 week > projects to 6 month projects. > > If you have ideas on what projects would be useful, please add them > to this page (or email me): > > http://kernelnewbies.org/KernelProjects Is there already a make config option that will do a good job at setting a default .config file based on what is already running on a system? I get tiered of trimming down my .config for my laptop build so it takes less than 30min to build a kernel. Bonus credit to additional "expert" options (like those powertop puts out) for target uses, laptop, HPC, home file share, embedded targets Oh, and lets make the expert configs easily extensible. --mgross > > thanks, > > Rik > -- > "Debugging is twice as hard as writing the code in the first place. > Therefore, if you write the code as cleverly as possible, you are, > by definition, not smart enough to debug it." - Brian W. Kernighan > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to [EMAIL PROTECTED] > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: WANTED: kernel projects for CS students
On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote: The kernel newbies community often gets inquiries from CS students who need a project for their studies and would like to do something with the Linux kernel, but would also like their code to be useful to the community afterwards. In order to make it easier for them, I am trying to put together a page with projects that: - Are self contained enough that the students can implement the project by themselves, since that is often a university requirement. - Are self contained enough that Linux could merge the code (maybe with additional changes) after the student has been working on it for a few months. - Are large enough to qualify as a student project, luckily there is flexibility here since we get inquiries for anything from 6 week projects to 6 month projects. If you have ideas on what projects would be useful, please add them to this page (or email me): http://kernelnewbies.org/KernelProjects Is there already a make config option that will do a good job at setting a default .config file based on what is already running on a system? I get tiered of trimming down my .config for my laptop build so it takes less than 30min to build a kernel. Bonus credit to additional expert options (like those powertop puts out) for target uses, laptop, HPC, home file share, embedded targets Oh, and lets make the expert configs easily extensible. --mgross thanks, Rik -- Debugging is twice as hard as writing the code in the first place. Therefore, if you write the code as cleverly as possible, you are, by definition, not smart enough to debug it. - Brian W. Kernighan - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: WANTED: kernel projects for CS students
On Mon, Oct 15, 2007 at 08:30:17PM +0200, Sam Ravnborg wrote: On Mon, Oct 15, 2007 at 10:04:11AM -0700, Mark Gross wrote: On Sun, Oct 14, 2007 at 07:01:28PM -0400, Rik van Riel wrote: The kernel newbies community often gets inquiries from CS students who need a project for their studies and would like to do something with the Linux kernel, but would also like their code to be useful to the community afterwards. In order to make it easier for them, I am trying to put together a page with projects that: - Are self contained enough that the students can implement the project by themselves, since that is often a university requirement. - Are self contained enough that Linux could merge the code (maybe with additional changes) after the student has been working on it for a few months. - Are large enough to qualify as a student project, luckily there is flexibility here since we get inquiries for anything from 6 week projects to 6 month projects. If you have ideas on what projects would be useful, please add them to this page (or email me): http://kernelnewbies.org/KernelProjects Is there already a make config option that will do a good job at setting a default .config file based on what is already running on a system? I have discussed this briefly with Kay Sievers. What udev can provide is the list of modules needed, so what the kernel need to provide is a simple module to CONFIG option(s) converter + a base config to start out with. Nothing particular difficult but needs a few days work to do. that would be cool. --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: pm qos infrastructure and interface
On Wed, Oct 10, 2007 at 10:17:04PM -0700, Andrew Morton wrote: > On Thu, 4 Oct 2007 14:51:39 -0700 Mark Gross <[EMAIL PROTECTED]> wrote: > > > The following patch is a generalization of the latency.c implementation > > done by Arjan last year. It provides infrastructure for more than one > > parameter, and exposes a user mode interface for processes to register > > pm_qos expectations of processes. > > > > > > This interface provides a kernel and user mode interface for registering > > performance expectations by drivers, subsystems and user space > > applications on one of the parameters. > > > > Currently we have {cpu_dma_latency, network_latency, network_throughput} > > as the initial set of pm_qos parameters. > > > > The infrastructure exposes multiple misc device nodes one per > > implemented parameter. The set of parameters implement is defined by > > pm_qos_power_init() and pm_qos_params.h. This is done because having > > the available parameters being runtime configurable or changeable from a > > driver was seen as too easy to abuse. > > I'm a bit surprised that this change appears to have no configurability. > If one has set CONFIG_PM=n (for example), shouldn't it all go away? We considered that as an option but as latency.c didn't offer it I didn't either. I could see the user mode interface portion of the implementation be made as a compile time option but the kernel infrastructure will continue to be needed by at least cpu-idel, pcm_native.c and ipw2100. You know it could make sense to have the user mode interface part of the patch as configurable or a build time dependent of sysfs and misc device support for the linux-tiny guys. Is it practical to make a linux-tiny without the sysfs infrastructure needed to make a misc device? I'll ask on the linux-tiny list. --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: pm qos infrastructure and interface
On Wed, Oct 10, 2007 at 10:17:04PM -0700, Andrew Morton wrote: On Thu, 4 Oct 2007 14:51:39 -0700 Mark Gross [EMAIL PROTECTED] wrote: The following patch is a generalization of the latency.c implementation done by Arjan last year. It provides infrastructure for more than one parameter, and exposes a user mode interface for processes to register pm_qos expectations of processes. This interface provides a kernel and user mode interface for registering performance expectations by drivers, subsystems and user space applications on one of the parameters. Currently we have {cpu_dma_latency, network_latency, network_throughput} as the initial set of pm_qos parameters. The infrastructure exposes multiple misc device nodes one per implemented parameter. The set of parameters implement is defined by pm_qos_power_init() and pm_qos_params.h. This is done because having the available parameters being runtime configurable or changeable from a driver was seen as too easy to abuse. I'm a bit surprised that this change appears to have no configurability. If one has set CONFIG_PM=n (for example), shouldn't it all go away? We considered that as an option but as latency.c didn't offer it I didn't either. I could see the user mode interface portion of the implementation be made as a compile time option but the kernel infrastructure will continue to be needed by at least cpu-idel, pcm_native.c and ipw2100. You know it could make sense to have the user mode interface part of the patch as configurable or a build time dependent of sysfs and misc device support for the linux-tiny guys. Is it practical to make a linux-tiny without the sysfs infrastructure needed to make a misc device? I'll ask on the linux-tiny list. --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: gigabit ethernet power consumption
On Tue, Oct 09, 2007 at 11:41:17AM -0700, Kok, Auke wrote: > Lennart Sorensen wrote: > > On Mon, Oct 08, 2007 at 03:31:51PM -0700, Kok, Auke wrote: > >> you most certainly want to do this in userspace I think. > >> > >> One of the biggest problems is that link negotiation can take a > >> significant amount > >> of time, well over several seconds (1 to 3 seconds typical) with gigabit, > >> and > >> having your ethernet connection go offline for 3 seconds may not be the > >> desired > >> effect for when you want to get more bandwidth in the first place. > >> > >> However, when a laptop is in battery mode, switching down from gigabit to > >> 100mbit > >> makes a lot more sense, so this is something I would recommend. This can > >> be as > >> easy as changing the advertisement mask of the interface and renegotiating > >> the > >> link. Userspace could handle that very easily. > > > > Now if you were trying to transfer a lot of data to the laptop, would it > > be more power efficient to do it at gigabit speeds so you can finish > > sooner and shut down the machine entirely, or to slow to 100mbit and > > take longer to do it, and hence spend more time powering the cpu and > > ram? > > my suspicion is that the cost of switching is much higher than what you would > consume running at 100mbit, even if the amount of data is quite large. going > offline to renegotiate the link would already cost you 3W typically. > > I definately think that userspace is the right field to solve this problem: > let > the users decide how to use the available power on their sytems through a > decent > power profile tool (perhaps gnome-power-manager or something like that). This > way > the user can choose. > Auke, I was wondering if we could use PM-QOS to have the driver drop to the 100Mb speed, when requests for bandwidth and latency are not in effect? --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: gigabit ethernet power consumption
On Tue, Oct 09, 2007 at 11:41:17AM -0700, Kok, Auke wrote: Lennart Sorensen wrote: On Mon, Oct 08, 2007 at 03:31:51PM -0700, Kok, Auke wrote: you most certainly want to do this in userspace I think. One of the biggest problems is that link negotiation can take a significant amount of time, well over several seconds (1 to 3 seconds typical) with gigabit, and having your ethernet connection go offline for 3 seconds may not be the desired effect for when you want to get more bandwidth in the first place. However, when a laptop is in battery mode, switching down from gigabit to 100mbit makes a lot more sense, so this is something I would recommend. This can be as easy as changing the advertisement mask of the interface and renegotiating the link. Userspace could handle that very easily. Now if you were trying to transfer a lot of data to the laptop, would it be more power efficient to do it at gigabit speeds so you can finish sooner and shut down the machine entirely, or to slow to 100mbit and take longer to do it, and hence spend more time powering the cpu and ram? my suspicion is that the cost of switching is much higher than what you would consume running at 100mbit, even if the amount of data is quite large. going offline to renegotiate the link would already cost you 3W typically. I definately think that userspace is the right field to solve this problem: let the users decide how to use the available power on their sytems through a decent power profile tool (perhaps gnome-power-manager or something like that). This way the user can choose. Auke, I was wondering if we could use PM-QOS to have the driver drop to the 100Mb speed, when requests for bandwidth and latency are not in effect? --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: RFC: reviewer's statement of oversight
On Mon, Oct 08, 2007 at 11:24:45AM -0600, Jonathan Corbet wrote: > Last month, at the kernel summit, there was discussion of putting a > Reviewed-by: tag onto patches to document the oversight they had > received on their way into the mainline. That tag has made an > occasional appearance since then, but there has not yet been a > discussion of what it really means. So it has not yet brought a whole > lot of value to the process. > > As I was trying to sleep last night, it occurred to me that what we > might need is an equivalent of the DCO for the Reviewed-by tag. To that > end, I dedicated a few minutes of my life to the following bit of text. > It's really just meant to be a starting point for the discussion. Is > the following something close to what we understand Reviewed-by to mean? > > jon > > > Reviewer's statement of oversight v0.01 > > By offering my Reviewed-by: tag, I state that: > > (a) I have carried out a technical review of this patch to evaluate its > appropriateness and readiness for inclusion into the mainline kernel. > > (b) Any problems, concerns, or questions relating to the patch have been > communicated back to the submitter. I am satisfied with how the > submitter has responded to my comments. > > (c) While there may (or may not) be things which could be improved with > this submission, I believe that it is, at this time, (1) a > worthwhile addition to the kernel, and (2) free of serious known > issues which would argue against its inclusion. C-1 "worthwhile addition..." Probably shouldn't be part of this. That's what additional Signed off by ACK's provide. I think reviewed by should limit its scope to code correctness leaving the subjective "worthwhile" statements are better expressed with other tags. > > (d) While I have reviewed the patch and believe it to be sound, I can not > (unless explicitly stated elsewhere) make any warranties or guarantees > that it will achieve its stated purpose or function properly in any > given situation. > > (e) I understand and agree that this project and the contribution are > public and that a record of the contribution (including my Reviewed-by > tag and any associated public communications) is maintained > indefinitely and may be redistributed consistent with this project or > the open source license(s) involved. > - I think this is a good thing to have, although recruiting reviews remains an open issue. I think it would be easier to recruit patch testers than reviewers should a Tested-by: tag be considered as well? --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: RFC: reviewer's statement of oversight
On Mon, Oct 08, 2007 at 11:24:45AM -0600, Jonathan Corbet wrote: Last month, at the kernel summit, there was discussion of putting a Reviewed-by: tag onto patches to document the oversight they had received on their way into the mainline. That tag has made an occasional appearance since then, but there has not yet been a discussion of what it really means. So it has not yet brought a whole lot of value to the process. As I was trying to sleep last night, it occurred to me that what we might need is an equivalent of the DCO for the Reviewed-by tag. To that end, I dedicated a few minutes of my life to the following bit of text. It's really just meant to be a starting point for the discussion. Is the following something close to what we understand Reviewed-by to mean? jon Reviewer's statement of oversight v0.01 By offering my Reviewed-by: tag, I state that: (a) I have carried out a technical review of this patch to evaluate its appropriateness and readiness for inclusion into the mainline kernel. (b) Any problems, concerns, or questions relating to the patch have been communicated back to the submitter. I am satisfied with how the submitter has responded to my comments. (c) While there may (or may not) be things which could be improved with this submission, I believe that it is, at this time, (1) a worthwhile addition to the kernel, and (2) free of serious known issues which would argue against its inclusion. C-1 worthwhile addition... Probably shouldn't be part of this. That's what additional Signed off by ACK's provide. I think reviewed by should limit its scope to code correctness leaving the subjective worthwhile statements are better expressed with other tags. (d) While I have reviewed the patch and believe it to be sound, I can not (unless explicitly stated elsewhere) make any warranties or guarantees that it will achieve its stated purpose or function properly in any given situation. (e) I understand and agree that this project and the contribution are public and that a record of the contribution (including my Reviewed-by tag and any associated public communications) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. - I think this is a good thing to have, although recruiting reviews remains an open issue. I think it would be easier to recruit patch testers than reviewers should a Tested-by: tag be considered as well? --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
pm qos infrastructure and interface
The following patch is a generalization of the latency.c implementation done by Arjan last year. It provides infrastructure for more than one parameter, and exposes a user mode interface for processes to register pm_qos expectations of processes. This interface provides a kernel and user mode interface for registering performance expectations by drivers, subsystems and user space applications on one of the parameters. Currently we have {cpu_dma_latency, network_latency, network_throughput} as the initial set of pm_qos parameters. The infrastructure exposes multiple misc device nodes one per implemented parameter. The set of parameters implement is defined by pm_qos_power_init() and pm_qos_params.h. This is done because having the available parameters being runtime configurable or changeable from a driver was seen as too easy to abuse. For each parameter a list of performance requirements is maintained along with an aggregated target value. The aggregated target value is updated with changes to the requirement list or elements of the list. Typically the aggregated target value is simply the max or min of the requirement values held in the parameter list elements. >From kernel mode the use of this interface is simple: pm_qos_add_requirement(param_id, name, target_value): Will insert a named element in the list for that identified PM_QOS parameter with the target value. Upon change to this list the new target is recomputed and any registered notifiers are called only if the target value is now different. pm_qos_update_requirement(param_id, name, new_target_value): Will search the list identified by the param_id for the named list element and then update its target value, calling the notification tree if the aggregated target is changed. with that name is already registered. pm_qos_remove_requirement(param_id, name): Will search the identified list for the named element and remove it, after removal it will update the aggregate target and call the notification tree if the target was changed as a result of removing the named requirement. >From user mode: Only processes can register a pm_qos requirement. To provide for automatic cleanup for process the interface requires the process to register its parameter requirements in the following way: To register the default pm_qos target for the specific parameter, the process must open one of /dev/[cpu_dma_latency, network_latency, network_throughput] As long as the device node is held open that process has a registered requirement on the parameter. The name of the requirement is "process_" derived from the current->pid from within the open system call. To change the requested target value the process needs to write a s32 value to the open device node. This translates to a pm_qos_update_requirement call. To remove the user mode request for a target value simply close the device node. --mgross Signed-off-by: mark gross <[EMAIL PROTECTED]> --- diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/Documentation/pm_qos_interface.txt linux-2.6.23-rc8-qos/Documentation/pm_qos_interface.txt --- linux-2.6.23-rc8/Documentation/pm_qos_interface.txt 1969-12-31 16:00:00.0 -0800 +++ linux-2.6.23-rc8-qos/Documentation/pm_qos_interface.txt 2007-10-04 14:26:58.0 -0700 @@ -0,0 +1,59 @@ +PM quality of Service interface. + +This interface provides a kernel and user mode interface for registering +performance expectations by drivers, subsystems and user space applications on +one of the parameters. + +Currently we have {cpu_dma_latency, network_latency, network_throughput} as the +initial set of pm_qos parameters. + +The infrastructure exposes multiple misc device nodes one per implemented +parameter. The set of parameters implement is defined by pm_qos_power_init() +and pm_qos_params.h. This is done because having the available parameters +being runtime configurable or changeable from a driver was seen as too easy to +abuse. + +For each parameter a list of performance requirements is maintained along with +an aggregated target value. The aggregated target value is updated with +changes to the requirement list or elements of the list. Typically the +aggregated target value is simply the max or min of the requirement values held +in the parameter list elements. + +From kernel mode the use of this interface is simple: +pm_qos_add_requirement(param_id, name, target_value): +Will insert a named element in the list for that identified PM_QOS parameter +with the target value. Upon change to this list the new target is recomputed +and any registered notifiers are called only if the target value is now +different. + +pm_qos_update_requirement(param_id, name, new_target_value): +Will search the list identified by the param_id for the named list element and +then update its target value, calling the notification tree if the aggregated +target is changed. with that n
Re: [PATCH] PM_QOS 1 of 2
On Thu, Oct 04, 2007 at 12:53:56PM -0700, Andrew Morton wrote: > On Mon, 1 Oct 2007 16:45:28 -0700 > Mark Gross <[EMAIL PROTECTED]> wrote: > > > The following is the cleaned up patch implementing the power management > > quality of service infrastructure discussed at the pm summit last June. > > > > It is a genralization of the latency code put into the kernel last year > > by Arjan. > > > > I would like to get this code included in the MM tree and to get some > > milage on it. > > > > One thing to note about this implementation is that it exposes an > > interface to user space for registering pm_qos constraints in addition > > to the kernel exports. Its a file based interface where a module can > > register a constraint and the constraint is valid only as long as the > > device node is held open. Upon closing of the device node that > > constraint is cleaned up. > > > > The patch set is in two postings. > > 1) the base parameter code (this email) > > 2) replacing of latency.c/latenc.h with pm_qos_params.c/pm_qos_params.h > > I wouldn't really view this as an adequate changelog. > > - The Subject:s are pretty pathetic (please see my suggesed replacements) uhg. Your right. > > - There is no description of the proposed new kernel<->userspace > interfaces. > the above description is light on specific details. > As you are proposing new and permanent enhancements to the Linux API, > this is something which should be spelled out in some detail. Because we > can change the implementation, but we can not ever change your interface. > > It would be nice to get that interface described in Documentation/ > somewhere, but it is *critical* that the design be fully revealed right > now, during review. I'll provide this. > > > Anyway, I am not a suitable person to review this submission. > > I'll put the patches in -mm for a bit of eyeball-and-test (not that anyone > will know how to test it, due to the secret interfaces) but I do not want > to move this code into mainline until someone who is familiar with the PM > code has performed a detailed review of both the implementation and the > design (whatever that is!). > > Please send new, complete descriptions of these patches. I don't think > they can be effectively reviewed without that information. Except perhaps > by someone who was at the PM summit, but that's cheating. > I will do this. --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] PM_QOS 1 of 2
On Thu, Oct 04, 2007 at 12:53:56PM -0700, Andrew Morton wrote: On Mon, 1 Oct 2007 16:45:28 -0700 Mark Gross [EMAIL PROTECTED] wrote: The following is the cleaned up patch implementing the power management quality of service infrastructure discussed at the pm summit last June. It is a genralization of the latency code put into the kernel last year by Arjan. I would like to get this code included in the MM tree and to get some milage on it. One thing to note about this implementation is that it exposes an interface to user space for registering pm_qos constraints in addition to the kernel exports. Its a file based interface where a module can register a constraint and the constraint is valid only as long as the device node is held open. Upon closing of the device node that constraint is cleaned up. The patch set is in two postings. 1) the base parameter code (this email) 2) replacing of latency.c/latenc.h with pm_qos_params.c/pm_qos_params.h I wouldn't really view this as an adequate changelog. - The Subject:s are pretty pathetic (please see my suggesed replacements) uhg. Your right. - There is no description of the proposed new kernel-userspace interfaces. the above description is light on specific details. As you are proposing new and permanent enhancements to the Linux API, this is something which should be spelled out in some detail. Because we can change the implementation, but we can not ever change your interface. It would be nice to get that interface described in Documentation/ somewhere, but it is *critical* that the design be fully revealed right now, during review. I'll provide this. Anyway, I am not a suitable person to review this submission. I'll put the patches in -mm for a bit of eyeball-and-test (not that anyone will know how to test it, due to the secret interfaces) but I do not want to move this code into mainline until someone who is familiar with the PM code has performed a detailed review of both the implementation and the design (whatever that is!). Please send new, complete descriptions of these patches. I don't think they can be effectively reviewed without that information. Except perhaps by someone who was at the PM summit, but that's cheating. I will do this. --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
pm qos infrastructure and interface
The following patch is a generalization of the latency.c implementation done by Arjan last year. It provides infrastructure for more than one parameter, and exposes a user mode interface for processes to register pm_qos expectations of processes. This interface provides a kernel and user mode interface for registering performance expectations by drivers, subsystems and user space applications on one of the parameters. Currently we have {cpu_dma_latency, network_latency, network_throughput} as the initial set of pm_qos parameters. The infrastructure exposes multiple misc device nodes one per implemented parameter. The set of parameters implement is defined by pm_qos_power_init() and pm_qos_params.h. This is done because having the available parameters being runtime configurable or changeable from a driver was seen as too easy to abuse. For each parameter a list of performance requirements is maintained along with an aggregated target value. The aggregated target value is updated with changes to the requirement list or elements of the list. Typically the aggregated target value is simply the max or min of the requirement values held in the parameter list elements. From kernel mode the use of this interface is simple: pm_qos_add_requirement(param_id, name, target_value): Will insert a named element in the list for that identified PM_QOS parameter with the target value. Upon change to this list the new target is recomputed and any registered notifiers are called only if the target value is now different. pm_qos_update_requirement(param_id, name, new_target_value): Will search the list identified by the param_id for the named list element and then update its target value, calling the notification tree if the aggregated target is changed. with that name is already registered. pm_qos_remove_requirement(param_id, name): Will search the identified list for the named element and remove it, after removal it will update the aggregate target and call the notification tree if the target was changed as a result of removing the named requirement. From user mode: Only processes can register a pm_qos requirement. To provide for automatic cleanup for process the interface requires the process to register its parameter requirements in the following way: To register the default pm_qos target for the specific parameter, the process must open one of /dev/[cpu_dma_latency, network_latency, network_throughput] As long as the device node is held open that process has a registered requirement on the parameter. The name of the requirement is process_PID derived from the current-pid from within the open system call. To change the requested target value the process needs to write a s32 value to the open device node. This translates to a pm_qos_update_requirement call. To remove the user mode request for a target value simply close the device node. --mgross Signed-off-by: mark gross [EMAIL PROTECTED] --- diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/Documentation/pm_qos_interface.txt linux-2.6.23-rc8-qos/Documentation/pm_qos_interface.txt --- linux-2.6.23-rc8/Documentation/pm_qos_interface.txt 1969-12-31 16:00:00.0 -0800 +++ linux-2.6.23-rc8-qos/Documentation/pm_qos_interface.txt 2007-10-04 14:26:58.0 -0700 @@ -0,0 +1,59 @@ +PM quality of Service interface. + +This interface provides a kernel and user mode interface for registering +performance expectations by drivers, subsystems and user space applications on +one of the parameters. + +Currently we have {cpu_dma_latency, network_latency, network_throughput} as the +initial set of pm_qos parameters. + +The infrastructure exposes multiple misc device nodes one per implemented +parameter. The set of parameters implement is defined by pm_qos_power_init() +and pm_qos_params.h. This is done because having the available parameters +being runtime configurable or changeable from a driver was seen as too easy to +abuse. + +For each parameter a list of performance requirements is maintained along with +an aggregated target value. The aggregated target value is updated with +changes to the requirement list or elements of the list. Typically the +aggregated target value is simply the max or min of the requirement values held +in the parameter list elements. + +From kernel mode the use of this interface is simple: +pm_qos_add_requirement(param_id, name, target_value): +Will insert a named element in the list for that identified PM_QOS parameter +with the target value. Upon change to this list the new target is recomputed +and any registered notifiers are called only if the target value is now +different. + +pm_qos_update_requirement(param_id, name, new_target_value): +Will search the list identified by the param_id for the named list element and +then update its target value, calling the notification tree if the aggregated +target is changed. with that name is already registered. + +pm_qos_remove_requirement
Re: [linux-pm] [PATCH] PM_QOS 2 of 2
this is the second part of the patch to replace latency.c use with pm_qos_params use. --mgross Signed-off-by: mark gross <[EMAIL PROTECTED]> --- diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8-qos/drivers/acpi/processor_idle.c linux-2.6.23-rc8-qos-nolatency.c/drivers/acpi/processor_idle.c --- linux-2.6.23-rc8-qos/drivers/acpi/processor_idle.c 2007-09-26 13:54:28.0 -0700 +++ linux-2.6.23-rc8-qos-nolatency.c/drivers/acpi/processor_idle.c 2007-10-01 11:32:13.0 -0700 @@ -38,7 +38,7 @@ #include #include #include/* need_resched() */ -#include +#include #include /* @@ -605,7 +605,8 @@ if (cx->promotion.state && ((cx->promotion.state - pr->power.states) <= max_cstate)) { if (sleep_ticks > cx->promotion.threshold.ticks && - cx->promotion.state->latency <= system_latency_constraint()) { + cx->promotion.state->latency <= + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) { cx->promotion.count++; cx->demotion.count = 0; if (cx->promotion.count >= @@ -649,7 +650,8 @@ * or if the latency of the current state is unacceptable */ if ((pr->power.state - pr->power.states) > max_cstate || - pr->power.state->latency > system_latency_constraint()) { + pr->power.state->latency > + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) { if (cx->demotion.state) next_state = cx->demotion.state; } @@ -1173,7 +1175,7 @@ "maximum allowed latency: %d usec\n", pr->power.state ? pr->power.state - pr->power.states : 0, max_cstate, (unsigned)pr->power.bm_activity, - system_latency_constraint()); + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)); seq_puts(seq, "states:\n"); @@ -1280,7 +1282,8 @@ max_cstate); first_run++; #ifdef CONFIG_SMP - register_latency_notifier(_processor_latency_notifier); + pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, + _processor_latency_notifier); #endif } @@ -1354,7 +1357,8 @@ */ cpu_idle_wait(); #ifdef CONFIG_SMP - unregister_latency_notifier(_processor_latency_notifier); + pm_qos_remove_notifier(PM_QOS_CPU_DMA_LATENCY, + _processor_latency_notifier); #endif } diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8-qos/drivers/net/wireless/ipw2100.c linux-2.6.23-rc8-qos-nolatency.c/drivers/net/wireless/ipw2100.c --- linux-2.6.23-rc8-qos/drivers/net/wireless/ipw2100.c 2007-09-26 13:54:34.0 -0700 +++ linux-2.6.23-rc8-qos-nolatency.c/drivers/net/wireless/ipw2100.c 2007-10-01 12:32:56.0 -0700 @@ -162,7 +162,7 @@ #include #include #include -#include +#include #include "ipw2100.h" @@ -1701,7 +1701,7 @@ /* the ipw2100 hardware really doesn't want power management delays * longer than 175usec */ - modify_acceptable_latency("ipw2100", 175); + pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100", 175); /* If the interrupt is enabled, turn it off... */ spin_lock_irqsave(>low_lock, flags); @@ -1856,7 +1856,8 @@ ipw2100_disable_interrupts(priv); spin_unlock_irqrestore(>low_lock, flags); - modify_acceptable_latency("ipw2100", INFINITE_LATENCY); + pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100", + PM_QOS_DEFAULT_VALUE); #ifdef ACPI_CSTATE_LIMIT_DEFINED if (priv->config & CFG_C3_DISABLED) { @@ -6544,7 +6545,8 @@ if (ret) goto out; - set_acceptable_latency("ipw2100", INFINITE_LATENCY); + pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100", + PM_QOS_DEFAULT_VALUE); #ifdef CONFIG_IPW2100_DEBUG ipw2100_debug_level = debug; ret = driver_create_file(_pci_driver.driver, @@ -6566,7 +6568,7 @@ _attr_debug_level); #endif pci_unregister_driver(_pci_driver); - remove_acceptable_latency("ipw2100"); + pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100"); } module_init(ipw2100_init); diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8-qos/include/linux/latency.h linux-2.6.23-rc8-qos-nolatency.c/include/linux/latency.h --- linux-2.6.23-rc8-qos/include/linux/latency.h2007-07-08 1
Re: [linux-pm] [PATCH] PM_QOS 2 of 2
this is the second part of the patch to replace latency.c use with pm_qos_params use. --mgross Signed-off-by: mark gross [EMAIL PROTECTED] --- diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8-qos/drivers/acpi/processor_idle.c linux-2.6.23-rc8-qos-nolatency.c/drivers/acpi/processor_idle.c --- linux-2.6.23-rc8-qos/drivers/acpi/processor_idle.c 2007-09-26 13:54:28.0 -0700 +++ linux-2.6.23-rc8-qos-nolatency.c/drivers/acpi/processor_idle.c 2007-10-01 11:32:13.0 -0700 @@ -38,7 +38,7 @@ #include linux/dmi.h #include linux/moduleparam.h #include linux/sched.h /* need_resched() */ -#include linux/latency.h +#include linux/pm_qos_params.h #include linux/clockchips.h /* @@ -605,7 +605,8 @@ if (cx-promotion.state ((cx-promotion.state - pr-power.states) = max_cstate)) { if (sleep_ticks cx-promotion.threshold.ticks - cx-promotion.state-latency = system_latency_constraint()) { + cx-promotion.state-latency = + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) { cx-promotion.count++; cx-demotion.count = 0; if (cx-promotion.count = @@ -649,7 +650,8 @@ * or if the latency of the current state is unacceptable */ if ((pr-power.state - pr-power.states) max_cstate || - pr-power.state-latency system_latency_constraint()) { + pr-power.state-latency + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) { if (cx-demotion.state) next_state = cx-demotion.state; } @@ -1173,7 +1175,7 @@ maximum allowed latency: %d usec\n, pr-power.state ? pr-power.state - pr-power.states : 0, max_cstate, (unsigned)pr-power.bm_activity, - system_latency_constraint()); + pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)); seq_puts(seq, states:\n); @@ -1280,7 +1282,8 @@ max_cstate); first_run++; #ifdef CONFIG_SMP - register_latency_notifier(acpi_processor_latency_notifier); + pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, + acpi_processor_latency_notifier); #endif } @@ -1354,7 +1357,8 @@ */ cpu_idle_wait(); #ifdef CONFIG_SMP - unregister_latency_notifier(acpi_processor_latency_notifier); + pm_qos_remove_notifier(PM_QOS_CPU_DMA_LATENCY, + acpi_processor_latency_notifier); #endif } diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8-qos/drivers/net/wireless/ipw2100.c linux-2.6.23-rc8-qos-nolatency.c/drivers/net/wireless/ipw2100.c --- linux-2.6.23-rc8-qos/drivers/net/wireless/ipw2100.c 2007-09-26 13:54:34.0 -0700 +++ linux-2.6.23-rc8-qos-nolatency.c/drivers/net/wireless/ipw2100.c 2007-10-01 12:32:56.0 -0700 @@ -162,7 +162,7 @@ #include linux/firmware.h #include linux/acpi.h #include linux/ctype.h -#include linux/latency.h +#include linux/pm_qos_params.h #include ipw2100.h @@ -1701,7 +1701,7 @@ /* the ipw2100 hardware really doesn't want power management delays * longer than 175usec */ - modify_acceptable_latency(ipw2100, 175); + pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY, ipw2100, 175); /* If the interrupt is enabled, turn it off... */ spin_lock_irqsave(priv-low_lock, flags); @@ -1856,7 +1856,8 @@ ipw2100_disable_interrupts(priv); spin_unlock_irqrestore(priv-low_lock, flags); - modify_acceptable_latency(ipw2100, INFINITE_LATENCY); + pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY, ipw2100, + PM_QOS_DEFAULT_VALUE); #ifdef ACPI_CSTATE_LIMIT_DEFINED if (priv-config CFG_C3_DISABLED) { @@ -6544,7 +6545,8 @@ if (ret) goto out; - set_acceptable_latency(ipw2100, INFINITE_LATENCY); + pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY, ipw2100, + PM_QOS_DEFAULT_VALUE); #ifdef CONFIG_IPW2100_DEBUG ipw2100_debug_level = debug; ret = driver_create_file(ipw2100_pci_driver.driver, @@ -6566,7 +6568,7 @@ driver_attr_debug_level); #endif pci_unregister_driver(ipw2100_pci_driver); - remove_acceptable_latency(ipw2100); + pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY, ipw2100); } module_init(ipw2100_init); diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8-qos/include/linux/latency.h linux-2.6.23-rc8-qos-nolatency.c/include/linux/latency.h --- linux-2.6.23-rc8-qos/include/linux/latency.h2007-07-08 16:32:17.0 -0700 +++ linux-2.6.23-rc8-qos-nolatency.c/include/linux/latency.h
[PATCH] PM_QOS 1 of 2
The following is the cleaned up patch implementing the power management quality of service infrastructure discussed at the pm summit last June. It is a genralization of the latency code put into the kernel last year by Arjan. I would like to get this code included in the MM tree and to get some milage on it. One thing to note about this implementation is that it exposes an interface to user space for registering pm_qos constraints in addition to the kernel exports. Its a file based interface where a module can register a constraint and the constraint is valid only as long as the device node is held open. Upon closing of the device node that constraint is cleaned up. The patch set is in two postings. 1) the base parameter code (this email) 2) replacing of latency.c/latenc.h with pm_qos_params.c/pm_qos_params.h thanks, --mgross Signed-off-by: mark gross <[EMAIL PROTECTED]> --- diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/include/linux/pm_qos_params.h linux-2.6.23-rc8-qos/include/linux/pm_qos_params.h --- linux-2.6.23-rc8/include/linux/pm_qos_params.h 1969-12-31 16:00:00.0 -0800 +++ linux-2.6.23-rc8-qos/include/linux/pm_qos_params.h 2007-10-01 11:19:13.0 -0700 @@ -0,0 +1,35 @@ +/* interface for the pm_qos_power infrastructure of the linux kernel. + * + * Mark Gross + */ +#include +#include +#include + +struct requirement_list { + struct list_head list; + union { + s32 value; + s32 usec; + s32 kbps; + }; + char *name; +}; + +#define PM_QOS_RESERVED 0 +#define PM_QOS_CPU_DMA_LATENCY 1 +#define PM_QOS_NETWORK_LATENCY 2 +#define PM_QOS_NETWORK_THROUGHPUT 3 + +#define PM_QOS_NUM_CLASSES 4 +#define PM_QOS_DEFAULT_VALUE -1 + +int pm_qos_add_requirement(int qos, char *name, s32 value); +int pm_qos_update_requirement(int qos, char *name, s32 new_value); +void pm_qos_remove_requirement(int qos, char *name); + +int pm_qos_requirement(int qos); + +int pm_qos_add_notifier(int qos, struct notifier_block *notifier); +int pm_qos_remove_notifier(int qos, struct notifier_block *notifier); + diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/kernel/Makefile linux-2.6.23-rc8-qos/kernel/Makefile --- linux-2.6.23-rc8/kernel/Makefile2007-09-26 13:54:54.0 -0700 +++ linux-2.6.23-rc8-qos/kernel/Makefile2007-10-01 11:13:35.0 -0700 @@ -9,7 +9,7 @@ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \ - utsname.o + utsname.o pm_qos_params.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/kernel/pm_qos_params.c linux-2.6.23-rc8-qos/kernel/pm_qos_params.c --- linux-2.6.23-rc8/kernel/pm_qos_params.c 1969-12-31 16:00:00.0 -0800 +++ linux-2.6.23-rc8-qos/kernel/pm_qos_params.c 2007-10-01 16:03:31.0 -0700 @@ -0,0 +1,417 @@ +/* + * This module exposes the interface to kernel space for specifying + * QoS dependencies. It provides infrastructure for registration of: + * + * Dependents on a QoS value : register requirements + * Watchers of QoS value : get notified when target QoS value changes + * + * This QoS design is best effort based. Dependents register their QoS needs. + * Watchers register to keep track of the current QoS needs of the system. + * + * There are 3 basic classes of QoS parameter: latency, timeout, throughput + * each have defined units: + * latency: usec + * timeout: usec <-- currently not used. + * throughput: kbs (kilo byte / sec) + * + * There are lists of pm_qos_objects each one wrapping requirements, notifiers + * + * User mode requirements on a QOS parameter register themselves to the + * subsystem by opening the device node /dev/... and writing there request to + * the node. As long as the process holds a file handle open to the node the + * client continues to be accounted for. Upon file release the usermode + * requirement is removed and a new qos target is computed. This way when the + * requirement that the application has is cleaned up when closes the file + * pointer or exits the pm_qos_object will get an opportunity to clean up. + * + * mark gross [EMAIL PROTECTED] + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * locking rule: all changes to target_value or requirements or notifiers lists + * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock + * held, taken with _irqsave. One lock to rule them all + */ + +struct pm_qos_object { + struct requirement_list requirements; + struct srcu_notifier_head notifiers; + struct miscdevice pm_qos_power_miscdev; + char *name; + s32 default_value; +
Re: [RFC] QoS params patch
On Fri, Sep 28, 2007 at 11:51:41AM -0700, Andrew Morton wrote: > On Fri, 28 Sep 2007 10:19:21 -0700 Mark Gross <[EMAIL PROTECTED]> wrote: > > > On Thu, Sep 27, 2007 at 11:25:01PM -0700, Andrew Morton wrote: > > > On Wed, 26 Sep 2007 15:40:26 -0700 Mark Gross <[EMAIL PROTECTED]> wrote: > > > > > > > +#define QOS_RESERVED 0 > > > > +#define QOS_CPU_DMA_LATENCY 1 > > > > +#define QOS_NETWORK_LATENCY 2 > > > > +#define QOS_NETWORK_THROUGHPUT 3 > > > > + > > > > +#define QOS_NUM_CLASSES 4 > > > > +#define QOS_DEFAULT_VALUE -1 > > > > + > > > > +int qos_add_requirement(int qos, char *name, s32 value); > > > > +int qos_update_requirement(int qos, char *name, s32 new_value); > > > > +void qos_remove_requirement(int qos, char *name); > > > > > > It's a bit rude stealing the entire "qos" namespace like this - there are > > > many different forms of QoS, some already in-kernel. > > > > > > s/qos/pm_qos/g ? > > > > I suppose it is a bit inconiderate. I could grow to like pm_qos, > > performance_throttling_constraint_hint_infrastructure is a bit too > > wordy. > > > > I suppose I should use qospm as thats the way it was put up on that > > lesswatts.org web page. > > > > Would qospm be good enough? > > > > Don't think it matters a lot, but kernel naming tends to be big-endian (ie: > we have net_ratelimit, not ratelimit_net), so the major part (pm) would > come first under that scheme. this makes sense. pm_qos it is. --mgross - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] QoS params patch
On Fri, Sep 28, 2007 at 11:51:41AM -0700, Andrew Morton wrote: On Fri, 28 Sep 2007 10:19:21 -0700 Mark Gross [EMAIL PROTECTED] wrote: On Thu, Sep 27, 2007 at 11:25:01PM -0700, Andrew Morton wrote: On Wed, 26 Sep 2007 15:40:26 -0700 Mark Gross [EMAIL PROTECTED] wrote: +#define QOS_RESERVED 0 +#define QOS_CPU_DMA_LATENCY 1 +#define QOS_NETWORK_LATENCY 2 +#define QOS_NETWORK_THROUGHPUT 3 + +#define QOS_NUM_CLASSES 4 +#define QOS_DEFAULT_VALUE -1 + +int qos_add_requirement(int qos, char *name, s32 value); +int qos_update_requirement(int qos, char *name, s32 new_value); +void qos_remove_requirement(int qos, char *name); It's a bit rude stealing the entire qos namespace like this - there are many different forms of QoS, some already in-kernel. s/qos/pm_qos/g ? I suppose it is a bit inconiderate. I could grow to like pm_qos, performance_throttling_constraint_hint_infrastructure is a bit too wordy. I suppose I should use qospm as thats the way it was put up on that lesswatts.org web page. Would qospm be good enough? Don't think it matters a lot, but kernel naming tends to be big-endian (ie: we have net_ratelimit, not ratelimit_net), so the major part (pm) would come first under that scheme. this makes sense. pm_qos it is. --mgross - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] PM_QOS 1 of 2
The following is the cleaned up patch implementing the power management quality of service infrastructure discussed at the pm summit last June. It is a genralization of the latency code put into the kernel last year by Arjan. I would like to get this code included in the MM tree and to get some milage on it. One thing to note about this implementation is that it exposes an interface to user space for registering pm_qos constraints in addition to the kernel exports. Its a file based interface where a module can register a constraint and the constraint is valid only as long as the device node is held open. Upon closing of the device node that constraint is cleaned up. The patch set is in two postings. 1) the base parameter code (this email) 2) replacing of latency.c/latenc.h with pm_qos_params.c/pm_qos_params.h thanks, --mgross Signed-off-by: mark gross [EMAIL PROTECTED] --- diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/include/linux/pm_qos_params.h linux-2.6.23-rc8-qos/include/linux/pm_qos_params.h --- linux-2.6.23-rc8/include/linux/pm_qos_params.h 1969-12-31 16:00:00.0 -0800 +++ linux-2.6.23-rc8-qos/include/linux/pm_qos_params.h 2007-10-01 11:19:13.0 -0700 @@ -0,0 +1,35 @@ +/* interface for the pm_qos_power infrastructure of the linux kernel. + * + * Mark Gross + */ +#include linux/list.h +#include linux/notifier.h +#include linux/miscdevice.h + +struct requirement_list { + struct list_head list; + union { + s32 value; + s32 usec; + s32 kbps; + }; + char *name; +}; + +#define PM_QOS_RESERVED 0 +#define PM_QOS_CPU_DMA_LATENCY 1 +#define PM_QOS_NETWORK_LATENCY 2 +#define PM_QOS_NETWORK_THROUGHPUT 3 + +#define PM_QOS_NUM_CLASSES 4 +#define PM_QOS_DEFAULT_VALUE -1 + +int pm_qos_add_requirement(int qos, char *name, s32 value); +int pm_qos_update_requirement(int qos, char *name, s32 new_value); +void pm_qos_remove_requirement(int qos, char *name); + +int pm_qos_requirement(int qos); + +int pm_qos_add_notifier(int qos, struct notifier_block *notifier); +int pm_qos_remove_notifier(int qos, struct notifier_block *notifier); + diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/kernel/Makefile linux-2.6.23-rc8-qos/kernel/Makefile --- linux-2.6.23-rc8/kernel/Makefile2007-09-26 13:54:54.0 -0700 +++ linux-2.6.23-rc8-qos/kernel/Makefile2007-10-01 11:13:35.0 -0700 @@ -9,7 +9,7 @@ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \ - utsname.o + utsname.o pm_qos_params.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ diff -urN -X linux-2.6.23-rc8/Documentation/dontdiff linux-2.6.23-rc8/kernel/pm_qos_params.c linux-2.6.23-rc8-qos/kernel/pm_qos_params.c --- linux-2.6.23-rc8/kernel/pm_qos_params.c 1969-12-31 16:00:00.0 -0800 +++ linux-2.6.23-rc8-qos/kernel/pm_qos_params.c 2007-10-01 16:03:31.0 -0700 @@ -0,0 +1,417 @@ +/* + * This module exposes the interface to kernel space for specifying + * QoS dependencies. It provides infrastructure for registration of: + * + * Dependents on a QoS value : register requirements + * Watchers of QoS value : get notified when target QoS value changes + * + * This QoS design is best effort based. Dependents register their QoS needs. + * Watchers register to keep track of the current QoS needs of the system. + * + * There are 3 basic classes of QoS parameter: latency, timeout, throughput + * each have defined units: + * latency: usec + * timeout: usec -- currently not used. + * throughput: kbs (kilo byte / sec) + * + * There are lists of pm_qos_objects each one wrapping requirements, notifiers + * + * User mode requirements on a QOS parameter register themselves to the + * subsystem by opening the device node /dev/... and writing there request to + * the node. As long as the process holds a file handle open to the node the + * client continues to be accounted for. Upon file release the usermode + * requirement is removed and a new qos target is computed. This way when the + * requirement that the application has is cleaned up when closes the file + * pointer or exits the pm_qos_object will get an opportunity to clean up. + * + * mark gross [EMAIL PROTECTED] + */ + +#include linux/pm_qos_params.h +#include linux/sched.h +#include linux/spinlock.h +#include linux/slab.h +#include linux/time.h +#include linux/fs.h +#include linux/device.h +#include linux/miscdevice.h +#include linux/string.h +#include linux/platform_device.h +#include linux/init.h + +#include linux/uaccess.h + +/* + * locking rule: all changes to target_value or requirements or notifiers lists + * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock + * held, taken with _irqsave. One lock to rule them all