The patch titled

     x86_64: Tell VM about holes in nodes

has been added to the -mm tree.  Its filename is

     x86_64-tell-vm-about-holes-in-nodes.patch

Patches currently in -mm which might be from [EMAIL PROTECTED] are

x86_64-dont-oops-at-boot-when-empty-opteron-node-has-io.patch
x86_64-tell-vm-about-holes-in-nodes.patch
x86_64-remove-duplicated-sys_time64.patch
x86_64-fix-off-by-one-in-e820_mapped.patch



From: Andi Kleen <[EMAIL PROTECTED]>

Some nodes can have large holes on x86-64.

This fixes problems with the VM allowing too many dirty pages because it
overestimates the number of available RAM in a node.  In extreme cases you
can end up with all RAM filled with dirty pages which can lead to deadlocks
and other nasty behaviour.

This patch just tells the VM about the known holes from e820.  Reserved
(like the kernel text or mem_map) is still not taken into account, but that
should be only a few percent error now.

Small detail is that the flat setup uses the NUMA free_area_init_node() now
too because it offers more flexibility.

(akpm: lotsa thanks to Martin for working this problem out)

Cc: Martin Bligh <[EMAIL PROTECTED]>
Signed-off-by: Andi Kleen <[EMAIL PROTECTED]>
Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
---

 arch/x86_64/kernel/e820.c |   34 ++++++++++++++++++++++++++++++++++
 arch/x86_64/mm/init.c     |   16 ++++++++++++----
 arch/x86_64/mm/numa.c     |    8 +++++++-
 include/asm-x86_64/e820.h |    2 ++
 4 files changed, 55 insertions(+), 5 deletions(-)

diff -puN arch/x86_64/kernel/e820.c~x86_64-tell-vm-about-holes-in-nodes 
arch/x86_64/kernel/e820.c
--- devel/arch/x86_64/kernel/e820.c~x86_64-tell-vm-about-holes-in-nodes 
2005-08-26 12:51:04.000000000 -0700
+++ devel-akpm/arch/x86_64/kernel/e820.c        2005-08-26 12:51:04.000000000 
-0700
@@ -185,6 +185,40 @@ unsigned long __init e820_end_of_ram(voi
 }
 
 /* 
+ * Compute how much memory is missing in a range.
+ * Unlike the other functions in this file the arguments are in page numbers.
+ */
+unsigned long __init
+e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
+{
+       unsigned long ram = 0;
+       unsigned long start = start_pfn << PAGE_SHIFT;
+       unsigned long end = end_pfn << PAGE_SHIFT;
+       int i;
+       for (i = 0; i < e820.nr_map; i++) {
+               struct e820entry *ei = &e820.map[i];
+               unsigned long last, addr;
+
+               if (ei->type != E820_RAM ||
+                   ei->addr+ei->size <= start ||
+                   ei->addr >= end)
+                       continue;
+
+               addr = round_up(ei->addr, PAGE_SIZE);
+               if (addr < start)
+                       addr = start;
+
+               last = round_down(ei->addr + ei->size, PAGE_SIZE);
+               if (last >= end)
+                       last = end;
+
+               if (last > addr)
+                       ram += last - addr;
+       }
+       return ((end - start) - ram) >> PAGE_SHIFT;
+}
+
+/*
  * Mark e820 reserved areas as busy for the resource manager.
  */
 void __init e820_reserve_resources(void)
diff -puN arch/x86_64/mm/init.c~x86_64-tell-vm-about-holes-in-nodes 
arch/x86_64/mm/init.c
--- devel/arch/x86_64/mm/init.c~x86_64-tell-vm-about-holes-in-nodes     
2005-08-26 12:51:04.000000000 -0700
+++ devel-akpm/arch/x86_64/mm/init.c    2005-08-26 12:51:04.000000000 -0700
@@ -322,18 +322,26 @@ void zap_low_mappings(void)
 void __init paging_init(void)
 {
        {
-               unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+               unsigned long zones_size[MAX_NR_ZONES];
+               unsigned long holes[MAX_NR_ZONES];
                unsigned int max_dma;
 
+               memset(zones_size, 0, sizeof(zones_size));
+               memset(holes, 0, sizeof(holes));
+
                max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 
-               if (end_pfn < max_dma)
+               if (end_pfn < max_dma) {
                        zones_size[ZONE_DMA] = end_pfn;
-               else {
+                       holes[ZONE_DMA] = e820_hole_size(0, end_pfn);
+               } else {
                        zones_size[ZONE_DMA] = max_dma;
+                       holes[ZONE_DMA] = e820_hole_size(0, max_dma);
                        zones_size[ZONE_NORMAL] = end_pfn - max_dma;
+                       holes[ZONE_NORMAL] = e820_hole_size(max_dma, end_pfn);
                }
-               free_area_init(zones_size);
+               free_area_init_node(0, NODE_DATA(0), zones_size,
+                        __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
        }
        return;
 }
diff -puN arch/x86_64/mm/numa.c~x86_64-tell-vm-about-holes-in-nodes 
arch/x86_64/mm/numa.c
--- devel/arch/x86_64/mm/numa.c~x86_64-tell-vm-about-holes-in-nodes     
2005-08-26 12:51:04.000000000 -0700
+++ devel-akpm/arch/x86_64/mm/numa.c    2005-08-26 12:51:04.000000000 -0700
@@ -126,9 +126,11 @@ void __init setup_node_zones(int nodeid)
 { 
        unsigned long start_pfn, end_pfn; 
        unsigned long zones[MAX_NR_ZONES];
+       unsigned long holes[MAX_NR_ZONES];
        unsigned long dma_end_pfn;
 
        memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); 
+       memset(holes, 0, sizeof(unsigned long) * MAX_NR_ZONES);
 
        start_pfn = node_start_pfn(nodeid);
        end_pfn = node_end_pfn(nodeid);
@@ -139,13 +141,17 @@ void __init setup_node_zones(int nodeid)
        dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; 
        if (start_pfn < dma_end_pfn) { 
                zones[ZONE_DMA] = dma_end_pfn - start_pfn;
+               holes[ZONE_DMA] = e820_hole_size(start_pfn, dma_end_pfn);
                zones[ZONE_NORMAL] = end_pfn - dma_end_pfn; 
+               holes[ZONE_NORMAL] = e820_hole_size(dma_end_pfn, end_pfn);
+
        } else { 
                zones[ZONE_NORMAL] = end_pfn - start_pfn; 
+               holes[ZONE_NORMAL] = e820_hole_size(start_pfn, end_pfn);
        } 
     
        free_area_init_node(nodeid, NODE_DATA(nodeid), zones,
-                           start_pfn, NULL); 
+                           start_pfn, holes);
 } 
 
 void __init numa_init_array(void)
diff -puN include/asm-x86_64/e820.h~x86_64-tell-vm-about-holes-in-nodes 
include/asm-x86_64/e820.h
--- devel/include/asm-x86_64/e820.h~x86_64-tell-vm-about-holes-in-nodes 
2005-08-26 12:51:04.000000000 -0700
+++ devel-akpm/include/asm-x86_64/e820.h        2005-08-26 12:51:04.000000000 
-0700
@@ -51,6 +51,8 @@ extern int e820_mapped(unsigned long sta
 
 extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned 
long end);
 extern void e820_setup_gap(void);
+extern unsigned long e820_hole_size(unsigned long start_pfn,
+                                   unsigned long end_pfn);
 
 extern void __init parse_memopt(char *p, char **end);
 
_
-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to