We now provide an option for users who don't want to specify physical
memory address in kernel commandline.

        /*
         * For movablemem_map=acpi:
         *
         * SRAT:                |_____| |_____| |_________| |_________| ......
         * node id:                0       1         1           2
         * hotpluggable:           n       y         y           n
         * movablemem_map:              |_____| |_________|
         *
         * Using movablemem_map, we can prevent memblock from allocating memory
         * on ZONE_MOVABLE at boot time.
         */

So user just specify movablemem_map=acpi, and the kernel will use hotpluggable
info in SRAT to determine which memory ranges should be set as ZONE_MOVABLE.

NOTE: Using this way will cause NUMA performance down because the whole node
      will be set as ZONE_MOVABLE, and kernel cannot use memory on it.
      If users don't want to lose NUMA performance, just don't use it.

Signed-off-by: Tang Chen <tangc...@cn.fujitsu.com>
---
 Documentation/kernel-parameters.txt |   15 +++++++
 arch/x86/mm/srat.c                  |   74 +++++++++++++++++++++++++++++++++--
 include/linux/mm.h                  |    2 +
 mm/page_alloc.c                     |   22 ++++++++++-
 4 files changed, 108 insertions(+), 5 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index dd3a36a..40387a2 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1649,6 +1649,17 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
                        that the amount of memory usable for all allocations
                        is not too small.
 
+       movablemem_map=acpi
+                       [KNL,X86,IA-64,PPC] This parameter is similar to
+                       memmap except it specifies the memory map of
+                       ZONE_MOVABLE.
+                       This option inform the kernel to use Hot Pluggable bit
+                       in flags from SRAT from ACPI BIOS to determine which
+                       memory devices could be hotplugged. The corresponding
+                       memory ranges will be set as ZONE_MOVABLE.
+                       NOTE: Whatever node the kernel resides in will always
+                             be un-hotpluggable.
+
        movablemem_map=nn[KMG]@ss[KMG]
                        [KNL,X86,IA-64,PPC] This parameter is similar to
                        memmap except it specifies the memory map of
@@ -1669,6 +1680,10 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
                        satisfied. So the administrator should be careful that
                        the amount of movablemem_map areas are not too large.
                        Otherwise kernel won't have enough memory to start.
+                       NOTE: We don't stop users specifying the node the
+                             kernel resides in as hotpluggable so that this
+                             option can be used as a workaround of firmware
+                             bugs.
 
        MTD_Partition=  [MTD]
                        Format: <name>,<region-number>,<size>,<offset>
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 44a9b9b..4f443de 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -142,15 +142,78 @@ static inline int save_add_info(void) {return 0;}
 #endif
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-static void __init sanitize_movablemem_map(int nid, u64 start, u64 end)
+static void __init sanitize_movablemem_map(int nid, u64 start, u64 end,
+                                          bool hotpluggable)
 {
-       int overlap;
+       int overlap, i;
        unsigned long start_pfn, end_pfn;
 
        start_pfn = PFN_DOWN(start);
        end_pfn = PFN_UP(end);
 
        /*
+        * For movablemem_map=acpi:
+        *
+        * SRAT:                |_____| |_____| |_________| |_________| ......
+        * node id:                0       1         1           2
+        * hotpluggable:           n       y         y           n
+        * movablemem_map:              |_____| |_________|
+        *
+        * Using movablemem_map, we can prevent memblock from allocating memory
+        * on ZONE_MOVABLE at boot time.
+        *
+        * Before parsing SRAT, memblock has already reserve some memory ranges
+        * for other purposes, such as for kernel image. We cannot prevent
+        * kernel from using these memory, so we need to exclude these memory
+        * even if it is hotpluggable.
+        * Furthermore, to ensure the kernel has enough memory to boot, we make
+        * all the memory on the node which the kernel resides in should be
+        * un-hotpluggable.
+        */
+       if (hotpluggable && movablemem_map.acpi) {
+               /* Exclude ranges reserved by memblock. */
+               struct memblock_type *rgn = &memblock.reserved;
+
+               for (i = 0; i < rgn->cnt; i++) {
+                       if (end <= rgn->regions[i].base ||
+                           start >= rgn->regions[i].base +
+                           rgn->regions[i].size)
+                               continue;
+
+                       /*
+                        * If the memory range overlaps the memory reserved by
+                        * memblock, then the kernel resides in this node.
+                        */
+                       node_set(nid, movablemem_map.numa_nodes_kernel);
+                       zone_movable_limit[nid] = 0;
+
+                       return;
+               }
+
+               /*
+                * If the kernel resides in this node, then the whole node
+                * should not be hotpluggable.
+                */
+               if (node_isset(nid, movablemem_map.numa_nodes_kernel)) {
+                       zone_movable_limit[nid] = 0;
+                       return;
+               }
+
+               /*
+                * Otherwise, if the range is hotpluggable, and the kernel is
+                * not on this node, insert it into movablemem_map.
+                */
+               insert_movablemem_map(start_pfn, end_pfn);
+               if (zone_movable_limit[nid])
+                       zone_movable_limit[nid] = min(zone_movable_limit[nid],
+                                                     start_pfn);
+               else
+                       zone_movable_limit[nid] = start_pfn;
+
+               return;
+       }
+
+       /*
         * For movablemem_map=nn[KMG]@ss[KMG]:
         *
         * SRAT:                |_____| |_____| |_________| |_________| ......
@@ -160,6 +223,8 @@ static void __init sanitize_movablemem_map(int nid, u64 
start, u64 end)
         *
         * Using movablemem_map, we can prevent memblock from allocating memory
         * on ZONE_MOVABLE at boot time.
+        *
+        * NOTE: In this case, SRAT info will be ingored.
         */
        overlap = movablemem_map_overlap(start_pfn, end_pfn);
        if (overlap >= 0) {
@@ -189,7 +254,8 @@ static void __init sanitize_movablemem_map(int nid, u64 
start, u64 end)
        }
 }
 #else          /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
-static inline void sanitize_movablemem_map(int nid, u64 start, u64 end)
+static inline void sanitize_movablemem_map(int nid, u64 start, u64 end,
+                                          bool hotpluggable)
 {
 }
 #endif         /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
@@ -234,7 +300,7 @@ acpi_numa_memory_affinity_init(struct 
acpi_srat_mem_affinity *ma)
               (unsigned long long) start, (unsigned long long) end - 1,
               hotpluggable ? "Hot Pluggable" : "");
 
-       sanitize_movablemem_map(node, start, end);
+       sanitize_movablemem_map(node, start, end, hotpluggable);
 
        return 0;
 out_err_bad_srat:
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d2c5fec..37cf1d7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1339,8 +1339,10 @@ struct movablemem_entry {
 };
 
 struct movablemem_map {
+       bool acpi;      /* True if using SRAT info. */
        int nr_map;
        struct movablemem_entry map[MOVABLEMEM_MAP_MAX];
+       nodemask_t numa_nodes_kernel;   /* on which nodes kernel resides in */
 };
 
 extern struct movablemem_map movablemem_map;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f451ded..31d27af 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -203,7 +203,10 @@ static unsigned long __meminitdata dma_reserve;
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 /* Movable memory ranges, will also be used by memblock subsystem. */
-struct movablemem_map movablemem_map;
+struct movablemem_map movablemem_map = {
+       .acpi = false,
+       .nr_map = 0,
+};
 
 static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
 static unsigned long __meminitdata 
arch_zone_highest_possible_pfn[MAX_NR_ZONES];
@@ -5204,6 +5207,23 @@ static int __init cmdline_parse_movablemem_map(char *p)
        if (!p)
                goto err;
 
+       if (!strcmp(p, "acpi"))
+               movablemem_map.acpi = true;
+
+       /*
+        * If user decide to use info from BIOS, all the other user specified
+        * ranges will be ingored.
+        */
+       if (movablemem_map.acpi) {
+               if (movablemem_map.nr_map) {
+                       memset(movablemem_map.map, 0,
+                              sizeof(struct movablemem_entry) *
+                              movablemem_map.nr_map);
+                       movablemem_map.nr_map = 0;
+               }
+               return 0;
+       }
+
        oldp = p;
        mem_size = memparse(p, &p);
        if (p == oldp)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to