Today, the MM subsystem uses the buddy 'Page Allocator' to manage memory
at a 'page' granularity. But this allocator has no notion of the physical
topology of the underlying memory hardware, and hence it is hard to
influence memory allocation decisions keeping the platform constraints
in mind.

So we need to augment the page-allocator with a new entity to manage
memory (at a much larger granularity) keeping the underlying platform
characteristics and the memory hardware topology in mind.

To that end, introduce a "Memory Region Allocator" as a backend to the
existing "Page Allocator".


Splitting the memory allocator into a Page-Allocator front-end and a
Region-Allocator backend:


                 Page Allocator          |      Memory Region Allocator
                                         -
           __    __    __                |    ________    ________
          |__|--|__|--|__|-- ...         -   |        |  |        |
           ____    ____    ____          |   |        |  |        |
          |____|--|____|--|____|-- ...   -   |        |--|        |-- ...
                                         |   |        |  |        |
                                         -   |________|  |________|
                                         |
                                         -
             Manages pages using         |     Manages memory regions
              buddy freelists            -  (allocates and frees entire
                                         |   memory regions, i.e., at a
                                         -   memory-region granularity)


The flow of memory allocations/frees between entities requesting memory
(applications/kernel) and the MM subsystem:

                  pages               regions
  Applications <========>   Page    <========>  Memory Region
   and Kernel             Allocator               Allocator



Since the region allocator is supposed to function as a backend to the
page allocator, we implement it on a per-zone basis (since the page-allocator
is also per-zone).

Signed-off-by: Srivatsa S. Bhat <srivatsa.b...@linux.vnet.ibm.com>
---

 include/linux/mmzone.h |   17 +++++++++++++++++
 mm/page_alloc.c        |   19 +++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 155c1a1..7c87518 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -112,6 +112,21 @@ struct free_area {
        unsigned long           nr_free;
 };
 
+/* A simplified free_area for managing entire memory regions */
+struct free_area_region {
+       struct list_head        list;
+       unsigned long           nr_free;
+};
+
+struct mem_region {
+       struct free_area_region region_area[MAX_ORDER];
+};
+
+struct region_allocator {
+       struct mem_region       region[MAX_NR_ZONE_REGIONS];
+       int                     next_region;
+};
+
 struct pglist_data;
 
 /*
@@ -405,6 +420,8 @@ struct zone {
        struct zone_mem_region  zone_regions[MAX_NR_ZONE_REGIONS];
        int                     nr_zone_regions;
 
+       struct region_allocator region_allocator;
+
 #ifndef CONFIG_SPARSEMEM
        /*
         * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dc02a80..876c231 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5240,6 +5240,23 @@ static void __meminit init_node_memory_regions(struct 
pglist_data *pgdat)
        pgdat->nr_node_regions = idx;
 }
 
+static void __meminit init_zone_region_allocator(struct zone *zone)
+{
+       struct free_area_region *area;
+       int i, j;
+
+       for (i = 0; i < zone->nr_zone_regions; i++) {
+               area = zone->region_allocator.region[i].region_area;
+
+               for (j = 0; j < MAX_ORDER; j++) {
+                       INIT_LIST_HEAD(&area[j].list);
+                       area[j].nr_free = 0;
+               }
+       }
+
+       zone->region_allocator.next_region = -1;
+}
+
 static void __meminit zone_init_free_lists_late(struct zone *zone)
 {
        struct mem_region_list *mr_list;
@@ -5326,6 +5343,8 @@ static void __meminit init_zone_memory_regions(struct 
pglist_data *pgdat)
 
                zone_init_free_lists_late(z);
 
+               init_zone_region_allocator(z);
+
                /*
                 * Revisit the last visited node memory region, in case it
                 * spans multiple zones.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to