Tony,

This patch shrinks the quicklist based upon free memory on the node
instead of the high/low water marks.  I have written it to enable
preemption periodically and recalculate the amount to shrink every time
we have freed enough pages that the quicklist size should have grown.
I rescan the nodes zones each pass because other processess may be
draining node memory at the same time as we are adding.

Signed-off-by: Robin Holt <[EMAIL PROTECTED]>


No noticable performance change on lmbench.  For completeness sake, here
are the before and after numbers:

Before:
Process fork+exit: 182.2333 microseconds
Process fork+execve: 692.7500 microseconds
Process fork+/bin/sh -c: 2905.5000 microseconds

After:
Process fork+exit: 181.1935 microseconds
Process fork+execve: 690.0000 microseconds
Process fork+/bin/sh -c: 2945.0000 microseconds

 init.c |   62 +++++++++++++++++++++++++++++++++++++++-----------------------
 1 files changed, 39 insertions(+), 23 deletions(-)

Index: linux-2.6/arch/ia64/mm/init.c
===================================================================
--- linux-2.6.orig/arch/ia64/mm/init.c  2005-03-15 11:56:20.954343163 -0600
+++ linux-2.6/arch/ia64/mm/init.c       2005-03-15 11:57:18.773089554 -0600
@@ -53,24 +53,53 @@
 EXPORT_SYMBOL(vmem_map);
 #endif
 
-static int pgt_cache_water[2] = { 25, 50 };
-
-struct page *zero_page_memmap_ptr;             /* map entry for zero page */
+struct page *zero_page_memmap_ptr;     /* map entry for zero page */
 EXPORT_SYMBOL(zero_page_memmap_ptr);
 
+#define MIN_PGT_PAGES                  25UL
+#define MAX_PGT_FREES_PER_PASS         16
+#define PGT_FRACTION_OF_NODE_MEM       16
+
+static inline long
+max_pgt_pages(void)
+{
+       u64 node_free_pages, max_pgt_pages;
+
+#ifndef        CONFIG_NUMA
+       node_free_pages = nr_free_pages();
+#else
+       node_free_pages = nr_free_pages_pgdat(NODE_DATA(numa_node_id()));
+#endif
+       max_pgt_pages = node_free_pages / PGT_FRACTION_OF_NODE_MEM;
+       max_pgt_pages = max(max_pgt_pages, MIN_PGT_PAGES);
+       return max_pgt_pages;
+}
+
+static inline long
+min_pages_to_free(void)
+{
+       long pages_to_free;
+
+       pages_to_free = pgtable_quicklist_size - max_pgt_pages();
+       pages_to_free = min(pages_to_free, MAX_PGT_FREES_PER_PASS);
+       return pages_to_free;
+}
+
 void
-check_pgt_cache (void)
+check_pgt_cache(void)
 {
-       int low, high;
+       long pages_to_free;
 
-       low = pgt_cache_water[0];
-       high = pgt_cache_water[1];
+       if (unlikely(pgtable_quicklist_size <= MIN_PGT_PAGES))
+               return;
 
        preempt_disable();
-       if (pgtable_quicklist_size > (u64) high) {
-               do {
+       while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
+               while (pages_to_free--) {
                        free_page((unsigned long)pgtable_quicklist_alloc());
-               } while (pgtable_quicklist_size > (u64) low);
+               }
+               preempt_enable();
+               preempt_disable();
        }
        preempt_enable();
 }
@@ -524,7 +553,6 @@
 mem_init (void)
 {
        long reserved_pages, codesize, datasize, initsize;
-       unsigned long num_pgt_pages;
        pg_data_t *pgdat;
        int i;
        static struct kcore_list kcore_mem, kcore_vmem, kcore_kernel;
@@ -569,18 +597,6 @@
               num_physpages << (PAGE_SHIFT - 10), codesize >> 10,
               reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 
10);
 
-       /*
-        * Allow for enough (cached) page table pages so that we can map the 
entire memory
-        * at least once.  Each task also needs a couple of page tables pages, 
so add in a
-        * fudge factor for that (don't use "threads-max" here; that would be 
wrong!).
-        * Don't allow the cache to be more than 10% of total memory, though.
-        */
-#      define NUM_TASKS        500     /* typical number of tasks */
-       num_pgt_pages = nr_free_pages() / PTRS_PER_PGD + NUM_TASKS;
-       if (num_pgt_pages > nr_free_pages() / 10)
-               num_pgt_pages = nr_free_pages() / 10;
-       if (num_pgt_pages > (u64) pgt_cache_water[1])
-               pgt_cache_water[1] = num_pgt_pages;
 
        /*
         * For fsyscall entrpoints with no light-weight handler, use the 
ordinary
-
To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to