Tony,

This patch shrinks the quicklist based upon free memory on the node
instead of the high/low water marks.  I have written it to enable
preemption periodically and recalculate the amount to shrink every
time we have freed enough pages that the quicklist size should have grown.
I retain the scan of free memory on the node as other processes may be
draining at the same time as we are adding.


Signed-off-by: Robin Holt <[EMAIL PROTECTED]>


No noticable performance change on lmbench.  For completeness sake, here
are the before and after numbers:

Before:
Process fork+exit: 180.8065 microseconds
Process fork+exit: 182.4286 microseconds
Process fork+exit: 184.0333 microseconds
Process fork+exit: 183.3226 microseconds
Process fork+exit: 182.6333 microseconds
Process fork+exit: 183.4000 microseconds
Process fork+exit: 183.4667 microseconds
Process fork+exit: 182.1935 microseconds
Process fork+exit: 182.0667 microseconds
Process fork+exit: 183.7742 microseconds
Process fork+execve: 188.1667 microseconds
Process fork+execve: 188.6071 microseconds
Process fork+execve: 187.5333 microseconds
Process fork+execve: 188.9286 microseconds
Process fork+execve: 188.4333 microseconds
Process fork+execve: 187.6000 microseconds
Process fork+execve: 187.6333 microseconds
Process fork+execve: 188.5333 microseconds
Process fork+execve: 187.9655 microseconds
Process fork+execve: 186.3667 microseconds
After:
Process fork+exit: 182.3793 microseconds
Process fork+exit: 183.0667 microseconds
Process fork+exit: 182.9333 microseconds
Process fork+exit: 183.7742 microseconds
Process fork+exit: 182.9333 microseconds
Process fork+exit: 183.6774 microseconds
Process fork+exit: 182.2903 microseconds
Process fork+exit: 183.2667 microseconds
Process fork+exit: 181.0333 microseconds
Process fork+exit: 183.0000 microseconds
Process fork+execve: 187.9333 microseconds
Process fork+execve: 188.2000 microseconds
Process fork+execve: 188.5333 microseconds
Process fork+execve: 188.7333 microseconds
Process fork+execve: 189.0000 microseconds
Process fork+execve: 188.8667 microseconds
Process fork+execve: 187.3333 microseconds
Process fork+execve: 189.5172 microseconds
Process fork+execve: 188.4333 microseconds
Process fork+execve: 188.7667 microseconds

 init.c |   48 ++++++++++++++++++++++++------------------------
 1 files changed, 24 insertions(+), 24 deletions(-)


Index: linux-2.6/arch/ia64/mm/init.c
===================================================================
--- linux-2.6.orig/arch/ia64/mm/init.c  2005-03-01 10:20:38.289030938 -0600
+++ linux-2.6/arch/ia64/mm/init.c       2005-03-01 10:20:38.374967413 -0600
@@ -50,24 +50,48 @@
 EXPORT_SYMBOL(vmem_map);
 #endif
 
-static int pgt_cache_water[2] = { 25, 50 };
-
 struct page *zero_page_memmap_ptr;             /* map entry for zero page */
 EXPORT_SYMBOL(zero_page_memmap_ptr);
 
+#define quicklist_size         local_cpu_data->pgtable_quicklist_size
+#define MIN_PGT_PAGES          25UL
+/* This value was chosen to prevent a large block of frees to hold off timer 
ticks */
+#define NODE_FREE_PAGES_SHIFT  4
+
+static inline long
+max_pgt_pages (void)
+{
+       u64 node_free_pages, max_pgt_pages;
+
+#ifndef        CONFIG_NUMA
+       node_free_pages = nr_free_pages();
+#else
+       node_free_pages = nr_free_pages_pgdat(NODE_DATA(numa_node_id()));
+#endif
+       max_pgt_pages = node_free_pages >> NODE_FREE_PAGES_SHIFT;
+       max_pgt_pages = max(max_pgt_pages, MIN_PGT_PAGES);
+       return max_pgt_pages;
+}
+
+
 void
 check_pgt_cache (void)
 {
-       int low, high;
+       long pages_to_free;
 
-       low = pgt_cache_water[0];
-       high = pgt_cache_water[1];
+       if (unlikely(quicklist_size <= MIN_PGT_PAGES))
+               return;
 
        preempt_disable();
-       if (local_cpu_data->pgtable_quicklist_size > (u64) high) {
-               do {
+       pages_to_free = quicklist_size - max_pgt_pages();
+       while (unlikely(pages_to_free > 0)) {
+               pages_to_free = min(pages_to_free, 1L << NODE_FREE_PAGES_SHIFT);
+               while (pages_to_free--) {
                        free_page((unsigned long)pgtable_quicklist_alloc());
-               } while (local_cpu_data->pgtable_quicklist_size > (u64) low);
+               }
+               preempt_enable();
+               preempt_disable();
+               pages_to_free = quicklist_size - max_pgt_pages();
        }
        preempt_enable();
 }
@@ -521,7 +545,6 @@
 mem_init (void)
 {
        long reserved_pages, codesize, datasize, initsize;
-       unsigned long num_pgt_pages;
        pg_data_t *pgdat;
        int i;
        static struct kcore_list kcore_mem, kcore_vmem, kcore_kernel;
@@ -563,19 +586,6 @@
               reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 
10);
 
        /*
-        * Allow for enough (cached) page table pages so that we can map the 
entire memory
-        * at least once.  Each task also needs a couple of page tables pages, 
so add in a
-        * fudge factor for that (don't use "threads-max" here; that would be 
wrong!).
-        * Don't allow the cache to be more than 10% of total memory, though.
-        */
-#      define NUM_TASKS        500     /* typical number of tasks */
-       num_pgt_pages = nr_free_pages() / PTRS_PER_PGD + NUM_TASKS;
-       if (num_pgt_pages > nr_free_pages() / 10)
-               num_pgt_pages = nr_free_pages() / 10;
-       if (num_pgt_pages > (u64) pgt_cache_water[1])
-               pgt_cache_water[1] = num_pgt_pages;
-
-       /*
         * For fsyscall entrpoints with no light-weight handler, use the 
ordinary
         * (heavy-weight) handler, but mark it by setting bit 0, so the 
fsyscall entry
         * code can tell them apart.
-
To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to