Tony,
This patch shrinks the quicklist based upon free memory on the node
instead of the high/low water marks. I have written it to enable
preemption periodically and recalculate the amount to shrink every time
we have freed enough pages that the quicklist size should have grown.
I rescan the nodes zones each pass because other processess may be
draining node memory at the same time as we are adding.
Signed-off-by: Robin Holt <[EMAIL PROTECTED]>
No noticable performance change on lmbench. For completeness sake, here
are the before and after numbers:
Before:
Process fork+exit: 182.2333 microseconds
Process fork+execve: 692.7500 microseconds
Process fork+/bin/sh -c: 2905.5000 microseconds
After:
Process fork+exit: 181.1935 microseconds
Process fork+execve: 690.0000 microseconds
Process fork+/bin/sh -c: 2945.0000 microseconds
init.c | 62 +++++++++++++++++++++++++++++++++++++++-----------------------
1 files changed, 39 insertions(+), 23 deletions(-)
Index: linux-2.6/arch/ia64/mm/init.c
===================================================================
--- linux-2.6.orig/arch/ia64/mm/init.c 2005-03-15 11:56:20.954343163 -0600
+++ linux-2.6/arch/ia64/mm/init.c 2005-03-15 11:57:18.773089554 -0600
@@ -53,24 +53,53 @@
EXPORT_SYMBOL(vmem_map);
#endif
-static int pgt_cache_water[2] = { 25, 50 };
-
-struct page *zero_page_memmap_ptr; /* map entry for zero page */
+struct page *zero_page_memmap_ptr; /* map entry for zero page */
EXPORT_SYMBOL(zero_page_memmap_ptr);
+#define MIN_PGT_PAGES 25UL
+#define MAX_PGT_FREES_PER_PASS 16
+#define PGT_FRACTION_OF_NODE_MEM 16
+
+static inline long
+max_pgt_pages(void)
+{
+ u64 node_free_pages, max_pgt_pages;
+
+#ifndef CONFIG_NUMA
+ node_free_pages = nr_free_pages();
+#else
+ node_free_pages = nr_free_pages_pgdat(NODE_DATA(numa_node_id()));
+#endif
+ max_pgt_pages = node_free_pages / PGT_FRACTION_OF_NODE_MEM;
+ max_pgt_pages = max(max_pgt_pages, MIN_PGT_PAGES);
+ return max_pgt_pages;
+}
+
+static inline long
+min_pages_to_free(void)
+{
+ long pages_to_free;
+
+ pages_to_free = pgtable_quicklist_size - max_pgt_pages();
+ pages_to_free = min(pages_to_free, MAX_PGT_FREES_PER_PASS);
+ return pages_to_free;
+}
+
void
-check_pgt_cache (void)
+check_pgt_cache(void)
{
- int low, high;
+ long pages_to_free;
- low = pgt_cache_water[0];
- high = pgt_cache_water[1];
+ if (unlikely(pgtable_quicklist_size <= MIN_PGT_PAGES))
+ return;
preempt_disable();
- if (pgtable_quicklist_size > (u64) high) {
- do {
+ while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
+ while (pages_to_free--) {
free_page((unsigned long)pgtable_quicklist_alloc());
- } while (pgtable_quicklist_size > (u64) low);
+ }
+ preempt_enable();
+ preempt_disable();
}
preempt_enable();
}
@@ -524,7 +553,6 @@
mem_init (void)
{
long reserved_pages, codesize, datasize, initsize;
- unsigned long num_pgt_pages;
pg_data_t *pgdat;
int i;
static struct kcore_list kcore_mem, kcore_vmem, kcore_kernel;
@@ -569,18 +597,6 @@
num_physpages << (PAGE_SHIFT - 10), codesize >> 10,
reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >>
10);
- /*
- * Allow for enough (cached) page table pages so that we can map the
entire memory
- * at least once. Each task also needs a couple of page tables pages,
so add in a
- * fudge factor for that (don't use "threads-max" here; that would be
wrong!).
- * Don't allow the cache to be more than 10% of total memory, though.
- */
-# define NUM_TASKS 500 /* typical number of tasks */
- num_pgt_pages = nr_free_pages() / PTRS_PER_PGD + NUM_TASKS;
- if (num_pgt_pages > nr_free_pages() / 10)
- num_pgt_pages = nr_free_pages() / 10;
- if (num_pgt_pages > (u64) pgt_cache_water[1])
- pgt_cache_water[1] = num_pgt_pages;
/*
* For fsyscall entrpoints with no light-weight handler, use the
ordinary
-
To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html