Introduce a mechanism to wait on free memory.

Currently congestion_wait() is abused to do this.

Signed-off-by: Peter Zijlstra <[EMAIL PROTECTED]>
---
 arch/i386/lib/usercopy.c |    2 +-
 fs/xfs/linux-2.6/kmem.c  |    4 ++--
 include/linux/mm.h       |    3 +++
 mm/page_alloc.c          |   25 +++++++++++++++++++++++--
 mm/shmem.c               |    2 +-
 mm/vmscan.c              |    1 +
 6 files changed, 31 insertions(+), 6 deletions(-)

Index: linux-2.6-mm/arch/i386/lib/usercopy.c
===================================================================
--- linux-2.6-mm.orig/arch/i386/lib/usercopy.c  2007-04-05 16:24:15.000000000 
+0200
+++ linux-2.6-mm/arch/i386/lib/usercopy.c       2007-04-05 16:29:49.000000000 
+0200
@@ -751,7 +751,7 @@ survive:
 
                        if (retval == -ENOMEM && is_init(current)) {
                                up_read(&current->mm->mmap_sem);
-                               congestion_wait(WRITE, HZ/50);
+                               page_alloc_wait(HZ/50);
                                goto survive;
                        }
 
Index: linux-2.6-mm/fs/xfs/linux-2.6/kmem.c
===================================================================
--- linux-2.6-mm.orig/fs/xfs/linux-2.6/kmem.c   2007-04-05 16:24:15.000000000 
+0200
+++ linux-2.6-mm/fs/xfs/linux-2.6/kmem.c        2007-04-05 16:29:49.000000000 
+0200
@@ -53,7 +53,7 @@ kmem_alloc(size_t size, unsigned int __n
                        printk(KERN_ERR "XFS: possible memory allocation "
                                        "deadlock in %s (mode:0x%x)\n",
                                        __FUNCTION__, lflags);
-               congestion_wait(WRITE, HZ/50);
+               page_alloc_wait(HZ/50);
        } while (1);
 }
 
@@ -131,7 +131,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsig
                        printk(KERN_ERR "XFS: possible memory allocation "
                                        "deadlock in %s (mode:0x%x)\n",
                                        __FUNCTION__, lflags);
-               congestion_wait(WRITE, HZ/50);
+               page_alloc_wait(HZ/50);
        } while (1);
 }
 
Index: linux-2.6-mm/include/linux/mm.h
===================================================================
--- linux-2.6-mm.orig/include/linux/mm.h        2007-04-05 16:24:15.000000000 
+0200
+++ linux-2.6-mm/include/linux/mm.h     2007-04-05 16:29:49.000000000 +0200
@@ -1028,6 +1028,9 @@ extern void setup_per_cpu_pageset(void);
 static inline void setup_per_cpu_pageset(void) {}
 #endif
 
+void page_alloc_ok(void);
+long page_alloc_wait(long timeout);
+
 /* prio_tree.c */
 void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
 void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
Index: linux-2.6-mm/mm/page_alloc.c
===================================================================
--- linux-2.6-mm.orig/mm/page_alloc.c   2007-04-05 16:24:15.000000000 +0200
+++ linux-2.6-mm/mm/page_alloc.c        2007-04-05 16:35:04.000000000 +0200
@@ -107,6 +107,9 @@ unsigned long __meminitdata nr_kernel_pa
 unsigned long __meminitdata nr_all_pages;
 static unsigned long __initdata dma_reserve;
 
+static wait_queue_head_t page_alloc_wqh =
+       __WAIT_QUEUE_HEAD_INITIALIZER(page_alloc_wqh);
+
 #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
   /*
    * MAX_ACTIVE_REGIONS determines the maxmimum number of distinct
@@ -1698,7 +1701,7 @@ nofail_alloc:
                        if (page)
                                goto got_pg;
                        if (gfp_mask & __GFP_NOFAIL) {
-                               congestion_wait(WRITE, HZ/50);
+                               page_alloc_wait(HZ/50);
                                goto nofail_alloc;
                        }
                }
@@ -1763,7 +1766,7 @@ nofail_alloc:
                        do_retry = 1;
        }
        if (do_retry) {
-               congestion_wait(WRITE, HZ/50);
+               page_alloc_wait(HZ/50);
                goto rebalance;
        }
 
@@ -4217,3 +4220,21 @@ void set_pageblock_flags_group(struct pa
                else
                        __clear_bit(bitidx + start_bitidx, bitmap);
 }
+
+void page_alloc_ok(void)
+{
+       if (waitqueue_active(&page_alloc_wqh))
+               wake_up(&page_alloc_wqh);
+}
+
+long page_alloc_wait(long timeout)
+{
+       long ret;
+       DEFINE_WAIT(wait);
+
+       prepare_to_wait(&page_alloc_wqh, &wait, TASK_UNINTERRUPTIBLE);
+       ret = schedule_timeout(timeout);
+       finish_wait(&page_alloc_wqh, &wait);
+       return ret;
+}
+EXPORT_SYMBOL(page_alloc_wait);
Index: linux-2.6-mm/mm/shmem.c
===================================================================
--- linux-2.6-mm.orig/mm/shmem.c        2007-04-05 16:24:15.000000000 +0200
+++ linux-2.6-mm/mm/shmem.c     2007-04-05 16:30:31.000000000 +0200
@@ -1216,7 +1216,7 @@ repeat:
                        page_cache_release(swappage);
                        if (error == -ENOMEM) {
                                /* let kswapd refresh zone for GFP_ATOMICs */
-                               congestion_wait(WRITE, HZ/50);
+                               page_alloc_wait(HZ/50);
                        }
                        goto repeat;
                }
Index: linux-2.6-mm/mm/vmscan.c
===================================================================
--- linux-2.6-mm.orig/mm/vmscan.c       2007-04-05 16:29:46.000000000 +0200
+++ linux-2.6-mm/mm/vmscan.c    2007-04-05 16:29:49.000000000 +0200
@@ -1436,6 +1436,7 @@ static int kswapd(void *p)
                finish_wait(&pgdat->kswapd_wait, &wait);
 
                balance_pgdat(pgdat, order);
+               page_alloc_ok();
        }
        return 0;
 }

--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to