Hi Maeda,
Here is a patch that should fix the hang problem you reported. Could you
try it?
Before applying this patch (fix_kernbench_pb.patch), you must remove the
previous one I sent you (reclaim_mapped_pages.patch).
Sometimes on my machine, under memory pressure in a class, kswapd is not
awaken any more. The patch "fix_shrink_atlimit_pb.patch" fixes this
problem.
Thanks for your help,
Valérie
------------------------------------------------------------------------
diff -ruNp a/include/linux/ckrm_mem.h b/include/linux/ckrm_mem.h
--- a/include/linux/ckrm_mem.h 2006-02-01 15:42:13.000000000 +0100
+++ b/include/linux/ckrm_mem.h 2006-02-10 14:53:55.000000000 +0100
@@ -34,6 +34,8 @@ struct ckrm_zone {
unsigned long nr_inactive;
unsigned long active_over;
unsigned long inactive_over;
+ unsigned long pages_scanned;
+ int all_unreclaimable;
struct list_head guar_list; /* list of all over guar classes */
struct zone *zone;
@@ -74,6 +76,7 @@ struct ckrm_mem_res {
};
#define CLS_AT_LIMIT (1)
+#define CLS_CONGESTION (2)
extern struct ckrm_res_ctlr mem_rcbs;
extern struct ckrm_mem_res *ckrm_mem_root_class;
diff -ruNp a/include/linux/ckrm_mem_inline.h b/include/linux/ckrm_mem_inline.h
--- a/include/linux/ckrm_mem_inline.h 2006-02-01 15:42:13.000000000 +0100
+++ b/include/linux/ckrm_mem_inline.h 2006-02-08 11:24:09.000000000 +0100
@@ -274,18 +274,20 @@ ckrm_zone_add_inactive(struct ckrm_zone
}
static inline void
-ckrm_zone_sub_active(struct ckrm_zone *czone, int cnt)
+ckrm_zone_sub_active(struct ckrm_zone *czone, int cnt, int pgscanned)
{
czone->nr_active -= cnt;
+ czone->pages_scanned += pgscanned;
sub_use_count(czone->memcls, 0, ckrm_czone_idx(czone), cnt);
while (cnt--)
kref_put(&czone->memcls->nr_users, memclass_release);
}
static inline void
-ckrm_zone_sub_inactive(struct ckrm_zone *czone, int cnt)
+ckrm_zone_sub_inactive(struct ckrm_zone *czone, int cnt, int pgscanned)
{
czone->nr_inactive -= cnt;
+ czone->pages_scanned += pgscanned;
sub_use_count(czone->memcls, 0, ckrm_czone_idx(czone), cnt);
while (cnt--)
kref_put(&czone->memcls->nr_users, memclass_release);
@@ -394,8 +396,8 @@ static inline void ckrm_mem_dec_inactive
#define ckrm_zone_add_active(a, b) do {} while (0)
#define ckrm_zone_add_inactive(a, b) do {} while (0)
-#define ckrm_zone_sub_active(a, b) do {} while (0)
-#define ckrm_zone_sub_inactive(a, b) do {} while (0)
+#define ckrm_zone_sub_active(a, b, c) do {} while (0)
+#define ckrm_zone_sub_inactive(a, b, c) do {} while (0)
#define set_page_ckrmzone(a, b) do {} while (0)
#define ckrm_class_limit_ok(a) (1)
diff -ruNp a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c 2006-02-01 15:44:37.000000000 +0100
+++ b/mm/page_alloc.c 2006-02-10 11:06:12.000000000 +0100
@@ -379,6 +379,9 @@ free_pages_bulk(struct zone *zone, int c
unsigned long flags;
struct page *page = NULL;
int ret = 0;
+#ifdef CONFIG_CKRM_RES_MEM
+ struct ckrm_zone *czone;
+#endif
spin_lock_irqsave(&zone->lock, flags);
zone->all_unreclaimable = 0;
@@ -388,6 +391,14 @@ free_pages_bulk(struct zone *zone, int c
/* have to delete it as __free_pages_bulk list manipulates */
list_del(&page->lru);
__free_pages_bulk(page, zone, order);
+#ifdef CONFIG_CKRM_RES_MEM
+ if ((czone = page_ckrmzone(page))) {
+ czone->pages_scanned = 0;
+ czone->all_unreclaimable = 0;
+ if (czone->memcls)
+ clear_bit(CLS_CONGESTION,
&czone->memcls->flags);
+ }
+#endif
ret++;
}
spin_unlock_irqrestore(&zone->lock, flags);
@@ -871,16 +882,36 @@ __alloc_pages(gfp_t gfp_mask, unsigned i
int do_retry;
int alloc_flags;
int did_some_progress;
+#ifdef CONFIG_CKRM_RES_MEM
+ struct ckrm_mem_res *cls = ckrm_task_memclass(p);
+ struct zone *zone;
+#endif
might_sleep_if(wait);
- if (!in_interrupt() && !ckrm_class_limit_ok(ckrm_task_memclass(p))
+#ifdef CONFIG_CKRM_RES_MEM
+ if (!in_interrupt() && !ckrm_class_limit_ok(cls)
&& wait) {
/* take a nap, let kswapd refresh zone */
blk_congestion_wait(WRITE, HZ/50);
- while (!ckrm_class_limit_ok(ckrm_task_memclass(p)))
+ while (!ckrm_class_limit_ok(cls)) {
blk_congestion_wait(WRITE, HZ/50);
+ if (test_and_clear_bit(CLS_CONGESTION, &cls->flags)) {
+ int czindex = 0;
+ for_each_zone(zone) {
+ struct ckrm_zone *czone;
+ if (zone->present_pages == 0)
+ continue;
+ czone = &cls->ckrm_zone[czindex];
+ czone->pages_scanned = 0;
+ czone->all_unreclaimable = 0;
+ czindex++;
+ }
+ return NULL;
+ }
+ }
}
+#endif
restart:
z = zonelist->zones; /* the list of zones suitable for gfp_mask */
diff -ruNp a/mm/vmscan.c b/mm/vmscan.c
--- a/mm/vmscan.c 2006-02-01 15:54:01.000000000 +0100
+++ b/mm/vmscan.c 2006-02-08 15:20:11.000000000 +0100
@@ -621,7 +621,6 @@ static int isolate_lru_pages(int nr_to_s
continue;
} else {
list_add(&page->lru, dst);
- set_page_ckrmzone(page, NULL);
nr_taken++;
}
}
@@ -665,7 +664,7 @@ static void shrink_cache(struct zone *zo
inactive_list,
&page_list, &nr_scan);
zone->nr_inactive -= nr_taken;
- ckrm_zone_sub_inactive(ckrm_zone, nr_taken);
+ ckrm_zone_sub_inactive(ckrm_zone, nr_taken, nr_scan);
zone->pages_scanned += nr_scan;
spin_unlock_irq(&zone->lru_lock);
@@ -701,7 +700,6 @@ static void shrink_cache(struct zone *zo
zone->nr_inactive++;
list_add(&page->lru, inactive_list);
}
- set_page_ckrmzone(page, ckrm_zone);
if (!pagevec_add(&pvec, page)) {
spin_unlock_irq(&zone->lru_lock);
__pagevec_release(&pvec);
@@ -766,7 +764,7 @@ refill_inactive_zone(struct zone *zone,
&l_hold, &pgscanned);
zone->pages_scanned += pgscanned;
zone->nr_active -= pgmoved;
- ckrm_zone_sub_active(ckrm_zone, pgmoved);
+ ckrm_zone_sub_active(ckrm_zone, pgmoved, pgscanned);
spin_unlock_irq(&zone->lru_lock);
/*
@@ -800,6 +798,11 @@ refill_inactive_zone(struct zone *zone,
if (swap_tendency >= 100)
reclaim_mapped = 1;
+#ifdef CONFIG_CKRM_RES_MEM
+ if (ckrm_zone->pages_scanned > ckrm_zone->nr_active +
ckrm_zone->nr_inactive)
+ reclaim_mapped = 1;
+#endif
+
while (!list_empty(&l_hold)) {
cond_resched();
page = lru_to_page(&l_hold);
@@ -826,7 +829,6 @@ refill_inactive_zone(struct zone *zone,
if (!TestClearPageActive(page))
BUG();
list_move(&page->lru, inactive_list);
- set_page_ckrmzone(page, ckrm_zone);
pgmoved++;
if (!pagevec_add(&pvec, page)) {
zone->nr_inactive += pgmoved;
@@ -857,7 +859,6 @@ refill_inactive_zone(struct zone *zone,
BUG();
BUG_ON(!PageActive(page));
list_move(&page->lru, active_list);
- set_page_ckrmzone(page, ckrm_zone);
pgmoved++;
if (!pagevec_add(&pvec, page)) {
zone->nr_active += pgmoved;
@@ -906,6 +907,7 @@ ckrm_shrink_class(struct ckrm_mem_res *c
struct scan_control sc;
struct zone *zone;
int czindex = 0, cnt, act_credit = 0, inact_credit = 0;
+ int all_zones_unreclaim = 1;
sc.nr_mapped = read_page_state(nr_mapped);
sc.nr_scanned = 0;
@@ -927,6 +929,10 @@ ckrm_shrink_class(struct ckrm_mem_res *c
czone = &cls->ckrm_zone[czindex];
BUG_ON(czone->zone != zone);
+ if (czone->all_unreclaimable) {
+ czindex++;
+ continue;
+ }
zone->temp_priority = zone->prev_priority;
zone->prev_priority = sc.priority;
@@ -960,10 +966,20 @@ ckrm_shrink_class(struct ckrm_mem_res *c
if (sc.ckrm_active || sc.ckrm_inactive) {
sc.nr_to_reclaim = sc.ckrm_inactive;
shrink_ckrmzone(czone, &sc);
+ if (czone->pages_scanned >
+ (czone->nr_active + czone->nr_inactive) * 4)
+ czone->all_unreclaimable = 1;
+ else
+ all_zones_unreclaim = 0;
}
+ else
+ czone->all_unreclaimable = 1;
zone->prev_priority = zone->temp_priority;
czindex++;
}
+ if (all_zones_unreclaim)
+ /* kswapd is getting into trouble */
+ set_bit(CLS_CONGESTION, &cls->flags);
}
static void
------------------------------------------------------------------------
diff -ruNp a/mm/vmscan.c b/mm/vmscan.c
--- a/mm/vmscan.c 2006-02-01 15:54:01.000000000 +0100
+++ b/mm/vmscan.c 2006-02-01 15:53:43.000000000 +0100
@@ -800,6 +800,9 @@ refill_inactive_zone(struct zone *zone,
if (swap_tendency >= 100)
reclaim_mapped = 1;
+ if (!ckrm_class_limit_ok(ckrm_zone->memcls))
+ reclaim_mapped = 1;
+
while (!list_empty(&l_hold)) {
cond_resched();
page = lru_to_page(&l_hold);
------------------------------------------------------------------------
diff -ruNp a/include/linux/ckrm_mem.h b/include/linux/ckrm_mem.h
--- a/include/linux/ckrm_mem.h 2006-02-10 14:53:55.000000000 +0100
+++ b/include/linux/ckrm_mem.h 2006-02-10 14:58:16.000000000 +0100
@@ -70,7 +70,7 @@ struct ckrm_mem_res {
struct list_head shrink_list; /* list of classes that are near
* limit and need to be shrunk
*/
- int shrink_count;
+ atomic_t shrink_count;
unsigned long last_shrink;
struct ckrm_zone ckrm_zone[0]; /* must be the last element */
};
diff -ruNp a/kernel/ckrm/ckrm_memctlr.c b/kernel/ckrm/ckrm_memctlr.c
--- a/kernel/ckrm/ckrm_memctlr.c 2006-02-10 14:53:28.000000000 +0100
+++ b/kernel/ckrm/ckrm_memctlr.c 2006-02-10 14:58:33.000000000 +0100
@@ -448,10 +448,10 @@ ckrm_shrink_atlimit(struct ckrm_mem_res
if (time_after(jiffies, cls->last_shrink +
ckrm_mem_shrink_interval * HZ)) {
cls->last_shrink = jiffies;
- cls->shrink_count = 0;
+ atomic_set(&cls->shrink_count, 0);
}
- cls->shrink_count++;
- if (cls->shrink_count > ckrm_mem_shrink_count) {
+ atomic_inc(&cls->shrink_count);
+ if (atomic_read(&cls->shrink_count) > ckrm_mem_shrink_count) {
clear_bit(CLS_AT_LIMIT, &cls->flags);
return;
}