Re: [RFC PATCH 3/5] mm/page_alloc: stop instantly reusing freed page

2016-10-13 Thread Joonsoo Kim
On Thu, Oct 13, 2016 at 12:59:14PM +0200, Vlastimil Babka wrote:
> On 10/13/2016 10:08 AM, js1...@gmail.com wrote:
> >From: Joonsoo Kim 
> >
> >Allocation/free pattern is usually sequantial. If they are freed to
> >the buddy list, they can be coalesced. However, we first keep these freed
> >pages at the pcp list and try to reuse them until threshold is reached
> >so we don't have enough chance to get a high order freepage. This reusing
> >would provide us some performance advantages since we don't need to
> >get the zone lock and we don't pay the cost to check buddy merging.
> >But, less fragmentation and more high order freepage would compensate
> >this overhead in other ways. First, we would trigger less direct
> >compaction which has high overhead. And, there are usecases that uses
> >high order page to boost their performance.
> >
> >Instantly resuing freed page seems to provide us computational benefit
> >but the other affects more precious things like as I/O performance and
> >memory consumption so I think that it's a good idea to weight
> >later advantage more.
> 
> Again, there's also cache hotness to consider. And whether the
> sequential pattern is still real on a system with higher uptime.
> Should be possible to evaluate with tracepoints?

I answered this in previous e-mail. Anyway, we should evaluate
cache-effect. tracepoint or perf's cache event would show some
evidence. I will do it soon and report again.

Thanks.



Re: [RFC PATCH 3/5] mm/page_alloc: stop instantly reusing freed page

2016-10-13 Thread Vlastimil Babka

On 10/13/2016 10:08 AM, js1...@gmail.com wrote:

From: Joonsoo Kim 

Allocation/free pattern is usually sequantial. If they are freed to
the buddy list, they can be coalesced. However, we first keep these freed
pages at the pcp list and try to reuse them until threshold is reached
so we don't have enough chance to get a high order freepage. This reusing
would provide us some performance advantages since we don't need to
get the zone lock and we don't pay the cost to check buddy merging.
But, less fragmentation and more high order freepage would compensate
this overhead in other ways. First, we would trigger less direct
compaction which has high overhead. And, there are usecases that uses
high order page to boost their performance.

Instantly resuing freed page seems to provide us computational benefit
but the other affects more precious things like as I/O performance and
memory consumption so I think that it's a good idea to weight
later advantage more.


Again, there's also cache hotness to consider. And whether the 
sequential pattern is still real on a system with higher uptime. Should 
be possible to evaluate with tracepoints?





[RFC PATCH 3/5] mm/page_alloc: stop instantly reusing freed page

2016-10-13 Thread js1304
From: Joonsoo Kim 

Allocation/free pattern is usually sequantial. If they are freed to
the buddy list, they can be coalesced. However, we first keep these freed
pages at the pcp list and try to reuse them until threshold is reached
so we don't have enough chance to get a high order freepage. This reusing
would provide us some performance advantages since we don't need to
get the zone lock and we don't pay the cost to check buddy merging.
But, less fragmentation and more high order freepage would compensate
this overhead in other ways. First, we would trigger less direct
compaction which has high overhead. And, there are usecases that uses
high order page to boost their performance.

Instantly resuing freed page seems to provide us computational benefit
but the other affects more precious things like as I/O performance and
memory consumption so I think that it's a good idea to weight
later advantage more.

Signed-off-by: Joonsoo Kim 
---
 include/linux/mmzone.h |  6 +++--
 mm/page_alloc.c| 71 --
 mm/vmstat.c|  7 ++---
 3 files changed, 53 insertions(+), 31 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7f2ae99..75a92d1 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -260,12 +260,14 @@ enum zone_watermarks {
 #define high_wmark_pages(z) (z->watermark[WMARK_HIGH])
 
 struct per_cpu_pages {
-   int count;  /* number of pages in the list */
+   int alloc_count;/* number of pages in the list */
+   int free_count; /* number of pages in the list */
int high;   /* high watermark, emptying needed */
int batch;  /* chunk size for buddy add/remove */
 
/* Lists of pages, one per migrate type stored on the pcp-lists */
-   struct list_head lists[MIGRATE_PCPTYPES];
+   struct list_head alloc_lists[MIGRATE_PCPTYPES];
+   struct list_head free_lists[MIGRATE_PCPTYPES];
 };
 
 struct per_cpu_pageset {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 70427bf..a167754 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1091,7 +1091,7 @@ static void free_pcppages_bulk(struct zone *zone, int 
count,
batch_free++;
if (++migratetype == MIGRATE_PCPTYPES)
migratetype = 0;
-   list = &pcp->lists[migratetype];
+   list = &pcp->free_lists[migratetype];
} while (list_empty(list));
 
/* This is the only non-empty list. Free them all. */
@@ -2258,10 +2258,10 @@ void drain_zone_pages(struct zone *zone, struct 
per_cpu_pages *pcp)
 
local_irq_save(flags);
batch = READ_ONCE(pcp->batch);
-   to_drain = min(pcp->count, batch);
+   to_drain = min(pcp->free_count, batch);
if (to_drain > 0) {
free_pcppages_bulk(zone, to_drain, pcp);
-   pcp->count -= to_drain;
+   pcp->free_count -= to_drain;
}
local_irq_restore(flags);
 }
@@ -2279,14 +2279,24 @@ static void drain_pages_zone(unsigned int cpu, struct 
zone *zone)
unsigned long flags;
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
+   int mt;
 
local_irq_save(flags);
pset = per_cpu_ptr(zone->pageset, cpu);
 
pcp = &pset->pcp;
-   if (pcp->count) {
-   free_pcppages_bulk(zone, pcp->count, pcp);
-   pcp->count = 0;
+   if (pcp->alloc_count) {
+   for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
+   list_splice_init(&pcp->alloc_lists[mt],
+   &pcp->free_lists[mt]);
+   }
+   pcp->free_count += pcp->alloc_count;
+   pcp->alloc_count = 0;
+   }
+
+   if (pcp->free_count) {
+   free_pcppages_bulk(zone, pcp->free_count, pcp);
+   pcp->free_count = 0;
}
local_irq_restore(flags);
 }
@@ -2357,12 +2367,13 @@ void drain_all_pages(struct zone *zone)
 
if (zone) {
pcp = per_cpu_ptr(zone->pageset, cpu);
-   if (pcp->pcp.count)
+   if (pcp->pcp.alloc_count || pcp->pcp.free_count)
has_pcps = true;
} else {
for_each_populated_zone(z) {
pcp = per_cpu_ptr(z->pageset, cpu);
-   if (pcp->pcp.count) {
+   if (pcp->pcp.alloc_count ||
+   pcp->pcp.free_count) {
has_pcps = true;
break;
}
@@ -2454,15 +2465,12 @@ void free_hot_cold_page(struct page *page, bool cold)
}
 
pcp = &this_cpu_ptr(zone->pageset)->pcp;
-   if (!cold)
-