Instead of scrubbing pages while holding heap lock we can mark buddy's head as being scrubbed and drop the lock temporarily. If someone (most likely alloc_heap_pages()) tries to access this chunk it will signal the scrubber to abort scrub by setting head's PAGE_SCRUB_ABORT bit. The scrubber checks this bit after processing each page and stops its work as soon as it sees it.
Signed-off-by: Boris Ostrovsky <boris.ostrov...@oracle.com> --- xen/common/page_alloc.c | 78 +++++++++++++++++++++++++++++++++++++++++++-- xen/include/asm-arm/mm.h | 4 ++ xen/include/asm-x86/mm.h | 4 ++ 3 files changed, 82 insertions(+), 4 deletions(-) diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index e5e6e70..df28090 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -685,6 +685,18 @@ static void check_low_mem_virq(void) } } +static void check_and_stop_scrub(struct page_info *head) +{ + if ( head->u.free.scrub_state & PAGE_SCRUBBING ) + { + head->u.free.scrub_state |= PAGE_SCRUB_ABORT; + smp_mb(); + spin_lock_kick(); + while ( ACCESS_ONCE(head->u.free.scrub_state) & PAGE_SCRUB_ABORT ) + cpu_relax(); + } +} + /* Allocate 2^@order contiguous pages. */ static struct page_info *alloc_heap_pages( unsigned int zone_lo, unsigned int zone_hi, @@ -771,10 +783,15 @@ static struct page_info *alloc_heap_pages( { if ( (pg = page_list_remove_head(&heap(node, zone, j))) ) { - if ( (order == 0) || use_unscrubbed || - !test_bit(_PGC_need_scrub, &pg->count_info) ) + if ( !test_bit(_PGC_need_scrub, &pg[0].count_info) ) goto found; + if ( (order == 0) || use_unscrubbed ) + { + check_and_stop_scrub(pg); + goto found; + } + page_list_add_tail(pg, &heap(node, zone, j)); } } @@ -911,6 +928,8 @@ static int reserve_offlined_page(struct page_info *head) cur_head = head; + check_and_stop_scrub(head); + page_list_del(head, &heap(node, zone, head_order)); while ( cur_head < (head + (1 << head_order)) ) @@ -993,6 +1012,9 @@ static bool_t can_merge(struct page_info *buddy, unsigned int node, !!test_bit(_PGC_need_scrub, &buddy->count_info) ) return 0; + if ( buddy->u.free.scrub_state & PAGE_SCRUBBING ) + return 0; + return 1; } @@ -1048,12 +1070,34 @@ static void merge_chunks(struct page_info *pg, unsigned int node, } #define SCRUB_CHUNK_ORDER 8 + +struct scrub_wait_state { + struct page_info *pg; + bool_t drop; +}; + +static void scrub_continue(void *data) +{ + struct scrub_wait_state *st = (struct scrub_wait_state *)data; + + if ( st->drop ) + return; + + if ( st->pg->u.free.scrub_state & PAGE_SCRUB_ABORT ) + { + /* There is a waiter for this chunk. Release it. */ + st->drop = true; + st->pg->u.free.scrub_state = 0; + } +} + bool_t scrub_free_pages() { struct page_info *pg; unsigned int i, zone; unsigned int num_scrubbed, scrub_order, start, end; bool_t preempt, is_frag; + struct scrub_wait_state st; int order, cpu = smp_processor_id(); nodeid_t node = cpu_to_node(cpu), local_node; static nodemask_t node_scrubbing; @@ -1092,7 +1136,10 @@ bool_t scrub_free_pages() if ( !test_bit(_PGC_need_scrub, &pg->count_info) ) break; - page_list_del(pg, &heap(node, zone, order)); + ASSERT(!pg->u.free.scrub_state); + pg->u.free.scrub_state = PAGE_SCRUBBING; + + spin_unlock(&heap_lock); scrub_order = MIN(order, SCRUB_CHUNK_ORDER); num_scrubbed = 0; @@ -1100,7 +1147,15 @@ bool_t scrub_free_pages() while ( num_scrubbed < (1 << order) ) { for ( i = 0; i < (1 << scrub_order); i++ ) + { scrub_one_page(&pg[num_scrubbed + i]); + if ( ACCESS_ONCE(pg->u.free.scrub_state) & PAGE_SCRUB_ABORT ) + { + /* Someone wants this chunk. Drop everything. */ + pg->u.free.scrub_state = 0; + goto out_nolock; + } + } num_scrubbed += (1 << scrub_order); if ( softirq_pending(cpu) ) @@ -1110,7 +1165,16 @@ bool_t scrub_free_pages() break; } } - + + st.pg = pg; + st.drop = false; + spin_lock_cb(&heap_lock, scrub_continue, &st); + + if ( st.drop ) + goto out; + + page_list_del(pg, &heap(node, zone, order)); + start = 0; end = num_scrubbed; @@ -1148,6 +1212,8 @@ bool_t scrub_free_pages() end += (1 << chunk_order); } + pg->u.free.scrub_state = 0; + if ( preempt ) goto out; } @@ -1156,6 +1222,8 @@ bool_t scrub_free_pages() out: spin_unlock(&heap_lock); + + out_nolock: node_clear(node, node_scrubbing); return (node_need_scrub[node] != 0); } @@ -1194,6 +1262,8 @@ static void free_heap_pages( if ( page_state_is(&pg[i], offlined) ) tainted = 1; + pg[i].u.free.scrub_state=0; + /* If a page has no owner it will need no safety TLB flush. */ pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL); if ( pg[i].u.free.need_tlbflush ) diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h index 52a03a0..1752d44 100644 --- a/xen/include/asm-arm/mm.h +++ b/xen/include/asm-arm/mm.h @@ -41,6 +41,10 @@ struct page_info } inuse; /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ struct { +#define PAGE_SCRUBBING (1<<1) +#define PAGE_SCRUB_ABORT (1<<2) + unsigned char scrub_state; + /* Do TLBs need flushing for safety before next page use? */ bool_t need_tlbflush; } free; diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index f3d4443..31e53e9 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -87,6 +87,10 @@ struct page_info /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ struct { +#define PAGE_SCRUBBING (1<<1) +#define PAGE_SCRUB_ABORT (1<<2) + unsigned char scrub_state; + /* Do TLBs need flushing for safety before next page use? */ bool_t need_tlbflush; } free; -- 1.7.1 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org https://lists.xen.org/xen-devel