On Thu, Jun 13 2013, Tang Chen wrote:
> As Yinghai suggested, even if a node is movable node, which has only
> ZONE_MOVABLE, pagetables should be put in the local node.
>
> In memory hot-remove logic, it offlines all pages first, and then
> removes pagetables. But the local pagetable pages cannot be offlined
> because they are used by kernel.
>
> So we should skip this kind of pages in offline procedure. But first
> of all, we need to mark them.
>
> This patch marks local node data pages in the same way as we mark the
> SECTION_INFO and MIX_SECTION_INFO data pages. We introduce a new type
> of bootmem: LOCAL_NODE_DATA. And use page->lru.next to mark this type
> of memory.
>
> Signed-off-by: Tang Chen <[email protected]>
> ---
>  arch/x86/mm/init_64.c          |    2 +
>  include/linux/memblock.h       |   22 +++++++++++++++++
>  include/linux/memory_hotplug.h |   13 ++++++++-
>  mm/memblock.c                  |   52 
> ++++++++++++++++++++++++++++++++++++++++
>  mm/memory_hotplug.c            |   26 ++++++++++++++++++++
>  5 files changed, 113 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> index bb00c46..25de304 100644
> --- a/arch/x86/mm/init_64.c
> +++ b/arch/x86/mm/init_64.c
> @@ -1053,6 +1053,8 @@ static void __init register_page_bootmem_info(void)
>  
>       for_each_online_node(i)
>               register_page_bootmem_info_node(NODE_DATA(i));
> +
> +     register_page_bootmem_local_node();
>  #endif
>  }
>  
> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> index a85ced9..8a38eef 100644
> --- a/include/linux/memblock.h
> +++ b/include/linux/memblock.h
> @@ -131,6 +131,28 @@ void __next_free_mem_range_rev(u64 *idx, int nid, 
> phys_addr_t *out_start,
>            i != (u64)ULLONG_MAX;                                      \
>            __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid))
>  
> +void __next_local_node_mem_range(int *idx, int nid, phys_addr_t *out_start,
> +                              phys_addr_t *out_end, int *out_nid);

Why not make it return int?

> +
> +/**
> + * for_each_local_node_mem_range - iterate memblock areas storing local node
> + *                                 data
> + * @i: int used as loop variable
> + * @nid: node selector, %MAX_NUMNODES for all nodes
> + * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
> + * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
> + * @p_nid: ptr to int for nid of the range, can be %NULL
> + *
> + * Walks over memblock areas storing local node data. Since all the local 
> node
> + * areas will be reserved by memblock, this iterator will only iterate
> + * memblock.reserve. Available as soon as memblock is initialized.
> + */
> +#define for_each_local_node_mem_range(i, nid, p_start, p_end, p_nid)     \
> +     for (i = -1,                                                        \
> +          __next_local_node_mem_range(&i, nid, p_start, p_end, p_nid);   \
> +          i != -1;                                                       \
> +          __next_local_node_mem_range(&i, nid, p_start, p_end, p_nid))
> +

If __next_local_node_mem_range() returned int, this would be easier:

+#define for_each_local_node_mem_range(i, nid, p_start, p_end, p_nid)         \
+       for (i = -1;
+            (i = __next_local_node_mem_range(i, nid, p_start, p_end, p_nid)) 
!= -1; )

>  #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
>  int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid);
>  
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 0b21e54..c0c4107 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h

> +/**
> + * __next_local_node_mem_range - next function for
> + *                               for_each_local_node_mem_range()
> + * @idx: pointer to int loop variable
> + * @nid: node selector, %MAX_NUMNODES for all nodes
> + * @out_start: ptr to phys_addr_t for start address of the range, can be 
> %NULL
> + * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
> + * @out_nid: ptr to int for nid of the range, can be %NULL
> + */
> +void __init_memblock __next_local_node_mem_range(int *idx, int nid,
> +                                     phys_addr_t *out_start,
> +                                     phys_addr_t *out_end, int *out_nid)
> +{
> +     __next_flag_mem_range(idx, nid, MEMBLK_LOCAL_NODE,
> +                           out_start, out_end, out_nid);
> +}

static inline in a header file perhaps?

-- 
Best regards,                                         _     _
.o. | Liege of Serenely Enlightened Majesty of      o' \,=./ `o
..o | Computer Science,  Michał “mina86” Nazarewicz    (o o)
ooo +----<email/xmpp: [email protected]>--------------ooO--(_)--Ooo--

Attachment: signature.asc
Description: PGP signature

Reply via email to