Re: [RFC PATCH 10/12] memory-hotplug : free memmap of sparse-vmemmap

2012-07-01 Thread Yasuaki Ishimatsu
Hi Jiang,

2012/07/01 0:58, Jiang Liu wrote:
 On 06/27/2012 01:56 PM, Yasuaki Ishimatsu wrote:
 I don't think that all pages of virtual mapping in removed memory can be
 freed, since page which type is MIX_SECTION_INFO is difficult to free.
 So, the patch only frees page which type is SECTION_INFO at first.

 CC: Len Brown len.br...@intel.com
 CC: Benjamin Herrenschmidt b...@kernel.crashing.org
 CC: Paul Mackerras pau...@samba.org
 CC: Christoph Lameter c...@linux.com
 Cc: Minchan Kim minchan@gmail.com
 CC: Andrew Morton a...@linux-foundation.org
 CC: KOSAKI Motohiro kosaki.motoh...@jp.fujitsu.com
 CC: Wen Congyang we...@cn.fujitsu.com
 Signed-off-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com

 ---
   arch/x86/mm/init_64.c |   89 
 ++
   include/linux/mm.h|2 +
   mm/memory_hotplug.c   |5 ++
   mm/sparse.c   |5 +-
   4 files changed, 99 insertions(+), 2 deletions(-)

 Index: linux-3.5-rc4/include/linux/mm.h
 ===
 --- linux-3.5-rc4.orig/include/linux/mm.h2012-06-27 09:11:13.790150442 
 +0900
 +++ linux-3.5-rc4/include/linux/mm.h 2012-06-27 09:11:16.433117400 +0900
 @@ -1588,6 +1588,8 @@ int vmemmap_populate(struct page *start_
   void vmemmap_populate_print_last(void);
   void register_page_bootmem_memmap(unsigned long section_nr, struct page 
 *map,
unsigned long size);
 +void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages);
 +void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages);

   enum mf_flags {
  MF_COUNT_INCREASED = 1  0,
 Index: linux-3.5-rc4/mm/sparse.c
 ===
 --- linux-3.5-rc4.orig/mm/sparse.c   2012-06-27 09:06:35.317631878 +0900
 +++ linux-3.5-rc4/mm/sparse.c2012-06-27 09:11:16.434117388 +0900
 @@ -614,12 +614,13 @@ static inline struct page *kmalloc_secti
  /* This will make the necessary allocations eventually. */
  return sparse_mem_map_populate(pnum, nid);
   }
 -static void __kfree_section_memmap(struct page *memmap, unsigned long 
 nr_pages)
 +static void __kfree_section_memmap(struct page *page, unsigned long 
 nr_pages)
   {
 -return; /* XXX: Not implemented yet */
 +vmemmap_kfree(page, nr_pages);
   }
   static void free_map_bootmem(struct page *page, unsigned long nr_pages)
   {
 +vmemmap_free_bootmem(page, nr_pages);
   }
   #else
   static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
 Index: linux-3.5-rc4/arch/x86/mm/init_64.c
 ===
 --- linux-3.5-rc4.orig/arch/x86/mm/init_64.c 2012-06-27 09:11:13.791150430 
 +0900
 +++ linux-3.5-rc4/arch/x86/mm/init_64.c  2012-06-27 09:11:59.254581998 
 +0900
 @@ -978,6 +978,95 @@ vmemmap_populate(struct page *start_page
  return 0;
   }

 +unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long end,
 +  struct page *page)
 I think the third parameter should be struct page **pp instead of struct 
 page *page.
 And page = pte_page(*pte) should be *pp = pte_page(*pte).
 Otherwise the found page pointer can't be returned to the caller and 
 vmemmap_kfree()
 just sees random value in variable page.

Oh, you are right. I'll update it.

Thanks,
Yasuaki Ishimatsu

 +{
 +pgd_t *pgd;
 +pud_t *pud;
 +pmd_t *pmd;
 +pte_t *pte;
 +unsigned long next;
 +
 +page = NULL;
 +
 +pgd = pgd_offset_k(addr);
 +if (pgd_none(*pgd))
 +return PAGE_SIZE;
 +
 +pud = pud_offset(pgd, addr);
 +if (pud_none(*pud))
 +return PAGE_SIZE;
 +
 +if (!cpu_has_pse) {
 +next = (addr + PAGE_SIZE)  PAGE_MASK;
 +pmd = pmd_offset(pud, addr);
 +if (pmd_none(*pmd))
 +return next;
 +
 +pte = pte_offset_kernel(pmd, addr);
 +if (pte_none(*pte))
 +return next;
 +
 +page = pte_page(*pte);
 +pte_clear(init_mm, addr, pte);
 +} else {
 +next = pmd_addr_end(addr, end);
 +
 +pmd = pmd_offset(pud, addr);
 +if (pmd_none(*pmd))
 +return next;
 +
 +page = pmd_page(*pmd);
 +pmd_clear(pmd);
 +}
 +
 +return next;
 +}
 +
 +void __meminit
 +vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
 +{
 +unsigned long addr = (unsigned long)memmap;
 +unsigned long end = (unsigned long)(memmap + nr_pages);
 +unsigned long next;
 +unsigned int order;
 +struct page *page;
 +
 +for (; addr  end; addr = next) {
 +next = find_and_clear_pte_page(addr, end, page);
 +if (!page)
 +continue;
 +
 +if (is_vmalloc_addr(page))
 +vfree(page);
 +else {
 +order = next - addr;
 +   

Re: [RFC PATCH 10/12] memory-hotplug : free memmap of sparse-vmemmap

2012-06-30 Thread Jiang Liu
On 06/27/2012 01:56 PM, Yasuaki Ishimatsu wrote:
 I don't think that all pages of virtual mapping in removed memory can be
 freed, since page which type is MIX_SECTION_INFO is difficult to free.
 So, the patch only frees page which type is SECTION_INFO at first.
 
 CC: Len Brown len.br...@intel.com
 CC: Benjamin Herrenschmidt b...@kernel.crashing.org
 CC: Paul Mackerras pau...@samba.org
 CC: Christoph Lameter c...@linux.com
 Cc: Minchan Kim minchan@gmail.com
 CC: Andrew Morton a...@linux-foundation.org
 CC: KOSAKI Motohiro kosaki.motoh...@jp.fujitsu.com
 CC: Wen Congyang we...@cn.fujitsu.com
 Signed-off-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com
 
 ---
  arch/x86/mm/init_64.c |   89 
 ++
  include/linux/mm.h|2 +
  mm/memory_hotplug.c   |5 ++
  mm/sparse.c   |5 +-
  4 files changed, 99 insertions(+), 2 deletions(-)
 
 Index: linux-3.5-rc4/include/linux/mm.h
 ===
 --- linux-3.5-rc4.orig/include/linux/mm.h 2012-06-27 09:11:13.790150442 
 +0900
 +++ linux-3.5-rc4/include/linux/mm.h  2012-06-27 09:11:16.433117400 +0900
 @@ -1588,6 +1588,8 @@ int vmemmap_populate(struct page *start_
  void vmemmap_populate_print_last(void);
  void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
 unsigned long size);
 +void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages);
 +void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages);
 
  enum mf_flags {
   MF_COUNT_INCREASED = 1  0,
 Index: linux-3.5-rc4/mm/sparse.c
 ===
 --- linux-3.5-rc4.orig/mm/sparse.c2012-06-27 09:06:35.317631878 +0900
 +++ linux-3.5-rc4/mm/sparse.c 2012-06-27 09:11:16.434117388 +0900
 @@ -614,12 +614,13 @@ static inline struct page *kmalloc_secti
   /* This will make the necessary allocations eventually. */
   return sparse_mem_map_populate(pnum, nid);
  }
 -static void __kfree_section_memmap(struct page *memmap, unsigned long 
 nr_pages)
 +static void __kfree_section_memmap(struct page *page, unsigned long nr_pages)
  {
 - return; /* XXX: Not implemented yet */
 + vmemmap_kfree(page, nr_pages);
  }
  static void free_map_bootmem(struct page *page, unsigned long nr_pages)
  {
 + vmemmap_free_bootmem(page, nr_pages);
  }
  #else
  static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
 Index: linux-3.5-rc4/arch/x86/mm/init_64.c
 ===
 --- linux-3.5-rc4.orig/arch/x86/mm/init_64.c  2012-06-27 09:11:13.791150430 
 +0900
 +++ linux-3.5-rc4/arch/x86/mm/init_64.c   2012-06-27 09:11:59.254581998 
 +0900
 @@ -978,6 +978,95 @@ vmemmap_populate(struct page *start_page
   return 0;
  }
 
 +unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long end,
 +   struct page *page)
I think the third parameter should be struct page **pp instead of struct 
page *page.
And page = pte_page(*pte) should be *pp = pte_page(*pte).
Otherwise the found page pointer can't be returned to the caller and 
vmemmap_kfree()
just sees random value in variable page.

 +{
 + pgd_t *pgd;
 + pud_t *pud;
 + pmd_t *pmd;
 + pte_t *pte;
 + unsigned long next;
 +
 + page = NULL;
 +
 + pgd = pgd_offset_k(addr);
 + if (pgd_none(*pgd))
 + return PAGE_SIZE;
 +
 + pud = pud_offset(pgd, addr);
 + if (pud_none(*pud))
 + return PAGE_SIZE;
 +
 + if (!cpu_has_pse) {
 + next = (addr + PAGE_SIZE)  PAGE_MASK;
 + pmd = pmd_offset(pud, addr);
 + if (pmd_none(*pmd))
 + return next;
 +
 + pte = pte_offset_kernel(pmd, addr);
 + if (pte_none(*pte))
 + return next;
 +
 + page = pte_page(*pte);
 + pte_clear(init_mm, addr, pte);
 + } else {
 + next = pmd_addr_end(addr, end);
 +
 + pmd = pmd_offset(pud, addr);
 + if (pmd_none(*pmd))
 + return next;
 +
 + page = pmd_page(*pmd);
 + pmd_clear(pmd);
 + }
 +
 + return next;
 +}
 +
 +void __meminit
 +vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
 +{
 + unsigned long addr = (unsigned long)memmap;
 + unsigned long end = (unsigned long)(memmap + nr_pages);
 + unsigned long next;
 + unsigned int order;
 + struct page *page;
 +
 + for (; addr  end; addr = next) {
 + next = find_and_clear_pte_page(addr, end, page);
 + if (!page)
 + continue;
 +
 + if (is_vmalloc_addr(page))
 + vfree(page);
 + else {
 + order = next - addr;
 + free_pages((unsigned long)page,
 +

[RFC PATCH 10/12] memory-hotplug : free memmap of sparse-vmemmap

2012-06-27 Thread Yasuaki Ishimatsu
I don't think that all pages of virtual mapping in removed memory can be
freed, since page which type is MIX_SECTION_INFO is difficult to free.
So, the patch only frees page which type is SECTION_INFO at first.

CC: Len Brown len.br...@intel.com
CC: Benjamin Herrenschmidt b...@kernel.crashing.org
CC: Paul Mackerras pau...@samba.org
CC: Christoph Lameter c...@linux.com
Cc: Minchan Kim minchan@gmail.com
CC: Andrew Morton a...@linux-foundation.org
CC: KOSAKI Motohiro kosaki.motoh...@jp.fujitsu.com
CC: Wen Congyang we...@cn.fujitsu.com
Signed-off-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com

---
 arch/x86/mm/init_64.c |   89 ++
 include/linux/mm.h|2 +
 mm/memory_hotplug.c   |5 ++
 mm/sparse.c   |5 +-
 4 files changed, 99 insertions(+), 2 deletions(-)

Index: linux-3.5-rc4/include/linux/mm.h
===
--- linux-3.5-rc4.orig/include/linux/mm.h   2012-06-27 09:11:13.790150442 
+0900
+++ linux-3.5-rc4/include/linux/mm.h2012-06-27 09:11:16.433117400 +0900
@@ -1588,6 +1588,8 @@ int vmemmap_populate(struct page *start_
 void vmemmap_populate_print_last(void);
 void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
  unsigned long size);
+void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages);
+void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages);

 enum mf_flags {
MF_COUNT_INCREASED = 1  0,
Index: linux-3.5-rc4/mm/sparse.c
===
--- linux-3.5-rc4.orig/mm/sparse.c  2012-06-27 09:06:35.317631878 +0900
+++ linux-3.5-rc4/mm/sparse.c   2012-06-27 09:11:16.434117388 +0900
@@ -614,12 +614,13 @@ static inline struct page *kmalloc_secti
/* This will make the necessary allocations eventually. */
return sparse_mem_map_populate(pnum, nid);
 }
-static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
+static void __kfree_section_memmap(struct page *page, unsigned long nr_pages)
 {
-   return; /* XXX: Not implemented yet */
+   vmemmap_kfree(page, nr_pages);
 }
 static void free_map_bootmem(struct page *page, unsigned long nr_pages)
 {
+   vmemmap_free_bootmem(page, nr_pages);
 }
 #else
 static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
Index: linux-3.5-rc4/arch/x86/mm/init_64.c
===
--- linux-3.5-rc4.orig/arch/x86/mm/init_64.c2012-06-27 09:11:13.791150430 
+0900
+++ linux-3.5-rc4/arch/x86/mm/init_64.c 2012-06-27 09:11:59.254581998 +0900
@@ -978,6 +978,95 @@ vmemmap_populate(struct page *start_page
return 0;
 }

+unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long end,
+ struct page *page)
+{
+   pgd_t *pgd;
+   pud_t *pud;
+   pmd_t *pmd;
+   pte_t *pte;
+   unsigned long next;
+
+   page = NULL;
+
+   pgd = pgd_offset_k(addr);
+   if (pgd_none(*pgd))
+   return PAGE_SIZE;
+
+   pud = pud_offset(pgd, addr);
+   if (pud_none(*pud))
+   return PAGE_SIZE;
+
+   if (!cpu_has_pse) {
+   next = (addr + PAGE_SIZE)  PAGE_MASK;
+   pmd = pmd_offset(pud, addr);
+   if (pmd_none(*pmd))
+   return next;
+
+   pte = pte_offset_kernel(pmd, addr);
+   if (pte_none(*pte))
+   return next;
+
+   page = pte_page(*pte);
+   pte_clear(init_mm, addr, pte);
+   } else {
+   next = pmd_addr_end(addr, end);
+
+   pmd = pmd_offset(pud, addr);
+   if (pmd_none(*pmd))
+   return next;
+
+   page = pmd_page(*pmd);
+   pmd_clear(pmd);
+   }
+
+   return next;
+}
+
+void __meminit
+vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+   unsigned long addr = (unsigned long)memmap;
+   unsigned long end = (unsigned long)(memmap + nr_pages);
+   unsigned long next;
+   unsigned int order;
+   struct page *page;
+
+   for (; addr  end; addr = next) {
+   next = find_and_clear_pte_page(addr, end, page);
+   if (!page)
+   continue;
+
+   if (is_vmalloc_addr(page))
+   vfree(page);
+   else {
+   order = next - addr;
+   free_pages((unsigned long)page,
+  get_order(sizeof(struct page) *  order));
+   }
+   }
+}
+
+void __meminit
+vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+   unsigned long addr = (unsigned long)memmap;
+   unsigned long end = (unsigned long)(memmap + nr_pages);
+   unsigned long next;
+   struct page *page;
+   unsigned