Re: [PATCH 3/3] mm/vmalloc: correct a few logic error in __insert_vmap_area()

2016-09-20 Thread zijun_hu
Hi All,
please ignore this patch
as advised by Nicholas Piggin, i split this patch to smaller patches
and resend them in another mail thread

On 09/20/2016 02:02 PM, zijun_hu wrote:
> From: zijun_hu 
> 
> correct a few logic error in __insert_vmap_area() since the else if
> condition is always true and meaningless
> 
> avoid endless loop under [un]mapping improper ranges whose boundary
> are not aligned to page
> 
> correct lazy_max_pages() return value if the number of online cpus
> is power of 2
> 
> improve performance for pcpu_get_vm_areas() via optimizing vmap_areas
> overlay checking algorithm and finding near vmap_areas by list_head
> other than rbtree
> 
> simplify /proc/vmallocinfo implementation via seq_file helpers
> for list_head
> 
> Signed-off-by: zijun_hu 
> Signed-off-by: zijun_hu 
> ---
>  include/linux/list.h |  11 ++
>  mm/internal.h|   6 +++
>  mm/memblock.c|  10 +
>  mm/vmalloc.c | 104 
> +++
>  4 files changed, 74 insertions(+), 57 deletions(-)
> 
> diff --git a/include/linux/list.h b/include/linux/list.h
> index 5183138..23c3081 100644
> --- a/include/linux/list.h
> +++ b/include/linux/list.h
> @@ -181,6 +181,17 @@ static inline int list_is_last(const struct list_head 
> *list,
>  }
>  
>  /**
> + * list_is_first - tests whether @list is the first entry in list @head
> + * @list: the entry to test
> + * @head: the head of the list
> + */
> +static inline int list_is_first(const struct list_head *list,
> + const struct list_head *head)
> +{
> + return list->prev == head;
> +}
> +
> +/**
>   * list_empty - tests whether a list is empty
>   * @head: the list to test.
>   */
> diff --git a/mm/internal.h b/mm/internal.h
> index 1501304..abbff7c 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -71,6 +71,12 @@ static inline void set_page_refcounted(struct page *page)
>   set_page_count(page, 1);
>  }
>  
> +/**
> + * check whether range [@s0, @e0) has intersection with [@s1, @e1)
> + */
> +#define is_range_overlay(s0, e0, s1, e1) \
> + (((s1) >= (e0) || (s0) >= (e1)) ? false : true)
> +
>  extern unsigned long highest_memmap_pfn;
>  
>  /*
> diff --git a/mm/memblock.c b/mm/memblock.c
> index 483197e..b4c7d7c 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -85,20 +85,14 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t 
> base, phys_addr_t *size)
>  /*
>   * Address comparison utilities
>   */
> -static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t 
> base1, phys_addr_t size1,
> -phys_addr_t base2, phys_addr_t size2)
> -{
> - return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
> -}
> -
>  bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
>   phys_addr_t base, phys_addr_t size)
>  {
>   unsigned long i;
>  
>   for (i = 0; i < type->cnt; i++)
> - if (memblock_addrs_overlap(base, size, type->regions[i].base,
> -type->regions[i].size))
> + if (is_range_overlay(base, base + size, type->regions[i].base,
> + type->regions[i].base + type->regions[i].size))
>   break;
>   return i < type->cnt;
>  }
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 91f44e7..dc938f6 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -67,7 +67,7 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long 
> addr, unsigned long end)
>   do {
>   pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
>   WARN_ON(!pte_none(ptent) && !pte_present(ptent));
> - } while (pte++, addr += PAGE_SIZE, addr != end);
> + } while (pte++, addr += PAGE_SIZE, addr < end && addr >= PAGE_SIZE);
>  }
>  
>  static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long 
> end)
> @@ -108,6 +108,9 @@ static void vunmap_page_range(unsigned long addr, 
> unsigned long end)
>   unsigned long next;
>  
>   BUG_ON(addr >= end);
> + WARN_ON(!PAGE_ALIGNED(addr | end));
> +
> + addr = round_down(addr, PAGE_SIZE);
>   pgd = pgd_offset_k(addr);
>   do {
>   next = pgd_addr_end(addr, end);
> @@ -139,7 +142,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
>   return -ENOMEM;
>   set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
>   (*nr)++;
> - } while (pte++, addr += PAGE_SIZE, addr != end);
> + } while (pte++, addr += PAGE_SIZE, addr < end && addr >= PAGE_SIZE);
>   return 0;
>  }
>  
> @@ -193,6 +196,9 @@ static int vmap_page_range_noflush(unsigned long start, 
> unsigned long end,
>   int nr = 0;
>  
>   BUG_ON(addr >= end);
> + WARN_ON(!PAGE_ALIGNED(addr | end));
> +
> + addr = round_down(addr, PAGE_SIZE);
>   pgd = pgd_offset_k(addr);
>   do {
>   next = pgd_addr_

Re: [PATCH 3/3] mm/vmalloc: correct a few logic error in __insert_vmap_area()

2016-09-20 Thread zijun_hu
On 09/20/2016 02:54 PM, Nicholas Piggin wrote:
> On Tue, 20 Sep 2016 14:02:26 +0800
> zijun_hu  wrote:
> 
>> From: zijun_hu 
>>
>> correct a few logic error in __insert_vmap_area() since the else if
>> condition is always true and meaningless
>>
>> avoid endless loop under [un]mapping improper ranges whose boundary
>> are not aligned to page
>>
>> correct lazy_max_pages() return value if the number of online cpus
>> is power of 2
>>
>> improve performance for pcpu_get_vm_areas() via optimizing vmap_areas
>> overlay checking algorithm and finding near vmap_areas by list_head
>> other than rbtree
>>
>> simplify /proc/vmallocinfo implementation via seq_file helpers
>> for list_head
>>
>> Signed-off-by: zijun_hu 
>> Signed-off-by: zijun_hu 
> 
> Could you submit each of these changes as a separate patch? Would you
> consider using capitalisation and punctuation in the changelog?
>
thanks for your advisement
i will follow it and split this patch to smaller patches finally

> Did you measure any performance improvements, or do you have a workload
> where vmalloc shows up in profiles?
>
don't have measurement in practice, but i am sure there are
performance improvements for pcpu_get_vm_areas() theoretically
due to below reasons:
1) the counter of vmap_area overlay checkup loop is reduced to half
2) the previous and next vmap_area of one on list_head are just the
   nearest ones due to address sorted vmap_areas on list_head, so no
   walk and compare is needed
> 
>> @@ -108,6 +108,9 @@ static void vunmap_page_range(unsigned long addr, 
>> unsigned long end)
>>  unsigned long next;
>>  
>>  BUG_ON(addr >= end);
>> +WARN_ON(!PAGE_ALIGNED(addr | end));
> 
> I prefer to avoid mixing bitwise and arithmetic operations unless it's
> necessary. Gcc should be able to optimise
> 
> WARN_ON(!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(end))
> 
i agree with you, i will apply your suggestion finally
>> +addr = round_down(addr, PAGE_SIZE);
> 
> I don't know if it's really necessary to relax the API like this for
> internal vmalloc.c functions. If garbage is detected here, it's likely
> due to a bug, and I'm not sure that rounding it would solve the problem.
> 
> For API functions perhaps it's reasonable -- in such cases you should
> consider using WARN_ON_ONCE() or similar.
> 
actually, another patch for API function within /lib/ioremap.c used the 
way as pointed by you as below, i am not sure which is better, perhaps i
will exchange each other

Subject: [PATCH 2/3] lib/ioremap.c: avoid endless loop under ioremapping
improper ranges

for ioremap_page_range(), endless loop maybe happen if either of parameter
addr and end is not page aligned, in order to fix this issue and hint range
parameter requirements BUG_ON() checkup are performed firstly

for ioremap_pte_range(), loop end condition is optimized due to lack of
relevant macro pte_addr_end()

Signed-off-by: zijun_hu 
---
 lib/ioremap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/ioremap.c b/lib/ioremap.c
index 86c8911..0058cc8 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -64,7 +64,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
BUG_ON(!pte_none(*pte));
set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
pfn++;
-   } while (pte++, addr += PAGE_SIZE, addr != end);
+   } while (pte++, addr += PAGE_SIZE, addr < end);
return 0;
 }
 
@@ -129,6 +129,7 @@ int ioremap_page_range(unsigned long addr,
int err;
 
BUG_ON(addr >= end);
+   BUG_ON(!PAGE_ALIGNED(addr | end));
 
start = addr;
phys_addr -= addr;

> Thanks,
> Nick
> 




Re: [PATCH 3/3] mm/vmalloc: correct a few logic error in __insert_vmap_area()

2016-09-19 Thread Nicholas Piggin
On Tue, 20 Sep 2016 14:02:26 +0800
zijun_hu  wrote:

> From: zijun_hu 
> 
> correct a few logic error in __insert_vmap_area() since the else if
> condition is always true and meaningless
> 
> avoid endless loop under [un]mapping improper ranges whose boundary
> are not aligned to page
> 
> correct lazy_max_pages() return value if the number of online cpus
> is power of 2
> 
> improve performance for pcpu_get_vm_areas() via optimizing vmap_areas
> overlay checking algorithm and finding near vmap_areas by list_head
> other than rbtree
> 
> simplify /proc/vmallocinfo implementation via seq_file helpers
> for list_head
> 
> Signed-off-by: zijun_hu 
> Signed-off-by: zijun_hu 

Could you submit each of these changes as a separate patch? Would you
consider using capitalisation and punctuation in the changelog?

Did you measure any performance improvements, or do you have a workload
where vmalloc shows up in profiles?


> @@ -108,6 +108,9 @@ static void vunmap_page_range(unsigned long addr, 
> unsigned long end)
>   unsigned long next;
>  
>   BUG_ON(addr >= end);
> + WARN_ON(!PAGE_ALIGNED(addr | end));

I prefer to avoid mixing bitwise and arithmetic operations unless it's
necessary. Gcc should be able to optimise

WARN_ON(!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(end))

> + addr = round_down(addr, PAGE_SIZE);

I don't know if it's really necessary to relax the API like this for
internal vmalloc.c functions. If garbage is detected here, it's likely
due to a bug, and I'm not sure that rounding it would solve the problem.

For API functions perhaps it's reasonable -- in such cases you should
consider using WARN_ON_ONCE() or similar.

Thanks,
Nick


[PATCH 3/3] mm/vmalloc: correct a few logic error in __insert_vmap_area()

2016-09-19 Thread zijun_hu
From: zijun_hu 

correct a few logic error in __insert_vmap_area() since the else if
condition is always true and meaningless

avoid endless loop under [un]mapping improper ranges whose boundary
are not aligned to page

correct lazy_max_pages() return value if the number of online cpus
is power of 2

improve performance for pcpu_get_vm_areas() via optimizing vmap_areas
overlay checking algorithm and finding near vmap_areas by list_head
other than rbtree

simplify /proc/vmallocinfo implementation via seq_file helpers
for list_head

Signed-off-by: zijun_hu 
Signed-off-by: zijun_hu 
---
 include/linux/list.h |  11 ++
 mm/internal.h|   6 +++
 mm/memblock.c|  10 +
 mm/vmalloc.c | 104 +++
 4 files changed, 74 insertions(+), 57 deletions(-)

diff --git a/include/linux/list.h b/include/linux/list.h
index 5183138..23c3081 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -181,6 +181,17 @@ static inline int list_is_last(const struct list_head 
*list,
 }
 
 /**
+ * list_is_first - tests whether @list is the first entry in list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_first(const struct list_head *list,
+   const struct list_head *head)
+{
+   return list->prev == head;
+}
+
+/**
  * list_empty - tests whether a list is empty
  * @head: the list to test.
  */
diff --git a/mm/internal.h b/mm/internal.h
index 1501304..abbff7c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -71,6 +71,12 @@ static inline void set_page_refcounted(struct page *page)
set_page_count(page, 1);
 }
 
+/**
+ * check whether range [@s0, @e0) has intersection with [@s1, @e1)
+ */
+#define is_range_overlay(s0, e0, s1, e1) \
+   (((s1) >= (e0) || (s0) >= (e1)) ? false : true)
+
 extern unsigned long highest_memmap_pfn;
 
 /*
diff --git a/mm/memblock.c b/mm/memblock.c
index 483197e..b4c7d7c 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -85,20 +85,14 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t 
base, phys_addr_t *size)
 /*
  * Address comparison utilities
  */
-static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, 
phys_addr_t size1,
-  phys_addr_t base2, phys_addr_t size2)
-{
-   return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
-}
-
 bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
phys_addr_t base, phys_addr_t size)
 {
unsigned long i;
 
for (i = 0; i < type->cnt; i++)
-   if (memblock_addrs_overlap(base, size, type->regions[i].base,
-  type->regions[i].size))
+   if (is_range_overlay(base, base + size, type->regions[i].base,
+   type->regions[i].base + type->regions[i].size))
break;
return i < type->cnt;
 }
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 91f44e7..dc938f6 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -67,7 +67,7 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, 
unsigned long end)
do {
pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
WARN_ON(!pte_none(ptent) && !pte_present(ptent));
-   } while (pte++, addr += PAGE_SIZE, addr != end);
+   } while (pte++, addr += PAGE_SIZE, addr < end && addr >= PAGE_SIZE);
 }
 
 static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
@@ -108,6 +108,9 @@ static void vunmap_page_range(unsigned long addr, unsigned 
long end)
unsigned long next;
 
BUG_ON(addr >= end);
+   WARN_ON(!PAGE_ALIGNED(addr | end));
+
+   addr = round_down(addr, PAGE_SIZE);
pgd = pgd_offset_k(addr);
do {
next = pgd_addr_end(addr, end);
@@ -139,7 +142,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
return -ENOMEM;
set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
(*nr)++;
-   } while (pte++, addr += PAGE_SIZE, addr != end);
+   } while (pte++, addr += PAGE_SIZE, addr < end && addr >= PAGE_SIZE);
return 0;
 }
 
@@ -193,6 +196,9 @@ static int vmap_page_range_noflush(unsigned long start, 
unsigned long end,
int nr = 0;
 
BUG_ON(addr >= end);
+   WARN_ON(!PAGE_ALIGNED(addr | end));
+
+   addr = round_down(addr, PAGE_SIZE);
pgd = pgd_offset_k(addr);
do {
next = pgd_addr_end(addr, end);
@@ -291,6 +297,22 @@ static unsigned long cached_align;
 
 static unsigned long vmap_area_pcpu_hole;
 
+static inline struct vmap_area *next_vmap_area(struct vmap_area *va)
+{
+   if (list_is_last(&va->list, &vmap_area_list))
+   return  NULL;
+   else
+   return list_next_entry(va, list);
+}
+
+static inline struct vmap_area *prev_vmap_are