Re: [PATCH 3/3] mm/vmalloc: correct a few logic error in __insert_vmap_area()
Hi All, please ignore this patch as advised by Nicholas Piggin, i split this patch to smaller patches and resend them in another mail thread On 09/20/2016 02:02 PM, zijun_hu wrote: > From: zijun_hu > > correct a few logic error in __insert_vmap_area() since the else if > condition is always true and meaningless > > avoid endless loop under [un]mapping improper ranges whose boundary > are not aligned to page > > correct lazy_max_pages() return value if the number of online cpus > is power of 2 > > improve performance for pcpu_get_vm_areas() via optimizing vmap_areas > overlay checking algorithm and finding near vmap_areas by list_head > other than rbtree > > simplify /proc/vmallocinfo implementation via seq_file helpers > for list_head > > Signed-off-by: zijun_hu > Signed-off-by: zijun_hu > --- > include/linux/list.h | 11 ++ > mm/internal.h| 6 +++ > mm/memblock.c| 10 + > mm/vmalloc.c | 104 > +++ > 4 files changed, 74 insertions(+), 57 deletions(-) > > diff --git a/include/linux/list.h b/include/linux/list.h > index 5183138..23c3081 100644 > --- a/include/linux/list.h > +++ b/include/linux/list.h > @@ -181,6 +181,17 @@ static inline int list_is_last(const struct list_head > *list, > } > > /** > + * list_is_first - tests whether @list is the first entry in list @head > + * @list: the entry to test > + * @head: the head of the list > + */ > +static inline int list_is_first(const struct list_head *list, > + const struct list_head *head) > +{ > + return list->prev == head; > +} > + > +/** > * list_empty - tests whether a list is empty > * @head: the list to test. > */ > diff --git a/mm/internal.h b/mm/internal.h > index 1501304..abbff7c 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -71,6 +71,12 @@ static inline void set_page_refcounted(struct page *page) > set_page_count(page, 1); > } > > +/** > + * check whether range [@s0, @e0) has intersection with [@s1, @e1) > + */ > +#define is_range_overlay(s0, e0, s1, e1) \ > + (((s1) >= (e0) || (s0) >= (e1)) ? false : true) > + > extern unsigned long highest_memmap_pfn; > > /* > diff --git a/mm/memblock.c b/mm/memblock.c > index 483197e..b4c7d7c 100644 > --- a/mm/memblock.c > +++ b/mm/memblock.c > @@ -85,20 +85,14 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t > base, phys_addr_t *size) > /* > * Address comparison utilities > */ > -static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t > base1, phys_addr_t size1, > -phys_addr_t base2, phys_addr_t size2) > -{ > - return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); > -} > - > bool __init_memblock memblock_overlaps_region(struct memblock_type *type, > phys_addr_t base, phys_addr_t size) > { > unsigned long i; > > for (i = 0; i < type->cnt; i++) > - if (memblock_addrs_overlap(base, size, type->regions[i].base, > -type->regions[i].size)) > + if (is_range_overlay(base, base + size, type->regions[i].base, > + type->regions[i].base + type->regions[i].size)) > break; > return i < type->cnt; > } > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > index 91f44e7..dc938f6 100644 > --- a/mm/vmalloc.c > +++ b/mm/vmalloc.c > @@ -67,7 +67,7 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long > addr, unsigned long end) > do { > pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte); > WARN_ON(!pte_none(ptent) && !pte_present(ptent)); > - } while (pte++, addr += PAGE_SIZE, addr != end); > + } while (pte++, addr += PAGE_SIZE, addr < end && addr >= PAGE_SIZE); > } > > static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long > end) > @@ -108,6 +108,9 @@ static void vunmap_page_range(unsigned long addr, > unsigned long end) > unsigned long next; > > BUG_ON(addr >= end); > + WARN_ON(!PAGE_ALIGNED(addr | end)); > + > + addr = round_down(addr, PAGE_SIZE); > pgd = pgd_offset_k(addr); > do { > next = pgd_addr_end(addr, end); > @@ -139,7 +142,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, > return -ENOMEM; > set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); > (*nr)++; > - } while (pte++, addr += PAGE_SIZE, addr != end); > + } while (pte++, addr += PAGE_SIZE, addr < end && addr >= PAGE_SIZE); > return 0; > } > > @@ -193,6 +196,9 @@ static int vmap_page_range_noflush(unsigned long start, > unsigned long end, > int nr = 0; > > BUG_ON(addr >= end); > + WARN_ON(!PAGE_ALIGNED(addr | end)); > + > + addr = round_down(addr, PAGE_SIZE); > pgd = pgd_offset_k(addr); > do { > next = pgd_addr_
Re: [PATCH 3/3] mm/vmalloc: correct a few logic error in __insert_vmap_area()
On 09/20/2016 02:54 PM, Nicholas Piggin wrote: > On Tue, 20 Sep 2016 14:02:26 +0800 > zijun_hu wrote: > >> From: zijun_hu >> >> correct a few logic error in __insert_vmap_area() since the else if >> condition is always true and meaningless >> >> avoid endless loop under [un]mapping improper ranges whose boundary >> are not aligned to page >> >> correct lazy_max_pages() return value if the number of online cpus >> is power of 2 >> >> improve performance for pcpu_get_vm_areas() via optimizing vmap_areas >> overlay checking algorithm and finding near vmap_areas by list_head >> other than rbtree >> >> simplify /proc/vmallocinfo implementation via seq_file helpers >> for list_head >> >> Signed-off-by: zijun_hu >> Signed-off-by: zijun_hu > > Could you submit each of these changes as a separate patch? Would you > consider using capitalisation and punctuation in the changelog? > thanks for your advisement i will follow it and split this patch to smaller patches finally > Did you measure any performance improvements, or do you have a workload > where vmalloc shows up in profiles? > don't have measurement in practice, but i am sure there are performance improvements for pcpu_get_vm_areas() theoretically due to below reasons: 1) the counter of vmap_area overlay checkup loop is reduced to half 2) the previous and next vmap_area of one on list_head are just the nearest ones due to address sorted vmap_areas on list_head, so no walk and compare is needed > >> @@ -108,6 +108,9 @@ static void vunmap_page_range(unsigned long addr, >> unsigned long end) >> unsigned long next; >> >> BUG_ON(addr >= end); >> +WARN_ON(!PAGE_ALIGNED(addr | end)); > > I prefer to avoid mixing bitwise and arithmetic operations unless it's > necessary. Gcc should be able to optimise > > WARN_ON(!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(end)) > i agree with you, i will apply your suggestion finally >> +addr = round_down(addr, PAGE_SIZE); > > I don't know if it's really necessary to relax the API like this for > internal vmalloc.c functions. If garbage is detected here, it's likely > due to a bug, and I'm not sure that rounding it would solve the problem. > > For API functions perhaps it's reasonable -- in such cases you should > consider using WARN_ON_ONCE() or similar. > actually, another patch for API function within /lib/ioremap.c used the way as pointed by you as below, i am not sure which is better, perhaps i will exchange each other Subject: [PATCH 2/3] lib/ioremap.c: avoid endless loop under ioremapping improper ranges for ioremap_page_range(), endless loop maybe happen if either of parameter addr and end is not page aligned, in order to fix this issue and hint range parameter requirements BUG_ON() checkup are performed firstly for ioremap_pte_range(), loop end condition is optimized due to lack of relevant macro pte_addr_end() Signed-off-by: zijun_hu --- lib/ioremap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/ioremap.c b/lib/ioremap.c index 86c8911..0058cc8 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -64,7 +64,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, BUG_ON(!pte_none(*pte)); set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot)); pfn++; - } while (pte++, addr += PAGE_SIZE, addr != end); + } while (pte++, addr += PAGE_SIZE, addr < end); return 0; } @@ -129,6 +129,7 @@ int ioremap_page_range(unsigned long addr, int err; BUG_ON(addr >= end); + BUG_ON(!PAGE_ALIGNED(addr | end)); start = addr; phys_addr -= addr; > Thanks, > Nick >
Re: [PATCH 3/3] mm/vmalloc: correct a few logic error in __insert_vmap_area()
On Tue, 20 Sep 2016 14:02:26 +0800 zijun_hu wrote: > From: zijun_hu > > correct a few logic error in __insert_vmap_area() since the else if > condition is always true and meaningless > > avoid endless loop under [un]mapping improper ranges whose boundary > are not aligned to page > > correct lazy_max_pages() return value if the number of online cpus > is power of 2 > > improve performance for pcpu_get_vm_areas() via optimizing vmap_areas > overlay checking algorithm and finding near vmap_areas by list_head > other than rbtree > > simplify /proc/vmallocinfo implementation via seq_file helpers > for list_head > > Signed-off-by: zijun_hu > Signed-off-by: zijun_hu Could you submit each of these changes as a separate patch? Would you consider using capitalisation and punctuation in the changelog? Did you measure any performance improvements, or do you have a workload where vmalloc shows up in profiles? > @@ -108,6 +108,9 @@ static void vunmap_page_range(unsigned long addr, > unsigned long end) > unsigned long next; > > BUG_ON(addr >= end); > + WARN_ON(!PAGE_ALIGNED(addr | end)); I prefer to avoid mixing bitwise and arithmetic operations unless it's necessary. Gcc should be able to optimise WARN_ON(!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(end)) > + addr = round_down(addr, PAGE_SIZE); I don't know if it's really necessary to relax the API like this for internal vmalloc.c functions. If garbage is detected here, it's likely due to a bug, and I'm not sure that rounding it would solve the problem. For API functions perhaps it's reasonable -- in such cases you should consider using WARN_ON_ONCE() or similar. Thanks, Nick
[PATCH 3/3] mm/vmalloc: correct a few logic error in __insert_vmap_area()
From: zijun_hu correct a few logic error in __insert_vmap_area() since the else if condition is always true and meaningless avoid endless loop under [un]mapping improper ranges whose boundary are not aligned to page correct lazy_max_pages() return value if the number of online cpus is power of 2 improve performance for pcpu_get_vm_areas() via optimizing vmap_areas overlay checking algorithm and finding near vmap_areas by list_head other than rbtree simplify /proc/vmallocinfo implementation via seq_file helpers for list_head Signed-off-by: zijun_hu Signed-off-by: zijun_hu --- include/linux/list.h | 11 ++ mm/internal.h| 6 +++ mm/memblock.c| 10 + mm/vmalloc.c | 104 +++ 4 files changed, 74 insertions(+), 57 deletions(-) diff --git a/include/linux/list.h b/include/linux/list.h index 5183138..23c3081 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -181,6 +181,17 @@ static inline int list_is_last(const struct list_head *list, } /** + * list_is_first - tests whether @list is the first entry in list @head + * @list: the entry to test + * @head: the head of the list + */ +static inline int list_is_first(const struct list_head *list, + const struct list_head *head) +{ + return list->prev == head; +} + +/** * list_empty - tests whether a list is empty * @head: the list to test. */ diff --git a/mm/internal.h b/mm/internal.h index 1501304..abbff7c 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -71,6 +71,12 @@ static inline void set_page_refcounted(struct page *page) set_page_count(page, 1); } +/** + * check whether range [@s0, @e0) has intersection with [@s1, @e1) + */ +#define is_range_overlay(s0, e0, s1, e1) \ + (((s1) >= (e0) || (s0) >= (e1)) ? false : true) + extern unsigned long highest_memmap_pfn; /* diff --git a/mm/memblock.c b/mm/memblock.c index 483197e..b4c7d7c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -85,20 +85,14 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) /* * Address comparison utilities */ -static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, - phys_addr_t base2, phys_addr_t size2) -{ - return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); -} - bool __init_memblock memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { unsigned long i; for (i = 0; i < type->cnt; i++) - if (memblock_addrs_overlap(base, size, type->regions[i].base, - type->regions[i].size)) + if (is_range_overlay(base, base + size, type->regions[i].base, + type->regions[i].base + type->regions[i].size)) break; return i < type->cnt; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 91f44e7..dc938f6 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -67,7 +67,7 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) do { pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte); WARN_ON(!pte_none(ptent) && !pte_present(ptent)); - } while (pte++, addr += PAGE_SIZE, addr != end); + } while (pte++, addr += PAGE_SIZE, addr < end && addr >= PAGE_SIZE); } static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end) @@ -108,6 +108,9 @@ static void vunmap_page_range(unsigned long addr, unsigned long end) unsigned long next; BUG_ON(addr >= end); + WARN_ON(!PAGE_ALIGNED(addr | end)); + + addr = round_down(addr, PAGE_SIZE); pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); @@ -139,7 +142,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, return -ENOMEM; set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); (*nr)++; - } while (pte++, addr += PAGE_SIZE, addr != end); + } while (pte++, addr += PAGE_SIZE, addr < end && addr >= PAGE_SIZE); return 0; } @@ -193,6 +196,9 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end, int nr = 0; BUG_ON(addr >= end); + WARN_ON(!PAGE_ALIGNED(addr | end)); + + addr = round_down(addr, PAGE_SIZE); pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); @@ -291,6 +297,22 @@ static unsigned long cached_align; static unsigned long vmap_area_pcpu_hole; +static inline struct vmap_area *next_vmap_area(struct vmap_area *va) +{ + if (list_is_last(&va->list, &vmap_area_list)) + return NULL; + else + return list_next_entry(va, list); +} + +static inline struct vmap_area *prev_vmap_are