[PATCH v9 1/4] vmalloc: Add __vmalloc_node_try_addr function

2018-11-09 Thread Rick Edgecombe
Create __vmalloc_node_try_addr function that tries to allocate at a specific
address without triggering any lazy purging and retry. For the randomized
allocator that uses this function, failing to allocate at a specific address is
a lot more common. This function will not try to do any lazy purge and retry,
to try to fail faster when an allocation won't fit at a specific address. This
function is used for a case where lazy free areas are unlikely and so the purge
and retry is just extra work done every time. For the randomized module
loader, the performance for an average allocation in ns for different numbers
of modules was:

Modules Vmalloc optimizationNo Vmalloc Optimization
100014331993
200022953681
300044247450
4000774613824
500012721   21852
600019724   33926
700027638   47427
800037745   64443

In order to support this behavior a try_addr argument was plugged into several
of the static helpers.

This also changes logic in __get_vm_area_node to be faster in cases where
allocations fail due to no space, which is a lot more common when trying
specific addresses.

Signed-off-by: Rick Edgecombe 
---
 include/linux/vmalloc.h |   3 +
 mm/vmalloc.c| 128 +---
 2 files changed, 95 insertions(+), 36 deletions(-)

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 398e9c95cd61..6eaa89612372 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -82,6 +82,9 @@ extern void *__vmalloc_node_range(unsigned long size, 
unsigned long align,
unsigned long start, unsigned long end, gfp_t gfp_mask,
pgprot_t prot, unsigned long vm_flags, int node,
const void *caller);
+extern void *__vmalloc_node_try_addr(unsigned long addr, unsigned long size,
+   gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags,
+   int node, const void *caller);
 #ifndef CONFIG_MMU
 extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags);
 static inline void *__vmalloc_node_flags_caller(unsigned long size, int node,
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 97d4b25d0373..b8b34d319c85 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -326,6 +326,9 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
 #define VM_LAZY_FREE   0x02
 #define VM_VM_AREA 0x04
 
+#define VMAP_MAY_PURGE 0x2
+#define VMAP_NO_PURGE  0x1
+
 static DEFINE_SPINLOCK(vmap_area_lock);
 /* Export for kexec only */
 LIST_HEAD(vmap_area_list);
@@ -402,12 +405,12 @@ static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
 static struct vmap_area *alloc_vmap_area(unsigned long size,
unsigned long align,
unsigned long vstart, unsigned long vend,
-   int node, gfp_t gfp_mask)
+   int node, gfp_t gfp_mask, int try_purge)
 {
struct vmap_area *va;
struct rb_node *n;
unsigned long addr;
-   int purged = 0;
+   int purged = try_purge & VMAP_NO_PURGE;
struct vmap_area *first;
 
BUG_ON(!size);
@@ -860,7 +863,7 @@ static void *new_vmap_block(unsigned int order, gfp_t 
gfp_mask)
 
va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
VMALLOC_START, VMALLOC_END,
-   node, gfp_mask);
+   node, gfp_mask, VMAP_MAY_PURGE);
if (IS_ERR(va)) {
kfree(vb);
return ERR_CAST(va);
@@ -1170,8 +1173,9 @@ void *vm_map_ram(struct page **pages, unsigned int count, 
int node, pgprot_t pro
addr = (unsigned long)mem;
} else {
struct vmap_area *va;
-   va = alloc_vmap_area(size, PAGE_SIZE,
-   VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
+   va = alloc_vmap_area(size, PAGE_SIZE, VMALLOC_START,
+   VMALLOC_END, node, GFP_KERNEL,
+   VMAP_MAY_PURGE);
if (IS_ERR(va))
return NULL;
 
@@ -1372,7 +1376,8 @@ static void clear_vm_uninitialized_flag(struct vm_struct 
*vm)
 
 static struct vm_struct *__get_vm_area_node(unsigned long size,
unsigned long align, unsigned long flags, unsigned long start,
-   unsigned long end, int node, gfp_t gfp_mask, const void *caller)
+   unsigned long end, int node, gfp_t gfp_mask, int try_purge,
+   const void *caller)
 {
struct vmap_area *va;
struct vm_struct *area;
@@ -1386,16 +1391,17 @@ static struct vm_struct *__get_vm_area_node(unsigned 
long size,
align = 1ul << clamp_t(int, get_count_order_long(size),
   

[PATCH v9 1/4] vmalloc: Add __vmalloc_node_try_addr function

2018-11-09 Thread Rick Edgecombe
Create __vmalloc_node_try_addr function that tries to allocate at a specific
address without triggering any lazy purging and retry. For the randomized
allocator that uses this function, failing to allocate at a specific address is
a lot more common. This function will not try to do any lazy purge and retry,
to try to fail faster when an allocation won't fit at a specific address. This
function is used for a case where lazy free areas are unlikely and so the purge
and retry is just extra work done every time. For the randomized module
loader, the performance for an average allocation in ns for different numbers
of modules was:

Modules Vmalloc optimizationNo Vmalloc Optimization
100014331993
200022953681
300044247450
4000774613824
500012721   21852
600019724   33926
700027638   47427
800037745   64443

In order to support this behavior a try_addr argument was plugged into several
of the static helpers.

This also changes logic in __get_vm_area_node to be faster in cases where
allocations fail due to no space, which is a lot more common when trying
specific addresses.

Signed-off-by: Rick Edgecombe 
---
 include/linux/vmalloc.h |   3 +
 mm/vmalloc.c| 128 +---
 2 files changed, 95 insertions(+), 36 deletions(-)

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 398e9c95cd61..6eaa89612372 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -82,6 +82,9 @@ extern void *__vmalloc_node_range(unsigned long size, 
unsigned long align,
unsigned long start, unsigned long end, gfp_t gfp_mask,
pgprot_t prot, unsigned long vm_flags, int node,
const void *caller);
+extern void *__vmalloc_node_try_addr(unsigned long addr, unsigned long size,
+   gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags,
+   int node, const void *caller);
 #ifndef CONFIG_MMU
 extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags);
 static inline void *__vmalloc_node_flags_caller(unsigned long size, int node,
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 97d4b25d0373..b8b34d319c85 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -326,6 +326,9 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
 #define VM_LAZY_FREE   0x02
 #define VM_VM_AREA 0x04
 
+#define VMAP_MAY_PURGE 0x2
+#define VMAP_NO_PURGE  0x1
+
 static DEFINE_SPINLOCK(vmap_area_lock);
 /* Export for kexec only */
 LIST_HEAD(vmap_area_list);
@@ -402,12 +405,12 @@ static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
 static struct vmap_area *alloc_vmap_area(unsigned long size,
unsigned long align,
unsigned long vstart, unsigned long vend,
-   int node, gfp_t gfp_mask)
+   int node, gfp_t gfp_mask, int try_purge)
 {
struct vmap_area *va;
struct rb_node *n;
unsigned long addr;
-   int purged = 0;
+   int purged = try_purge & VMAP_NO_PURGE;
struct vmap_area *first;
 
BUG_ON(!size);
@@ -860,7 +863,7 @@ static void *new_vmap_block(unsigned int order, gfp_t 
gfp_mask)
 
va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
VMALLOC_START, VMALLOC_END,
-   node, gfp_mask);
+   node, gfp_mask, VMAP_MAY_PURGE);
if (IS_ERR(va)) {
kfree(vb);
return ERR_CAST(va);
@@ -1170,8 +1173,9 @@ void *vm_map_ram(struct page **pages, unsigned int count, 
int node, pgprot_t pro
addr = (unsigned long)mem;
} else {
struct vmap_area *va;
-   va = alloc_vmap_area(size, PAGE_SIZE,
-   VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
+   va = alloc_vmap_area(size, PAGE_SIZE, VMALLOC_START,
+   VMALLOC_END, node, GFP_KERNEL,
+   VMAP_MAY_PURGE);
if (IS_ERR(va))
return NULL;
 
@@ -1372,7 +1376,8 @@ static void clear_vm_uninitialized_flag(struct vm_struct 
*vm)
 
 static struct vm_struct *__get_vm_area_node(unsigned long size,
unsigned long align, unsigned long flags, unsigned long start,
-   unsigned long end, int node, gfp_t gfp_mask, const void *caller)
+   unsigned long end, int node, gfp_t gfp_mask, int try_purge,
+   const void *caller)
 {
struct vmap_area *va;
struct vm_struct *area;
@@ -1386,16 +1391,17 @@ static struct vm_struct *__get_vm_area_node(unsigned 
long size,
align = 1ul << clamp_t(int, get_count_order_long(size),