Re: [PATCH 3/3] percpu: allow select gfp to be passed to underlying allocators
Hi, On Thu, Feb 15, 2018 at 01:41:48PM -0800, Tejun Heo wrote: > On Thu, Feb 15, 2018 at 10:08:16AM -0600, Dennis Zhou wrote: > > +/* the whitelisted flags that can be passed to the backing allocators */ > > +#define gfp_percpu_mask(gfp) (((gfp) & (__GFP_NORETRY | __GFP_NOWARN)) | \ > > + GFP_KERNEL) > > Isn't there just one place where gfp comes in from outside? If so, > this looks like a bit of overkill. Can't we just filter there? > I agree, but it's also nice having a single place where flags can be added or removed. The primary motivator was for the "| GFP_KERNEL", but as suggested in the other patch this is getting removed. I guess I still lean towards having it as it's explicit and helps gate both the balance path and the user allocation path. Thanks, Dennis
Re: [PATCH 3/3] percpu: allow select gfp to be passed to underlying allocators
Hi, On Thu, Feb 15, 2018 at 01:41:48PM -0800, Tejun Heo wrote: > On Thu, Feb 15, 2018 at 10:08:16AM -0600, Dennis Zhou wrote: > > +/* the whitelisted flags that can be passed to the backing allocators */ > > +#define gfp_percpu_mask(gfp) (((gfp) & (__GFP_NORETRY | __GFP_NOWARN)) | \ > > + GFP_KERNEL) > > Isn't there just one place where gfp comes in from outside? If so, > this looks like a bit of overkill. Can't we just filter there? > I agree, but it's also nice having a single place where flags can be added or removed. The primary motivator was for the "| GFP_KERNEL", but as suggested in the other patch this is getting removed. I guess I still lean towards having it as it's explicit and helps gate both the balance path and the user allocation path. Thanks, Dennis
Re: [PATCH 3/3] percpu: allow select gfp to be passed to underlying allocators
Hello, On Thu, Feb 15, 2018 at 10:08:16AM -0600, Dennis Zhou wrote: > +/* the whitelisted flags that can be passed to the backing allocators */ > +#define gfp_percpu_mask(gfp) (((gfp) & (__GFP_NORETRY | __GFP_NOWARN)) | \ > + GFP_KERNEL) Isn't there just one place where gfp comes in from outside? If so, this looks like a bit of overkill. Can't we just filter there? Thanks. -- tejun
Re: [PATCH 3/3] percpu: allow select gfp to be passed to underlying allocators
Hello, On Thu, Feb 15, 2018 at 10:08:16AM -0600, Dennis Zhou wrote: > +/* the whitelisted flags that can be passed to the backing allocators */ > +#define gfp_percpu_mask(gfp) (((gfp) & (__GFP_NORETRY | __GFP_NOWARN)) | \ > + GFP_KERNEL) Isn't there just one place where gfp comes in from outside? If so, this looks like a bit of overkill. Can't we just filter there? Thanks. -- tejun
[PATCH 3/3] percpu: allow select gfp to be passed to underlying allocators
The prior patch added support for passing gfp flags through to the underlying allocators. This patch allows users to pass along gfp flags (currently only __GFP_NORETRY and __GFP_NOWARN) to the underlying allocators. This should allow users to decide if they are ok with failing allocations recovering in a more graceful way. Additionally, the prior use of gfp was as additional gfp flags that were then combined with the base flags, namely GFP_KERNEL. gfp_percpu_mask is introduced to create the base of GFP_KERNEL and whitelist allowed gfp flags. Using this in the appropriate places changes gfp use from as additional flags to as a whole set in general removing the need to always or with the GFP_KERNEL. Signed-off-by: Dennis ZhouSuggested-by: Daniel Borkmann --- mm/percpu-km.c | 2 +- mm/percpu-vm.c | 4 ++-- mm/percpu.c| 16 ++-- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/mm/percpu-km.c b/mm/percpu-km.c index 0d88d7b..38de70a 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c @@ -56,7 +56,7 @@ static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) if (!chunk) return NULL; - pages = alloc_pages(gfp | GFP_KERNEL, order_base_2(nr_pages)); + pages = alloc_pages(gfp, order_base_2(nr_pages)); if (!pages) { pcpu_free_chunk(chunk); return NULL; diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index ea9906a..c771d86 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -37,7 +37,7 @@ static struct page **pcpu_get_pages(void) lockdep_assert_held(_alloc_mutex); if (!pages) - pages = pcpu_mem_zalloc(pages_size, 0); + pages = pcpu_mem_zalloc(pages_size, gfp_percpu_mask(0)); return pages; } @@ -86,7 +86,7 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk, unsigned int cpu, tcpu; int i; - gfp |= GFP_KERNEL | __GFP_HIGHMEM; + gfp |= __GFP_HIGHMEM; for_each_possible_cpu(cpu) { for (i = page_start; i < page_end; i++) { diff --git a/mm/percpu.c b/mm/percpu.c index 2489b8b..e35a120 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -91,6 +91,10 @@ #include "percpu-internal.h" +/* the whitelisted flags that can be passed to the backing allocators */ +#define gfp_percpu_mask(gfp) (((gfp) & (__GFP_NORETRY | __GFP_NOWARN)) | \ + GFP_KERNEL) + /* the slots are sorted by free bytes left, 1-31 bytes share the same slot */ #define PCPU_SLOT_BASE_SHIFT 5 @@ -466,10 +470,9 @@ static void *pcpu_mem_zalloc(size_t size, gfp_t gfp) return NULL; if (size <= PAGE_SIZE) - return kzalloc(size, gfp | GFP_KERNEL); + return kzalloc(size, gfp); else - return __vmalloc(size, gfp | GFP_KERNEL | __GFP_ZERO, -PAGE_KERNEL); + return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL); } /** @@ -1344,6 +1347,7 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, gfp_t gfp) { + gfp_t pcpu_gfp = gfp_percpu_mask(gfp); bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; bool do_warn = !(gfp & __GFP_NOWARN); static int warn_limit = 10; @@ -1426,7 +1430,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, } if (list_empty(_slot[pcpu_nr_slots - 1])) { - chunk = pcpu_create_chunk(0); + chunk = pcpu_create_chunk(pcpu_gfp); if (!chunk) { err = "failed to allocate new chunk"; goto fail; @@ -1455,7 +1459,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, page_start, page_end) { WARN_ON(chunk->immutable); - ret = pcpu_populate_chunk(chunk, rs, re, 0); + ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp); spin_lock_irqsave(_lock, flags); if (ret) { @@ -1576,7 +1580,7 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) static void pcpu_balance_workfn(struct work_struct *work) { /* gfp flags passed to underlying allocators */ - gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN; + gfp_t gfp = gfp_percpu_mask(__GFP_NORETRY | __GFP_NOWARN); LIST_HEAD(to_free); struct list_head *free_head = _slot[pcpu_nr_slots - 1]; struct pcpu_chunk *chunk, *next; -- 1.8.3.1
[PATCH 3/3] percpu: allow select gfp to be passed to underlying allocators
The prior patch added support for passing gfp flags through to the underlying allocators. This patch allows users to pass along gfp flags (currently only __GFP_NORETRY and __GFP_NOWARN) to the underlying allocators. This should allow users to decide if they are ok with failing allocations recovering in a more graceful way. Additionally, the prior use of gfp was as additional gfp flags that were then combined with the base flags, namely GFP_KERNEL. gfp_percpu_mask is introduced to create the base of GFP_KERNEL and whitelist allowed gfp flags. Using this in the appropriate places changes gfp use from as additional flags to as a whole set in general removing the need to always or with the GFP_KERNEL. Signed-off-by: Dennis Zhou Suggested-by: Daniel Borkmann --- mm/percpu-km.c | 2 +- mm/percpu-vm.c | 4 ++-- mm/percpu.c| 16 ++-- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/mm/percpu-km.c b/mm/percpu-km.c index 0d88d7b..38de70a 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c @@ -56,7 +56,7 @@ static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) if (!chunk) return NULL; - pages = alloc_pages(gfp | GFP_KERNEL, order_base_2(nr_pages)); + pages = alloc_pages(gfp, order_base_2(nr_pages)); if (!pages) { pcpu_free_chunk(chunk); return NULL; diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index ea9906a..c771d86 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -37,7 +37,7 @@ static struct page **pcpu_get_pages(void) lockdep_assert_held(_alloc_mutex); if (!pages) - pages = pcpu_mem_zalloc(pages_size, 0); + pages = pcpu_mem_zalloc(pages_size, gfp_percpu_mask(0)); return pages; } @@ -86,7 +86,7 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk, unsigned int cpu, tcpu; int i; - gfp |= GFP_KERNEL | __GFP_HIGHMEM; + gfp |= __GFP_HIGHMEM; for_each_possible_cpu(cpu) { for (i = page_start; i < page_end; i++) { diff --git a/mm/percpu.c b/mm/percpu.c index 2489b8b..e35a120 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -91,6 +91,10 @@ #include "percpu-internal.h" +/* the whitelisted flags that can be passed to the backing allocators */ +#define gfp_percpu_mask(gfp) (((gfp) & (__GFP_NORETRY | __GFP_NOWARN)) | \ + GFP_KERNEL) + /* the slots are sorted by free bytes left, 1-31 bytes share the same slot */ #define PCPU_SLOT_BASE_SHIFT 5 @@ -466,10 +470,9 @@ static void *pcpu_mem_zalloc(size_t size, gfp_t gfp) return NULL; if (size <= PAGE_SIZE) - return kzalloc(size, gfp | GFP_KERNEL); + return kzalloc(size, gfp); else - return __vmalloc(size, gfp | GFP_KERNEL | __GFP_ZERO, -PAGE_KERNEL); + return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL); } /** @@ -1344,6 +1347,7 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, gfp_t gfp) { + gfp_t pcpu_gfp = gfp_percpu_mask(gfp); bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; bool do_warn = !(gfp & __GFP_NOWARN); static int warn_limit = 10; @@ -1426,7 +1430,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, } if (list_empty(_slot[pcpu_nr_slots - 1])) { - chunk = pcpu_create_chunk(0); + chunk = pcpu_create_chunk(pcpu_gfp); if (!chunk) { err = "failed to allocate new chunk"; goto fail; @@ -1455,7 +1459,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, page_start, page_end) { WARN_ON(chunk->immutable); - ret = pcpu_populate_chunk(chunk, rs, re, 0); + ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp); spin_lock_irqsave(_lock, flags); if (ret) { @@ -1576,7 +1580,7 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) static void pcpu_balance_workfn(struct work_struct *work) { /* gfp flags passed to underlying allocators */ - gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN; + gfp_t gfp = gfp_percpu_mask(__GFP_NORETRY | __GFP_NOWARN); LIST_HEAD(to_free); struct list_head *free_head = _slot[pcpu_nr_slots - 1]; struct pcpu_chunk *chunk, *next; -- 1.8.3.1