Re: [lttng-dev] [PATCH lttng-modules] RFC: Add kmalloc failover to vmalloc

Mathieu Desnoyers Thu, 21 Sep 2017 08:18:06 -0700

If we use vmalloc to allocate memory touched by the tracing fast-path,
we need to issue wrapper_vmalloc_sync_all() between memory allocation
and publising the new pointer.


----- On Sep 21, 2017, at 10:55 AM, Michael Jeanson [email protected] wrote:

> This patch is based on the kvmalloc helpers introduced in kernel 4.12.
> 
> It will gracefully failover memory allocations of more than one page to
> vmalloc for systems under high memory pressure or fragmentation.
> 
> I only used it in lttng-events.c as a POC, most allocations fit into a
> single page so I'm not sure how useful this actually is.
> 
> Thoughts?
> 
> Signed-off-by: Michael Jeanson <[email protected]>
> 
> See upstream commit:
>  commit a7c3e901a46ff54c016d040847eda598a9e3e653
>  Author: Michal Hocko <[email protected]>
>  Date:   Mon May 8 15:57:09 2017 -0700
> 
>    mm: introduce kv[mz]alloc helpers
> 
>    Patch series "kvmalloc", v5.
> 
>    There are many open coded kmalloc with vmalloc fallback instances in the
>    tree.  Most of them are not careful enough or simply do not care about
>    the underlying semantic of the kmalloc/page allocator which means that
>    a) some vmalloc fallbacks are basically unreachable because the kmalloc
>    part will keep retrying until it succeeds b) the page allocator can
>    invoke a really disruptive steps like the OOM killer to move forward
>    which doesn't sound appropriate when we consider that the vmalloc
>    fallback is available.
> 
>    As it can be seen implementing kvmalloc requires quite an intimate
>    knowledge if the page allocator and the memory reclaim internals which
>    strongly suggests that a helper should be implemented in the memory
>    subsystem proper.
> 
>    Most callers, I could find, have been converted to use the helper
>    instead.  This is patch 6.  There are some more relying on __GFP_REPEAT
>    in the networking stack which I have converted as well and Eric Dumazet
>    was not opposed [2] to convert them as well.
> 
>    [1] http://lkml.kernel.org/r/[email protected]
>    [2]
>    
> http://lkml.kernel.org/r/1485273626.16328.301.ca...@edumazet-glaptop3.roam.corp.google.com
> 
>    This patch (of 9):
> 
>    Using kmalloc with the vmalloc fallback for larger allocations is a
>    common pattern in the kernel code.  Yet we do not have any common helper
>    for that and so users have invented their own helpers.  Some of them are
>    really creative when doing so.  Let's just add kv[mz]alloc and make sure
>    it is implemented properly.  This implementation makes sure to not make
>    a large memory pressure for > PAGE_SZE requests (__GFP_NORETRY) and also
>    to not warn about allocation failures.  This also rules out the OOM
>    killer as the vmalloc is a more approapriate fallback than a disruptive
>    user visible action.
> 
> Signed-off-by: Michael Jeanson <[email protected]>
> ---
> lttng-events.c    | 30 ++++++++++-----------
> wrapper/vmalloc.h | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
> 2 files changed, 92 insertions(+), 17 deletions(-)
> 
> diff --git a/lttng-events.c b/lttng-events.c
> index 6aa994c..1e4cc10 100644
> --- a/lttng-events.c
> +++ b/lttng-events.c
> @@ -132,14 +132,14 @@ struct lttng_session *lttng_session_create(void)
>       int i;
> 
>       mutex_lock(&sessions_mutex);
> -     session = kzalloc(sizeof(struct lttng_session), GFP_KERNEL);
> +     session = lttng_kvzalloc(sizeof(struct lttng_session), GFP_KERNEL);
>       if (!session)
>               goto err;
>       INIT_LIST_HEAD(&session->chan);
>       INIT_LIST_HEAD(&session->events);
>       uuid_le_gen(&session->uuid);
> 
> -     metadata_cache = kzalloc(sizeof(struct lttng_metadata_cache),
> +     metadata_cache = lttng_kvzalloc(sizeof(struct lttng_metadata_cache),
>                       GFP_KERNEL);
>       if (!metadata_cache)
>               goto err_free_session;
> @@ -161,9 +161,9 @@ struct lttng_session *lttng_session_create(void)
>       return session;
> 
> err_free_cache:
> -     kfree(metadata_cache);
> +     lttng_kvfree(metadata_cache);
> err_free_session:
> -     kfree(session);
> +     lttng_kvfree(session);
> err:
>       mutex_unlock(&sessions_mutex);
>       return NULL;
> @@ -174,7 +174,7 @@ void metadata_cache_destroy(struct kref *kref)
>       struct lttng_metadata_cache *cache =
>               container_of(kref, struct lttng_metadata_cache, refcount);
>       vfree(cache->data);
> -     kfree(cache);
> +     lttng_kvfree(cache);
> }
> 
> void lttng_session_destroy(struct lttng_session *session)
> @@ -212,7 +212,7 @@ void lttng_session_destroy(struct lttng_session *session)
>       kref_put(&session->metadata_cache->refcount, metadata_cache_destroy);
>       list_del(&session->list);
>       mutex_unlock(&sessions_mutex);
> -     kfree(session);
> +     lttng_kvfree(session);
> }
> 
> int lttng_session_statedump(struct lttng_session *session)
> @@ -495,7 +495,7 @@ struct lttng_channel *lttng_channel_create(struct
> lttng_session *session,
>               printk(KERN_WARNING "LTT : Can't lock transport module.\n");
>               goto notransport;
>       }
> -     chan = kzalloc(sizeof(struct lttng_channel), GFP_KERNEL);
> +     chan = lttng_kvzalloc(sizeof(struct lttng_channel), GFP_KERNEL);
>       if (!chan)
>               goto nomem;
>       chan->session = session;
> @@ -520,7 +520,7 @@ struct lttng_channel *lttng_channel_create(struct
> lttng_session *session,
>       return chan;
> 
> create_error:
> -     kfree(chan);
> +     lttng_kvfree(chan);
> nomem:
>       if (transport)
>               module_put(transport->owner);
> @@ -542,7 +542,7 @@ void _lttng_channel_destroy(struct lttng_channel *chan)
>       module_put(chan->transport->owner);
>       list_del(&chan->list);
>       lttng_destroy_context(chan->ctx);
> -     kfree(chan);
> +     lttng_kvfree(chan);
> }
> 
> void lttng_metadata_channel_destroy(struct lttng_channel *chan)
> @@ -1343,7 +1343,7 @@ int lttng_enabler_ref_events(struct lttng_enabler
> *enabler)
>                        * If no backward ref, create it.
>                        * Add backward ref from event to enabler.
>                        */
> -                     enabler_ref = kzalloc(sizeof(*enabler_ref), GFP_KERNEL);
> +                     enabler_ref = lttng_kvzalloc(sizeof(*enabler_ref), 
> GFP_KERNEL);
>                       if (!enabler_ref)
>                               return -ENOMEM;
>                       enabler_ref->ref = enabler;
> @@ -1381,7 +1381,7 @@ struct lttng_enabler *lttng_enabler_create(enum
> lttng_enabler_type type,
> {
>       struct lttng_enabler *enabler;
> 
> -     enabler = kzalloc(sizeof(*enabler), GFP_KERNEL);
> +     enabler = lttng_kvzalloc(sizeof(*enabler), GFP_KERNEL);
>       if (!enabler)
>               return NULL;
>       enabler->type = type;
> @@ -1427,7 +1427,7 @@ int lttng_enabler_attach_bytecode(struct lttng_enabler
> *enabler,
>       ret = get_user(bytecode_len, &bytecode->len);
>       if (ret)
>               return ret;
> -     bytecode_node = kzalloc(sizeof(*bytecode_node) + bytecode_len,
> +     bytecode_node = lttng_kvzalloc(sizeof(*bytecode_node) + bytecode_len,
>                       GFP_KERNEL);
>       if (!bytecode_node)
>               return -ENOMEM;
> @@ -1443,7 +1443,7 @@ int lttng_enabler_attach_bytecode(struct lttng_enabler
> *enabler,
>       return 0;
> 
> error_free:
> -     kfree(bytecode_node);
> +     lttng_kvfree(bytecode_node);
>       return ret;
> }
> 
> @@ -1461,14 +1461,14 @@ void lttng_enabler_destroy(struct lttng_enabler
> *enabler)
>       /* Destroy filter bytecode */
>       list_for_each_entry_safe(filter_node, tmp_filter_node,
>                       &enabler->filter_bytecode_head, node) {
> -             kfree(filter_node);
> +             lttng_kvfree(filter_node);
>       }
> 
>       /* Destroy contexts */
>       lttng_destroy_context(enabler->ctx);
> 
>       list_del(&enabler->node);
> -     kfree(enabler);
> +     lttng_kvfree(enabler);
> }
> 
> /*
> diff --git a/wrapper/vmalloc.h b/wrapper/vmalloc.h
> index 2332439..ad2d6cf 100644
> --- a/wrapper/vmalloc.h
> +++ b/wrapper/vmalloc.h
> @@ -25,6 +25,9 @@
>  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
>  */
> 
> +#include <linux/version.h>
> +#include <linux/vmalloc.h>
> +
> #ifdef CONFIG_KALLSYMS
> 
> #include <linux/kallsyms.h>
> @@ -51,8 +54,6 @@ void wrapper_vmalloc_sync_all(void)
> }
> #else
> 
> -#include <linux/vmalloc.h>
> -
> static inline
> void wrapper_vmalloc_sync_all(void)
> {
> @@ -60,4 +61,78 @@ void wrapper_vmalloc_sync_all(void)
> }
> #endif
> 
> +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0))
> +static inline
> +void *lttng_kvmalloc(unsigned long size, int flags)
> +{
> +     return kvmalloc(size, flags);
> +}
> +
> +static inline
> +void *lttng_kvzalloc(unsigned long size, int flags)
> +{
> +     return kvzalloc(size, flags);
> +}
> +#else
> +
> +#include <linux/slab.h>
> +#include <linux/mm.h>
> +
> +/**
> + * lttng_kvmalloc_node - attempt to allocate physically contiguous memory, 
> but
> upon
> + * failure, fall back to non-contiguous (vmalloc) allocation.
> + * @size: size of the request.
> + * @flags: gfp mask for the allocation - must be compatible with GFP_KERNEL.
> + *
> + * Uses kmalloc to get the memory but if the allocation fails then falls back
> + * to the vmalloc allocator. Use lttng_kvfree to free the memory.
> + *
> + * Reclaim modifiers - __GFP_NORETRY, __GFP_REPEAT and __GFP_NOFAIL are not
> supported
> + */
> +static inline
> +void *lttng_kvmalloc(unsigned long size, int flags)
> +{
> +     void *ret;
> +
> +     /*
> +      * vmalloc uses GFP_KERNEL for some internal allocations (e.g page 
> tables)
> +      * so the given set of flags has to be compatible.
> +      */
> +     WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL);
> +
> +     /*
> +      * If the allocation fits in a single page, do not fallback.
> +      */
> +     if (size <= PAGE_SIZE) {
> +             return kmalloc(size, flags);
> +     }
> +
> +     /*
> +      * Make sure that larger requests are not too disruptive - no OOM
> +      * killer and no allocation failure warnings as we have a fallback
> +      */
> +     ret = kmalloc(size, flags | __GFP_NOWARN | __GFP_NORETRY);
> +     if (!ret) {
> +             ret = __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
> +     }
> +     return ret;
> +}
> +
> +static inline
> +void *lttng_kvzalloc(unsigned long size, int flags)
> +{
> +     return lttng_kvmalloc(size, flags | __GFP_ZERO);
> +}
> +
> +static inline
> +void lttng_kvfree(const void *addr)
> +{
> +     if (is_vmalloc_addr(addr)) {
> +             vfree(addr);
> +     } else {
> +             kfree(addr);
> +     }
> +}
> +#endif
> +
> #endif /* _LTTNG_WRAPPER_VMALLOC_H */
> --
> 2.7.4

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
[email protected]
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

Re: [lttng-dev] [PATCH lttng-modules] RFC: Add kmalloc failover to vmalloc

Reply via email to