If we use vmalloc to allocate memory touched by the tracing fast-path, we need to issue wrapper_vmalloc_sync_all() between memory allocation and publising the new pointer.
----- On Sep 21, 2017, at 10:55 AM, Michael Jeanson [email protected] wrote: > This patch is based on the kvmalloc helpers introduced in kernel 4.12. > > It will gracefully failover memory allocations of more than one page to > vmalloc for systems under high memory pressure or fragmentation. > > I only used it in lttng-events.c as a POC, most allocations fit into a > single page so I'm not sure how useful this actually is. > > Thoughts? > > Signed-off-by: Michael Jeanson <[email protected]> > > See upstream commit: > commit a7c3e901a46ff54c016d040847eda598a9e3e653 > Author: Michal Hocko <[email protected]> > Date: Mon May 8 15:57:09 2017 -0700 > > mm: introduce kv[mz]alloc helpers > > Patch series "kvmalloc", v5. > > There are many open coded kmalloc with vmalloc fallback instances in the > tree. Most of them are not careful enough or simply do not care about > the underlying semantic of the kmalloc/page allocator which means that > a) some vmalloc fallbacks are basically unreachable because the kmalloc > part will keep retrying until it succeeds b) the page allocator can > invoke a really disruptive steps like the OOM killer to move forward > which doesn't sound appropriate when we consider that the vmalloc > fallback is available. > > As it can be seen implementing kvmalloc requires quite an intimate > knowledge if the page allocator and the memory reclaim internals which > strongly suggests that a helper should be implemented in the memory > subsystem proper. > > Most callers, I could find, have been converted to use the helper > instead. This is patch 6. There are some more relying on __GFP_REPEAT > in the networking stack which I have converted as well and Eric Dumazet > was not opposed [2] to convert them as well. > > [1] http://lkml.kernel.org/r/[email protected] > [2] > > http://lkml.kernel.org/r/1485273626.16328.301.ca...@edumazet-glaptop3.roam.corp.google.com > > This patch (of 9): > > Using kmalloc with the vmalloc fallback for larger allocations is a > common pattern in the kernel code. Yet we do not have any common helper > for that and so users have invented their own helpers. Some of them are > really creative when doing so. Let's just add kv[mz]alloc and make sure > it is implemented properly. This implementation makes sure to not make > a large memory pressure for > PAGE_SZE requests (__GFP_NORETRY) and also > to not warn about allocation failures. This also rules out the OOM > killer as the vmalloc is a more approapriate fallback than a disruptive > user visible action. > > Signed-off-by: Michael Jeanson <[email protected]> > --- > lttng-events.c | 30 ++++++++++----------- > wrapper/vmalloc.h | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- > 2 files changed, 92 insertions(+), 17 deletions(-) > > diff --git a/lttng-events.c b/lttng-events.c > index 6aa994c..1e4cc10 100644 > --- a/lttng-events.c > +++ b/lttng-events.c > @@ -132,14 +132,14 @@ struct lttng_session *lttng_session_create(void) > int i; > > mutex_lock(&sessions_mutex); > - session = kzalloc(sizeof(struct lttng_session), GFP_KERNEL); > + session = lttng_kvzalloc(sizeof(struct lttng_session), GFP_KERNEL); > if (!session) > goto err; > INIT_LIST_HEAD(&session->chan); > INIT_LIST_HEAD(&session->events); > uuid_le_gen(&session->uuid); > > - metadata_cache = kzalloc(sizeof(struct lttng_metadata_cache), > + metadata_cache = lttng_kvzalloc(sizeof(struct lttng_metadata_cache), > GFP_KERNEL); > if (!metadata_cache) > goto err_free_session; > @@ -161,9 +161,9 @@ struct lttng_session *lttng_session_create(void) > return session; > > err_free_cache: > - kfree(metadata_cache); > + lttng_kvfree(metadata_cache); > err_free_session: > - kfree(session); > + lttng_kvfree(session); > err: > mutex_unlock(&sessions_mutex); > return NULL; > @@ -174,7 +174,7 @@ void metadata_cache_destroy(struct kref *kref) > struct lttng_metadata_cache *cache = > container_of(kref, struct lttng_metadata_cache, refcount); > vfree(cache->data); > - kfree(cache); > + lttng_kvfree(cache); > } > > void lttng_session_destroy(struct lttng_session *session) > @@ -212,7 +212,7 @@ void lttng_session_destroy(struct lttng_session *session) > kref_put(&session->metadata_cache->refcount, metadata_cache_destroy); > list_del(&session->list); > mutex_unlock(&sessions_mutex); > - kfree(session); > + lttng_kvfree(session); > } > > int lttng_session_statedump(struct lttng_session *session) > @@ -495,7 +495,7 @@ struct lttng_channel *lttng_channel_create(struct > lttng_session *session, > printk(KERN_WARNING "LTT : Can't lock transport module.\n"); > goto notransport; > } > - chan = kzalloc(sizeof(struct lttng_channel), GFP_KERNEL); > + chan = lttng_kvzalloc(sizeof(struct lttng_channel), GFP_KERNEL); > if (!chan) > goto nomem; > chan->session = session; > @@ -520,7 +520,7 @@ struct lttng_channel *lttng_channel_create(struct > lttng_session *session, > return chan; > > create_error: > - kfree(chan); > + lttng_kvfree(chan); > nomem: > if (transport) > module_put(transport->owner); > @@ -542,7 +542,7 @@ void _lttng_channel_destroy(struct lttng_channel *chan) > module_put(chan->transport->owner); > list_del(&chan->list); > lttng_destroy_context(chan->ctx); > - kfree(chan); > + lttng_kvfree(chan); > } > > void lttng_metadata_channel_destroy(struct lttng_channel *chan) > @@ -1343,7 +1343,7 @@ int lttng_enabler_ref_events(struct lttng_enabler > *enabler) > * If no backward ref, create it. > * Add backward ref from event to enabler. > */ > - enabler_ref = kzalloc(sizeof(*enabler_ref), GFP_KERNEL); > + enabler_ref = lttng_kvzalloc(sizeof(*enabler_ref), > GFP_KERNEL); > if (!enabler_ref) > return -ENOMEM; > enabler_ref->ref = enabler; > @@ -1381,7 +1381,7 @@ struct lttng_enabler *lttng_enabler_create(enum > lttng_enabler_type type, > { > struct lttng_enabler *enabler; > > - enabler = kzalloc(sizeof(*enabler), GFP_KERNEL); > + enabler = lttng_kvzalloc(sizeof(*enabler), GFP_KERNEL); > if (!enabler) > return NULL; > enabler->type = type; > @@ -1427,7 +1427,7 @@ int lttng_enabler_attach_bytecode(struct lttng_enabler > *enabler, > ret = get_user(bytecode_len, &bytecode->len); > if (ret) > return ret; > - bytecode_node = kzalloc(sizeof(*bytecode_node) + bytecode_len, > + bytecode_node = lttng_kvzalloc(sizeof(*bytecode_node) + bytecode_len, > GFP_KERNEL); > if (!bytecode_node) > return -ENOMEM; > @@ -1443,7 +1443,7 @@ int lttng_enabler_attach_bytecode(struct lttng_enabler > *enabler, > return 0; > > error_free: > - kfree(bytecode_node); > + lttng_kvfree(bytecode_node); > return ret; > } > > @@ -1461,14 +1461,14 @@ void lttng_enabler_destroy(struct lttng_enabler > *enabler) > /* Destroy filter bytecode */ > list_for_each_entry_safe(filter_node, tmp_filter_node, > &enabler->filter_bytecode_head, node) { > - kfree(filter_node); > + lttng_kvfree(filter_node); > } > > /* Destroy contexts */ > lttng_destroy_context(enabler->ctx); > > list_del(&enabler->node); > - kfree(enabler); > + lttng_kvfree(enabler); > } > > /* > diff --git a/wrapper/vmalloc.h b/wrapper/vmalloc.h > index 2332439..ad2d6cf 100644 > --- a/wrapper/vmalloc.h > +++ b/wrapper/vmalloc.h > @@ -25,6 +25,9 @@ > * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > */ > > +#include <linux/version.h> > +#include <linux/vmalloc.h> > + > #ifdef CONFIG_KALLSYMS > > #include <linux/kallsyms.h> > @@ -51,8 +54,6 @@ void wrapper_vmalloc_sync_all(void) > } > #else > > -#include <linux/vmalloc.h> > - > static inline > void wrapper_vmalloc_sync_all(void) > { > @@ -60,4 +61,78 @@ void wrapper_vmalloc_sync_all(void) > } > #endif > > +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0)) > +static inline > +void *lttng_kvmalloc(unsigned long size, int flags) > +{ > + return kvmalloc(size, flags); > +} > + > +static inline > +void *lttng_kvzalloc(unsigned long size, int flags) > +{ > + return kvzalloc(size, flags); > +} > +#else > + > +#include <linux/slab.h> > +#include <linux/mm.h> > + > +/** > + * lttng_kvmalloc_node - attempt to allocate physically contiguous memory, > but > upon > + * failure, fall back to non-contiguous (vmalloc) allocation. > + * @size: size of the request. > + * @flags: gfp mask for the allocation - must be compatible with GFP_KERNEL. > + * > + * Uses kmalloc to get the memory but if the allocation fails then falls back > + * to the vmalloc allocator. Use lttng_kvfree to free the memory. > + * > + * Reclaim modifiers - __GFP_NORETRY, __GFP_REPEAT and __GFP_NOFAIL are not > supported > + */ > +static inline > +void *lttng_kvmalloc(unsigned long size, int flags) > +{ > + void *ret; > + > + /* > + * vmalloc uses GFP_KERNEL for some internal allocations (e.g page > tables) > + * so the given set of flags has to be compatible. > + */ > + WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL); > + > + /* > + * If the allocation fits in a single page, do not fallback. > + */ > + if (size <= PAGE_SIZE) { > + return kmalloc(size, flags); > + } > + > + /* > + * Make sure that larger requests are not too disruptive - no OOM > + * killer and no allocation failure warnings as we have a fallback > + */ > + ret = kmalloc(size, flags | __GFP_NOWARN | __GFP_NORETRY); > + if (!ret) { > + ret = __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL); > + } > + return ret; > +} > + > +static inline > +void *lttng_kvzalloc(unsigned long size, int flags) > +{ > + return lttng_kvmalloc(size, flags | __GFP_ZERO); > +} > + > +static inline > +void lttng_kvfree(const void *addr) > +{ > + if (is_vmalloc_addr(addr)) { > + vfree(addr); > + } else { > + kfree(addr); > + } > +} > +#endif > + > #endif /* _LTTNG_WRAPPER_VMALLOC_H */ > -- > 2.7.4 -- Mathieu Desnoyers EfficiOS Inc. http://www.efficios.com _______________________________________________ lttng-dev mailing list [email protected] https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev
