----- On Feb 22, 2018, at 4:15 PM, Julien Desfossez jdesfos...@efficios.com 
wrote:

> This memory pool is created when the lttng-tracer module is loaded. It
> allocates 4 buffers of 4k on each CPU. These buffers are designed to
> allow tracepoint probes to temporarily store data that does not fit on
> the stack (during the code_pre and code_post phases). The memory is
> freed when the lttng-tracer module is unloaded.
> 
> This removes the need for dynamic allocation during the execution of
> tracepoint probes, which does not behave well on PREEMPT_RT kernel, even
> if the GFP_ATOMIC and GFP_NOWAIT flags are set.

"... event when invoked with the GFP_ATOMIC | GFP_NOWAIT flags."

> 
> Signed-off-by: Julien Desfossez <jdesfos...@efficios.com>
> ---
> Makefile                             |   3 +-
> lttng-abi.c                          |   9 ++
> probes/lttng-tracepoint-event-impl.h |   1 +
> tp-mempool.c                         | 175 +++++++++++++++++++++++++++++++++++
> tp-mempool.h                         |  48 ++++++++++
> 5 files changed, 235 insertions(+), 1 deletion(-)
> create mode 100644 tp-mempool.c
> create mode 100644 tp-mempool.h
> 
> diff --git a/Makefile b/Makefile
> index 2cd2df0..78f6661 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -59,7 +59,8 @@ ifneq ($(KERNELRELEASE),)
>                        lttng-filter.o lttng-filter-interpreter.o \
>                        lttng-filter-specialize.o \
>                        lttng-filter-validator.o \
> -                       probes/lttng-probe-user.o
> +                       probes/lttng-probe-user.o \
> +                       tp-mempool.o
> 
>   ifneq ($(CONFIG_HAVE_SYSCALL_TRACEPOINTS),)
>     lttng-tracer-objs += lttng-syscalls.o
> diff --git a/lttng-abi.c b/lttng-abi.c
> index d202b72..9c29612 100644
> --- a/lttng-abi.c
> +++ b/lttng-abi.c
> @@ -56,6 +56,7 @@
> #include <lttng-abi-old.h>
> #include <lttng-events.h>
> #include <lttng-tracer.h>
> +#include <tp-mempool.h>
> #include <lib/ringbuffer/frontend_types.h>
> 
> /*
> @@ -1771,6 +1772,12 @@ int __init lttng_abi_init(void)
> 
>       wrapper_vmalloc_sync_all();
>       lttng_clock_ref();
> +
> +     ret = tp_mempool_init();
> +     if (ret) {
> +             goto error;
> +     }
> +
>       lttng_proc_dentry = proc_create_data("lttng", S_IRUSR | S_IWUSR, NULL,
>                                       &lttng_fops, NULL);
> 
> @@ -1784,6 +1791,7 @@ int __init lttng_abi_init(void)
> 
> error:
>       lttng_clock_unref();
> +     tp_mempool_destroy();

we may want to unref clock after mempool destroy for symmetry.

>       return ret;
> }
> 
> @@ -1793,4 +1801,5 @@ void lttng_abi_exit(void)
>       lttng_clock_unref();
>       if (lttng_proc_dentry)
>               remove_proc_entry("lttng", NULL);
> +     tp_mempool_destroy();

same.

> }
> diff --git a/probes/lttng-tracepoint-event-impl.h
> b/probes/lttng-tracepoint-event-impl.h
> index 61f1c2d..2a8fe58 100644
> --- a/probes/lttng-tracepoint-event-impl.h
> +++ b/probes/lttng-tracepoint-event-impl.h
> @@ -34,6 +34,7 @@
> #include <wrapper/rcu.h>
> #include <lttng-events.h>
> #include <lttng-tracer-core.h>
> +#include <tp-mempool.h>
> 
> #define __LTTNG_NULL_STRING   "(null)"
> 
> diff --git a/tp-mempool.c b/tp-mempool.c
> new file mode 100644
> index 0000000..38df26f
> --- /dev/null
> +++ b/tp-mempool.c
> @@ -0,0 +1,175 @@
> +/*
> + * tp-mempool.c
> + *
> + * Copyright (C) 2018 Julien Desfossez <jdesfos...@efficios.com>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; only
> + * version 2.1 of the License.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + */
> +
> +#include <linux/slab.h>
> +#include <linux/percpu.h>
> +
> +#include <tp-mempool.h>
> +
> +#define BUF_SIZE 4 * 1024

missing (), but you can simply replace that with 4096

> +#define NR_BUF_PER_CPU 4
> +
> +struct tp_buf_entry {
> +     int cpu; /* To make sure we return the entry to the right pool. */
> +     char buf[BUF_SIZE];
> +     struct list_head list;
> +};
> +
> +struct per_cpu_buf {
> +     struct list_head free_list; /* Free struct tp_buf_entry. */
> +     struct list_head allocated_list; /* Allocated struct tp_buf_entry. */
> +};

What is the strategy to prevent concurrent updates here ?

In the current patchset, it's only called from syscall entry within
a tracepoint probe (with preemption disabled), so there is only a single
context using it per cpu. But if we end up using it in other execution
contexts, it will break.

If this is an intended limitation, it should be clearly documented.

> +
> +static struct per_cpu_buf __percpu *pool = NULL; /* Per-cpu buffer. */

The " = NULL" can be removed for kernel coding style. It's zeroed anyway.

> +
> +int tp_mempool_init(void)
> +{
> +     int ret, cpu;
> +
> +     /* The pool is only supposed to be allocated once. */
> +     if (pool) {
> +             WARN_ON_ONCE(1);
> +             ret = -1;
> +             goto end;
> +     }
> +
> +     pool = alloc_percpu(struct per_cpu_buf);
> +     if (!pool) {
> +             ret = -ENOMEM;
> +             goto end;
> +     }
> +
> +     for_each_possible_cpu(cpu) {
> +             int i;
> +             struct per_cpu_buf *cpu_buf = per_cpu_ptr(pool, cpu);
> +
> +             INIT_LIST_HEAD(&cpu_buf->free_list);
> +             INIT_LIST_HEAD(&cpu_buf->allocated_list);

We can remove the "allocated list", it's not really useful.

Also, please initialize the free_list for each possible cpu
in a first pass, and then in a second pass allocate the items.
Otherwise, if we have a kzalloc error, we end up calling
tp_mempool_destroy which iterates on non-initialized free lists.

> +
> +             for (i = 0; i < NR_BUF_PER_CPU; i++) {
> +                     struct tp_buf_entry *entry;
> +
> +                     entry = kzalloc(sizeof(struct tp_buf_entry),
> +                                     GFP_KERNEL);
> +                     if (!entry) {
> +                             ret = -ENOMEM;
> +                             goto error_free_pool;
> +                     }
> +                     entry->cpu = cpu;
> +                     list_add_tail(&entry->list, &cpu_buf->free_list);
> +             }
> +     }
> +
> +     ret = 0;
> +     goto end;
> +
> +error_free_pool:
> +     tp_mempool_destroy();
> +end:
> +     return ret;
> +}
> +
> +void tp_mempool_destroy(void)
> +{
> +     int cpu;
> +
> +     if (!pool) {
> +             return;
> +     }
> +
> +     for_each_possible_cpu(cpu) {
> +             struct per_cpu_buf *cpu_buf = per_cpu_ptr(pool, cpu);
> +             struct tp_buf_entry *entry, *tmp;
> +
> +             list_for_each_entry_safe(entry, tmp, &cpu_buf->free_list, list) 
> {
> +                     list_del(&entry->list);
> +                     kfree(entry);
> +             }

If you really want to check that there is nothing allocated anymore, you
can just count the number of items in the freelist here. Just reporting
that something is fishy should be enough: freeing stuff from the allocated
list is bogus anyway, because we don't own the allocated memory area, and
another thread could still be using it.

> +             if (!list_empty(&cpu_buf->allocated_list)) {
> +                     printk(KERN_WARNING "TP allocated memory pool not 
> empty");
> +                     list_for_each_entry_safe(entry, tmp,
> +                                     &cpu_buf->allocated_list, list) {
> +                             list_del(&entry->list);
> +                             kfree(entry);
> +                     }
> +             }
> +     }
> +     free_percpu(pool);
> +     pool = NULL;
> +}
> +
> +void *tp_mempool_alloc(size_t size)
> +{
> +     void *ret;
> +     struct tp_buf_entry *entry;
> +     struct per_cpu_buf *cpu_buf;
> +     int cpu = smp_processor_id();
> +
> +     if (size > BUF_SIZE) {
> +             ret = NULL;
> +             goto end;
> +     }
> +
> +     cpu_buf = per_cpu_ptr(pool, cpu);
> +     if (list_empty(&cpu_buf->free_list)) {
> +             ret = NULL;
> +             goto end;
> +     }
> +
> +     entry = list_first_entry(&cpu_buf->free_list, struct tp_buf_entry, 
> list);
> +     /* Remove the entry from the free list. */
> +     list_del(&entry->list);
> +     /* Add it to the allocated list for tracking. */
> +     list_add_tail(&entry->list, &cpu_buf->allocated_list);

allocated list not needed.

Please memset to 0 on allocation.

> +
> +     ret = (void *) entry->buf;
> +
> +end:
> +     return ret;
> +}
> +
> +void tp_mempool_free(void *ptr)
> +{
> +     struct tp_buf_entry *entry;
> +     struct per_cpu_buf *cpu_buf;
> +
> +     if (!ptr) {
> +             goto end;
> +     }
> +
> +     entry = container_of(ptr, struct tp_buf_entry, buf);
> +     if (!entry) {
> +             goto end;
> +     }
> +     memset(entry->buf, 0, BUF_SIZE);

cache-wise, it's better to memset to 0 on allocation rather than
free, because the memory ends up being written to right after
allocation, whereas "free" often don't need to touch the memory.

> +
> +     cpu_buf = per_cpu_ptr(pool, entry->cpu);
> +     if (!cpu_buf) {
> +             goto end;
> +     }
> +
> +     /* Remove the entry from the allocated list. */
> +     list_del(&entry->list);

not needed.

> +     /* Add it to the free list. */
> +     list_add_tail(&entry->list, &cpu_buf->free_list);
> +
> +end:
> +     return;
> +}
> diff --git a/tp-mempool.h b/tp-mempool.h
> new file mode 100644
> index 0000000..93c2239
> --- /dev/null
> +++ b/tp-mempool.h
> @@ -0,0 +1,48 @@
> +#ifndef LTTNG_TP_MEMPOOL_H
> +#define LTTNG_TP_MEMPOOL_H
> +
> +/*
> + * tp-mempool.h
> + *
> + * Copyright (C) 2018 Julien Desfossez <jdesfos...@efficios.com>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; only
> + * version 2.1 of the License.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + */
> +
> +#include <linux/percpu.h>
> +
> +/*
> + * Initialize the pool, only performed once.
> + * Returns 0 on success, a negative value on error.
> + */
> +int tp_mempool_init(void);
> +
> +/*
> + * Destroy the pool and free all the memory allocated.
> + */
> +void tp_mempool_destroy(void);
> +
> +/*
> + * Ask for a buffer on the current cpu.
> + * Return a pointer to a buffer on success, NULL on error.

Concurrency requirements/guarantees should be documented right
here. Allocation size limits too.

Thanks!

Mathieu

> + */
> +void *tp_mempool_alloc(size_t size);
> +
> +/*
> + * Release the memory reserved.
> + */
> +void tp_mempool_free(void *ptr);
> +
> +#endif /* LTTNG_TP_MEMPOOL_H */
> --
> 2.7.4

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

Reply via email to