Re: [lttng-dev] [RFC PATCH urcu] Fix: dynamic fallback to compat futex on sys_futex ENOSYS

2015-09-13 Thread Paul E. McKenney
On Fri, Sep 11, 2015 at 10:48:38AM -0400, Mathieu Desnoyers wrote:
> Some MIPS processors (e.g. Cavium Octeon II) dynamically check if the
> CPU supports ll/sc within sys_futex, and return a ENOSYS errno if they
> don't, even though the architecture implements sys_futex.
> 
> Handle this situation by always building the sys_futex compatibility
> layer, and fall-back on it if sys_futex return a ENOSYS errno. This is
> a tiny compat layer which adds very little space overhead.
> 
> This adds an unlikely branch on return from sys_futex, which should
> not be an issue performance-wise (we've already taken a system call).
> 
> Since this is a fall-back mode, don't try to be clever, and don't cache
> the result, so that the common cases (architectures with a properly
> working sys_futex) don't get two conditional branches, just one.

Looks like a reasonable approach to me.

Acked-by: Paul E. McKenney 

> Signed-off-by: Mathieu Desnoyers 
> CC: Paul E. McKenney 
> CC: Michael Jeanson 
> CC: Jon Bernard 
> ---
>  Makefile.am  |  2 --
>  urcu/futex.h | 70 
> +---
>  2 files changed, 57 insertions(+), 15 deletions(-)
> 
> diff --git a/Makefile.am b/Makefile.am
> index 752510d..f9a 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -41,9 +41,7 @@ else
>  COMPAT=
>  endif
> 
> -if COMPAT_FUTEX
>  COMPAT+=compat_futex.c
> -endif
> 
>  RCULFHASH = rculfhash.c rculfhash-mm-order.c rculfhash-mm-chunk.c \
>   rculfhash-mm-mmap.c
> diff --git a/urcu/futex.h b/urcu/futex.h
> index 2be3bb6..13d2b1a 100644
> --- a/urcu/futex.h
> +++ b/urcu/futex.h
> @@ -47,22 +47,66 @@ extern "C" {
>   * (returns EINTR).
>   */
> 
> +extern int compat_futex_noasync(int32_t *uaddr, int op, int32_t val,
> + const struct timespec *timeout, int32_t *uaddr2, int32_t val3);
> +extern int compat_futex_async(int32_t *uaddr, int op, int32_t val,
> + const struct timespec *timeout, int32_t *uaddr2, int32_t val3);
> +
>  #ifdef CONFIG_RCU_HAVE_FUTEX
> +
> +#include 
> +#include 
> +#include 
>  #include 
> -#define futex(...)   syscall(__NR_futex, __VA_ARGS__)
> -#define futex_noasync(uaddr, op, val, timeout, uaddr2, val3) \
> - futex(uaddr, op, val, timeout, uaddr2, val3)
> -#define futex_async(uaddr, op, val, timeout, uaddr2, val3)   \
> - futex(uaddr, op, val, timeout, uaddr2, val3)
> +
> +static inline int futex(int32_t *uaddr, int op, int32_t val,
> + const struct timespec *timeout, int32_t *uaddr2, int32_t val3)
> +{
> + return syscall(__NR_futex, uaddr, op, val, timeout,
> + uaddr2, val3);
> +}
> +
> +static inline int futex_noasync(int32_t *uaddr, int op, int32_t val,
> + const struct timespec *timeout, int32_t *uaddr2, int32_t val3)
> +{
> + int ret;
> +
> + ret = futex(uaddr, op, val, timeout, uaddr2, val3);
> + if (caa_unlikely(ret < 0 && errno == ENOSYS)) {
> + return compat_futex_noasync(uaddr, op, val, timeout,
> + uaddr2, val3);
> + }
> + return ret;
> +
> +}
> +
> +static inline int futex_async(int32_t *uaddr, int op, int32_t val,
> + const struct timespec *timeout, int32_t *uaddr2, int32_t val3)
> +{
> + int ret;
> +
> + ret = futex(uaddr, op, val, timeout, uaddr2, val3);
> + if (caa_unlikely(ret < 0 && errno == ENOSYS)) {
> + return compat_futex_async(uaddr, op, val, timeout,
> + uaddr2, val3);
> + }
> + return ret;
> +}
> +
>  #else
> -extern int compat_futex_noasync(int32_t *uaddr, int op, int32_t val,
> - const struct timespec *timeout, int32_t *uaddr2, int32_t val3);
> -#define futex_noasync(uaddr, op, val, timeout, uaddr2, val3) \
> - compat_futex_noasync(uaddr, op, val, timeout, uaddr2, val3)
> -extern int compat_futex_async(int32_t *uaddr, int op, int32_t val,
> - const struct timespec *timeout, int32_t *uaddr2, int32_t val3);
> -#define futex_async(uaddr, op, val, timeout, uaddr2, val3)   \
> - compat_futex_async(uaddr, op, val, timeout, uaddr2, val3)
> +
> +static inline int futex_noasync(int32_t *uaddr, int op, int32_t val,
> + const struct timespec *timeout, int32_t *uaddr2, int32_t val3)
> +{
> + return compat_futex_noasync(uaddr, op, val, timeout, uaddr2, val3);
> +}
> +
> +static inline int futex_async(int32_t *uaddr, int op, int32_t val,
> + const struct timespec *timeout, int32_t *uaddr2, int32_t val3)
> +{
> + return compat_futex_async(uaddr, op, val, timeout, uaddr2, val3);
> +}
> +
>  #endif
> 
>  #ifdef __cplusplus 
> -- 
> 2.1.4
> 


___
lttng-dev mailing list
lttng-dev@lists.lttng.org
http://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev


Re: [lttng-dev] [RFC lttng-tools] Do not list or allow unsupported perf events

2015-09-13 Thread Mathieu Desnoyers
CCing Jeremie.

- On Sep 10, 2015, at 8:15 PM, Tony Jones to...@suse.de wrote:

> On 09/10/2015 12:21 PM, Tony Jones wrote:
> 
>> +static int is_event_supported(const struct ctx_opts *ctx)
>> +{
>> +int fd;
>> +struct perf_event_attr attr = {
>> +.type = ctx->u.perf.type,
>> +.config = ctx->u.perf.config,
>> +.disabled = 1
>> +};
>> +
>> +if (ctx->ctx_type == CONTEXT_PERF_CPU_COUNTER ||
>> +ctx->ctx_type == CONTEXT_PERF_THREAD_COUNTER) {
> 
> obviously CONTEXT_PERF_THREAD_COUNTER is UST.  Regardless, there are  some
> obvious usability issues (below).  I need to look at the code some more.

OK, keep us posted. It would be nice if we can list more specifically
which counters are supported on the machine.

Thanks,

Mathieu

> 
> # lttng add-context -u -t perf:thread:alignment-faults
> UST context perf:thread:alignment-faults added to all channels
> 
> (invalid, better error needed, also sometimes -k -t perf:thread:* succeeds 
> which
> seems broken)
> # lttng add-context -k -t perf:thread:alignment-faults
> Error: perf:thread:alignment-faults: Add kernel context failed
> Warning: Some command(s) went wrong

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

___
lttng-dev mailing list
lttng-dev@lists.lttng.org
http://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev


Re: [lttng-dev] [RFC lttng-tools] Do not list or allow unsupported perf events

2015-09-13 Thread Mathieu Desnoyers
CCing the new lttng-tools maintainer, Jérémie. More below,

- On Sep 10, 2015, at 3:21 PM, Tony Jones to...@suse.com wrote:

> Query the perf api to determine the list of events that are actually
> supported by the PMU.  Reject events that are not supported.   On an
> AMD Opteron 6128 the following currently listed events are not supported:
> 
>   perf:bus-cycles, perf:L1-dcache-store-misses,
>   perf:L1-icache-stores, perf:L1-icache-store-misses,
>   perf:L1-icache-prefetch-misses, perf:LLC-store-misses,
>   perf:LLC-prefetches, perf:LLC-prefetch-misses, perf:dTLB-stores,
>   perf:dTLB-store-misses, perf:dTLB-prefetches,
>   perf:dTLB-prefetch-misses
> 
> It's not clear to me (hence the RFC) why parts of the perf ABI definitions
> had been replicated into add_context.c rather than just including
> perf_event.h (other than to avoid an autotools dependancy/check on
> perf_event.h which this patch still needs). It is also possible that
> querying the available events would be better handled over the lttng
> user->kernel ABI.
> 
> Thoughts?

It might make sense to add a dependency on perf_event.h.

I'm wondering if your detection technique below really try to
allocate a PMU counter for a short period of time ? This could
be an issue since there is a limited amount of counters available.

Thoughts ?

Thanks,

Mathieu

> ---
> src/bin/lttng/commands/add_context.c | 92 ++--
> 1 file changed, 25 insertions(+), 67 deletions(-)
> 
> diff --git a/src/bin/lttng/commands/add_context.c
> b/src/bin/lttng/commands/add_context.c
> index cf3f3ef..6f52edb 100644
> --- a/src/bin/lttng/commands/add_context.c
> +++ b/src/bin/lttng/commands/add_context.c
> @@ -26,6 +26,8 @@
> #include 
> #include 
> 
> +#include 
> +
> #include 
> 
> #include 
> @@ -77,72 +79,6 @@ enum context_type {
>   CONTEXT_PERF_THREAD_COUNTER = 14,
> };
> 
> -/*
> - * Taken from the Perf ABI (all enum perf_*)
> - */
> -enum perf_type {
> - PERF_TYPE_HARDWARE = 0,
> - PERF_TYPE_SOFTWARE = 1,
> - PERF_TYPE_HW_CACHE = 3,
> -};
> -
> -enum perf_count_hard {
> - PERF_COUNT_HW_CPU_CYCLES= 0,
> - PERF_COUNT_HW_INSTRUCTIONS  = 1,
> - PERF_COUNT_HW_CACHE_REFERENCES  = 2,
> - PERF_COUNT_HW_CACHE_MISSES  = 3,
> - PERF_COUNT_HW_BRANCH_INSTRUCTIONS   = 4,
> - PERF_COUNT_HW_BRANCH_MISSES = 5,
> - PERF_COUNT_HW_BUS_CYCLES= 6,
> - PERF_COUNT_HW_STALLED_CYCLES_FRONTEND   = 7,
> - PERF_COUNT_HW_STALLED_CYCLES_BACKEND= 8,
> -};
> -
> -enum perf_count_soft {
> - PERF_COUNT_SW_CPU_CLOCK= 0,
> - PERF_COUNT_SW_TASK_CLOCK   = 1,
> - PERF_COUNT_SW_PAGE_FAULTS  = 2,
> - PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
> - PERF_COUNT_SW_CPU_MIGRATIONS   = 4,
> - PERF_COUNT_SW_PAGE_FAULTS_MIN  = 5,
> - PERF_COUNT_SW_PAGE_FAULTS_MAJ  = 6,
> - PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
> - PERF_COUNT_SW_EMULATION_FAULTS = 8,
> -};
> -
> -/*
> - * Generalized hardware cache events:
> - *
> - *   { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
> - *   { read, write, prefetch } x
> - *   { accesses, misses }
> - */
> -enum perf_hw_cache_id {
> - PERF_COUNT_HW_CACHE_L1D = 0,
> - PERF_COUNT_HW_CACHE_L1I = 1,
> - PERF_COUNT_HW_CACHE_LL  = 2,
> - PERF_COUNT_HW_CACHE_DTLB= 3,
> - PERF_COUNT_HW_CACHE_ITLB= 4,
> - PERF_COUNT_HW_CACHE_BPU = 5,
> -
> - PERF_COUNT_HW_CACHE_MAX,/* non-ABI */
> -};
> -
> -enum perf_hw_cache_op_id {
> - PERF_COUNT_HW_CACHE_OP_READ = 0,
> - PERF_COUNT_HW_CACHE_OP_WRITE= 1,
> - PERF_COUNT_HW_CACHE_OP_PREFETCH = 2,
> -
> - PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */
> -};
> -
> -enum perf_hw_cache_op_result_id {
> - PERF_COUNT_HW_CACHE_RESULT_ACCESS   = 0,
> - PERF_COUNT_HW_CACHE_RESULT_MISS = 1,
> -
> - PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */
> -};
> -
> static struct poptOption long_options[] = {
>   /* longName, shortName, argInfo, argPtr, value, descrip, argDesc */
>   {"help",   'h', POPT_ARG_NONE, 0, OPT_HELP, 0, 0},
> @@ -461,6 +397,25 @@ struct ctx_type_list {
>   .head = CDS_LIST_HEAD_INIT(ctx_type_list.head),
> };
> 
> +static int is_event_supported(const struct ctx_opts *ctx)
> +{
> + int fd;
> + struct perf_event_attr attr = {
> + .type = ctx->u.perf.type,
> + .config = ctx->u.perf.config,
> + .disabled = 1
> + };
> +
> + if (ctx->ctx_type == CONTEXT_PERF_CPU_COUNTER ||
> + ctx->ctx_type == CONTEXT_PERF_THREAD_COUNTER) {
> + if ((fd = syscall(__NR_perf_event_open, ,  0, -1, -1, 0)) 
> < 0)
> + return 0;
> +
> + close(fd);
> + }
> + return 1;
> +}
> +
> /*
>  * Pretty 

Re: [lttng-dev] [RFC PATCH urcu] Fix: dynamic fallback to compat futex on sys_futex ENOSYS

2015-09-13 Thread Mathieu Desnoyers
- On Sep 13, 2015, at 12:29 PM, Paul E. McKenney paul...@linux.vnet.ibm.com 
wrote:

> On Fri, Sep 11, 2015 at 10:48:38AM -0400, Mathieu Desnoyers wrote:
>> Some MIPS processors (e.g. Cavium Octeon II) dynamically check if the
>> CPU supports ll/sc within sys_futex, and return a ENOSYS errno if they
>> don't, even though the architecture implements sys_futex.
>> 
>> Handle this situation by always building the sys_futex compatibility
>> layer, and fall-back on it if sys_futex return a ENOSYS errno. This is
>> a tiny compat layer which adds very little space overhead.
>> 
>> This adds an unlikely branch on return from sys_futex, which should
>> not be an issue performance-wise (we've already taken a system call).
>> 
>> Since this is a fall-back mode, don't try to be clever, and don't cache
>> the result, so that the common cases (architectures with a properly
>> working sys_futex) don't get two conditional branches, just one.
> 
> Looks like a reasonable approach to me.
> 
> Acked-by: Paul E. McKenney 

Merged into master, stable-0.8 and stable-0.7,

Thanks!

Mathieu

> 
>> Signed-off-by: Mathieu Desnoyers 
>> CC: Paul E. McKenney 
>> CC: Michael Jeanson 
>> CC: Jon Bernard 
>> ---
>>  Makefile.am  |  2 --
>>  urcu/futex.h | 70 
>> +---
>>  2 files changed, 57 insertions(+), 15 deletions(-)
>> 
>> diff --git a/Makefile.am b/Makefile.am
>> index 752510d..f9a 100644
>> --- a/Makefile.am
>> +++ b/Makefile.am
>> @@ -41,9 +41,7 @@ else
>>  COMPAT=
>>  endif
>> 
>> -if COMPAT_FUTEX
>>  COMPAT+=compat_futex.c
>> -endif
>> 
>>  RCULFHASH = rculfhash.c rculfhash-mm-order.c rculfhash-mm-chunk.c \
>>  rculfhash-mm-mmap.c
>> diff --git a/urcu/futex.h b/urcu/futex.h
>> index 2be3bb6..13d2b1a 100644
>> --- a/urcu/futex.h
>> +++ b/urcu/futex.h
>> @@ -47,22 +47,66 @@ extern "C" {
>>   * (returns EINTR).
>>   */
>> 
>> +extern int compat_futex_noasync(int32_t *uaddr, int op, int32_t val,
>> +const struct timespec *timeout, int32_t *uaddr2, int32_t val3);
>> +extern int compat_futex_async(int32_t *uaddr, int op, int32_t val,
>> +const struct timespec *timeout, int32_t *uaddr2, int32_t val3);
>> +
>>  #ifdef CONFIG_RCU_HAVE_FUTEX
>> +
>> +#include 
>> +#include 
>> +#include 
>>  #include 
>> -#define futex(...)  syscall(__NR_futex, __VA_ARGS__)
>> -#define futex_noasync(uaddr, op, val, timeout, uaddr2, val3)\
>> -futex(uaddr, op, val, timeout, uaddr2, val3)
>> -#define futex_async(uaddr, op, val, timeout, uaddr2, val3)  \
>> -futex(uaddr, op, val, timeout, uaddr2, val3)
>> +
>> +static inline int futex(int32_t *uaddr, int op, int32_t val,
>> +const struct timespec *timeout, int32_t *uaddr2, int32_t val3)
>> +{
>> +return syscall(__NR_futex, uaddr, op, val, timeout,
>> +uaddr2, val3);
>> +}
>> +
>> +static inline int futex_noasync(int32_t *uaddr, int op, int32_t val,
>> +const struct timespec *timeout, int32_t *uaddr2, int32_t val3)
>> +{
>> +int ret;
>> +
>> +ret = futex(uaddr, op, val, timeout, uaddr2, val3);
>> +if (caa_unlikely(ret < 0 && errno == ENOSYS)) {
>> +return compat_futex_noasync(uaddr, op, val, timeout,
>> +uaddr2, val3);
>> +}
>> +return ret;
>> +
>> +}
>> +
>> +static inline int futex_async(int32_t *uaddr, int op, int32_t val,
>> +const struct timespec *timeout, int32_t *uaddr2, int32_t val3)
>> +{
>> +int ret;
>> +
>> +ret = futex(uaddr, op, val, timeout, uaddr2, val3);
>> +if (caa_unlikely(ret < 0 && errno == ENOSYS)) {
>> +return compat_futex_async(uaddr, op, val, timeout,
>> +uaddr2, val3);
>> +}
>> +return ret;
>> +}
>> +
>>  #else
>> -extern int compat_futex_noasync(int32_t *uaddr, int op, int32_t val,
>> -const struct timespec *timeout, int32_t *uaddr2, int32_t val3);
>> -#define futex_noasync(uaddr, op, val, timeout, uaddr2, val3)\
>> -compat_futex_noasync(uaddr, op, val, timeout, uaddr2, val3)
>> -extern int compat_futex_async(int32_t *uaddr, int op, int32_t val,
>> -const struct timespec *timeout, int32_t *uaddr2, int32_t val3);
>> -#define futex_async(uaddr, op, val, timeout, uaddr2, val3)  \
>> -compat_futex_async(uaddr, op, val, timeout, uaddr2, val3)
>> +
>> +static inline int futex_noasync(int32_t *uaddr, int op, int32_t val,
>> +const struct timespec *timeout, int32_t *uaddr2, int32_t val3)
>> +{
>> +return compat_futex_noasync(uaddr, op, val, timeout, uaddr2, val3);
>> +}
>> +
>> +static inline int futex_async(int32_t *uaddr, int op, int32_t val,
>> +const struct timespec *timeout, int32_t *uaddr2, int32_t val3)
>> +{
>> +return compat_futex_async(uaddr, op, val, timeout, uaddr2,