On Tue, Nov 21, 2017 at 09:18:53AM -0500, Mathieu Desnoyers wrote:
> diff --git a/tools/testing/selftests/rseq/rseq-x86.h 
> b/tools/testing/selftests/rseq/rseq-x86.h
> new file mode 100644
> index 000000000000..63e81d6c61fa
> --- /dev/null
> +++ b/tools/testing/selftests/rseq/rseq-x86.h
> @@ -0,0 +1,898 @@
> +/*
> + * rseq-x86.h
> + *
> + * (C) Copyright 2016 - Mathieu Desnoyers <[email protected]>
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to 
> deal
> + * in the Software without restriction, including without limitation the 
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
> THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
> FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
> THE
> + * SOFTWARE.
> + */
> +
> +#include <stdint.h>
> +
> +#define RSEQ_SIG     0x53053053
> +
> +#ifdef __x86_64__
> +
> +#define rseq_smp_mb()        __asm__ __volatile__ ("mfence" : : : "memory")

See commit:

  450cbdd0125c ("locking/x86: Use LOCK ADD for smp_mb() instead of MFENCE")

> +#define rseq_smp_rmb()       barrier()
> +#define rseq_smp_wmb()       barrier()
> +
> +#define rseq_smp_load_acquire(p)                                     \
> +__extension__ ({                                                     \
> +     __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);                       \
> +     barrier();                                                      \
> +     ____p1;                                                         \
> +})
> +
> +#define rseq_smp_acquire__after_ctrl_dep()   rseq_smp_rmb()
> +
> +#define rseq_smp_store_release(p, v)                                 \
> +do {                                                                 \
> +     barrier();                                                      \
> +     RSEQ_WRITE_ONCE(*p, v);                                         \
> +} while (0)
> +
> +#define RSEQ_ASM_DEFINE_TABLE(label, section, version, flags,                
> \
> +                     start_ip, post_commit_offset, abort_ip)         \
> +             ".pushsection " __rseq_str(section) ", \"aw\"\n\t"      \
> +             ".balign 32\n\t"                                        \
> +             __rseq_str(label) ":\n\t"                               \
> +             ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
> +             ".quad " __rseq_str(start_ip) ", " 
> __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
> +             ".popsection\n\t"

OK, so this creates table entry, but why is @section an argument, AFAICT
its _always_ the same thing, no?

> +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)             \
> +             RSEQ_INJECT_ASM(1)                                      \
> +             "leaq " __rseq_str(cs_label) "(%%rip), %%rax\n\t"       \
> +             "movq %%rax, %[" __rseq_str(rseq_cs) "]\n\t"            \
> +             __rseq_str(label) ":\n\t"

And this sets the TLS variable to point to the table entry from the
previous macro, no? But again @rseq_cs seems to always be the very same,
why is that an argument?

> +#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)           \
> +             RSEQ_INJECT_ASM(2)                                      \
> +             "cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) 
> "]\n\t" \
> +             "jnz " __rseq_str(label) "\n\t"

more things that are always the same it seems..

> +#define RSEQ_ASM_DEFINE_ABORT(label, section, sig, teardown, abort_label) \
> +             ".pushsection " __rseq_str(section) ", \"ax\"\n\t"      \
> +             /* Disassembler-friendly signature: nopl <sig>(%rip). */\
> +             ".byte 0x0f, 0x1f, 0x05\n\t"                            \
> +             ".long " __rseq_str(sig) "\n\t"                 \
> +             __rseq_str(label) ":\n\t"                               \
> +             teardown                                                \
> +             "jmp %l[" __rseq_str(abort_label) "]\n\t"               \
> +             ".popsection\n\t"

@section and @sig seem to always be the same...

> +#define RSEQ_ASM_DEFINE_CMPFAIL(label, section, teardown, cmpfail_label) \
> +             ".pushsection " __rseq_str(section) ", \"ax\"\n\t"      \
> +             __rseq_str(label) ":\n\t"                               \
> +             teardown                                                \
> +             "jmp %l[" __rseq_str(cmpfail_label) "]\n\t"             \
> +             ".popsection\n\t"

Somewhat failing to see the point of this macro, it seems to just
obfuscate the normal failure path.

> +static inline __attribute__((always_inline))
> +int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv,
> +             int cpu)

I find this a very confusing name for what is essentially
compare-and-exchange or compare-and-swap, no?

> +{
> +     __asm__ __volatile__ goto (
> +             RSEQ_ASM_DEFINE_TABLE(3, __rseq_table, 0x0, 0x0, 1f, 2f-1f, 4f)

So we set up the section, but unreadably so... reducing the number of
arguments would help a lot.

Rename the current one to __RSEQ_ASM_DEFINE_TABLE() and then use:

#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
        __RSEQ_ASM_DEFINE_TABLE(label, __rseq_table, 0x0, 0x0, start_ip, \
                                (post_commit_ip - start_ip), abort_ip)

or something, such that we can write:

                RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */

> +             RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)

And here we open start the rseq by storing the table entry pointer into
the TLS thingy.

> +             RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
> +             "cmpq %[v], %[expect]\n\t"
> +             "jnz 5f\n\t"

                "jnz %l[cmpfail]\n\t"

was too complicated?

> +             /* final store */
> +             "movq %[newv], %[v]\n\t"
> +             "2:\n\t"
> +             RSEQ_ASM_DEFINE_ABORT(4, __rseq_failure, RSEQ_SIG, "", abort)
> +             RSEQ_ASM_DEFINE_CMPFAIL(5, __rseq_failure, "", cmpfail)
> +             : /* gcc asm goto does not allow outputs */
> +             : [cpu_id]"r"(cpu),
> +               [current_cpu_id]"m"(__rseq_abi.cpu_id),
> +               [rseq_cs]"m"(__rseq_abi.rseq_cs),
> +               [v]"m"(*v),
> +               [expect]"r"(expect),
> +               [newv]"r"(newv)

                : [cpu_id]         "r" (cpu),
                  [current_cpu_id] "m" (__rseq_abi.cpu_id),
                  [rseq_cs]        "m" (__rseq_abi.rseq_cs),
                  [v]              "m" (*v),
                  [expect]         "r" (expect),
                  [newv]           "r" (newv)

or something does read much better

> +             : "memory", "cc", "rax"
> +             : abort, cmpfail
> +     );
> +     return 0;
> +abort:
> +     return -1;
> +cmpfail:
> +     return 1;
> +}
> +
> +static inline __attribute__((always_inline))
> +int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
> +             off_t voffp, intptr_t *load, int cpu)

so this thing does what now? It compares @v to @expectnot, when _not_
matching it will store @voffp into @v and load something..?

> +{
> +     __asm__ __volatile__ goto (
> +             RSEQ_ASM_DEFINE_TABLE(3, __rseq_table, 0x0, 0x0, 1f, 2f-1f, 4f)
> +             RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
> +             RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
> +             "cmpq %[v], %[expectnot]\n\t"
> +             "jz 5f\n\t"

So I would prefer "je" in this context, or rather:

                je %l[cmpfail]

> +             "movq %[v], %%rax\n\t"

loads @v in A

But it could already have changed since the previous load from cmp, no?
Would it not make sense to put this load before the cmp and use A
instead?

> +             "movq %%rax, %[load]\n\t"

stores A in @load

> +             "addq %[voffp], %%rax\n\t"

adds @off to A

> +             "movq (%%rax), %%rax\n\t"

loads (A) in A

> +             /* final store */
> +             "movq %%rax, %[v]\n\t"

stores A in @v


So the whole thing loads @v into @load, adds and offset, dereferences
and adds that back in @v, provided @v doesn't match @expected.. whee.

> +             "2:\n\t"
> +             RSEQ_ASM_DEFINE_ABORT(4, __rseq_failure, RSEQ_SIG, "", abort)
> +             RSEQ_ASM_DEFINE_CMPFAIL(5, __rseq_failure, "", cmpfail)
> +             : /* gcc asm goto does not allow outputs */
> +             : [cpu_id]"r"(cpu),
> +               [current_cpu_id]"m"(__rseq_abi.cpu_id),
> +               [rseq_cs]"m"(__rseq_abi.rseq_cs),
> +               /* final store input */
> +               [v]"m"(*v),
> +               [expectnot]"r"(expectnot),
> +               [voffp]"er"(voffp),
> +               [load]"m"(*load)
> +             : "memory", "cc", "rax"
> +             : abort, cmpfail
> +     );
> +     return 0;
> +abort:
> +     return -1;
> +cmpfail:
> +     return 1;
> +}

> +#elif __i386__
> +
> +/*
> + * Support older 32-bit architectures that do not implement fence
> + * instructions.
> + */
> +#define rseq_smp_mb()        \
> +     __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory")
> +#define rseq_smp_rmb()       \
> +     __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory")
> +#define rseq_smp_wmb()       \
> +     __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory")

Oh shiny, you're supporting that OOSTORE and PPRO_FENCE nonsense?

Going by commit:

  09df7c4c8097 ("x86: Remove CONFIG_X86_OOSTORE")

That smp_wmb() one was an 'optimization' (forced store buffer flush) but
not a correctness thing. And we dropped that stuff from the kernel a
_long_ time ago.

Ideally we'd kill that PPRO_FENCE crap too.

Reply via email to