Re: [PATCHv5 28/37] x86/vdso: Enable static branches for the timens vdso

2019-07-31 Thread Thomas Gleixner
On Wed, 31 Jul 2019, Andy Lutomirski wrote:
> On Mon, Jul 29, 2019 at 2:58 PM Dmitry Safonov  wrote:
> > As it has been discussed on timens RFC, adding a new conditional branch
> > `if (inside_time_ns)` on VDSO for all processes is undesirable.
> >
> > Addressing those problems, there are two versions of VDSO's .so:
> > for host tasks (without any penalty) and for processes inside of time
> > namespace with clk_to_ns() that subtracts offsets from host's time.
> >
> > The timens code in vdso looks like this:
> >
> > if (timens_static_branch_unlikely()) {
> >clk_to_ns(clk, ts);
> > }
> 
> I'm confused.  Now we effectively have *three* versions: the vDSO
> without timens, and vDSO with timens but with it switched off, and the
> vDSO with timens on.  This seems like too much.

The problem is that if you have a single VDSO, then omce one process joins
a time namespace _ALL_ other processes get an extra conditional with at
least one extra  cache line as a bonus.

This has been discussed at Plumbers last year and the agreement was to
create VDSO plain (no namespace support) and VDSO extra (namespace
support).

The performance hit of the conditional + one etra cache line for tasks
which do not use a time name space is measurable.

That also avoids the whole mess of dealing with the static branch being
flipped. The time name space property is determined on fork/exec _before_
anything can touch the VDSO and depending on it the approriate version of
the VDSO is loaded.

Thanks,

tglx


Re: [PATCHv5 28/37] x86/vdso: Enable static branches for the timens vdso

2019-07-31 Thread Andy Lutomirski
On Mon, Jul 29, 2019 at 2:58 PM Dmitry Safonov  wrote:
>
> From: Andrei Vagin 
>
> As it has been discussed on timens RFC, adding a new conditional branch
> `if (inside_time_ns)` on VDSO for all processes is undesirable.
>
> Addressing those problems, there are two versions of VDSO's .so:
> for host tasks (without any penalty) and for processes inside of time
> namespace with clk_to_ns() that subtracts offsets from host's time.
>
> The timens code in vdso looks like this:
>
> if (timens_static_branch_unlikely()) {
>clk_to_ns(clk, ts);
> }

I'm confused.  Now we effectively have *three* versions: the vDSO
without timens, and vDSO with timens but with it switched off, and the
vDSO with timens on.  This seems like too much.

What you need is, IMO, a static-branch-ish thing that is per mm.  This
has a fundamental problem that the vDSO can be modified using
FOLL_FORCE.  Perhaps any CoW of the vDSO should implicitly switch the
static branch on, which at least gives some degree of correctness even
if it's a bit surprising.

--Andy


[PATCHv5 28/37] x86/vdso: Enable static branches for the timens vdso

2019-07-29 Thread Dmitry Safonov
From: Andrei Vagin 

As it has been discussed on timens RFC, adding a new conditional branch
`if (inside_time_ns)` on VDSO for all processes is undesirable.

Addressing those problems, there are two versions of VDSO's .so:
for host tasks (without any penalty) and for processes inside of time
namespace with clk_to_ns() that subtracts offsets from host's time.

The timens code in vdso looks like this:

if (timens_static_branch_unlikely()) {
   clk_to_ns(clk, ts);
}

This static branch is disabled by default. And the code generated consist
of a single atomic 'no-op' instruction, in the straight-line code path.

Enable static branches in the timens vdso: the 'no-op' instruction
gets replaced with a 'jump' instruction to the out-of-line true branch.

Signed-off-by: Andrei Vagin 
Co-developed-by: Dmitry Safonov 
Signed-off-by: Dmitry Safonov 
---
 arch/x86/entry/vdso/vma.c| 30 --
 arch/x86/kernel/jump_label.c | 14 ++
 include/linux/jump_label.h   |  8 
 init/Kconfig |  1 +
 4 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 91cf5a5c8c9e..1a3eb4656eb6 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -31,20 +32,37 @@
 unsigned int __read_mostly vdso64_enabled = 1;
 #endif
 
-void __init init_vdso_image(struct vdso_image *image)
+#ifdef CONFIG_TIME_NS
+static __init void init_timens(struct vdso_image *image)
 {
-   BUG_ON(image->size % PAGE_SIZE != 0);
+   struct vdso_jump_entry *entries;
+   unsigned long entries_nr;
+
+   if (WARN_ON(image->jump_table == -1UL))
+   return;
 
-   apply_alternatives((struct alt_instr *)(image->text + image->alt),
-  (struct alt_instr *)(image->text + image->alt +
-   image->alt_len));
-#ifdef CONFIG_TIME_NS
image->text_timens = vmalloc_32(image->size);
if (WARN_ON(image->text_timens == NULL))
return;
 
memcpy(image->text_timens, image->text, image->size);
+
+   entries = image->text_timens + image->jump_table;
+   entries_nr = image->jump_table_len / sizeof(struct vdso_jump_entry);
+   apply_vdso_jump_labels(entries, entries_nr);
+}
+#else
+static inline void init_timens(struct vdso_image *image) {}
 #endif
+
+void __init init_vdso_image(struct vdso_image *image)
+{
+   BUG_ON(image->size % PAGE_SIZE != 0);
+
+   apply_alternatives((struct alt_instr *)(image->text + image->alt),
+  (struct alt_instr *)(image->text + image->alt +
+   image->alt_len));
+   init_timens(image);
 }
 
 struct linux_binprm;
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 044053235302..7820ac61b688 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -24,6 +24,20 @@ union jump_code_union {
} __attribute__((packed));
 };
 
+__init void apply_vdso_jump_labels(struct vdso_jump_entry *ent, unsigned long 
nr)
+{
+   while (nr--) {
+   void *code_addr = (void *)ent + ent->code;
+   union jump_code_union jmp;
+
+   jmp.jump= 0xe9; /* JMP rel32 */
+   jmp.offset  = ent->target - ent->code - JUMP_LABEL_NOP_SIZE;
+   memcpy(code_addr, &jmp, JUMP_LABEL_NOP_SIZE);
+
+   ent++;
+   }
+}
+
 static void bug_at(unsigned char *ip, int line)
 {
/*
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 3526c0aee954..bb9d828ee49a 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -125,6 +125,11 @@ struct jump_entry {
long key;   // key may be far away from the core kernel under KASLR
 };
 
+struct vdso_jump_entry {
+   u16 code;
+   u16 target;
+};
+
 static inline unsigned long jump_entry_code(const struct jump_entry *entry)
 {
return (unsigned long)&entry->code + entry->code;
@@ -229,6 +234,9 @@ extern void static_key_enable(struct static_key *key);
 extern void static_key_disable(struct static_key *key);
 extern void static_key_enable_cpuslocked(struct static_key *key);
 extern void static_key_disable_cpuslocked(struct static_key *key);
+extern void apply_vdso_jump_labels(struct vdso_jump_entry *ent,
+  unsigned long nr);
+
 
 /*
  * We should be using ATOMIC_INIT() for initializing .enabled, but
diff --git a/init/Kconfig b/init/Kconfig
index 9e40c07da4e1..be8bd41774f6 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1072,6 +1072,7 @@ config UTS_NS
 config TIME_NS
bool "TIME namespace"
depends on ARCH_HAS_VDSO_TIME_NS
+   depends on JUMP_LABEL
default y
help
  In this namespace boottime and monotonic clocks can be set.
-- 
2.2