[tip:perf/core] tools lib traceevent: update KVM plugin

2015-10-22 Thread tip-bot for Paolo Bonzini
Commit-ID:  2f465deef7ce8c722121b782dd91c284f5ae80ca
Gitweb: http://git.kernel.org/tip/2f465deef7ce8c722121b782dd91c284f5ae80ca
Author: Paolo Bonzini 
AuthorDate: Thu, 1 Oct 2015 12:28:11 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Tue, 20 Oct 2015 15:54:14 -0300

tools lib traceevent: update KVM plugin

The format of the role word has changed through the years and the plugin
was never updated; some VMX exit reasons were missing too.

Signed-off-by: Paolo Bonzini 
Acked-by: Steven Rostedt 
Cc: David Ahern 
Cc: Namhyung Kim 
Cc: k...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1443695293-31127-1-git-send-email-pbonz...@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/lib/traceevent/plugin_kvm.c | 25 +
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/tools/lib/traceevent/plugin_kvm.c 
b/tools/lib/traceevent/plugin_kvm.c
index 88fe83d..18536f7 100644
--- a/tools/lib/traceevent/plugin_kvm.c
+++ b/tools/lib/traceevent/plugin_kvm.c
@@ -124,7 +124,10 @@ static const char *disassemble(unsigned char *insn, int 
len, uint64_t rip,
_ER(WBINVD,  54)\
_ER(XSETBV,  55)\
_ER(APIC_WRITE,  56)\
-   _ER(INVPCID, 58)
+   _ER(INVPCID, 58)\
+   _ER(PML_FULL,62)\
+   _ER(XSAVES,  63)\
+   _ER(XRSTORS, 64)
 
 #define SVM_EXIT_REASONS \
_ER(EXIT_READ_CR0,  0x000)  \
@@ -352,15 +355,18 @@ static int kvm_nested_vmexit_handler(struct trace_seq *s, 
struct pevent_record *
 union kvm_mmu_page_role {
unsigned word;
struct {
-   unsigned glevels:4;
unsigned level:4;
+   unsigned cr4_pae:1;
unsigned quadrant:2;
-   unsigned pad_for_nice_hex_output:6;
unsigned direct:1;
unsigned access:3;
unsigned invalid:1;
-   unsigned cr4_pge:1;
unsigned nxe:1;
+   unsigned cr0_wp:1;
+   unsigned smep_and_not_wp:1;
+   unsigned smap_and_not_wp:1;
+   unsigned pad_for_nice_hex_output:8;
+   unsigned smm:8;
};
 };
 
@@ -385,15 +391,18 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct 
pevent_record *record,
if (pevent_is_file_bigendian(event->pevent) ==
pevent_is_host_bigendian(event->pevent)) {
 
-   trace_seq_printf(s, "%u/%u q%u%s %s%s %spge %snxe",
+   trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s",
 role.level,
-role.glevels,
 role.quadrant,
 role.direct ? " direct" : "",
 access_str[role.access],
 role.invalid ? " invalid" : "",
-role.cr4_pge ? "" : "!",
-role.nxe ? "" : "!");
+role.cr4_pae ? "" : "!",
+role.nxe ? "" : "!",
+role.cr0_wp ? "" : "!",
+role.smep_and_not_wp ? " smep" : "",
+role.smap_and_not_wp ? " smap" : "",
+role.smm ? " smm" : "");
} else
trace_seq_printf(s, "WORD: %08x", role.word);
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/apic] x86/x2apic: Make stub functions available even if !CONFIG_X86_LOCAL_APIC

2015-09-30 Thread tip-bot for Paolo Bonzini
Commit-ID:  e02ae3871355194a61b03a07d96fd71e81d7eff9
Gitweb: http://git.kernel.org/tip/e02ae3871355194a61b03a07d96fd71e81d7eff9
Author: Paolo Bonzini 
AuthorDate: Mon, 28 Sep 2015 12:26:31 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 30 Sep 2015 21:17:36 +0200

x86/x2apic: Make stub functions available even if !CONFIG_X86_LOCAL_APIC

Some CONFIG_X86_X2APIC functions, especially x2apic_enabled(), are not
declared if !CONFIG_X86_LOCAL_APIC.  However, the same stubs that work
for !CONFIG_X86_X2APIC are okay even if there is no local APIC support
at all.

Avoid the introduction of #ifdefs by moving the x2apic declarations
completely outside the CONFIG_X86_LOCAL_APIC block.  (Unfortunately,
diff generation messes up the actual change that this patch makes).
There is no semantic change because CONFIG_X86_X2APIC depends on
CONFIG_X86_LOCAL_APIC.

Reported-by: Fengguang Wu 
Signed-off-by: Paolo Bonzini 
Cc: Feng Wu 
Link: 
http://lkml.kernel.org/r/1443435991-35750-1-git-send-email-pbonz...@redhat.com
Signed-off-by: Thomas Gleixner 
---
 arch/x86/include/asm/apic.h | 110 ++--
 1 file changed, 55 insertions(+), 55 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index ebf6d5e..a30316b 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -115,6 +115,59 @@ static inline bool apic_is_x2apic_enabled(void)
return msr & X2APIC_ENABLE;
 }
 
+extern void enable_IR_x2apic(void);
+
+extern int get_physical_broadcast(void);
+
+extern int lapic_get_maxlvt(void);
+extern void clear_local_APIC(void);
+extern void disconnect_bsp_APIC(int virt_wire_setup);
+extern void disable_local_APIC(void);
+extern void lapic_shutdown(void);
+extern void sync_Arb_IDs(void);
+extern void init_bsp_APIC(void);
+extern void setup_local_APIC(void);
+extern void init_apic_mappings(void);
+void register_lapic_address(unsigned long address);
+extern void setup_boot_APIC_clock(void);
+extern void setup_secondary_APIC_clock(void);
+extern int APIC_init_uniprocessor(void);
+
+#ifdef CONFIG_X86_64
+static inline int apic_force_enable(unsigned long addr)
+{
+   return -1;
+}
+#else
+extern int apic_force_enable(unsigned long addr);
+#endif
+
+extern int apic_bsp_setup(bool upmode);
+extern void apic_ap_setup(void);
+
+/*
+ * On 32bit this is mach-xxx local
+ */
+#ifdef CONFIG_X86_64
+extern int apic_is_clustered_box(void);
+#else
+static inline int apic_is_clustered_box(void)
+{
+   return 0;
+}
+#endif
+
+extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
+
+#else /* !CONFIG_X86_LOCAL_APIC */
+static inline void lapic_shutdown(void) { }
+#define local_apic_timer_c2_ok 1
+static inline void init_apic_mappings(void) { }
+static inline void disable_local_APIC(void) { }
+# define setup_boot_APIC_clock x86_init_noop
+# define setup_secondary_APIC_clock x86_init_noop
+#endif /* !CONFIG_X86_LOCAL_APIC */
+
 #ifdef CONFIG_X86_X2APIC
 /*
  * Make previous memory operations globally visible before
@@ -186,67 +239,14 @@ static inline int x2apic_enabled(void)
 }
 
 #define x2apic_supported() (cpu_has_x2apic)
-#else
+#else /* !CONFIG_X86_X2APIC */
 static inline void check_x2apic(void) { }
 static inline void x2apic_setup(void) { }
 static inline int x2apic_enabled(void) { return 0; }
 
 #define x2apic_mode(0)
 #definex2apic_supported()  (0)
-#endif
-
-extern void enable_IR_x2apic(void);
-
-extern int get_physical_broadcast(void);
-
-extern int lapic_get_maxlvt(void);
-extern void clear_local_APIC(void);
-extern void disconnect_bsp_APIC(int virt_wire_setup);
-extern void disable_local_APIC(void);
-extern void lapic_shutdown(void);
-extern void sync_Arb_IDs(void);
-extern void init_bsp_APIC(void);
-extern void setup_local_APIC(void);
-extern void init_apic_mappings(void);
-void register_lapic_address(unsigned long address);
-extern void setup_boot_APIC_clock(void);
-extern void setup_secondary_APIC_clock(void);
-extern int APIC_init_uniprocessor(void);
-
-#ifdef CONFIG_X86_64
-static inline int apic_force_enable(unsigned long addr)
-{
-   return -1;
-}
-#else
-extern int apic_force_enable(unsigned long addr);
-#endif
-
-extern int apic_bsp_setup(bool upmode);
-extern void apic_ap_setup(void);
-
-/*
- * On 32bit this is mach-xxx local
- */
-#ifdef CONFIG_X86_64
-extern int apic_is_clustered_box(void);
-#else
-static inline int apic_is_clustered_box(void)
-{
-   return 0;
-}
-#endif
-
-extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
-
-#else /* !CONFIG_X86_LOCAL_APIC */
-static inline void lapic_shutdown(void) { }
-#define local_apic_timer_c2_ok 1
-static inline void init_apic_mappings(void) { }
-static inline void disable_local_APIC(void) { }
-# define setup_boot_APIC_clock x86_init_noop
-# define 

[tip:perf/core] perf script: Add stackcollapse.py script

2016-06-22 Thread tip-bot for Paolo Bonzini
Commit-ID:  6745d8ea825966b0956c691cf7fccc13debedc39
Gitweb: http://git.kernel.org/tip/6745d8ea825966b0956c691cf7fccc13debedc39
Author: Paolo Bonzini 
AuthorDate: Tue, 12 Apr 2016 15:26:13 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Tue, 21 Jun 2016 13:18:35 -0300

perf script: Add stackcollapse.py script

Add stackcollapse.py script as an example of parsing call chains, and
also of using optparse to access command line options.

The flame graph tools include a set of scripts that parse output from
various tools (including "perf script"), remove the offsets in the
function and collapse each stack to a single line.  The website also
says "perf report could have a report style [...] that output folded
stacks directly, obviating the need for stackcollapse-perf.pl", so here
it is.

This script is a Python rewrite of stackcollapse-perf.pl, using the perf
scripting interface to access the perf data directly from Python.

Signed-off-by: Paolo Bonzini 
Acked-by: Jiri Olsa 
Cc: Brendan Gregg 
Link: 
http://lkml.kernel.org/r/1460467573-22989-1-git-send-email-pbonz...@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/scripts/python/bin/stackcollapse-record |   8 ++
 tools/perf/scripts/python/bin/stackcollapse-report |   3 +
 tools/perf/scripts/python/stackcollapse.py | 127 +
 3 files changed, 138 insertions(+)

diff --git a/tools/perf/scripts/python/bin/stackcollapse-record 
b/tools/perf/scripts/python/bin/stackcollapse-record
new file mode 100755
index 000..9d8f9f0
--- /dev/null
+++ b/tools/perf/scripts/python/bin/stackcollapse-record
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+#
+# stackcollapse.py can cover all type of perf samples including
+# the tracepoints, so no special record requirements, just record what
+# you want to analyze.
+#
+perf record "$@"
diff --git a/tools/perf/scripts/python/bin/stackcollapse-report 
b/tools/perf/scripts/python/bin/stackcollapse-report
new file mode 100755
index 000..356b965
--- /dev/null
+++ b/tools/perf/scripts/python/bin/stackcollapse-report
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: produce callgraphs in short form for scripting use
+perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py -- "$@"
diff --git a/tools/perf/scripts/python/stackcollapse.py 
b/tools/perf/scripts/python/stackcollapse.py
new file mode 100755
index 000..a2dfcda
--- /dev/null
+++ b/tools/perf/scripts/python/stackcollapse.py
@@ -0,0 +1,127 @@
+#!/usr/bin/perl -w
+#
+# stackcollapse.py - format perf samples with one line per distinct call stack
+#
+# This script's output has two space-separated fields.  The first is a 
semicolon
+# separated stack including the program name (from the "comm" field) and the
+# function names from the call stack.  The second is a count:
+#
+#  swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 2
+#
+# The file is sorted according to the first field.
+#
+# Input may be created and processed using:
+#
+#  perf record -a -g -F 99 sleep 60
+#  perf script report stackcollapse > out.stacks-folded
+#
+# (perf script record stackcollapse works too).
+#
+# Written by Paolo Bonzini 
+# Based on Brendan Gregg's stackcollapse-perf.pl script.
+
+import os
+import sys
+from collections import defaultdict
+from optparse import OptionParser, make_option
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from EventClass import *
+
+# command line parsing
+
+option_list = [
+# formatting options for the bottom entry of the stack
+make_option("--include-tid", dest="include_tid",
+ action="store_true", default=False,
+ help="include thread id in stack"),
+make_option("--include-pid", dest="include_pid",
+ action="store_true", default=False,
+ help="include process id in stack"),
+make_option("--no-comm", dest="include_comm",
+ action="store_false", default=True,
+ help="do not separate stacks according to comm"),
+make_option("--tidy-java", dest="tidy_java",
+ action="store_true", default=False,
+ help="beautify Java signatures"),
+make_option("--kernel", dest="annotate_kernel",
+ action="store_true", default=False,
+ help="annotate kernel functions with _[k]")
+]
+
+parser = OptionParser(option_list=option_list)
+(opts, args) = parser.parse_args()
+
+if len(args) != 0:
+parser.error("unexpected command line argument")
+if opts.include_tid and not opts.include_comm:
+parser.error("requesting tid but not comm is invalid")
+if opts.include_pid and not opts.include_comm:
+parser.error("requesting pid but not comm is invalid")
+
+# event handlers
+
+lines 

[tip:locking/urgent] locking/static_key: Fix concurrent static_key_slow_inc()

2016-06-24 Thread tip-bot for Paolo Bonzini
Commit-ID:  4c5ea0a9cd02d6aa8adc86e100b2a4cff8d614ff
Gitweb: http://git.kernel.org/tip/4c5ea0a9cd02d6aa8adc86e100b2a4cff8d614ff
Author: Paolo Bonzini 
AuthorDate: Tue, 21 Jun 2016 18:52:17 +0200
Committer:  Ingo Molnar 
CommitDate: Fri, 24 Jun 2016 08:23:16 +0200

locking/static_key: Fix concurrent static_key_slow_inc()

The following scenario is possible:

CPU 1   CPU 2
static_key_slow_inc()
 atomic_inc_not_zero()
  -> key.enabled == 0, no increment
 jump_label_lock()
 atomic_inc_return()
  -> key.enabled == 1 now
static_key_slow_inc()
 atomic_inc_not_zero()
  -> key.enabled == 1, inc to 2
 return
** static key is wrong!
 jump_label_update()
 jump_label_unlock()

Testing the static key at the point marked by (**) will follow the
wrong path for jumps that have not been patched yet.  This can
actually happen when creating many KVM virtual machines with userspace
LAPIC emulation; just run several copies of the following program:

#include 
#include 
#include 
#include 

int main(void)
{
for (;;) {
int kvmfd = open("/dev/kvm", O_RDONLY);
int vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
close(ioctl(vmfd, KVM_CREATE_VCPU, 1));
close(vmfd);
close(kvmfd);
}
return 0;
}

Every KVM_CREATE_VCPU ioctl will attempt a static_key_slow_inc() call.
The static key's purpose is to skip NULL pointer checks and indeed one
of the processes eventually dereferences NULL.

As explained in the commit that introduced the bug:

  706249c222f6 ("locking/static_keys: Rework update logic")

jump_label_update() needs key.enabled to be true.  The solution adopted
here is to temporarily make key.enabled == -1, and use go down the
slow path when key.enabled <= 0.

Reported-by: Dmitry Vyukov 
Signed-off-by: Paolo Bonzini 
Signed-off-by: Peter Zijlstra (Intel) 
Cc:  # v4.3+
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Fixes: 706249c222f6 ("locking/static_keys: Rework update logic")
Link: 
http://lkml.kernel.org/r/1466527937-69798-1-git-send-email-pbonz...@redhat.com
[ Small stylistic edits to the changelog and the code. ]
Signed-off-by: Ingo Molnar 
---
 include/linux/jump_label.h | 16 +---
 kernel/jump_label.c| 36 +---
 2 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 0536524..6890446 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -117,13 +117,18 @@ struct module;
 
 #include 
 
+#ifdef HAVE_JUMP_LABEL
+
 static inline int static_key_count(struct static_key *key)
 {
-   return atomic_read(>enabled);
+   /*
+* -1 means the first static_key_slow_inc() is in progress.
+*  static_key_enabled() must return true, so return 1 here.
+*/
+   int n = atomic_read(>enabled);
+   return n >= 0 ? n : 1;
 }
 
-#ifdef HAVE_JUMP_LABEL
-
 #define JUMP_TYPE_FALSE0UL
 #define JUMP_TYPE_TRUE 1UL
 #define JUMP_TYPE_MASK 1UL
@@ -162,6 +167,11 @@ extern void jump_label_apply_nops(struct module *mod);
 
 #else  /* !HAVE_JUMP_LABEL */
 
+static inline int static_key_count(struct static_key *key)
+{
+   return atomic_read(>enabled);
+}
+
 static __always_inline void jump_label_init(void)
 {
static_key_initialized = true;
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 05254ee..4b353e0 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -58,13 +58,36 @@ static void jump_label_update(struct static_key *key);
 
 void static_key_slow_inc(struct static_key *key)
 {
+   int v, v1;
+
STATIC_KEY_CHECK_USE();
-   if (atomic_inc_not_zero(>enabled))
-   return;
+
+   /*
+* Careful if we get concurrent static_key_slow_inc() calls;
+* later calls must wait for the first one to _finish_ the
+* jump_label_update() process.  At the same time, however,
+* the jump_label_update() call below wants to see
+* static_key_enabled() for jumps to be updated properly.
+*
+* So give a special meaning to negative key->enabled: it sends
+* static_key_slow_inc() down the slow path, and it is non-zero
+* so it counts as "enabled" in jump_label_update().  Note that
+* atomic_inc_unless_negative() checks >= 0, so roll our own.
+*/
+   for (v = atomic_read(>enabled); v > 0; v = v1) {
+   v1 = 

[tip:x86/asm] x86/entry: Avoid interrupt flag save and restore

2016-07-09 Thread tip-bot for Paolo Bonzini
Commit-ID:  0b95364f977c180e1f336e00273fda5d3eca54b4
Gitweb: http://git.kernel.org/tip/0b95364f977c180e1f336e00273fda5d3eca54b4
Author: Paolo Bonzini 
AuthorDate: Mon, 20 Jun 2016 16:58:29 +0200
Committer:  Ingo Molnar 
CommitDate: Sat, 9 Jul 2016 10:44:01 +0200

x86/entry: Avoid interrupt flag save and restore

Thanks to all the work that was done by Andy Lutomirski and others,
enter_from_user_mode() and prepare_exit_to_usermode() are now called only with
interrupts disabled.  Let's provide them a version of user_enter()/user_exit()
that skips saving and restoring the interrupt flag.

On an AMD-based machine I tested this patch on, with force-enabled
context tracking, the speed-up in system calls was 90 clock cycles or 6%,
measured with the following simple benchmark:

#include 
#include 
#include 
#include 

unsigned long rdtsc()
{
unsigned long result;
asm volatile("rdtsc; shl $32, %%rdx; mov %%eax, %%eax\n"
 "or %%rdx, %%rax" : "=a" (result) : : "rdx");
return result;
}

int main()
{
unsigned long tsc1, tsc2;
int pid = getpid();
int i;

tsc1 = rdtsc();
for (i = 0; i < 1; i++)
kill(pid, SIGWINCH);
tsc2 = rdtsc();

printf("%ld\n", tsc2 - tsc1);
}

Signed-off-by: Paolo Bonzini 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Acked-by: Paolo Bonzini 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Josh Poimboeuf 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: k...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1466434712-31440-2-git-send-email-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/common.c  |  4 ++--
 include/linux/context_tracking.h | 15 +++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index ec138e5..618bc61 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -43,7 +43,7 @@ static struct thread_info *pt_regs_to_thread_info(struct 
pt_regs *regs)
 __visible void enter_from_user_mode(void)
 {
CT_WARN_ON(ct_state() != CONTEXT_USER);
-   user_exit();
+   user_exit_irqoff();
 }
 #else
 static inline void enter_from_user_mode(void) {}
@@ -274,7 +274,7 @@ __visible inline void prepare_exit_to_usermode(struct 
pt_regs *regs)
ti->status &= ~TS_COMPAT;
 #endif
 
-   user_enter();
+   user_enter_irqoff();
 }
 
 #define SYSCALL_EXIT_WORK_FLAGS\
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index d259274..d9aef2a 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -31,6 +31,19 @@ static inline void user_exit(void)
context_tracking_exit(CONTEXT_USER);
 }
 
+/* Called with interrupts disabled.  */
+static inline void user_enter_irqoff(void)
+{
+   if (context_tracking_is_enabled())
+   __context_tracking_enter(CONTEXT_USER);
+
+}
+static inline void user_exit_irqoff(void)
+{
+   if (context_tracking_is_enabled())
+   __context_tracking_exit(CONTEXT_USER);
+}
+
 static inline enum ctx_state exception_enter(void)
 {
enum ctx_state prev_ctx;
@@ -69,6 +82,8 @@ static inline enum ctx_state ct_state(void)
 #else
 static inline void user_enter(void) { }
 static inline void user_exit(void) { }
+static inline void user_enter_irqoff(void) { }
+static inline void user_exit_irqoff(void) { }
 static inline enum ctx_state exception_enter(void) { return 0; }
 static inline void exception_exit(enum ctx_state prev_ctx) { }
 static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }


[tip:x86/asm] x86/entry: Inline enter_from_user_mode()

2016-07-09 Thread tip-bot for Paolo Bonzini
Commit-ID:  eec4b1227db153ca16f8f5f285d01fefdce05438
Gitweb: http://git.kernel.org/tip/eec4b1227db153ca16f8f5f285d01fefdce05438
Author: Paolo Bonzini 
AuthorDate: Mon, 20 Jun 2016 16:58:30 +0200
Committer:  Ingo Molnar 
CommitDate: Sat, 9 Jul 2016 10:44:02 +0200

x86/entry: Inline enter_from_user_mode()

This matches what is already done for prepare_exit_to_usermode(),
and saves about 60 clock cycles (4% speedup) with the benchmark
in the previous commit message.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Acked-by: Paolo Bonzini 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Josh Poimboeuf 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: k...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1466434712-31440-3-git-send-email-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 618bc61..9e1e27d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -40,7 +40,7 @@ static struct thread_info *pt_regs_to_thread_info(struct 
pt_regs *regs)
 
 #ifdef CONFIG_CONTEXT_TRACKING
 /* Called on entry from user mode with IRQs off. */
-__visible void enter_from_user_mode(void)
+__visible inline void enter_from_user_mode(void)
 {
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit_irqoff();


[tip:x86/asm] x86/entry: Inline enter_from_user_mode()

2016-07-10 Thread tip-bot for Paolo Bonzini
Commit-ID:  be8a18e2e98e04a5def5887d913b267865562448
Gitweb: http://git.kernel.org/tip/be8a18e2e98e04a5def5887d913b267865562448
Author: Paolo Bonzini 
AuthorDate: Mon, 20 Jun 2016 16:58:30 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 10 Jul 2016 13:33:02 +0200

x86/entry: Inline enter_from_user_mode()

This matches what is already done for prepare_exit_to_usermode(),
and saves about 60 clock cycles (4% speedup) with the benchmark
in the previous commit message.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Acked-by: Paolo Bonzini 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Josh Poimboeuf 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: k...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1466434712-31440-3-git-send-email-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 618bc61..9e1e27d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -40,7 +40,7 @@ static struct thread_info *pt_regs_to_thread_info(struct 
pt_regs *regs)
 
 #ifdef CONFIG_CONTEXT_TRACKING
 /* Called on entry from user mode with IRQs off. */
-__visible void enter_from_user_mode(void)
+__visible inline void enter_from_user_mode(void)
 {
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit_irqoff();


[tip:x86/asm] x86/entry: Avoid interrupt flag save and restore

2016-07-10 Thread tip-bot for Paolo Bonzini
Commit-ID:  2e9d1e150abf88cb63e5d34ca286edbb95b4c53d
Gitweb: http://git.kernel.org/tip/2e9d1e150abf88cb63e5d34ca286edbb95b4c53d
Author: Paolo Bonzini 
AuthorDate: Mon, 20 Jun 2016 16:58:29 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 10 Jul 2016 13:33:02 +0200

x86/entry: Avoid interrupt flag save and restore

Thanks to all the work that was done by Andy Lutomirski and others,
enter_from_user_mode() and prepare_exit_to_usermode() are now called only with
interrupts disabled.  Let's provide them a version of user_enter()/user_exit()
that skips saving and restoring the interrupt flag.

On an AMD-based machine I tested this patch on, with force-enabled
context tracking, the speed-up in system calls was 90 clock cycles or 6%,
measured with the following simple benchmark:

#include 
#include 
#include 
#include 

unsigned long rdtsc()
{
unsigned long result;
asm volatile("rdtsc; shl $32, %%rdx; mov %%eax, %%eax\n"
 "or %%rdx, %%rax" : "=a" (result) : : "rdx");
return result;
}

int main()
{
unsigned long tsc1, tsc2;
int pid = getpid();
int i;

tsc1 = rdtsc();
for (i = 0; i < 1; i++)
kill(pid, SIGWINCH);
tsc2 = rdtsc();

printf("%ld\n", tsc2 - tsc1);
}

Signed-off-by: Paolo Bonzini 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Acked-by: Paolo Bonzini 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Josh Poimboeuf 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: k...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1466434712-31440-2-git-send-email-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/common.c  |  4 ++--
 include/linux/context_tracking.h | 15 +++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index ec138e5..618bc61 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -43,7 +43,7 @@ static struct thread_info *pt_regs_to_thread_info(struct 
pt_regs *regs)
 __visible void enter_from_user_mode(void)
 {
CT_WARN_ON(ct_state() != CONTEXT_USER);
-   user_exit();
+   user_exit_irqoff();
 }
 #else
 static inline void enter_from_user_mode(void) {}
@@ -274,7 +274,7 @@ __visible inline void prepare_exit_to_usermode(struct 
pt_regs *regs)
ti->status &= ~TS_COMPAT;
 #endif
 
-   user_enter();
+   user_enter_irqoff();
 }
 
 #define SYSCALL_EXIT_WORK_FLAGS\
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index d259274..d9aef2a 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -31,6 +31,19 @@ static inline void user_exit(void)
context_tracking_exit(CONTEXT_USER);
 }
 
+/* Called with interrupts disabled.  */
+static inline void user_enter_irqoff(void)
+{
+   if (context_tracking_is_enabled())
+   __context_tracking_enter(CONTEXT_USER);
+
+}
+static inline void user_exit_irqoff(void)
+{
+   if (context_tracking_is_enabled())
+   __context_tracking_exit(CONTEXT_USER);
+}
+
 static inline enum ctx_state exception_enter(void)
 {
enum ctx_state prev_ctx;
@@ -69,6 +82,8 @@ static inline enum ctx_state ct_state(void)
 #else
 static inline void user_enter(void) { }
 static inline void user_exit(void) { }
+static inline void user_enter_irqoff(void) { }
+static inline void user_exit_irqoff(void) { }
 static inline enum ctx_state exception_enter(void) { return 0; }
 static inline void exception_exit(enum ctx_state prev_ctx) { }
 static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }


[tip:locking/core] cpuset: Make nr_cpusets private

2017-08-10 Thread tip-bot for Paolo Bonzini
Commit-ID:  be040bea9085a9c2b1700c9e60888777baeb96d5
Gitweb: http://git.kernel.org/tip/be040bea9085a9c2b1700c9e60888777baeb96d5
Author: Paolo Bonzini 
AuthorDate: Tue, 1 Aug 2017 17:24:06 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 10 Aug 2017 12:28:57 +0200

cpuset: Make nr_cpusets private

Any use of key->enabled (that is static_key_enabled and static_key_count)
outside jump_label_lock should handle its own serialization.  In the case
of cpusets_enabled_key, the key is always incremented/decremented under
cpuset_mutex, and hence the same rule applies to nr_cpusets.  The rule
*is* respected currently, but the mutex is static so nr_cpusets should
be static too.

Signed-off-by: Paolo Bonzini 
Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Zefan Li 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1501601046-35683-4-git-send-email-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 include/linux/cpuset.h | 6 --
 kernel/cgroup/cpuset.c | 7 +++
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 898cfe2..e74655d 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -37,12 +37,6 @@ static inline bool cpusets_enabled(void)
return static_branch_unlikely(_enabled_key);
 }
 
-static inline int nr_cpusets(void)
-{
-   /* jump label reference count + the top-level cpuset */
-   return static_key_count(_enabled_key.key) + 1;
-}
-
 static inline void cpuset_inc(void)
 {
static_branch_inc(_pre_enable_key);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 8d51516..9ed6a05 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -577,6 +577,13 @@ static void update_domain_attr_tree(struct 
sched_domain_attr *dattr,
rcu_read_unlock();
 }
 
+/* Must be called with cpuset_mutex held.  */
+static inline int nr_cpusets(void)
+{
+   /* jump label reference count + the top-level cpuset */
+   return static_key_count(_enabled_key.key) + 1;
+}
+
 /*
  * generate_sched_domains()
  *


[tip:locking/core] jump_label: Do not use unserialized static_key_enabled()

2017-08-10 Thread tip-bot for Paolo Bonzini
Commit-ID:  7a34bcb8b272b1300f0125c93a54f0c98812acdd
Gitweb: http://git.kernel.org/tip/7a34bcb8b272b1300f0125c93a54f0c98812acdd
Author: Paolo Bonzini 
AuthorDate: Tue, 1 Aug 2017 17:24:05 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 10 Aug 2017 12:28:56 +0200

jump_label: Do not use unserialized static_key_enabled()

Any use of key->enabled (that is static_key_enabled and static_key_count)
outside jump_label_lock should handle its own serialization.  The only
two that are not doing so are the UDP encapsulation static keys.  Change
them to use static_key_enable, which now correctly tests key->enabled under
the jump label lock.

Signed-off-by: Paolo Bonzini 
Signed-off-by: Peter Zijlstra (Intel) 
Cc: Eric Dumazet 
Cc: Jason Baron 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1501601046-35683-3-git-send-email-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 Documentation/static-keys.txt | 5 +
 net/ipv4/udp.c| 3 +--
 net/ipv6/udp.c| 3 +--
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt
index b83dfa1..870b4be 100644
--- a/Documentation/static-keys.txt
+++ b/Documentation/static-keys.txt
@@ -149,6 +149,11 @@ static_branch_inc(), will change the branch back to true. 
Likewise, if the
 key is initialized false, a 'static_branch_inc()', will change the branch to
 true. And then a 'static_branch_dec()', will again make the branch false.
 
+The state and the reference count can be retrieved with 'static_key_enabled()'
+and 'static_key_count()'.  In general, if you use these functions, they
+should be protected with the same mutex used around the enable/disable
+or increment/decrement function.
+
 Where an array of keys is required, it can be defined as::
 
DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e6276fa..3037339 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1809,8 +1809,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct 
sk_buff *skb)
 static struct static_key udp_encap_needed __read_mostly;
 void udp_encap_enable(void)
 {
-   if (!static_key_enabled(_encap_needed))
-   static_key_slow_inc(_encap_needed);
+   static_key_enable(_encap_needed);
 }
 EXPORT_SYMBOL(udp_encap_enable);
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 578142b..96d2407 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -574,8 +574,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
 static struct static_key udpv6_encap_needed __read_mostly;
 void udpv6_encap_enable(void)
 {
-   if (!static_key_enabled(_encap_needed))
-   static_key_slow_inc(_encap_needed);
+   static_key_enable(_encap_needed);
 }
 EXPORT_SYMBOL(udpv6_encap_enable);
 


[tip:locking/core] jump_label: Fix concurrent static_key_enable/disable()

2017-08-10 Thread tip-bot for Paolo Bonzini
Commit-ID:  1dbb6704de91b169a58d0c8221624afd6a95cfc7
Gitweb: http://git.kernel.org/tip/1dbb6704de91b169a58d0c8221624afd6a95cfc7
Author: Paolo Bonzini 
AuthorDate: Tue, 1 Aug 2017 17:24:04 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 10 Aug 2017 12:28:56 +0200

jump_label: Fix concurrent static_key_enable/disable()

static_key_enable/disable are trying to cap the static key count to
0/1.  However, their use of key->enabled is outside jump_label_lock
so they do not really ensure that.

Rewrite them to do a quick check for an already enabled (respectively,
already disabled), and then recheck under the jump label lock.  Unlike
static_key_slow_inc/dec, a failed check under the jump label lock does
not modify key->enabled.

Signed-off-by: Paolo Bonzini 
Signed-off-by: Peter Zijlstra (Intel) 
Cc: Eric Dumazet 
Cc: Jason Baron 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1501601046-35683-2-git-send-email-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 include/linux/jump_label.h | 22 +
 kernel/jump_label.c| 59 +-
 2 files changed, 49 insertions(+), 32 deletions(-)

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 2afd74b..740a42e 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -234,22 +234,24 @@ static inline int jump_label_apply_nops(struct module 
*mod)
 
 static inline void static_key_enable(struct static_key *key)
 {
-   int count = static_key_count(key);
-
-   WARN_ON_ONCE(count < 0 || count > 1);
+   STATIC_KEY_CHECK_USE();
 
-   if (!count)
-   static_key_slow_inc(key);
+   if (atomic_read(>enabled) != 0) {
+   WARN_ON_ONCE(atomic_read(>enabled) != 1);
+   return;
+   }
+   atomic_set(>enabled, 1);
 }
 
 static inline void static_key_disable(struct static_key *key)
 {
-   int count = static_key_count(key);
-
-   WARN_ON_ONCE(count < 0 || count > 1);
+   STATIC_KEY_CHECK_USE();
 
-   if (count)
-   static_key_slow_dec(key);
+   if (atomic_read(>enabled) != 1) {
+   WARN_ON_ONCE(atomic_read(>enabled) != 0);
+   return;
+   }
+   atomic_set(>enabled, 0);
 }
 
 #define STATIC_KEY_INIT_TRUE   { .enabled = ATOMIC_INIT(1) }
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index d11c506..833eeca 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -79,28 +79,6 @@ int static_key_count(struct static_key *key)
 }
 EXPORT_SYMBOL_GPL(static_key_count);
 
-void static_key_enable(struct static_key *key)
-{
-   int count = static_key_count(key);
-
-   WARN_ON_ONCE(count < 0 || count > 1);
-
-   if (!count)
-   static_key_slow_inc(key);
-}
-EXPORT_SYMBOL_GPL(static_key_enable);
-
-void static_key_disable(struct static_key *key)
-{
-   int count = static_key_count(key);
-
-   WARN_ON_ONCE(count < 0 || count > 1);
-
-   if (count)
-   static_key_slow_dec(key);
-}
-EXPORT_SYMBOL_GPL(static_key_disable);
-
 void static_key_slow_inc(struct static_key *key)
 {
int v, v1;
@@ -139,6 +117,43 @@ void static_key_slow_inc(struct static_key *key)
 }
 EXPORT_SYMBOL_GPL(static_key_slow_inc);
 
+void static_key_enable(struct static_key *key)
+{
+   STATIC_KEY_CHECK_USE();
+   if (atomic_read(>enabled) > 0) {
+   WARN_ON_ONCE(atomic_read(>enabled) != 1);
+   return;
+   }
+
+   cpus_read_lock();
+   jump_label_lock();
+   if (atomic_read(>enabled) == 0) {
+   atomic_set(>enabled, -1);
+   jump_label_update(key);
+   atomic_set(>enabled, 1);
+   }
+   jump_label_unlock();
+   cpus_read_unlock();
+}
+EXPORT_SYMBOL_GPL(static_key_enable);
+
+void static_key_disable(struct static_key *key)
+{
+   STATIC_KEY_CHECK_USE();
+   if (atomic_read(>enabled) != 1) {
+   WARN_ON_ONCE(atomic_read(>enabled) != 0);
+   return;
+   }
+
+   cpus_read_lock();
+   jump_label_lock();
+   if (atomic_cmpxchg(>enabled, 1, 0))
+   jump_label_update(key);
+   jump_label_unlock();
+   cpus_read_unlock();
+}
+EXPORT_SYMBOL_GPL(static_key_disable);
+
 static void __static_key_slow_dec(struct static_key *key,
unsigned long rate_limit, struct delayed_work *work)
 {


[tip:perf/core] perf unwind: Support for powerpc

2017-06-21 Thread tip-bot for Paolo Bonzini
Commit-ID:  a7f0fda085870312ab694b19a1304ece161a1217
Gitweb: http://git.kernel.org/tip/a7f0fda085870312ab694b19a1304ece161a1217
Author: Paolo Bonzini 
AuthorDate: Thu, 1 Jun 2017 12:24:41 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Wed, 21 Jun 2017 11:35:42 -0300

perf unwind: Support for powerpc

Porting PPC to libdw only needs an architecture-specific hook to move
the register state from perf to libdw.

The ARM and x86 architectures already use libdw, and it is useful to
have as much common code for the unwinder as possible.  Mark Wielaard
has contributed a frame-based unwinder to libdw, so that unwinding works
even for binaries that do not have CFI information.  In addition,
libunwind is always preferred to libdw by the build machinery so this
cannot introduce regressions on machines that have both libunwind and
libdw installed.

Signed-off-by: Paolo Bonzini 
Acked-by: Jiri Olsa 
Acked-by: Milian Wolff 
Acked-by: Ravi Bangoria 
Cc: Naveen N. Rao 
Cc: linuxppc-...@lists.ozlabs.org
Link: 
http://lkml.kernel.org/r/1496312681-20133-1-git-send-email-pbonz...@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Makefile.config  |  2 +-
 tools/perf/arch/powerpc/util/Build  |  2 +
 tools/perf/arch/powerpc/util/unwind-libdw.c | 73 +
 3 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 1f4fbc9..bdf0e87 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -61,7 +61,7 @@ endif
 # Disable it on all other architectures in case libdw unwind
 # support is detected in system. Add supported architectures
 # to the check.
-ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm))
+ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc))
   NO_LIBDW_DWARF_UNWIND := 1
 endif
 
diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 90ad64b..2e659531 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -5,4 +5,6 @@ libperf-y += perf_regs.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
+
 libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c 
b/tools/perf/arch/powerpc/util/unwind-libdw.c
new file mode 100644
index 000..3a24b3c
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/unwind-libdw.c
@@ -0,0 +1,73 @@
+#include 
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+#include "../../util/event.h"
+
+/* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils.  */
+static const int special_regs[3][2] = {
+   { 65, PERF_REG_POWERPC_LINK },
+   { 101, PERF_REG_POWERPC_XER },
+   { 109, PERF_REG_POWERPC_CTR },
+};
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+   struct unwind_info *ui = arg;
+   struct regs_dump *user_regs = >sample->user_regs;
+   Dwarf_Word dwarf_regs[32], dwarf_nip;
+   size_t i;
+
+#define REG(r) ({  \
+   Dwarf_Word val = 0; \
+   perf_reg_value(, user_regs, PERF_REG_POWERPC_##r);  \
+   val;\
+})
+
+   dwarf_regs[0]  = REG(R0);
+   dwarf_regs[1]  = REG(R1);
+   dwarf_regs[2]  = REG(R2);
+   dwarf_regs[3]  = REG(R3);
+   dwarf_regs[4]  = REG(R4);
+   dwarf_regs[5]  = REG(R5);
+   dwarf_regs[6]  = REG(R6);
+   dwarf_regs[7]  = REG(R7);
+   dwarf_regs[8]  = REG(R8);
+   dwarf_regs[9]  = REG(R9);
+   dwarf_regs[10] = REG(R10);
+   dwarf_regs[11] = REG(R11);
+   dwarf_regs[12] = REG(R12);
+   dwarf_regs[13] = REG(R13);
+   dwarf_regs[14] = REG(R14);
+   dwarf_regs[15] = REG(R15);
+   dwarf_regs[16] = REG(R16);
+   dwarf_regs[17] = REG(R17);
+   dwarf_regs[18] = REG(R18);
+   dwarf_regs[19] = REG(R19);
+   dwarf_regs[20] = REG(R20);
+   dwarf_regs[21] = REG(R21);
+   dwarf_regs[22] = REG(R22);
+   dwarf_regs[23] = REG(R23);
+   dwarf_regs[24] = REG(R24);
+   dwarf_regs[25] = REG(R25);
+   dwarf_regs[26] = REG(R26);
+   dwarf_regs[27] = REG(R27);
+   dwarf_regs[28] = REG(R28);
+   dwarf_regs[29] = REG(R29);
+   dwarf_regs[30] = REG(R30);
+   dwarf_regs[31] = REG(R31);
+   if (!dwfl_thread_state_registers(thread, 0, 32, dwarf_regs))
+   return false;
+
+   dwarf_nip = REG(NIP);
+   dwfl_thread_state_register_pc(thread, dwarf_nip);
+   for (i = 0; i < ARRAY_SIZE(special_regs); i++) {
+   Dwarf_Word val = 0;
+   perf_reg_value(, 

[tip:x86/urgent] x86/apic: Silence "FW_BUG TSC_DEADLINE disabled due to Errata" on hypervisors

2017-10-12 Thread tip-bot for Paolo Bonzini
Commit-ID:  cc6afe2240298049585e86b1ade85efc8a7f225d
Gitweb: https://git.kernel.org/tip/cc6afe2240298049585e86b1ade85efc8a7f225d
Author: Paolo Bonzini 
AuthorDate: Tue, 10 Oct 2017 12:12:57 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 12 Oct 2017 17:10:10 +0200

x86/apic: Silence "FW_BUG TSC_DEADLINE disabled due to Errata" on hypervisors

Commit 594a30fb1242 ("x86/apic: Silence "FW_BUG TSC_DEADLINE disabled
due to Errata" on CPUs without the feature", 2017-08-30) was also about
silencing the warning on VirtualBox; however, KVM does expose the TSC
deadline timer, and it's virtualized so that it is immune from CPU errata.

Therefore, booting 4.13 with "-cpu Haswell" shows this in the logs:

 [0.00] [Firmware Bug]: TSC_DEADLINE disabled due to Errata;
please update microcode to version: 0xb2 (or later)

Even if you had a hypervisor that does _not_ virtualize the TSC deadline
and rather exposes the hardware one, it should be the hypervisors task
to update microcode and possibly hide the flag from CPUID.  So just
hide the message when running on _any_ hypervisor, not just those that
do not support the TSC deadline timer.

The older check still makes sense, so keep it.

Fixes: bd9240a18e ("x86/apic: Add TSC_DEADLINE quirk due to errata")
Signed-off-by: Paolo Bonzini 
Signed-off-by: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Hans de Goede 
Cc: k...@vger.kernel.org
Cc: sta...@vger.kernel.org
Link: 
https://lkml.kernel.org/r/1507630377-54471-1-git-send-email-pbonz...@redhat.com

---
 arch/x86/kernel/apic/apic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d705c76..50109ea 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -600,7 +600,8 @@ static void apic_check_deadline_errata(void)
const struct x86_cpu_id *m;
u32 rev;
 
-   if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+   if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
+   boot_cpu_has(X86_FEATURE_HYPERVISOR))
return;
 
m = x86_match_cpu(deadline_match);


[tip:locking/core] tools/memory-model: Fix cheat sheet typo

2018-05-15 Thread tip-bot for Paolo Bonzini
Commit-ID:  a839195186a2bca1b2b46e57619e9ad5b8d42426
Gitweb: https://git.kernel.org/tip/a839195186a2bca1b2b46e57619e9ad5b8d42426
Author: Paolo Bonzini 
AuthorDate: Mon, 14 May 2018 16:33:42 -0700
Committer:  Ingo Molnar 
CommitDate: Tue, 15 May 2018 08:11:16 +0200

tools/memory-model: Fix cheat sheet typo

"RWM" should be "RMW".

Signed-off-by: Paolo Bonzini 
Signed-off-by: Paul E. McKenney 
Cc: Andrew Morton 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: aki...@gmail.com
Cc: boqun.f...@gmail.com
Cc: dhowe...@redhat.com
Cc: j.algl...@ucl.ac.uk
Cc: linux-a...@vger.kernel.org
Cc: luc.maran...@inria.fr
Cc: npig...@gmail.com
Cc: parri.and...@gmail.com
Cc: st...@rowland.harvard.edu
Link: 
http://lkml.kernel.org/r/1526340837-1-4-git-send-email-paul...@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar 
---
 tools/memory-model/Documentation/cheatsheet.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/memory-model/Documentation/cheatsheet.txt 
b/tools/memory-model/Documentation/cheatsheet.txt
index 956b1ae4aafb..c0eafdaddfa4 100644
--- a/tools/memory-model/Documentation/cheatsheet.txt
+++ b/tools/memory-model/Documentation/cheatsheet.txt
@@ -1,6 +1,6 @@
   Prior Operation Subsequent Operation
   ---  ---
-   C  Self  R  W  RWM  Self  R  W  DR  DW  RMW  SV
+   C  Self  R  W  RMW  Self  R  W  DR  DW  RMW  SV
   --    -  -  ---    -  -  --  --  ---  --
 
 Store, e.g., WRITE_ONCE()Y   Y


[tip:x86/pti] x86/cpufeature: Move processor tracing out of scattered features

2018-01-16 Thread tip-bot for Paolo Bonzini
Commit-ID:  4fdec2034b7540dda461c6ba33325dfcff345c64
Gitweb: https://git.kernel.org/tip/4fdec2034b7540dda461c6ba33325dfcff345c64
Author: Paolo Bonzini 
AuthorDate: Tue, 16 Jan 2018 16:42:25 +0100
Committer:  Ingo Molnar 
CommitDate: Wed, 17 Jan 2018 07:38:39 +0100

x86/cpufeature: Move processor tracing out of scattered features

Processor tracing is already enumerated in word 9 (CPUID[7,0].EBX),
so do not duplicate it in the scattered features word.

Besides being more tidy, this will be useful for KVM when it presents
processor tracing to the guests.  KVM selects host features that are
supported by both the host kernel (depending on command line options,
CPU errata, or whatever) and KVM.  Whenever a full feature word exists,
KVM's code is written in the expectation that the CPUID bit number
matches the X86_FEATURE_* bit number, but this is not the case for
X86_FEATURE_INTEL_PT.

Signed-off-by: Paolo Bonzini 
Cc: Borislav Petkov 
Cc: Linus Torvalds 
Cc: Luwei Kang 
Cc: Peter Zijlstra 
Cc: Radim Krčmář 
Cc: Thomas Gleixner 
Cc: k...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1516117345-34561-1-git-send-email-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/cpufeatures.h | 2 +-
 arch/x86/kernel/cpu/scattered.c| 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index aa09559..25b9375 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -206,7 +206,6 @@
 #define X86_FEATURE_RETPOLINE  ( 7*32+12) /* Generic Retpoline 
mitigation for Spectre variant 2 */
 #define X86_FEATURE_RETPOLINE_AMD  ( 7*32+13) /* AMD Retpoline mitigation 
for Spectre variant 2 */
 #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory 
Number */
-#define X86_FEATURE_INTEL_PT   ( 7*32+15) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512_4VNNIW  ( 7*32+16) /* AVX-512 Neural Network 
Instructions */
 #define X86_FEATURE_AVX512_4FMAPS  ( 7*32+17) /* AVX-512 Multiply 
Accumulation Single precision */
 
@@ -246,6 +245,7 @@
 #define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused 
Multiply-Add instructions */
 #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLWB   ( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_INTEL_PT   ( 9*32+25) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512PF   ( 9*32+26) /* AVX-512 Prefetch */
 #define X86_FEATURE_AVX512ER   ( 9*32+27) /* AVX-512 Exponential and 
Reciprocal */
 #define X86_FEATURE_AVX512CD   ( 9*32+28) /* AVX-512 Conflict 
Detection */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 05459ad..d0e6976 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -21,7 +21,6 @@ struct cpuid_bit {
 static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF,   CPUID_ECX,  0, 0x0006, 0 },
{ X86_FEATURE_EPB,  CPUID_ECX,  3, 0x0006, 0 },
-   { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x0007, 0 },
{ X86_FEATURE_AVX512_4VNNIW,CPUID_EDX,  2, 0x0007, 0 },
{ X86_FEATURE_AVX512_4FMAPS,CPUID_EDX,  3, 0x0007, 0 },
{ X86_FEATURE_CAT_L3,   CPUID_EBX,  1, 0x0010, 0 },


[tip:x86/pti] KVM/x86: Remove indirect MSR op calls from SPEC_CTRL

2018-02-23 Thread tip-bot for Paolo Bonzini
Commit-ID:  ecb586bd29c99fb4de599dec388658e74388daad
Gitweb: https://git.kernel.org/tip/ecb586bd29c99fb4de599dec388658e74388daad
Author: Paolo Bonzini 
AuthorDate: Thu, 22 Feb 2018 16:43:17 +0100
Committer:  Ingo Molnar 
CommitDate: Fri, 23 Feb 2018 08:24:35 +0100

KVM/x86: Remove indirect MSR op calls from SPEC_CTRL

Having a paravirt indirect call in the IBRS restore path is not a
good idea, since we are trying to protect from speculative execution
of bogus indirect branch targets.  It is also slower, so use
native_wrmsrl() on the vmentry path too.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Jim Mattson 
Cc: David Woodhouse 
Cc: KarimAllah Ahmed 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Radim Krčmář 
Cc: Thomas Gleixner 
Cc: k...@vger.kernel.org
Cc: sta...@vger.kernel.org
Fixes: d28b387fb74da95d69d2615732f50cceb38e9a4d
Link: http://lkml.kernel.org/r/20180222154318.20361-2-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kvm/svm.c | 7 ---
 arch/x86/kvm/vmx.c | 7 ---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b3e488a74828..1598beeda11c 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -49,6 +49,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -5355,7 +5356,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 * being speculatively taken.
 */
if (svm->spec_ctrl)
-   wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+   native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
 
asm volatile (
"push %%" _ASM_BP "; \n\t"
@@ -5465,10 +5466,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 * save it.
 */
if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
-   rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+   svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
if (svm->spec_ctrl)
-   wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+   native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3dec126aa302..0927be315965 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -51,6 +51,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "trace.h"
@@ -9452,7 +9453,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 * being speculatively taken.
 */
if (vmx->spec_ctrl)
-   wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+   native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
 
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
@@ -9588,10 +9589,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu 
*vcpu)
 * save it.
 */
if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
-   rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+   vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
if (vmx->spec_ctrl)
-   wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+   native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();


[tip:x86/pti] KVM/VMX: Optimize vmx_vcpu_run() and svm_vcpu_run() by marking the RDMSR path as unlikely()

2018-02-23 Thread tip-bot for Paolo Bonzini
Commit-ID:  946fbbc13dce68902f64515b610eeb2a6c3d7a64
Gitweb: https://git.kernel.org/tip/946fbbc13dce68902f64515b610eeb2a6c3d7a64
Author: Paolo Bonzini 
AuthorDate: Thu, 22 Feb 2018 16:43:18 +0100
Committer:  Ingo Molnar 
CommitDate: Fri, 23 Feb 2018 08:24:36 +0100

KVM/VMX: Optimize vmx_vcpu_run() and svm_vcpu_run() by marking the RDMSR path 
as unlikely()

vmx_vcpu_run() and svm_vcpu_run() are large functions, and giving
branch hints to the compiler can actually make a substantial cycle
difference by keeping the fast path contiguous in memory.

With this optimization, the retpoline-guest/retpoline-host case is
about 50 cycles faster.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Jim Mattson 
Cc: David Woodhouse 
Cc: KarimAllah Ahmed 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Radim Krčmář 
Cc: Thomas Gleixner 
Cc: k...@vger.kernel.org
Cc: sta...@vger.kernel.org
Link: http://lkml.kernel.org/r/20180222154318.20361-3-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kvm/svm.c | 2 +-
 arch/x86/kvm/vmx.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1598beeda11c..24c9521ebc24 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5465,7 +5465,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 * If the L02 MSR bitmap does not intercept the MSR, then we need to
 * save it.
 */
-   if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
+   if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
if (svm->spec_ctrl)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0927be315965..7f8401d05939 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9588,7 +9588,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 * If the L02 MSR bitmap does not intercept the MSR, then we need to
 * save it.
 */
-   if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
+   if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
if (vmx->spec_ctrl)


[tip:locking/core] tools/memory-model: Fix cheat sheet typo

2018-05-15 Thread tip-bot for Paolo Bonzini
Commit-ID:  a839195186a2bca1b2b46e57619e9ad5b8d42426
Gitweb: https://git.kernel.org/tip/a839195186a2bca1b2b46e57619e9ad5b8d42426
Author: Paolo Bonzini 
AuthorDate: Mon, 14 May 2018 16:33:42 -0700
Committer:  Ingo Molnar 
CommitDate: Tue, 15 May 2018 08:11:16 +0200

tools/memory-model: Fix cheat sheet typo

"RWM" should be "RMW".

Signed-off-by: Paolo Bonzini 
Signed-off-by: Paul E. McKenney 
Cc: Andrew Morton 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: aki...@gmail.com
Cc: boqun.f...@gmail.com
Cc: dhowe...@redhat.com
Cc: j.algl...@ucl.ac.uk
Cc: linux-a...@vger.kernel.org
Cc: luc.maran...@inria.fr
Cc: npig...@gmail.com
Cc: parri.and...@gmail.com
Cc: st...@rowland.harvard.edu
Link: 
http://lkml.kernel.org/r/1526340837-1-4-git-send-email-paul...@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar 
---
 tools/memory-model/Documentation/cheatsheet.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/memory-model/Documentation/cheatsheet.txt 
b/tools/memory-model/Documentation/cheatsheet.txt
index 956b1ae4aafb..c0eafdaddfa4 100644
--- a/tools/memory-model/Documentation/cheatsheet.txt
+++ b/tools/memory-model/Documentation/cheatsheet.txt
@@ -1,6 +1,6 @@
   Prior Operation Subsequent Operation
   ---  ---
-   C  Self  R  W  RWM  Self  R  W  DR  DW  RMW  SV
+   C  Self  R  W  RMW  Self  R  W  DR  DW  RMW  SV
   --    -  -  ---    -  -  --  --  ---  --
 
 Store, e.g., WRITE_ONCE()Y   Y


[tip:x86/urgent] x86/apic: Silence "FW_BUG TSC_DEADLINE disabled due to Errata" on hypervisors

2017-10-12 Thread tip-bot for Paolo Bonzini
Commit-ID:  cc6afe2240298049585e86b1ade85efc8a7f225d
Gitweb: https://git.kernel.org/tip/cc6afe2240298049585e86b1ade85efc8a7f225d
Author: Paolo Bonzini 
AuthorDate: Tue, 10 Oct 2017 12:12:57 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 12 Oct 2017 17:10:10 +0200

x86/apic: Silence "FW_BUG TSC_DEADLINE disabled due to Errata" on hypervisors

Commit 594a30fb1242 ("x86/apic: Silence "FW_BUG TSC_DEADLINE disabled
due to Errata" on CPUs without the feature", 2017-08-30) was also about
silencing the warning on VirtualBox; however, KVM does expose the TSC
deadline timer, and it's virtualized so that it is immune from CPU errata.

Therefore, booting 4.13 with "-cpu Haswell" shows this in the logs:

 [0.00] [Firmware Bug]: TSC_DEADLINE disabled due to Errata;
please update microcode to version: 0xb2 (or later)

Even if you had a hypervisor that does _not_ virtualize the TSC deadline
and rather exposes the hardware one, it should be the hypervisors task
to update microcode and possibly hide the flag from CPUID.  So just
hide the message when running on _any_ hypervisor, not just those that
do not support the TSC deadline timer.

The older check still makes sense, so keep it.

Fixes: bd9240a18e ("x86/apic: Add TSC_DEADLINE quirk due to errata")
Signed-off-by: Paolo Bonzini 
Signed-off-by: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Hans de Goede 
Cc: k...@vger.kernel.org
Cc: sta...@vger.kernel.org
Link: 
https://lkml.kernel.org/r/1507630377-54471-1-git-send-email-pbonz...@redhat.com

---
 arch/x86/kernel/apic/apic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d705c76..50109ea 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -600,7 +600,8 @@ static void apic_check_deadline_errata(void)
const struct x86_cpu_id *m;
u32 rev;
 
-   if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+   if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
+   boot_cpu_has(X86_FEATURE_HYPERVISOR))
return;
 
m = x86_match_cpu(deadline_match);


[tip:locking/core] cpuset: Make nr_cpusets private

2017-08-10 Thread tip-bot for Paolo Bonzini
Commit-ID:  be040bea9085a9c2b1700c9e60888777baeb96d5
Gitweb: http://git.kernel.org/tip/be040bea9085a9c2b1700c9e60888777baeb96d5
Author: Paolo Bonzini 
AuthorDate: Tue, 1 Aug 2017 17:24:06 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 10 Aug 2017 12:28:57 +0200

cpuset: Make nr_cpusets private

Any use of key->enabled (that is static_key_enabled and static_key_count)
outside jump_label_lock should handle its own serialization.  In the case
of cpusets_enabled_key, the key is always incremented/decremented under
cpuset_mutex, and hence the same rule applies to nr_cpusets.  The rule
*is* respected currently, but the mutex is static so nr_cpusets should
be static too.

Signed-off-by: Paolo Bonzini 
Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Zefan Li 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1501601046-35683-4-git-send-email-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 include/linux/cpuset.h | 6 --
 kernel/cgroup/cpuset.c | 7 +++
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 898cfe2..e74655d 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -37,12 +37,6 @@ static inline bool cpusets_enabled(void)
return static_branch_unlikely(_enabled_key);
 }
 
-static inline int nr_cpusets(void)
-{
-   /* jump label reference count + the top-level cpuset */
-   return static_key_count(_enabled_key.key) + 1;
-}
-
 static inline void cpuset_inc(void)
 {
static_branch_inc(_pre_enable_key);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 8d51516..9ed6a05 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -577,6 +577,13 @@ static void update_domain_attr_tree(struct 
sched_domain_attr *dattr,
rcu_read_unlock();
 }
 
+/* Must be called with cpuset_mutex held.  */
+static inline int nr_cpusets(void)
+{
+   /* jump label reference count + the top-level cpuset */
+   return static_key_count(_enabled_key.key) + 1;
+}
+
 /*
  * generate_sched_domains()
  *


[tip:locking/core] jump_label: Fix concurrent static_key_enable/disable()

2017-08-10 Thread tip-bot for Paolo Bonzini
Commit-ID:  1dbb6704de91b169a58d0c8221624afd6a95cfc7
Gitweb: http://git.kernel.org/tip/1dbb6704de91b169a58d0c8221624afd6a95cfc7
Author: Paolo Bonzini 
AuthorDate: Tue, 1 Aug 2017 17:24:04 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 10 Aug 2017 12:28:56 +0200

jump_label: Fix concurrent static_key_enable/disable()

static_key_enable/disable are trying to cap the static key count to
0/1.  However, their use of key->enabled is outside jump_label_lock
so they do not really ensure that.

Rewrite them to do a quick check for an already enabled (respectively,
already disabled), and then recheck under the jump label lock.  Unlike
static_key_slow_inc/dec, a failed check under the jump label lock does
not modify key->enabled.

Signed-off-by: Paolo Bonzini 
Signed-off-by: Peter Zijlstra (Intel) 
Cc: Eric Dumazet 
Cc: Jason Baron 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1501601046-35683-2-git-send-email-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 include/linux/jump_label.h | 22 +
 kernel/jump_label.c| 59 +-
 2 files changed, 49 insertions(+), 32 deletions(-)

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 2afd74b..740a42e 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -234,22 +234,24 @@ static inline int jump_label_apply_nops(struct module 
*mod)
 
 static inline void static_key_enable(struct static_key *key)
 {
-   int count = static_key_count(key);
-
-   WARN_ON_ONCE(count < 0 || count > 1);
+   STATIC_KEY_CHECK_USE();
 
-   if (!count)
-   static_key_slow_inc(key);
+   if (atomic_read(>enabled) != 0) {
+   WARN_ON_ONCE(atomic_read(>enabled) != 1);
+   return;
+   }
+   atomic_set(>enabled, 1);
 }
 
 static inline void static_key_disable(struct static_key *key)
 {
-   int count = static_key_count(key);
-
-   WARN_ON_ONCE(count < 0 || count > 1);
+   STATIC_KEY_CHECK_USE();
 
-   if (count)
-   static_key_slow_dec(key);
+   if (atomic_read(>enabled) != 1) {
+   WARN_ON_ONCE(atomic_read(>enabled) != 0);
+   return;
+   }
+   atomic_set(>enabled, 0);
 }
 
 #define STATIC_KEY_INIT_TRUE   { .enabled = ATOMIC_INIT(1) }
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index d11c506..833eeca 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -79,28 +79,6 @@ int static_key_count(struct static_key *key)
 }
 EXPORT_SYMBOL_GPL(static_key_count);
 
-void static_key_enable(struct static_key *key)
-{
-   int count = static_key_count(key);
-
-   WARN_ON_ONCE(count < 0 || count > 1);
-
-   if (!count)
-   static_key_slow_inc(key);
-}
-EXPORT_SYMBOL_GPL(static_key_enable);
-
-void static_key_disable(struct static_key *key)
-{
-   int count = static_key_count(key);
-
-   WARN_ON_ONCE(count < 0 || count > 1);
-
-   if (count)
-   static_key_slow_dec(key);
-}
-EXPORT_SYMBOL_GPL(static_key_disable);
-
 void static_key_slow_inc(struct static_key *key)
 {
int v, v1;
@@ -139,6 +117,43 @@ void static_key_slow_inc(struct static_key *key)
 }
 EXPORT_SYMBOL_GPL(static_key_slow_inc);
 
+void static_key_enable(struct static_key *key)
+{
+   STATIC_KEY_CHECK_USE();
+   if (atomic_read(>enabled) > 0) {
+   WARN_ON_ONCE(atomic_read(>enabled) != 1);
+   return;
+   }
+
+   cpus_read_lock();
+   jump_label_lock();
+   if (atomic_read(>enabled) == 0) {
+   atomic_set(>enabled, -1);
+   jump_label_update(key);
+   atomic_set(>enabled, 1);
+   }
+   jump_label_unlock();
+   cpus_read_unlock();
+}
+EXPORT_SYMBOL_GPL(static_key_enable);
+
+void static_key_disable(struct static_key *key)
+{
+   STATIC_KEY_CHECK_USE();
+   if (atomic_read(>enabled) != 1) {
+   WARN_ON_ONCE(atomic_read(>enabled) != 0);
+   return;
+   }
+
+   cpus_read_lock();
+   jump_label_lock();
+   if (atomic_cmpxchg(>enabled, 1, 0))
+   jump_label_update(key);
+   jump_label_unlock();
+   cpus_read_unlock();
+}
+EXPORT_SYMBOL_GPL(static_key_disable);
+
 static void __static_key_slow_dec(struct static_key *key,
unsigned long rate_limit, struct delayed_work *work)
 {


[tip:locking/core] jump_label: Do not use unserialized static_key_enabled()

2017-08-10 Thread tip-bot for Paolo Bonzini
Commit-ID:  7a34bcb8b272b1300f0125c93a54f0c98812acdd
Gitweb: http://git.kernel.org/tip/7a34bcb8b272b1300f0125c93a54f0c98812acdd
Author: Paolo Bonzini 
AuthorDate: Tue, 1 Aug 2017 17:24:05 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 10 Aug 2017 12:28:56 +0200

jump_label: Do not use unserialized static_key_enabled()

Any use of key->enabled (that is static_key_enabled and static_key_count)
outside jump_label_lock should handle its own serialization.  The only
two that are not doing so are the UDP encapsulation static keys.  Change
them to use static_key_enable, which now correctly tests key->enabled under
the jump label lock.

Signed-off-by: Paolo Bonzini 
Signed-off-by: Peter Zijlstra (Intel) 
Cc: Eric Dumazet 
Cc: Jason Baron 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1501601046-35683-3-git-send-email-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 Documentation/static-keys.txt | 5 +
 net/ipv4/udp.c| 3 +--
 net/ipv6/udp.c| 3 +--
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt
index b83dfa1..870b4be 100644
--- a/Documentation/static-keys.txt
+++ b/Documentation/static-keys.txt
@@ -149,6 +149,11 @@ static_branch_inc(), will change the branch back to true. 
Likewise, if the
 key is initialized false, a 'static_branch_inc()', will change the branch to
 true. And then a 'static_branch_dec()', will again make the branch false.
 
+The state and the reference count can be retrieved with 'static_key_enabled()'
+and 'static_key_count()'.  In general, if you use these functions, they
+should be protected with the same mutex used around the enable/disable
+or increment/decrement function.
+
 Where an array of keys is required, it can be defined as::
 
DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e6276fa..3037339 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1809,8 +1809,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct 
sk_buff *skb)
 static struct static_key udp_encap_needed __read_mostly;
 void udp_encap_enable(void)
 {
-   if (!static_key_enabled(_encap_needed))
-   static_key_slow_inc(_encap_needed);
+   static_key_enable(_encap_needed);
 }
 EXPORT_SYMBOL(udp_encap_enable);
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 578142b..96d2407 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -574,8 +574,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
 static struct static_key udpv6_encap_needed __read_mostly;
 void udpv6_encap_enable(void)
 {
-   if (!static_key_enabled(_encap_needed))
-   static_key_slow_inc(_encap_needed);
+   static_key_enable(_encap_needed);
 }
 EXPORT_SYMBOL(udpv6_encap_enable);
 


[tip:x86/pti] KVM/x86: Remove indirect MSR op calls from SPEC_CTRL

2018-02-23 Thread tip-bot for Paolo Bonzini
Commit-ID:  ecb586bd29c99fb4de599dec388658e74388daad
Gitweb: https://git.kernel.org/tip/ecb586bd29c99fb4de599dec388658e74388daad
Author: Paolo Bonzini 
AuthorDate: Thu, 22 Feb 2018 16:43:17 +0100
Committer:  Ingo Molnar 
CommitDate: Fri, 23 Feb 2018 08:24:35 +0100

KVM/x86: Remove indirect MSR op calls from SPEC_CTRL

Having a paravirt indirect call in the IBRS restore path is not a
good idea, since we are trying to protect from speculative execution
of bogus indirect branch targets.  It is also slower, so use
native_wrmsrl() on the vmentry path too.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Jim Mattson 
Cc: David Woodhouse 
Cc: KarimAllah Ahmed 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Radim Krčmář 
Cc: Thomas Gleixner 
Cc: k...@vger.kernel.org
Cc: sta...@vger.kernel.org
Fixes: d28b387fb74da95d69d2615732f50cceb38e9a4d
Link: http://lkml.kernel.org/r/20180222154318.20361-2-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kvm/svm.c | 7 ---
 arch/x86/kvm/vmx.c | 7 ---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b3e488a74828..1598beeda11c 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -49,6 +49,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -5355,7 +5356,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 * being speculatively taken.
 */
if (svm->spec_ctrl)
-   wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+   native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
 
asm volatile (
"push %%" _ASM_BP "; \n\t"
@@ -5465,10 +5466,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 * save it.
 */
if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
-   rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+   svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
if (svm->spec_ctrl)
-   wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+   native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3dec126aa302..0927be315965 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -51,6 +51,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "trace.h"
@@ -9452,7 +9453,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 * being speculatively taken.
 */
if (vmx->spec_ctrl)
-   wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+   native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
 
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
@@ -9588,10 +9589,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu 
*vcpu)
 * save it.
 */
if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
-   rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+   vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
if (vmx->spec_ctrl)
-   wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+   native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();


[tip:x86/pti] KVM/VMX: Optimize vmx_vcpu_run() and svm_vcpu_run() by marking the RDMSR path as unlikely()

2018-02-23 Thread tip-bot for Paolo Bonzini
Commit-ID:  946fbbc13dce68902f64515b610eeb2a6c3d7a64
Gitweb: https://git.kernel.org/tip/946fbbc13dce68902f64515b610eeb2a6c3d7a64
Author: Paolo Bonzini 
AuthorDate: Thu, 22 Feb 2018 16:43:18 +0100
Committer:  Ingo Molnar 
CommitDate: Fri, 23 Feb 2018 08:24:36 +0100

KVM/VMX: Optimize vmx_vcpu_run() and svm_vcpu_run() by marking the RDMSR path 
as unlikely()

vmx_vcpu_run() and svm_vcpu_run() are large functions, and giving
branch hints to the compiler can actually make a substantial cycle
difference by keeping the fast path contiguous in memory.

With this optimization, the retpoline-guest/retpoline-host case is
about 50 cycles faster.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Jim Mattson 
Cc: David Woodhouse 
Cc: KarimAllah Ahmed 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Radim Krčmář 
Cc: Thomas Gleixner 
Cc: k...@vger.kernel.org
Cc: sta...@vger.kernel.org
Link: http://lkml.kernel.org/r/20180222154318.20361-3-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kvm/svm.c | 2 +-
 arch/x86/kvm/vmx.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1598beeda11c..24c9521ebc24 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5465,7 +5465,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 * If the L02 MSR bitmap does not intercept the MSR, then we need to
 * save it.
 */
-   if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
+   if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
if (svm->spec_ctrl)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0927be315965..7f8401d05939 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9588,7 +9588,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 * If the L02 MSR bitmap does not intercept the MSR, then we need to
 * save it.
 */
-   if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
+   if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
if (vmx->spec_ctrl)


[tip:x86/pti] x86/cpufeature: Move processor tracing out of scattered features

2018-01-16 Thread tip-bot for Paolo Bonzini
Commit-ID:  4fdec2034b7540dda461c6ba33325dfcff345c64
Gitweb: https://git.kernel.org/tip/4fdec2034b7540dda461c6ba33325dfcff345c64
Author: Paolo Bonzini 
AuthorDate: Tue, 16 Jan 2018 16:42:25 +0100
Committer:  Ingo Molnar 
CommitDate: Wed, 17 Jan 2018 07:38:39 +0100

x86/cpufeature: Move processor tracing out of scattered features

Processor tracing is already enumerated in word 9 (CPUID[7,0].EBX),
so do not duplicate it in the scattered features word.

Besides being more tidy, this will be useful for KVM when it presents
processor tracing to the guests.  KVM selects host features that are
supported by both the host kernel (depending on command line options,
CPU errata, or whatever) and KVM.  Whenever a full feature word exists,
KVM's code is written in the expectation that the CPUID bit number
matches the X86_FEATURE_* bit number, but this is not the case for
X86_FEATURE_INTEL_PT.

Signed-off-by: Paolo Bonzini 
Cc: Borislav Petkov 
Cc: Linus Torvalds 
Cc: Luwei Kang 
Cc: Peter Zijlstra 
Cc: Radim Krčmář 
Cc: Thomas Gleixner 
Cc: k...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1516117345-34561-1-git-send-email-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/cpufeatures.h | 2 +-
 arch/x86/kernel/cpu/scattered.c| 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index aa09559..25b9375 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -206,7 +206,6 @@
 #define X86_FEATURE_RETPOLINE  ( 7*32+12) /* Generic Retpoline 
mitigation for Spectre variant 2 */
 #define X86_FEATURE_RETPOLINE_AMD  ( 7*32+13) /* AMD Retpoline mitigation 
for Spectre variant 2 */
 #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory 
Number */
-#define X86_FEATURE_INTEL_PT   ( 7*32+15) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512_4VNNIW  ( 7*32+16) /* AVX-512 Neural Network 
Instructions */
 #define X86_FEATURE_AVX512_4FMAPS  ( 7*32+17) /* AVX-512 Multiply 
Accumulation Single precision */
 
@@ -246,6 +245,7 @@
 #define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused 
Multiply-Add instructions */
 #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLWB   ( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_INTEL_PT   ( 9*32+25) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512PF   ( 9*32+26) /* AVX-512 Prefetch */
 #define X86_FEATURE_AVX512ER   ( 9*32+27) /* AVX-512 Exponential and 
Reciprocal */
 #define X86_FEATURE_AVX512CD   ( 9*32+28) /* AVX-512 Conflict 
Detection */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 05459ad..d0e6976 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -21,7 +21,6 @@ struct cpuid_bit {
 static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF,   CPUID_ECX,  0, 0x0006, 0 },
{ X86_FEATURE_EPB,  CPUID_ECX,  3, 0x0006, 0 },
-   { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x0007, 0 },
{ X86_FEATURE_AVX512_4VNNIW,CPUID_EDX,  2, 0x0007, 0 },
{ X86_FEATURE_AVX512_4FMAPS,CPUID_EDX,  3, 0x0007, 0 },
{ X86_FEATURE_CAT_L3,   CPUID_EBX,  1, 0x0010, 0 },


[tip:perf/core] perf unwind: Support for powerpc

2017-06-21 Thread tip-bot for Paolo Bonzini
Commit-ID:  a7f0fda085870312ab694b19a1304ece161a1217
Gitweb: http://git.kernel.org/tip/a7f0fda085870312ab694b19a1304ece161a1217
Author: Paolo Bonzini 
AuthorDate: Thu, 1 Jun 2017 12:24:41 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Wed, 21 Jun 2017 11:35:42 -0300

perf unwind: Support for powerpc

Porting PPC to libdw only needs an architecture-specific hook to move
the register state from perf to libdw.

The ARM and x86 architectures already use libdw, and it is useful to
have as much common code for the unwinder as possible.  Mark Wielaard
has contributed a frame-based unwinder to libdw, so that unwinding works
even for binaries that do not have CFI information.  In addition,
libunwind is always preferred to libdw by the build machinery so this
cannot introduce regressions on machines that have both libunwind and
libdw installed.

Signed-off-by: Paolo Bonzini 
Acked-by: Jiri Olsa 
Acked-by: Milian Wolff 
Acked-by: Ravi Bangoria 
Cc: Naveen N. Rao 
Cc: linuxppc-...@lists.ozlabs.org
Link: 
http://lkml.kernel.org/r/1496312681-20133-1-git-send-email-pbonz...@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/Makefile.config  |  2 +-
 tools/perf/arch/powerpc/util/Build  |  2 +
 tools/perf/arch/powerpc/util/unwind-libdw.c | 73 +
 3 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 1f4fbc9..bdf0e87 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -61,7 +61,7 @@ endif
 # Disable it on all other architectures in case libdw unwind
 # support is detected in system. Add supported architectures
 # to the check.
-ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm))
+ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc))
   NO_LIBDW_DWARF_UNWIND := 1
 endif
 
diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 90ad64b..2e659531 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -5,4 +5,6 @@ libperf-y += perf_regs.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
+
 libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c 
b/tools/perf/arch/powerpc/util/unwind-libdw.c
new file mode 100644
index 000..3a24b3c
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/unwind-libdw.c
@@ -0,0 +1,73 @@
+#include 
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+#include "../../util/event.h"
+
+/* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils.  */
+static const int special_regs[3][2] = {
+   { 65, PERF_REG_POWERPC_LINK },
+   { 101, PERF_REG_POWERPC_XER },
+   { 109, PERF_REG_POWERPC_CTR },
+};
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+   struct unwind_info *ui = arg;
+   struct regs_dump *user_regs = >sample->user_regs;
+   Dwarf_Word dwarf_regs[32], dwarf_nip;
+   size_t i;
+
+#define REG(r) ({  \
+   Dwarf_Word val = 0; \
+   perf_reg_value(, user_regs, PERF_REG_POWERPC_##r);  \
+   val;\
+})
+
+   dwarf_regs[0]  = REG(R0);
+   dwarf_regs[1]  = REG(R1);
+   dwarf_regs[2]  = REG(R2);
+   dwarf_regs[3]  = REG(R3);
+   dwarf_regs[4]  = REG(R4);
+   dwarf_regs[5]  = REG(R5);
+   dwarf_regs[6]  = REG(R6);
+   dwarf_regs[7]  = REG(R7);
+   dwarf_regs[8]  = REG(R8);
+   dwarf_regs[9]  = REG(R9);
+   dwarf_regs[10] = REG(R10);
+   dwarf_regs[11] = REG(R11);
+   dwarf_regs[12] = REG(R12);
+   dwarf_regs[13] = REG(R13);
+   dwarf_regs[14] = REG(R14);
+   dwarf_regs[15] = REG(R15);
+   dwarf_regs[16] = REG(R16);
+   dwarf_regs[17] = REG(R17);
+   dwarf_regs[18] = REG(R18);
+   dwarf_regs[19] = REG(R19);
+   dwarf_regs[20] = REG(R20);
+   dwarf_regs[21] = REG(R21);
+   dwarf_regs[22] = REG(R22);
+   dwarf_regs[23] = REG(R23);
+   dwarf_regs[24] = REG(R24);
+   dwarf_regs[25] = REG(R25);
+   dwarf_regs[26] = REG(R26);
+   dwarf_regs[27] = REG(R27);
+   dwarf_regs[28] = REG(R28);
+   dwarf_regs[29] = REG(R29);
+   dwarf_regs[30] = REG(R30);
+   dwarf_regs[31] = REG(R31);
+   if (!dwfl_thread_state_registers(thread, 0, 32, dwarf_regs))
+   return false;
+
+   dwarf_nip = REG(NIP);
+   dwfl_thread_state_register_pc(thread, dwarf_nip);
+   for (i = 0; i < ARRAY_SIZE(special_regs); i++) {
+   Dwarf_Word val = 0;
+   perf_reg_value(, user_regs, special_regs[i][1]);
+   if (!dwfl_thread_state_registers(thread,
+special_regs[i][0], 1,
+  

[tip:perf/core] tools lib traceevent: update KVM plugin

2015-10-22 Thread tip-bot for Paolo Bonzini
Commit-ID:  2f465deef7ce8c722121b782dd91c284f5ae80ca
Gitweb: http://git.kernel.org/tip/2f465deef7ce8c722121b782dd91c284f5ae80ca
Author: Paolo Bonzini 
AuthorDate: Thu, 1 Oct 2015 12:28:11 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Tue, 20 Oct 2015 15:54:14 -0300

tools lib traceevent: update KVM plugin

The format of the role word has changed through the years and the plugin
was never updated; some VMX exit reasons were missing too.

Signed-off-by: Paolo Bonzini 
Acked-by: Steven Rostedt 
Cc: David Ahern 
Cc: Namhyung Kim 
Cc: k...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1443695293-31127-1-git-send-email-pbonz...@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/lib/traceevent/plugin_kvm.c | 25 +
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/tools/lib/traceevent/plugin_kvm.c 
b/tools/lib/traceevent/plugin_kvm.c
index 88fe83d..18536f7 100644
--- a/tools/lib/traceevent/plugin_kvm.c
+++ b/tools/lib/traceevent/plugin_kvm.c
@@ -124,7 +124,10 @@ static const char *disassemble(unsigned char *insn, int 
len, uint64_t rip,
_ER(WBINVD,  54)\
_ER(XSETBV,  55)\
_ER(APIC_WRITE,  56)\
-   _ER(INVPCID, 58)
+   _ER(INVPCID, 58)\
+   _ER(PML_FULL,62)\
+   _ER(XSAVES,  63)\
+   _ER(XRSTORS, 64)
 
 #define SVM_EXIT_REASONS \
_ER(EXIT_READ_CR0,  0x000)  \
@@ -352,15 +355,18 @@ static int kvm_nested_vmexit_handler(struct trace_seq *s, 
struct pevent_record *
 union kvm_mmu_page_role {
unsigned word;
struct {
-   unsigned glevels:4;
unsigned level:4;
+   unsigned cr4_pae:1;
unsigned quadrant:2;
-   unsigned pad_for_nice_hex_output:6;
unsigned direct:1;
unsigned access:3;
unsigned invalid:1;
-   unsigned cr4_pge:1;
unsigned nxe:1;
+   unsigned cr0_wp:1;
+   unsigned smep_and_not_wp:1;
+   unsigned smap_and_not_wp:1;
+   unsigned pad_for_nice_hex_output:8;
+   unsigned smm:8;
};
 };
 
@@ -385,15 +391,18 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct 
pevent_record *record,
if (pevent_is_file_bigendian(event->pevent) ==
pevent_is_host_bigendian(event->pevent)) {
 
-   trace_seq_printf(s, "%u/%u q%u%s %s%s %spge %snxe",
+   trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s",
 role.level,
-role.glevels,
 role.quadrant,
 role.direct ? " direct" : "",
 access_str[role.access],
 role.invalid ? " invalid" : "",
-role.cr4_pge ? "" : "!",
-role.nxe ? "" : "!");
+role.cr4_pae ? "" : "!",
+role.nxe ? "" : "!",
+role.cr0_wp ? "" : "!",
+role.smep_and_not_wp ? " smep" : "",
+role.smap_and_not_wp ? " smap" : "",
+role.smm ? " smm" : "");
} else
trace_seq_printf(s, "WORD: %08x", role.word);
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:perf/core] perf script: Add stackcollapse.py script

2016-06-22 Thread tip-bot for Paolo Bonzini
Commit-ID:  6745d8ea825966b0956c691cf7fccc13debedc39
Gitweb: http://git.kernel.org/tip/6745d8ea825966b0956c691cf7fccc13debedc39
Author: Paolo Bonzini 
AuthorDate: Tue, 12 Apr 2016 15:26:13 +0200
Committer:  Arnaldo Carvalho de Melo 
CommitDate: Tue, 21 Jun 2016 13:18:35 -0300

perf script: Add stackcollapse.py script

Add stackcollapse.py script as an example of parsing call chains, and
also of using optparse to access command line options.

The flame graph tools include a set of scripts that parse output from
various tools (including "perf script"), remove the offsets in the
function and collapse each stack to a single line.  The website also
says "perf report could have a report style [...] that output folded
stacks directly, obviating the need for stackcollapse-perf.pl", so here
it is.

This script is a Python rewrite of stackcollapse-perf.pl, using the perf
scripting interface to access the perf data directly from Python.

Signed-off-by: Paolo Bonzini 
Acked-by: Jiri Olsa 
Cc: Brendan Gregg 
Link: 
http://lkml.kernel.org/r/1460467573-22989-1-git-send-email-pbonz...@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/scripts/python/bin/stackcollapse-record |   8 ++
 tools/perf/scripts/python/bin/stackcollapse-report |   3 +
 tools/perf/scripts/python/stackcollapse.py | 127 +
 3 files changed, 138 insertions(+)

diff --git a/tools/perf/scripts/python/bin/stackcollapse-record 
b/tools/perf/scripts/python/bin/stackcollapse-record
new file mode 100755
index 000..9d8f9f0
--- /dev/null
+++ b/tools/perf/scripts/python/bin/stackcollapse-record
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+#
+# stackcollapse.py can cover all type of perf samples including
+# the tracepoints, so no special record requirements, just record what
+# you want to analyze.
+#
+perf record "$@"
diff --git a/tools/perf/scripts/python/bin/stackcollapse-report 
b/tools/perf/scripts/python/bin/stackcollapse-report
new file mode 100755
index 000..356b965
--- /dev/null
+++ b/tools/perf/scripts/python/bin/stackcollapse-report
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: produce callgraphs in short form for scripting use
+perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py -- "$@"
diff --git a/tools/perf/scripts/python/stackcollapse.py 
b/tools/perf/scripts/python/stackcollapse.py
new file mode 100755
index 000..a2dfcda
--- /dev/null
+++ b/tools/perf/scripts/python/stackcollapse.py
@@ -0,0 +1,127 @@
+#!/usr/bin/perl -w
+#
+# stackcollapse.py - format perf samples with one line per distinct call stack
+#
+# This script's output has two space-separated fields.  The first is a 
semicolon
+# separated stack including the program name (from the "comm" field) and the
+# function names from the call stack.  The second is a count:
+#
+#  swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 2
+#
+# The file is sorted according to the first field.
+#
+# Input may be created and processed using:
+#
+#  perf record -a -g -F 99 sleep 60
+#  perf script report stackcollapse > out.stacks-folded
+#
+# (perf script record stackcollapse works too).
+#
+# Written by Paolo Bonzini 
+# Based on Brendan Gregg's stackcollapse-perf.pl script.
+
+import os
+import sys
+from collections import defaultdict
+from optparse import OptionParser, make_option
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from EventClass import *
+
+# command line parsing
+
+option_list = [
+# formatting options for the bottom entry of the stack
+make_option("--include-tid", dest="include_tid",
+ action="store_true", default=False,
+ help="include thread id in stack"),
+make_option("--include-pid", dest="include_pid",
+ action="store_true", default=False,
+ help="include process id in stack"),
+make_option("--no-comm", dest="include_comm",
+ action="store_false", default=True,
+ help="do not separate stacks according to comm"),
+make_option("--tidy-java", dest="tidy_java",
+ action="store_true", default=False,
+ help="beautify Java signatures"),
+make_option("--kernel", dest="annotate_kernel",
+ action="store_true", default=False,
+ help="annotate kernel functions with _[k]")
+]
+
+parser = OptionParser(option_list=option_list)
+(opts, args) = parser.parse_args()
+
+if len(args) != 0:
+parser.error("unexpected command line argument")
+if opts.include_tid and not opts.include_comm:
+parser.error("requesting tid but not comm is invalid")
+if opts.include_pid and not opts.include_comm:
+parser.error("requesting pid but not comm is invalid")
+
+# event handlers
+
+lines = defaultdict(lambda: 0)
+
+def process_event(param_dict):
+def tidy_function_name(sym, dso):
+if sym is None:
+

[tip:x86/asm] x86/entry: Avoid interrupt flag save and restore

2016-07-09 Thread tip-bot for Paolo Bonzini
Commit-ID:  0b95364f977c180e1f336e00273fda5d3eca54b4
Gitweb: http://git.kernel.org/tip/0b95364f977c180e1f336e00273fda5d3eca54b4
Author: Paolo Bonzini 
AuthorDate: Mon, 20 Jun 2016 16:58:29 +0200
Committer:  Ingo Molnar 
CommitDate: Sat, 9 Jul 2016 10:44:01 +0200

x86/entry: Avoid interrupt flag save and restore

Thanks to all the work that was done by Andy Lutomirski and others,
enter_from_user_mode() and prepare_exit_to_usermode() are now called only with
interrupts disabled.  Let's provide them a version of user_enter()/user_exit()
that skips saving and restoring the interrupt flag.

On an AMD-based machine I tested this patch on, with force-enabled
context tracking, the speed-up in system calls was 90 clock cycles or 6%,
measured with the following simple benchmark:

#include 
#include 
#include 
#include 

unsigned long rdtsc()
{
unsigned long result;
asm volatile("rdtsc; shl $32, %%rdx; mov %%eax, %%eax\n"
 "or %%rdx, %%rax" : "=a" (result) : : "rdx");
return result;
}

int main()
{
unsigned long tsc1, tsc2;
int pid = getpid();
int i;

tsc1 = rdtsc();
for (i = 0; i < 1; i++)
kill(pid, SIGWINCH);
tsc2 = rdtsc();

printf("%ld\n", tsc2 - tsc1);
}

Signed-off-by: Paolo Bonzini 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Acked-by: Paolo Bonzini 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Josh Poimboeuf 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: k...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1466434712-31440-2-git-send-email-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/common.c  |  4 ++--
 include/linux/context_tracking.h | 15 +++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index ec138e5..618bc61 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -43,7 +43,7 @@ static struct thread_info *pt_regs_to_thread_info(struct 
pt_regs *regs)
 __visible void enter_from_user_mode(void)
 {
CT_WARN_ON(ct_state() != CONTEXT_USER);
-   user_exit();
+   user_exit_irqoff();
 }
 #else
 static inline void enter_from_user_mode(void) {}
@@ -274,7 +274,7 @@ __visible inline void prepare_exit_to_usermode(struct 
pt_regs *regs)
ti->status &= ~TS_COMPAT;
 #endif
 
-   user_enter();
+   user_enter_irqoff();
 }
 
 #define SYSCALL_EXIT_WORK_FLAGS\
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index d259274..d9aef2a 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -31,6 +31,19 @@ static inline void user_exit(void)
context_tracking_exit(CONTEXT_USER);
 }
 
+/* Called with interrupts disabled.  */
+static inline void user_enter_irqoff(void)
+{
+   if (context_tracking_is_enabled())
+   __context_tracking_enter(CONTEXT_USER);
+
+}
+static inline void user_exit_irqoff(void)
+{
+   if (context_tracking_is_enabled())
+   __context_tracking_exit(CONTEXT_USER);
+}
+
 static inline enum ctx_state exception_enter(void)
 {
enum ctx_state prev_ctx;
@@ -69,6 +82,8 @@ static inline enum ctx_state ct_state(void)
 #else
 static inline void user_enter(void) { }
 static inline void user_exit(void) { }
+static inline void user_enter_irqoff(void) { }
+static inline void user_exit_irqoff(void) { }
 static inline enum ctx_state exception_enter(void) { return 0; }
 static inline void exception_exit(enum ctx_state prev_ctx) { }
 static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }


[tip:x86/asm] x86/entry: Inline enter_from_user_mode()

2016-07-09 Thread tip-bot for Paolo Bonzini
Commit-ID:  eec4b1227db153ca16f8f5f285d01fefdce05438
Gitweb: http://git.kernel.org/tip/eec4b1227db153ca16f8f5f285d01fefdce05438
Author: Paolo Bonzini 
AuthorDate: Mon, 20 Jun 2016 16:58:30 +0200
Committer:  Ingo Molnar 
CommitDate: Sat, 9 Jul 2016 10:44:02 +0200

x86/entry: Inline enter_from_user_mode()

This matches what is already done for prepare_exit_to_usermode(),
and saves about 60 clock cycles (4% speedup) with the benchmark
in the previous commit message.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Acked-by: Paolo Bonzini 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Josh Poimboeuf 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: k...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1466434712-31440-3-git-send-email-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 618bc61..9e1e27d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -40,7 +40,7 @@ static struct thread_info *pt_regs_to_thread_info(struct 
pt_regs *regs)
 
 #ifdef CONFIG_CONTEXT_TRACKING
 /* Called on entry from user mode with IRQs off. */
-__visible void enter_from_user_mode(void)
+__visible inline void enter_from_user_mode(void)
 {
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit_irqoff();


[tip:x86/asm] x86/entry: Avoid interrupt flag save and restore

2016-07-10 Thread tip-bot for Paolo Bonzini
Commit-ID:  2e9d1e150abf88cb63e5d34ca286edbb95b4c53d
Gitweb: http://git.kernel.org/tip/2e9d1e150abf88cb63e5d34ca286edbb95b4c53d
Author: Paolo Bonzini 
AuthorDate: Mon, 20 Jun 2016 16:58:29 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 10 Jul 2016 13:33:02 +0200

x86/entry: Avoid interrupt flag save and restore

Thanks to all the work that was done by Andy Lutomirski and others,
enter_from_user_mode() and prepare_exit_to_usermode() are now called only with
interrupts disabled.  Let's provide them a version of user_enter()/user_exit()
that skips saving and restoring the interrupt flag.

On an AMD-based machine I tested this patch on, with force-enabled
context tracking, the speed-up in system calls was 90 clock cycles or 6%,
measured with the following simple benchmark:

#include 
#include 
#include 
#include 

unsigned long rdtsc()
{
unsigned long result;
asm volatile("rdtsc; shl $32, %%rdx; mov %%eax, %%eax\n"
 "or %%rdx, %%rax" : "=a" (result) : : "rdx");
return result;
}

int main()
{
unsigned long tsc1, tsc2;
int pid = getpid();
int i;

tsc1 = rdtsc();
for (i = 0; i < 1; i++)
kill(pid, SIGWINCH);
tsc2 = rdtsc();

printf("%ld\n", tsc2 - tsc1);
}

Signed-off-by: Paolo Bonzini 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Acked-by: Paolo Bonzini 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Josh Poimboeuf 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: k...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1466434712-31440-2-git-send-email-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/common.c  |  4 ++--
 include/linux/context_tracking.h | 15 +++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index ec138e5..618bc61 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -43,7 +43,7 @@ static struct thread_info *pt_regs_to_thread_info(struct 
pt_regs *regs)
 __visible void enter_from_user_mode(void)
 {
CT_WARN_ON(ct_state() != CONTEXT_USER);
-   user_exit();
+   user_exit_irqoff();
 }
 #else
 static inline void enter_from_user_mode(void) {}
@@ -274,7 +274,7 @@ __visible inline void prepare_exit_to_usermode(struct 
pt_regs *regs)
ti->status &= ~TS_COMPAT;
 #endif
 
-   user_enter();
+   user_enter_irqoff();
 }
 
 #define SYSCALL_EXIT_WORK_FLAGS\
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index d259274..d9aef2a 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -31,6 +31,19 @@ static inline void user_exit(void)
context_tracking_exit(CONTEXT_USER);
 }
 
+/* Called with interrupts disabled.  */
+static inline void user_enter_irqoff(void)
+{
+   if (context_tracking_is_enabled())
+   __context_tracking_enter(CONTEXT_USER);
+
+}
+static inline void user_exit_irqoff(void)
+{
+   if (context_tracking_is_enabled())
+   __context_tracking_exit(CONTEXT_USER);
+}
+
 static inline enum ctx_state exception_enter(void)
 {
enum ctx_state prev_ctx;
@@ -69,6 +82,8 @@ static inline enum ctx_state ct_state(void)
 #else
 static inline void user_enter(void) { }
 static inline void user_exit(void) { }
+static inline void user_enter_irqoff(void) { }
+static inline void user_exit_irqoff(void) { }
 static inline enum ctx_state exception_enter(void) { return 0; }
 static inline void exception_exit(enum ctx_state prev_ctx) { }
 static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }


[tip:x86/asm] x86/entry: Inline enter_from_user_mode()

2016-07-10 Thread tip-bot for Paolo Bonzini
Commit-ID:  be8a18e2e98e04a5def5887d913b267865562448
Gitweb: http://git.kernel.org/tip/be8a18e2e98e04a5def5887d913b267865562448
Author: Paolo Bonzini 
AuthorDate: Mon, 20 Jun 2016 16:58:30 +0200
Committer:  Ingo Molnar 
CommitDate: Sun, 10 Jul 2016 13:33:02 +0200

x86/entry: Inline enter_from_user_mode()

This matches what is already done for prepare_exit_to_usermode(),
and saves about 60 clock cycles (4% speedup) with the benchmark
in the previous commit message.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Rik van Riel 
Reviewed-by: Andy Lutomirski 
Acked-by: Paolo Bonzini 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Josh Poimboeuf 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: k...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1466434712-31440-3-git-send-email-pbonz...@redhat.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 618bc61..9e1e27d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -40,7 +40,7 @@ static struct thread_info *pt_regs_to_thread_info(struct 
pt_regs *regs)
 
 #ifdef CONFIG_CONTEXT_TRACKING
 /* Called on entry from user mode with IRQs off. */
-__visible void enter_from_user_mode(void)
+__visible inline void enter_from_user_mode(void)
 {
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit_irqoff();


[tip:locking/urgent] locking/static_key: Fix concurrent static_key_slow_inc()

2016-06-24 Thread tip-bot for Paolo Bonzini
Commit-ID:  4c5ea0a9cd02d6aa8adc86e100b2a4cff8d614ff
Gitweb: http://git.kernel.org/tip/4c5ea0a9cd02d6aa8adc86e100b2a4cff8d614ff
Author: Paolo Bonzini 
AuthorDate: Tue, 21 Jun 2016 18:52:17 +0200
Committer:  Ingo Molnar 
CommitDate: Fri, 24 Jun 2016 08:23:16 +0200

locking/static_key: Fix concurrent static_key_slow_inc()

The following scenario is possible:

CPU 1   CPU 2
static_key_slow_inc()
 atomic_inc_not_zero()
  -> key.enabled == 0, no increment
 jump_label_lock()
 atomic_inc_return()
  -> key.enabled == 1 now
static_key_slow_inc()
 atomic_inc_not_zero()
  -> key.enabled == 1, inc to 2
 return
** static key is wrong!
 jump_label_update()
 jump_label_unlock()

Testing the static key at the point marked by (**) will follow the
wrong path for jumps that have not been patched yet.  This can
actually happen when creating many KVM virtual machines with userspace
LAPIC emulation; just run several copies of the following program:

#include 
#include 
#include 
#include 

int main(void)
{
for (;;) {
int kvmfd = open("/dev/kvm", O_RDONLY);
int vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
close(ioctl(vmfd, KVM_CREATE_VCPU, 1));
close(vmfd);
close(kvmfd);
}
return 0;
}

Every KVM_CREATE_VCPU ioctl will attempt a static_key_slow_inc() call.
The static key's purpose is to skip NULL pointer checks and indeed one
of the processes eventually dereferences NULL.

As explained in the commit that introduced the bug:

  706249c222f6 ("locking/static_keys: Rework update logic")

jump_label_update() needs key.enabled to be true.  The solution adopted
here is to temporarily make key.enabled == -1, and use go down the
slow path when key.enabled <= 0.

Reported-by: Dmitry Vyukov 
Signed-off-by: Paolo Bonzini 
Signed-off-by: Peter Zijlstra (Intel) 
Cc:  # v4.3+
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Fixes: 706249c222f6 ("locking/static_keys: Rework update logic")
Link: 
http://lkml.kernel.org/r/1466527937-69798-1-git-send-email-pbonz...@redhat.com
[ Small stylistic edits to the changelog and the code. ]
Signed-off-by: Ingo Molnar 
---
 include/linux/jump_label.h | 16 +---
 kernel/jump_label.c| 36 +---
 2 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 0536524..6890446 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -117,13 +117,18 @@ struct module;
 
 #include 
 
+#ifdef HAVE_JUMP_LABEL
+
 static inline int static_key_count(struct static_key *key)
 {
-   return atomic_read(>enabled);
+   /*
+* -1 means the first static_key_slow_inc() is in progress.
+*  static_key_enabled() must return true, so return 1 here.
+*/
+   int n = atomic_read(>enabled);
+   return n >= 0 ? n : 1;
 }
 
-#ifdef HAVE_JUMP_LABEL
-
 #define JUMP_TYPE_FALSE0UL
 #define JUMP_TYPE_TRUE 1UL
 #define JUMP_TYPE_MASK 1UL
@@ -162,6 +167,11 @@ extern void jump_label_apply_nops(struct module *mod);
 
 #else  /* !HAVE_JUMP_LABEL */
 
+static inline int static_key_count(struct static_key *key)
+{
+   return atomic_read(>enabled);
+}
+
 static __always_inline void jump_label_init(void)
 {
static_key_initialized = true;
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 05254ee..4b353e0 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -58,13 +58,36 @@ static void jump_label_update(struct static_key *key);
 
 void static_key_slow_inc(struct static_key *key)
 {
+   int v, v1;
+
STATIC_KEY_CHECK_USE();
-   if (atomic_inc_not_zero(>enabled))
-   return;
+
+   /*
+* Careful if we get concurrent static_key_slow_inc() calls;
+* later calls must wait for the first one to _finish_ the
+* jump_label_update() process.  At the same time, however,
+* the jump_label_update() call below wants to see
+* static_key_enabled() for jumps to be updated properly.
+*
+* So give a special meaning to negative key->enabled: it sends
+* static_key_slow_inc() down the slow path, and it is non-zero
+* so it counts as "enabled" in jump_label_update().  Note that
+* atomic_inc_unless_negative() checks >= 0, so roll our own.
+*/
+   for (v = atomic_read(>enabled); v > 0; v = v1) {
+   v1 = atomic_cmpxchg(>enabled, v, v + 1);
+   if (likely(v1 == v))
+   return;
+   }
 
jump_label_lock();
-   if (atomic_inc_return(>enabled) == 1)
+   if (atomic_read(>enabled) == 0) {

[tip:x86/apic] x86/x2apic: Make stub functions available even if !CONFIG_X86_LOCAL_APIC

2015-09-30 Thread tip-bot for Paolo Bonzini
Commit-ID:  e02ae3871355194a61b03a07d96fd71e81d7eff9
Gitweb: http://git.kernel.org/tip/e02ae3871355194a61b03a07d96fd71e81d7eff9
Author: Paolo Bonzini 
AuthorDate: Mon, 28 Sep 2015 12:26:31 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 30 Sep 2015 21:17:36 +0200

x86/x2apic: Make stub functions available even if !CONFIG_X86_LOCAL_APIC

Some CONFIG_X86_X2APIC functions, especially x2apic_enabled(), are not
declared if !CONFIG_X86_LOCAL_APIC.  However, the same stubs that work
for !CONFIG_X86_X2APIC are okay even if there is no local APIC support
at all.

Avoid the introduction of #ifdefs by moving the x2apic declarations
completely outside the CONFIG_X86_LOCAL_APIC block.  (Unfortunately,
diff generation messes up the actual change that this patch makes).
There is no semantic change because CONFIG_X86_X2APIC depends on
CONFIG_X86_LOCAL_APIC.

Reported-by: Fengguang Wu 
Signed-off-by: Paolo Bonzini 
Cc: Feng Wu 
Link: 
http://lkml.kernel.org/r/1443435991-35750-1-git-send-email-pbonz...@redhat.com
Signed-off-by: Thomas Gleixner 
---
 arch/x86/include/asm/apic.h | 110 ++--
 1 file changed, 55 insertions(+), 55 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index ebf6d5e..a30316b 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -115,6 +115,59 @@ static inline bool apic_is_x2apic_enabled(void)
return msr & X2APIC_ENABLE;
 }
 
+extern void enable_IR_x2apic(void);
+
+extern int get_physical_broadcast(void);
+
+extern int lapic_get_maxlvt(void);
+extern void clear_local_APIC(void);
+extern void disconnect_bsp_APIC(int virt_wire_setup);
+extern void disable_local_APIC(void);
+extern void lapic_shutdown(void);
+extern void sync_Arb_IDs(void);
+extern void init_bsp_APIC(void);
+extern void setup_local_APIC(void);
+extern void init_apic_mappings(void);
+void register_lapic_address(unsigned long address);
+extern void setup_boot_APIC_clock(void);
+extern void setup_secondary_APIC_clock(void);
+extern int APIC_init_uniprocessor(void);
+
+#ifdef CONFIG_X86_64
+static inline int apic_force_enable(unsigned long addr)
+{
+   return -1;
+}
+#else
+extern int apic_force_enable(unsigned long addr);
+#endif
+
+extern int apic_bsp_setup(bool upmode);
+extern void apic_ap_setup(void);
+
+/*
+ * On 32bit this is mach-xxx local
+ */
+#ifdef CONFIG_X86_64
+extern int apic_is_clustered_box(void);
+#else
+static inline int apic_is_clustered_box(void)
+{
+   return 0;
+}
+#endif
+
+extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
+
+#else /* !CONFIG_X86_LOCAL_APIC */
+static inline void lapic_shutdown(void) { }
+#define local_apic_timer_c2_ok 1
+static inline void init_apic_mappings(void) { }
+static inline void disable_local_APIC(void) { }
+# define setup_boot_APIC_clock x86_init_noop
+# define setup_secondary_APIC_clock x86_init_noop
+#endif /* !CONFIG_X86_LOCAL_APIC */
+
 #ifdef CONFIG_X86_X2APIC
 /*
  * Make previous memory operations globally visible before
@@ -186,67 +239,14 @@ static inline int x2apic_enabled(void)
 }
 
 #define x2apic_supported() (cpu_has_x2apic)
-#else
+#else /* !CONFIG_X86_X2APIC */
 static inline void check_x2apic(void) { }
 static inline void x2apic_setup(void) { }
 static inline int x2apic_enabled(void) { return 0; }
 
 #define x2apic_mode(0)
 #definex2apic_supported()  (0)
-#endif
-
-extern void enable_IR_x2apic(void);
-
-extern int get_physical_broadcast(void);
-
-extern int lapic_get_maxlvt(void);
-extern void clear_local_APIC(void);
-extern void disconnect_bsp_APIC(int virt_wire_setup);
-extern void disable_local_APIC(void);
-extern void lapic_shutdown(void);
-extern void sync_Arb_IDs(void);
-extern void init_bsp_APIC(void);
-extern void setup_local_APIC(void);
-extern void init_apic_mappings(void);
-void register_lapic_address(unsigned long address);
-extern void setup_boot_APIC_clock(void);
-extern void setup_secondary_APIC_clock(void);
-extern int APIC_init_uniprocessor(void);
-
-#ifdef CONFIG_X86_64
-static inline int apic_force_enable(unsigned long addr)
-{
-   return -1;
-}
-#else
-extern int apic_force_enable(unsigned long addr);
-#endif
-
-extern int apic_bsp_setup(bool upmode);
-extern void apic_ap_setup(void);
-
-/*
- * On 32bit this is mach-xxx local
- */
-#ifdef CONFIG_X86_64
-extern int apic_is_clustered_box(void);
-#else
-static inline int apic_is_clustered_box(void)
-{
-   return 0;
-}
-#endif
-
-extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
-
-#else /* !CONFIG_X86_LOCAL_APIC */
-static inline void lapic_shutdown(void) { }
-#define local_apic_timer_c2_ok 1
-static inline void init_apic_mappings(void) { }
-static inline void disable_local_APIC(void) { }
-# define setup_boot_APIC_clock x86_init_noop
-# define setup_secondary_APIC_clock x86_init_noop
-#endif /* !CONFIG_X86_LOCAL_APIC */
+#endif /* !CONFIG_X86_X2APIC */
 
 #ifdef CONFIG_X86_64
 #define